Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenych
Painter_pytorch
Commits
106580f9
Commit
106580f9
authored
Dec 29, 2023
by
chenych
Browse files
First commit
parents
Pipeline
#689
failed with stages
in 0 seconds
Changes
117
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3166 additions
and
0 deletions
+3166
-0
eval/mmpose_custom/configs/coco_256x192_gendata.py
eval/mmpose_custom/configs/coco_256x192_gendata.py
+154
-0
eval/mmpose_custom/configs/coco_256x192_gendata_test.py
eval/mmpose_custom/configs/coco_256x192_gendata_test.py
+153
-0
eval/mmpose_custom/configs/coco_256x192_gendata_testflip.py
eval/mmpose_custom/configs/coco_256x192_gendata_testflip.py
+153
-0
eval/mmpose_custom/configs/coco_256x192_test_offline.py
eval/mmpose_custom/configs/coco_256x192_test_offline.py
+161
-0
eval/mmpose_custom/data/pipelines/custom_transform.py
eval/mmpose_custom/data/pipelines/custom_transform.py
+128
-0
eval/mmpose_custom/data/pipelines/top_down_transform.py
eval/mmpose_custom/data/pipelines/top_down_transform.py
+182
-0
eval/mmpose_custom/data/topdown_coco_dataset.py
eval/mmpose_custom/data/topdown_coco_dataset.py
+315
-0
eval/mmpose_custom/gen_json_coco_pose.py
eval/mmpose_custom/gen_json_coco_pose.py
+65
-0
eval/mmpose_custom/model/top_down.py
eval/mmpose_custom/model/top_down.py
+258
-0
eval/mmpose_custom/painter_inference_pose.py
eval/mmpose_custom/painter_inference_pose.py
+167
-0
eval/mmpose_custom/tools/dist_test.sh
eval/mmpose_custom/tools/dist_test.sh
+23
-0
eval/mmpose_custom/tools/dist_train.sh
eval/mmpose_custom/tools/dist_train.sh
+20
-0
eval/mmpose_custom/tools/test.py
eval/mmpose_custom/tools/test.py
+189
-0
eval/mmpose_custom/tools/train.py
eval/mmpose_custom/tools/train.py
+203
-0
eval/nyuv2_depth/eval.sh
eval/nyuv2_depth/eval.sh
+20
-0
eval/nyuv2_depth/eval_with_pngs.py
eval/nyuv2_depth/eval_with_pngs.py
+228
-0
eval/nyuv2_depth/painter_inference_depth.py
eval/nyuv2_depth/painter_inference_depth.py
+154
-0
eval/sidd/eval_sidd.m
eval/sidd/eval_sidd.m
+26
-0
eval/sidd/painter_inference_sidd.py
eval/sidd/painter_inference_sidd.py
+170
-0
main_train.py
main_train.py
+397
-0
No files found.
eval/mmpose_custom/configs/coco_256x192_gendata.py
0 → 100644
View file @
106580f9
_base_
=
[
'./_base_/default_runtime.py'
,
'./_base_/coco.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# model settings
model
=
None
use_gt_bbox
=
True
data_cfg
=
dict
(
image_size
=
[
192
,
256
],
heatmap_size
=
[
192
,
256
],
# [48, 64]
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
use_gt_bbox
,
det_bbox_thr
=
0.0
,
bbox_file
=
'datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json'
,
)
sigma
=
[
1.5
,
3
]
aug_idx
=
0
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
# dict(type='TopDownRandomShiftBboxCenter', shift_factor=0.16, prob=0.3),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0
),
# dict(
# type='TopDownHalfBodyTransform',
# num_joints_half_body=8,
# prob_half_body=0.3),
# dict(
# type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'train_256x192_aug{}'
.
format
(
aug_idx
),
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
# dict(type='TopDownRandomFlip', flip_prob=1), # for flip test
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'val_256x192'
,
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'datasets/coco'
data
=
dict
(
samples_per_gpu
=
32
,
workers_per_gpu
=
8
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
load_data_only
=
True
,
# custom arg
train
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_train2017.json'
,
img_prefix
=
f
'
{
data_root
}
/train2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
# import newly registered module
custom_imports
=
dict
(
imports
=
[
'model.top_down'
,
'data.topdown_coco_dataset'
,
'data.pipelines.top_down_transform'
,
],
allow_failed_imports
=
False
)
eval/mmpose_custom/configs/coco_256x192_gendata_test.py
0 → 100644
View file @
106580f9
_base_
=
[
'./_base_/default_runtime.py'
,
'./_base_/coco.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# model settings
model
=
None
use_gt_bbox
=
False
data_cfg
=
dict
(
image_size
=
[
192
,
256
],
heatmap_size
=
[
192
,
256
],
# [48, 64]
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
use_gt_bbox
,
det_bbox_thr
=
0.0
,
bbox_file
=
'datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json'
,
)
sigma
=
[
1.5
,
3
]
# 2
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'train_256x192_aug0'
,
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
# dict(type='TopDownRandomFlip', flip_prob=1), # for flip test
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'test_256x192'
,
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'datasets/coco'
data
=
dict
(
samples_per_gpu
=
32
,
workers_per_gpu
=
8
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
load_data_only
=
True
,
# custom arg
train
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_train2017.json'
,
img_prefix
=
f
'
{
data_root
}
/train2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
# import newly registered module
custom_imports
=
dict
(
imports
=
[
'model.top_down'
,
'data.topdown_coco_dataset'
,
'data.pipelines.top_down_transform'
,
],
allow_failed_imports
=
False
)
eval/mmpose_custom/configs/coco_256x192_gendata_testflip.py
0 → 100644
View file @
106580f9
_base_
=
[
'./_base_/default_runtime.py'
,
'./_base_/coco.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# model settings
model
=
None
use_gt_bbox
=
False
data_cfg
=
dict
(
image_size
=
[
192
,
256
],
heatmap_size
=
[
192
,
256
],
# [48, 64]
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
use_gt_bbox
,
det_bbox_thr
=
0.0
,
bbox_file
=
'datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json'
,
)
sigma
=
[
1.5
,
3
]
# 2
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomShiftBboxCenter'
,
shift_factor
=
0.16
,
prob
=
0.3
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
0.5
),
dict
(
type
=
'TopDownHalfBodyTransform'
,
num_joints_half_body
=
8
,
prob_half_body
=
0.3
),
dict
(
type
=
'TopDownGetRandomScaleRotation'
,
rot_factor
=
40
,
scale_factor
=
0.5
),
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'train_256x192_aug0'
,
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'target'
,
'target_weight'
],
meta_keys
=
[
'image_file'
,
'joints_3d'
,
'joints_3d_visible'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'TopDownRandomFlip'
,
flip_prob
=
1
),
# for flip test
dict
(
type
=
'TopDownAffine'
),
# dict(type='ToTensor'),
# dict(
# type='NormalizeTensor',
# mean=[0.485, 0.456, 0.406],
# std=[0.229, 0.224, 0.225]),
dict
(
type
=
'TopDownGenerateTargetCustom'
,
sigma
=
sigma
,
# the following are custom args
use_gt_bbox
=
use_gt_bbox
,
dir_name
=
'test_256x192_flip'
,
target_path
=
'datasets/coco_pose/data_pair'
,
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'datasets/coco'
data
=
dict
(
samples_per_gpu
=
32
,
workers_per_gpu
=
8
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
load_data_only
=
True
,
# custom arg
train
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_train2017.json'
,
img_prefix
=
f
'
{
data_root
}
/train2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
train_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
val
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
img_prefix
=
f
'
{
data_root
}
/val2017/'
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
# import newly registered module
custom_imports
=
dict
(
imports
=
[
'model.top_down'
,
'data.topdown_coco_dataset'
,
'data.pipelines.top_down_transform'
,
],
allow_failed_imports
=
False
)
eval/mmpose_custom/configs/coco_256x192_test_offline.py
0 → 100644
View file @
106580f9
import
os
job_name
=
"painter_vit_large"
ckpt_file
=
"painter_vit_large.pth"
prompt
=
"000000000165_box0"
image_dir
=
'models_inference/{}/coco_pose_inference_{}_{}/'
.
format
(
job_name
,
ckpt_file
,
prompt
)
if
not
image_dir
[
-
1
]
==
"/"
:
image_dir
=
image_dir
+
'/'
print
(
image_dir
)
_base_
=
[
'./_base_/default_runtime.py'
,
'./_base_/coco.py'
]
evaluation
=
dict
(
interval
=
10
,
metric
=
'mAP'
,
save_best
=
'AP'
)
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
5e-4
,
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
170
,
200
])
total_epochs
=
210
channel_cfg
=
dict
(
num_output_channels
=
17
,
dataset_joints
=
17
,
dataset_channel
=
[
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
],
],
inference_channel
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
])
# fake model settings
model
=
dict
(
type
=
'TopDownCustom'
,
pretrained
=
None
,
backbone
=
dict
(
type
=
'HRNet'
,
in_channels
=
3
,
extra
=
dict
(
stage1
=
dict
(
num_modules
=
1
,
num_branches
=
1
,
block
=
'BOTTLENECK'
,
num_blocks
=
(
4
,
),
num_channels
=
(
64
,
)),
stage2
=
dict
(
num_modules
=
1
,
num_branches
=
2
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
),
num_channels
=
(
32
,
64
)),
stage3
=
dict
(
num_modules
=
4
,
num_branches
=
3
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
)),
stage4
=
dict
(
num_modules
=
3
,
num_branches
=
4
,
block
=
'BASIC'
,
num_blocks
=
(
4
,
4
,
4
,
4
),
num_channels
=
(
32
,
64
,
128
,
256
))),
),
keypoint_head
=
dict
(
type
=
'TopdownHeatmapSimpleHead'
,
in_channels
=
32
,
out_channels
=
channel_cfg
[
'num_output_channels'
],
num_deconv_layers
=
0
,
extra
=
dict
(
final_conv_kernel
=
1
,
),
loss_keypoint
=
dict
(
type
=
'JointsMSELoss'
,
use_target_weight
=
True
)),
train_cfg
=
dict
(),
test_cfg
=
dict
(
flip_test
=
True
,
post_process
=
'default'
,
shift_heatmap
=
True
,
modulate_kernel
=
17
))
data_cfg
=
dict
(
image_size
=
[
192
,
256
],
heatmap_size
=
[
192
,
256
],
# heatmap_size=[48, 64],
# image_size=[640, 320], # w, h
# heatmap_size=[640, 320],
num_output_channels
=
channel_cfg
[
'num_output_channels'
],
num_joints
=
channel_cfg
[
'dataset_joints'
],
dataset_channel
=
channel_cfg
[
'dataset_channel'
],
inference_channel
=
channel_cfg
[
'inference_channel'
],
soft_nms
=
False
,
nms_thr
=
1.0
,
oks_thr
=
0.9
,
vis_thr
=
0.2
,
use_gt_bbox
=
False
,
imagename_with_boxid
=
True
,
# custom
det_bbox_thr
=
0.0
,
bbox_file
=
'datasets/coco_pose/person_detection_results/'
'COCO_val2017_detections_AP_H_56_person.json'
,
)
# sigma = [1.5, 3] # 2
sigma
=
3
# use the hyper params of R, which is heatmap
val_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
# load custom images according to filename and box_id, using topdown_coco_dataset
dict
(
type
=
'TopDownGetBboxCenterScale'
,
padding
=
1.25
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
],
meta_keys
=
[
'image_file'
,
'center'
,
'scale'
,
'rotation'
,
'bbox_score'
,
'flip_pairs'
]),
]
test_pipeline
=
val_pipeline
data_root
=
'datasets/coco'
data
=
dict
(
samples_per_gpu
=
32
,
workers_per_gpu
=
8
,
val_dataloader
=
dict
(
samples_per_gpu
=
32
),
test_dataloader
=
dict
(
samples_per_gpu
=
32
),
pseudo_test
=
True
,
# custom arg
val
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
# img_prefix=f'{data_root}/val2017/',
img_prefix
=
image_dir
,
data_cfg
=
data_cfg
,
pipeline
=
val_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
test
=
dict
(
type
=
'TopDownCocoDatasetCustom'
,
ann_file
=
f
'
{
data_root
}
/annotations/person_keypoints_val2017.json'
,
# img_prefix=f'{data_root}/val2017/',
img_prefix
=
image_dir
,
data_cfg
=
data_cfg
,
pipeline
=
test_pipeline
,
dataset_info
=
{{
_base_
.
dataset_info
}}),
)
# import newly registered module
custom_imports
=
dict
(
imports
=
[
'model.top_down'
,
'data.topdown_coco_dataset'
,
'data.pipelines.top_down_transform'
,
],
allow_failed_imports
=
False
)
eval/mmpose_custom/data/pipelines/custom_transform.py
0 → 100644
View file @
106580f9
import
os
import
random
import
warnings
import
cv2
import
numpy
as
np
from
PIL
import
Image
def
define_colors_gb_mean_sep
(
num_locations
=
17
):
num_sep_per_channel
=
int
(
num_locations
**
(
1
/
2
))
+
1
# 5
separation_per_channel
=
256
//
num_sep_per_channel
# 51
color_dict
=
{}
# R = G = B = 0
# B += separation_per_channel # offset for the first loop
for
location
in
range
(
num_locations
):
num_seq_g
=
location
//
num_sep_per_channel
num_seq_b
=
location
%
num_sep_per_channel
assert
(
num_seq_g
<=
num_sep_per_channel
)
and
(
num_seq_b
<=
num_sep_per_channel
)
G
=
255
-
num_seq_g
*
separation_per_channel
B
=
255
-
num_seq_b
*
separation_per_channel
assert
(
G
<
256
)
and
(
B
<
256
)
assert
(
G
>=
0
)
and
(
B
>=
0
)
assert
(
G
,
B
)
not
in
color_dict
.
values
()
color_dict
[
location
]
=
(
G
,
B
)
# print(location, (num_seq_g, num_seq_b), (G, B))
# colors = [v for k, v in color_dict.items()]
# min values in gb: [51, 51]
return
color_dict
color_dict
=
define_colors_gb_mean_sep
()
def
encode_target_to_image
(
target
,
target_weight
,
target_dir
,
metas
):
if
len
(
target
.
shape
)
==
3
:
return
encode_rgb_target_to_image
(
target_kernel
=
target
,
target_class
=
target
,
target_weight_kernel
=
target_weight
,
target_weight_class
=
target_weight
,
target_dir
=
target_dir
,
metas
=
metas
,
)
assert
len
(
target
.
shape
)
==
4
return
encode_rgb_target_to_image
(
target_kernel
=
target
[
1
],
target_class
=
target
[
0
],
target_weight_kernel
=
target_weight
[
1
],
target_weight_class
=
target_weight
[
0
],
target_dir
=
target_dir
,
metas
=
metas
,
)
def
check_input
(
target_weight
,
target
,
metas
):
if
not
((
target_weight
.
reshape
(
17
,
1
,
1
)
*
target
)
==
target
).
all
():
print
(
"useful target_weight!"
)
target
=
target_weight
.
reshape
(
17
,
1
,
1
)
*
target
# make sure the invisible part is weighted zero, and thus not shown in target
if
not
(
target_weight
[
np
.
sum
(
metas
[
'joints_3d_visible'
],
axis
=
1
)
==
0
]
==
0
).
all
():
print
(
metas
[
'image_file'
],
"may have joints_3d_visible problems!"
)
def
encode_rgb_target_to_image
(
target_kernel
,
target_class
,
target_weight_kernel
,
target_weight_class
,
target_dir
,
metas
):
"""
Args:
target: ndarray (17, 256, 192)
target_weight: ndarray (17, 1)
metas: dict
Returns:
an RGB image, R encodes heatmap, GB encodes class
"""
check_input
(
target_weight_kernel
,
target_kernel
,
metas
)
check_input
(
target_weight_class
,
target_class
,
metas
)
# 1. handle kernel in R channel
# get max value for collision area
sum_kernel
=
target_kernel
.
max
(
0
)
# (256, 192)
max_kernel_indices
=
target_kernel
.
argmax
(
0
)
# (256, 192)
R
=
sum_kernel
[:,
:,
None
]
*
255.
# (256, 192, 1)
# 2. handle class in BG channels
K
,
H
,
W
=
target_class
.
shape
keypoint_areas_class
=
[]
for
keypoint_idx
in
range
(
K
):
mask
=
target_class
[
keypoint_idx
]
!=
0
keypoint_areas_class
.
append
(
mask
)
keypoint_areas_class
=
np
.
stack
(
keypoint_areas_class
)
# (17, 256, 192)
num_pos_per_location_class
=
keypoint_areas_class
.
sum
(
0
)
# (256, 192)
collision_area_class
=
num_pos_per_location_class
>
1
# (256, 192)
GB_MultiChannel
=
np
.
zeros
((
17
,
256
,
192
,
2
))
for
keypoint_idx
in
range
(
K
):
color
=
color_dict
[
keypoint_idx
]
class_mask
=
keypoint_areas_class
[
keypoint_idx
]
GB_MultiChannel
[
keypoint_idx
][
class_mask
]
=
color
GB
=
GB_MultiChannel
.
sum
(
0
)
# (256, 192, 2)
if
np
.
sum
(
collision_area_class
)
!=
0
:
for
keypoint_idx
in
range
(
K
):
color
=
color_dict
[
keypoint_idx
]
# mach more max_area_this_keypoint for 0, but removed by collision_area_class latter
max_area_this_keypoint
=
max_kernel_indices
==
keypoint_idx
area_of_interest
=
max_area_this_keypoint
*
collision_area_class
if
not
(
area_of_interest
==
0
).
all
():
GB
[
area_of_interest
]
=
color
# 3. get images / labels and save
image_label
=
np
.
concatenate
([
R
,
GB
],
axis
=-
1
).
astype
(
np
.
uint8
)
# (256, 192, 3)
image_label
=
Image
.
fromarray
(
image_label
)
image
=
metas
[
'img'
]
image
=
Image
.
fromarray
(
image
)
box_idx
=
metas
[
'bbox_id'
]
_
,
filename
=
os
.
path
.
dirname
(
metas
[
'image_file'
]),
os
.
path
.
basename
(
metas
[
'image_file'
])
image_path
=
os
.
path
.
join
(
target_dir
,
filename
.
replace
(
".jpg"
,
"_box{}_image.png"
.
format
(
box_idx
)))
label_path
=
os
.
path
.
join
(
target_dir
,
filename
.
replace
(
".jpg"
,
"_box{}_label.png"
.
format
(
box_idx
)))
# if os.path.exists(image_path):
# print(image_path, "exist! return!")
# return
image
.
save
(
image_path
)
image_label
.
save
(
label_path
)
eval/mmpose_custom/data/pipelines/top_down_transform.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
import
os
from
PIL
import
Image
import
cv2
import
numpy
as
np
from
mmpose.core.bbox
import
bbox_xywh2cs
from
mmpose.core.post_processing
import
(
affine_transform
,
fliplr_joints
,
get_affine_transform
,
get_warp_matrix
,
warp_affine_joints
)
from
mmpose.datasets.builder
import
PIPELINES
from
mmpose.datasets.pipelines
import
TopDownGenerateTarget
from
.custom_transform
import
encode_target_to_image
@
PIPELINES
.
register_module
()
class
TopDownGenerateTargetCustom
(
TopDownGenerateTarget
):
"""Generate the target heatmap.
Required key: 'joints_3d', 'joints_3d_visible', 'ann_info'.
Modified key: 'target', and 'target_weight'.
Args:
sigma: Sigma of heatmap gaussian for 'MSRA' approach.
kernel: Kernel of heatmap gaussian for 'Megvii' approach.
encoding (str): Approach to generate target heatmaps.
Currently supported approaches: 'MSRA', 'Megvii', 'UDP'.
Default:'MSRA'
unbiased_encoding (bool): Option to use unbiased
encoding methods.
Paper ref: Zhang et al. Distribution-Aware Coordinate
Representation for Human Pose Estimation (CVPR 2020).
keypoint_pose_distance: Keypoint pose distance for UDP.
Paper ref: Huang et al. The Devil is in the Details: Delving into
Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
target_type (str): supported targets: 'GaussianHeatmap',
'CombinedTarget'. Default:'GaussianHeatmap'
CombinedTarget: The combination of classification target
(response map) and regression target (offset map).
Paper ref: Huang et al. The Devil is in the Details: Delving into
Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
"""
def
__init__
(
self
,
sigma
=
2
,
kernel
=
(
11
,
11
),
valid_radius_factor
=
0.0546875
,
target_type
=
'GaussianHeatmap'
,
encoding
=
'MSRA'
,
unbiased_encoding
=
False
,
# the following are custom args
target_path
=
None
,
dir_name
=
None
,
use_gt_bbox
=
True
):
super
().
__init__
(
sigma
=
sigma
,
kernel
=
kernel
,
valid_radius_factor
=
valid_radius_factor
,
target_type
=
target_type
,
encoding
=
encoding
,
unbiased_encoding
=
unbiased_encoding
)
self
.
target_path
=
target_path
self
.
dir_name
=
dir_name
self
.
use_gt_bbox
=
use_gt_bbox
target_dir
=
os
.
path
.
join
(
self
.
target_path
,
self
.
dir_name
)
if
not
os
.
path
.
exists
(
target_dir
):
os
.
makedirs
(
target_dir
)
def
__call__
(
self
,
results
):
"""Generate the target heatmap."""
joints_3d
=
results
[
'joints_3d'
]
joints_3d_visible
=
results
[
'joints_3d_visible'
]
assert
self
.
encoding
in
[
'MSRA'
,
'Megvii'
,
'UDP'
]
if
self
.
encoding
==
'MSRA'
:
if
isinstance
(
self
.
sigma
,
list
):
num_sigmas
=
len
(
self
.
sigma
)
cfg
=
results
[
'ann_info'
]
num_joints
=
cfg
[
'num_joints'
]
heatmap_size
=
cfg
[
'heatmap_size'
]
target
=
np
.
empty
(
(
0
,
num_joints
,
heatmap_size
[
1
],
heatmap_size
[
0
]),
dtype
=
np
.
float32
)
target_weight
=
np
.
empty
((
0
,
num_joints
,
1
),
dtype
=
np
.
float32
)
for
i
in
range
(
num_sigmas
):
target_i
,
target_weight_i
=
self
.
_msra_generate_target
(
cfg
,
joints_3d
,
joints_3d_visible
,
self
.
sigma
[
i
])
target
=
np
.
concatenate
([
target
,
target_i
[
None
]],
axis
=
0
)
target_weight
=
np
.
concatenate
(
[
target_weight
,
target_weight_i
[
None
]],
axis
=
0
)
else
:
target
,
target_weight
=
self
.
_msra_generate_target
(
results
[
'ann_info'
],
joints_3d
,
joints_3d_visible
,
self
.
sigma
)
elif
self
.
encoding
==
'Megvii'
:
if
isinstance
(
self
.
kernel
,
list
):
num_kernels
=
len
(
self
.
kernel
)
cfg
=
results
[
'ann_info'
]
num_joints
=
cfg
[
'num_joints'
]
W
,
H
=
cfg
[
'heatmap_size'
]
target
=
np
.
empty
((
0
,
num_joints
,
H
,
W
),
dtype
=
np
.
float32
)
target_weight
=
np
.
empty
((
0
,
num_joints
,
1
),
dtype
=
np
.
float32
)
for
i
in
range
(
num_kernels
):
target_i
,
target_weight_i
=
self
.
_megvii_generate_target
(
cfg
,
joints_3d
,
joints_3d_visible
,
self
.
kernel
[
i
])
target
=
np
.
concatenate
([
target
,
target_i
[
None
]],
axis
=
0
)
target_weight
=
np
.
concatenate
(
[
target_weight
,
target_weight_i
[
None
]],
axis
=
0
)
else
:
target
,
target_weight
=
self
.
_megvii_generate_target
(
results
[
'ann_info'
],
joints_3d
,
joints_3d_visible
,
self
.
kernel
)
elif
self
.
encoding
==
'UDP'
:
if
self
.
target_type
.
lower
()
==
'CombinedTarget'
.
lower
():
factors
=
self
.
valid_radius_factor
channel_factor
=
3
elif
self
.
target_type
.
lower
()
==
'GaussianHeatmap'
.
lower
():
factors
=
self
.
sigma
channel_factor
=
1
else
:
raise
ValueError
(
'target_type should be either '
"'GaussianHeatmap' or 'CombinedTarget'"
)
if
isinstance
(
factors
,
list
):
num_factors
=
len
(
factors
)
cfg
=
results
[
'ann_info'
]
num_joints
=
cfg
[
'num_joints'
]
W
,
H
=
cfg
[
'heatmap_size'
]
target
=
np
.
empty
((
0
,
channel_factor
*
num_joints
,
H
,
W
),
dtype
=
np
.
float32
)
target_weight
=
np
.
empty
((
0
,
num_joints
,
1
),
dtype
=
np
.
float32
)
for
i
in
range
(
num_factors
):
target_i
,
target_weight_i
=
self
.
_udp_generate_target
(
cfg
,
joints_3d
,
joints_3d_visible
,
factors
[
i
],
self
.
target_type
)
target
=
np
.
concatenate
([
target
,
target_i
[
None
]],
axis
=
0
)
target_weight
=
np
.
concatenate
(
[
target_weight
,
target_weight_i
[
None
]],
axis
=
0
)
else
:
target
,
target_weight
=
self
.
_udp_generate_target
(
results
[
'ann_info'
],
joints_3d
,
joints_3d_visible
,
factors
,
self
.
target_type
)
else
:
raise
ValueError
(
f
'Encoding approach
{
self
.
encoding
}
is not supported!'
)
results
[
'target'
]
=
target
results
[
'target_weight'
]
=
target_weight
target_dir
=
os
.
path
.
join
(
self
.
target_path
,
self
.
dir_name
)
if
not
self
.
use_gt_bbox
:
box_idx
=
results
[
'bbox_id'
]
image
=
results
[
'img'
]
image
=
Image
.
fromarray
(
image
)
_
,
filename
=
os
.
path
.
dirname
(
results
[
'image_file'
]),
os
.
path
.
basename
(
results
[
'image_file'
])
image_path
=
os
.
path
.
join
(
target_dir
,
filename
.
replace
(
".jpg"
,
"_box{}_image.png"
.
format
(
box_idx
)))
if
os
.
path
.
exists
(
image_path
):
print
(
image_path
,
"exist! return!"
)
return
results
image
.
save
(
image_path
)
else
:
# filter all black target
if
(
target
.
sum
((
1
,
2
))
==
0
).
all
():
return
results
# encode target to image (save is also done inside)
encode_target_to_image
(
target
,
target_weight
,
target_dir
=
target_dir
,
metas
=
results
)
return
results
eval/mmpose_custom/data/topdown_coco_dataset.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
os
import
os.path
as
osp
import
tempfile
import
warnings
from
collections
import
OrderedDict
,
defaultdict
import
json_tricks
as
json
import
numpy
as
np
from
mmcv
import
Config
,
deprecated_api_warning
from
xtcocotools.cocoeval
import
COCOeval
from
mmpose.core.post_processing
import
oks_nms
,
soft_oks_nms
from
mmpose.datasets.builder
import
DATASETS
# from mmpose.datasets.datasets.base import Kpt2dSviewRgbImgTopDownDataset
from
mmpose.datasets.datasets.top_down
import
TopDownCocoDataset
@
DATASETS
.
register_module
()
class
TopDownCocoDatasetCustom
(
TopDownCocoDataset
):
"""CocoDataset dataset for top-down pose estimation.
"Microsoft COCO: Common Objects in Context", ECCV'2014.
More details can be found in the `paper
<https://arxiv.org/abs/1405.0312>`__ .
The dataset loads raw features and apply specified transforms
to return a dict containing the image tensors and other information.
COCO keypoint indexes::
0: 'nose',
1: 'left_eye',
2: 'right_eye',
3: 'left_ear',
4: 'right_ear',
5: 'left_shoulder',
6: 'right_shoulder',
7: 'left_elbow',
8: 'right_elbow',
9: 'left_wrist',
10: 'right_wrist',
11: 'left_hip',
12: 'right_hip',
13: 'left_knee',
14: 'right_knee',
15: 'left_ankle',
16: 'right_ankle'
Args:
ann_file (str): Path to the annotation file.
img_prefix (str): Path to a directory where images are held.
Default: None.
data_cfg (dict): config
pipeline (list[dict | callable]): A sequence of data transforms.
dataset_info (DatasetInfo): A class containing all dataset info.
test_mode (bool): Store True when building test or
validation dataset. Default: False.
"""
def
__init__
(
self
,
ann_file
,
img_prefix
,
data_cfg
,
pipeline
,
dataset_info
=
None
,
test_mode
=
False
):
super
().
__init__
(
ann_file
,
img_prefix
,
data_cfg
,
pipeline
,
dataset_info
=
dataset_info
,
test_mode
=
test_mode
)
self
.
imagename_with_boxid
=
data_cfg
.
get
(
'imagename_with_boxid'
,
False
)
def
_load_coco_keypoint_annotation_kernel
(
self
,
img_id
):
"""load annotation from COCOAPI.
Note:
bbox:[x1, y1, w, h]
Args:
img_id: coco image id
Returns:
dict: db entry
"""
img_ann
=
self
.
coco
.
loadImgs
(
img_id
)[
0
]
width
=
img_ann
[
'width'
]
height
=
img_ann
[
'height'
]
num_joints
=
self
.
ann_info
[
'num_joints'
]
ann_ids
=
self
.
coco
.
getAnnIds
(
imgIds
=
img_id
,
iscrowd
=
False
)
objs
=
self
.
coco
.
loadAnns
(
ann_ids
)
# sanitize bboxes
valid_objs
=
[]
for
obj
in
objs
:
if
'bbox'
not
in
obj
:
continue
x
,
y
,
w
,
h
=
obj
[
'bbox'
]
x1
=
max
(
0
,
x
)
y1
=
max
(
0
,
y
)
x2
=
min
(
width
-
1
,
x1
+
max
(
0
,
w
))
y2
=
min
(
height
-
1
,
y1
+
max
(
0
,
h
))
if
(
'area'
not
in
obj
or
obj
[
'area'
]
>
0
)
and
x2
>
x1
and
y2
>
y1
:
obj
[
'clean_bbox'
]
=
[
x1
,
y1
,
x2
-
x1
,
y2
-
y1
]
valid_objs
.
append
(
obj
)
objs
=
valid_objs
bbox_id
=
0
rec
=
[]
for
obj
in
objs
:
if
'keypoints'
not
in
obj
:
continue
if
max
(
obj
[
'keypoints'
])
==
0
:
continue
if
'num_keypoints'
in
obj
and
obj
[
'num_keypoints'
]
==
0
:
continue
joints_3d
=
np
.
zeros
((
num_joints
,
3
),
dtype
=
np
.
float32
)
joints_3d_visible
=
np
.
zeros
((
num_joints
,
3
),
dtype
=
np
.
float32
)
keypoints
=
np
.
array
(
obj
[
'keypoints'
]).
reshape
(
-
1
,
3
)
joints_3d
[:,
:
2
]
=
keypoints
[:,
:
2
]
joints_3d_visible
[:,
:
2
]
=
np
.
minimum
(
1
,
keypoints
[:,
2
:
3
])
image_file
=
osp
.
join
(
self
.
img_prefix
,
self
.
id2name
[
img_id
])
if
self
.
imagename_with_boxid
:
# gt bbox label example: 000000342971_box0_image.png
image_file
=
image_file
.
replace
(
".jpg"
,
"_box{}_image.png"
.
format
(
bbox_id
))
rec
.
append
({
'image_file'
:
image_file
,
'bbox'
:
obj
[
'clean_bbox'
][:
4
],
'rotation'
:
0
,
'joints_3d'
:
joints_3d
,
'joints_3d_visible'
:
joints_3d_visible
,
'dataset'
:
self
.
dataset_name
,
'bbox_score'
:
1
,
'bbox_id'
:
bbox_id
})
bbox_id
=
bbox_id
+
1
return
rec
def
_load_coco_person_detection_results
(
self
):
"""Load coco person detection results."""
num_joints
=
self
.
ann_info
[
'num_joints'
]
all_boxes
=
None
with
open
(
self
.
bbox_file
,
'r'
)
as
f
:
all_boxes
=
json
.
load
(
f
)
if
not
all_boxes
:
raise
ValueError
(
'=> Load %s fail!'
%
self
.
bbox_file
)
print
(
f
'=> Total boxes:
{
len
(
all_boxes
)
}
'
)
kpt_db
=
[]
bbox_id
=
0
for
det_res
in
all_boxes
:
if
det_res
[
'category_id'
]
!=
1
:
continue
image_file
=
osp
.
join
(
self
.
img_prefix
,
self
.
id2name
[
det_res
[
'image_id'
]])
box
=
det_res
[
'bbox'
]
score
=
det_res
[
'score'
]
if
score
<
self
.
det_bbox_thr
:
continue
joints_3d
=
np
.
zeros
((
num_joints
,
3
),
dtype
=
np
.
float32
)
joints_3d_visible
=
np
.
ones
((
num_joints
,
3
),
dtype
=
np
.
float32
)
if
self
.
imagename_with_boxid
:
image_file
=
image_file
.
replace
(
".jpg"
,
"_box{}_image.png"
.
format
(
bbox_id
))
kpt_db
.
append
({
'image_file'
:
image_file
,
'rotation'
:
0
,
'bbox'
:
box
[:
4
],
'bbox_score'
:
score
,
'dataset'
:
self
.
dataset_name
,
'joints_3d'
:
joints_3d
,
'joints_3d_visible'
:
joints_3d_visible
,
'bbox_id'
:
bbox_id
})
bbox_id
=
bbox_id
+
1
print
(
f
'=> Total boxes after filter '
f
'low score@
{
self
.
det_bbox_thr
}
:
{
bbox_id
}
'
)
return
kpt_db
@
deprecated_api_warning
(
name_dict
=
dict
(
outputs
=
'results'
))
def
evaluate
(
self
,
results
,
res_folder
=
None
,
metric
=
'mAP'
,
**
kwargs
):
"""Evaluate coco keypoint results. The pose prediction results will be
saved in ``${res_folder}/result_keypoints.json``.
Note:
- batch_size: N
- num_keypoints: K
- heatmap height: H
- heatmap width: W
Args:
results (list[dict]): Testing results containing the following
items:
- preds (np.ndarray[N,K,3]): The first two dimensions are
\
coordinates, score is the third dimension of the array.
- boxes (np.ndarray[N,6]): [center[0], center[1], scale[0],
\
scale[1],area, score]
- image_paths (list[str]): For example, ['data/coco/val2017
\
/000000393226.jpg']
- heatmap (np.ndarray[N, K, H, W]): model output heatmap
- bbox_id (list(int)).
res_folder (str, optional): The folder to save the testing
results. If not specified, a temp folder will be created.
Default: None.
metric (str | list[str]): Metric to be performed. Defaults: 'mAP'.
Returns:
dict: Evaluation results for evaluation metric.
"""
metrics
=
metric
if
isinstance
(
metric
,
list
)
else
[
metric
]
allowed_metrics
=
[
'mAP'
]
for
metric
in
metrics
:
if
metric
not
in
allowed_metrics
:
raise
KeyError
(
f
'metric
{
metric
}
is not supported'
)
if
res_folder
is
not
None
:
tmp_folder
=
None
res_file
=
osp
.
join
(
res_folder
,
'result_keypoints.json'
)
else
:
tmp_folder
=
tempfile
.
TemporaryDirectory
()
res_file
=
osp
.
join
(
tmp_folder
.
name
,
'result_keypoints.json'
)
kpts
=
defaultdict
(
list
)
for
result
in
results
:
preds
=
result
[
'preds'
]
boxes
=
result
[
'boxes'
]
image_paths
=
result
[
'image_paths'
]
if
self
.
imagename_with_boxid
:
for
idx
,
img_path
in
enumerate
(
image_paths
):
image_dir
,
file_name
=
os
.
path
.
dirname
(
img_path
),
os
.
path
.
basename
(
img_path
)
file_name
=
file_name
.
split
(
"_"
)[
0
]
+
".jpg"
img_path
=
os
.
path
.
join
(
image_dir
,
file_name
)
image_paths
[
idx
]
=
img_path
bbox_ids
=
result
[
'bbox_ids'
]
batch_size
=
len
(
image_paths
)
for
i
in
range
(
batch_size
):
image_id
=
self
.
name2id
[
image_paths
[
i
][
len
(
self
.
img_prefix
):]]
kpts
[
image_id
].
append
({
'keypoints'
:
preds
[
i
],
'center'
:
boxes
[
i
][
0
:
2
],
'scale'
:
boxes
[
i
][
2
:
4
],
'area'
:
boxes
[
i
][
4
],
'score'
:
boxes
[
i
][
5
],
'image_id'
:
image_id
,
'bbox_id'
:
bbox_ids
[
i
]
})
kpts
=
self
.
_sort_and_unique_bboxes
(
kpts
)
# rescoring and oks nms
num_joints
=
self
.
ann_info
[
'num_joints'
]
vis_thr
=
self
.
vis_thr
oks_thr
=
self
.
oks_thr
valid_kpts
=
[]
for
image_id
in
kpts
.
keys
():
img_kpts
=
kpts
[
image_id
]
for
n_p
in
img_kpts
:
box_score
=
n_p
[
'score'
]
if
kwargs
.
get
(
'rle_score'
,
False
):
pose_score
=
n_p
[
'keypoints'
][:,
2
]
n_p
[
'score'
]
=
float
(
box_score
+
np
.
mean
(
pose_score
)
+
np
.
max
(
pose_score
))
else
:
kpt_score
=
0
valid_num
=
0
for
n_jt
in
range
(
0
,
num_joints
):
t_s
=
n_p
[
'keypoints'
][
n_jt
][
2
]
if
t_s
>
vis_thr
:
kpt_score
=
kpt_score
+
t_s
valid_num
=
valid_num
+
1
if
valid_num
!=
0
:
kpt_score
=
kpt_score
/
valid_num
# rescoring
n_p
[
'score'
]
=
kpt_score
*
box_score
if
self
.
use_nms
:
nms
=
soft_oks_nms
if
self
.
soft_nms
else
oks_nms
keep
=
nms
(
img_kpts
,
oks_thr
,
sigmas
=
self
.
sigmas
)
valid_kpts
.
append
([
img_kpts
[
_keep
]
for
_keep
in
keep
])
else
:
valid_kpts
.
append
(
img_kpts
)
self
.
_write_coco_keypoint_results
(
valid_kpts
,
res_file
)
# do evaluation only if the ground truth keypoint annotations exist
if
'annotations'
in
self
.
coco
.
dataset
:
info_str
=
self
.
_do_python_keypoint_eval
(
res_file
)
name_value
=
OrderedDict
(
info_str
)
if
tmp_folder
is
not
None
:
tmp_folder
.
cleanup
()
else
:
warnings
.
warn
(
f
'Due to the absence of ground truth keypoint'
f
'annotations, the quantitative evaluation can not'
f
'be conducted. The prediction results have been'
f
'saved at:
{
osp
.
abspath
(
res_file
)
}
'
)
name_value
=
{}
return
name_value
eval/mmpose_custom/gen_json_coco_pose.py
0 → 100644
View file @
106580f9
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import
os
import
glob
import
json
import
tqdm
import
argparse
def
get_args_parser
():
parser
=
argparse
.
ArgumentParser
(
'COCO pose estimation preparation'
,
add_help
=
False
)
parser
.
add_argument
(
'--split'
,
type
=
str
,
help
=
'dataset split'
,
choices
=
[
'train'
,
'val'
],
required
=
True
)
parser
.
add_argument
(
'--output_dir'
,
type
=
str
,
help
=
'path to output dir'
,
default
=
'datasets/coco_pose'
)
return
parser
.
parse_args
()
if
__name__
==
"__main__"
:
args
=
get_args_parser
()
split
=
args
.
split
if
split
==
"train"
:
aug_list
=
[
"_aug0"
,
"_aug1"
,
"_aug2"
,
"_aug3"
,
"_aug4"
,
"_aug5"
,
"_aug6"
,
"_aug7"
,
"_aug8"
,
"_aug9"
,
"_aug10"
,
"_aug11"
,
"_aug12"
,
"_aug13"
,
"_aug14"
,
"_aug15"
,
"_aug16"
,
"_aug17"
,
"_aug18"
,
"_aug19"
,
]
elif
split
==
"val"
:
aug_list
=
[
""
,
"_flip"
]
else
:
raise
NotImplementedError
save_path
=
os
.
path
.
join
(
args
.
output_dir
,
"coco_pose_256x192_{}.json"
.
format
(
split
))
print
(
save_path
)
output_dict
=
[]
for
aug_idx
in
aug_list
:
image_dir
=
"datasets/coco_pose/data_pair/{}_256x192{}"
.
format
(
split
,
aug_idx
)
print
(
aug_idx
,
image_dir
)
image_path_list
=
glob
.
glob
(
os
.
path
.
join
(
image_dir
,
'*image.png'
))
for
image_path
in
tqdm
.
tqdm
(
image_path_list
):
label_path
=
image_path
.
replace
(
"image.png"
,
"label.png"
)
assert
label_path
!=
image_path
assert
os
.
path
.
isfile
(
image_path
)
if
not
os
.
path
.
isfile
(
label_path
):
print
(
"ignoring {}"
.
format
(
label_path
))
continue
pair_dict
=
{}
pair_dict
[
"image_path"
]
=
image_path
.
replace
(
'datasets/'
,
''
)
pair_dict
[
"target_path"
]
=
label_path
.
replace
(
'datasets/'
,
''
)
pair_dict
[
"type"
]
=
"coco_image2pose"
output_dict
.
append
(
pair_dict
)
json
.
dump
(
output_dict
,
open
(
save_path
,
'w'
))
eval/mmpose_custom/model/top_down.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
os
import
warnings
import
mmcv
import
numpy
as
np
from
PIL
import
Image
import
torch
from
mmcv.image
import
imwrite
from
mmcv.utils.misc
import
deprecated_api_warning
from
mmcv.visualization.image
import
imshow
from
mmpose.core
import
imshow_bboxes
,
imshow_keypoints
from
mmpose.models
import
builder
from
mmpose.models.builder
import
POSENETS
# from .base import BasePose
from
mmpose.models.detectors
import
TopDown
try
:
from
mmcv.runner
import
auto_fp16
except
ImportError
:
warnings
.
warn
(
'auto_fp16 from mmpose will be deprecated from v0.15.0'
'Please install mmcv>=1.1.4'
)
from
mmpose.core
import
auto_fp16
from
mmpose.core.post_processing
import
flip_back
from
data.pipelines.custom_transform
import
define_colors_gb_mean_sep
color_dict
=
define_colors_gb_mean_sep
()
color_list
=
[
v
for
k
,
v
in
color_dict
.
items
()]
color_list
.
append
((
0
,
0
))
@
POSENETS
.
register_module
()
class
TopDownCustom
(
TopDown
):
"""Top-down pose detectors.
Args:
backbone (dict): Backbone modules to extract feature.
keypoint_head (dict): Keypoint head to process feature.
train_cfg (dict): Config for training. Default: None.
test_cfg (dict): Config for testing. Default: None.
pretrained (str): Path to the pretrained models.
loss_pose (None): Deprecated arguments. Please use
`loss_keypoint` for heads instead.
"""
colors
=
torch
.
tensor
(
color_list
,
dtype
=
torch
.
float32
,
device
=
"cuda"
)
def
__init__
(
self
,
backbone
,
neck
=
None
,
keypoint_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
,
loss_pose
=
None
):
super
().
__init__
(
backbone
=
backbone
,
neck
=
neck
,
keypoint_head
=
keypoint_head
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
,
pretrained
=
pretrained
,
loss_pose
=
loss_pose
)
@
auto_fp16
(
apply_to
=
(
'img'
,
))
def
forward
(
self
,
img
,
target
=
None
,
target_weight
=
None
,
img_metas
=
None
,
return_loss
=
True
,
return_heatmap
=
False
,
pseudo_test
=
False
,
**
kwargs
):
"""Calls either forward_train or forward_test depending on whether
return_loss=True. Note this setting will change the expected inputs.
When `return_loss=True`, img and img_meta are single-nested (i.e.
Tensor and List[dict]), and when `resturn_loss=False`, img and img_meta
should be double nested (i.e. List[Tensor], List[List[dict]]), with
the outer list indicating test time augmentations.
Note:
- batch_size: N
- num_keypoints: K
- num_img_channel: C (Default: 3)
- img height: imgH
- img width: imgW
- heatmaps height: H
- heatmaps weight: W
Args:
img (torch.Tensor[NxCximgHximgW]): Input images.
target (torch.Tensor[NxKxHxW]): Target heatmaps.
target_weight (torch.Tensor[NxKx1]): Weights across
different joint types.
img_metas (list(dict)): Information about data augmentation
By default this includes:
- "image_file: path to the image file
- "center": center of the bbox
- "scale": scale of the bbox
- "rotation": rotation of the bbox
- "bbox_score": score of bbox
return_loss (bool): Option to `return loss`. `return loss=True`
for training, `return loss=False` for validation & test.
return_heatmap (bool) : Option to return heatmap.
Returns:
dict|tuple: if `return loss` is true, then return losses.
\
Otherwise, return predicted poses, boxes, image paths
\
and heatmaps.
"""
if
pseudo_test
:
return
self
.
forward_pseudo_test
(
img
,
img_metas
,
return_heatmap
=
return_heatmap
,
**
kwargs
)
if
return_loss
:
return
self
.
forward_train
(
img
,
target
,
target_weight
,
img_metas
,
**
kwargs
)
return
self
.
forward_test
(
img
,
img_metas
,
return_heatmap
=
return_heatmap
,
**
kwargs
)
def
forward_test
(
self
,
img
,
img_metas
,
return_heatmap
=
False
,
**
kwargs
):
"""Defines the computation performed at every call when testing."""
assert
img
.
size
(
0
)
==
len
(
img_metas
)
batch_size
,
_
,
img_height
,
img_width
=
img
.
shape
if
batch_size
>
1
:
assert
'bbox_id'
in
img_metas
[
0
]
result
=
{}
features
=
self
.
backbone
(
img
)
if
self
.
with_neck
:
features
=
self
.
neck
(
features
)
if
self
.
with_keypoint
:
output_heatmap
=
self
.
keypoint_head
.
inference_model
(
features
,
flip_pairs
=
None
)
if
self
.
test_cfg
.
get
(
'flip_test'
,
True
):
img_flipped
=
img
.
flip
(
3
)
# (b, c, h, w)
features_flipped
=
self
.
backbone
(
img_flipped
)
if
self
.
with_neck
:
features_flipped
=
self
.
neck
(
features_flipped
)
if
self
.
with_keypoint
:
output_flipped_heatmap
=
self
.
keypoint_head
.
inference_model
(
features_flipped
,
img_metas
[
0
][
'flip_pairs'
])
output_heatmap
=
(
output_heatmap
+
output_flipped_heatmap
)
if
self
.
test_cfg
.
get
(
'regression_flip_shift'
,
False
):
output_heatmap
[...,
0
]
-=
1.0
/
img_width
output_heatmap
=
output_heatmap
/
2
if
self
.
with_keypoint
:
keypoint_result
=
self
.
keypoint_head
.
decode
(
img_metas
,
output_heatmap
,
img_size
=
[
img_width
,
img_height
])
result
.
update
(
keypoint_result
)
if
not
return_heatmap
:
output_heatmap
=
None
result
[
'output_heatmap'
]
=
output_heatmap
return
result
def
forward_pseudo_test
(
self
,
img
,
img_metas
,
return_heatmap
=
False
,
**
kwargs
):
"""Defines the computation performed at every call when testing."""
assert
img
.
size
(
0
)
==
len
(
img_metas
)
batch_size
,
_
,
img_height
,
img_width
=
img
.
shape
if
batch_size
>
1
:
assert
'bbox_id'
in
img_metas
[
0
]
result
=
{}
output_heatmap
=
self
.
decode_images_to_heatmaps_minmax
(
images
=
img
,
resize
=
False
,
)
# add support for flip test
if
self
.
test_cfg
.
get
(
'flip_test'
,
True
):
image_flip_list
=
[]
for
batch_idx
in
range
(
img
.
shape
[
0
]):
flip_image_dir
=
os
.
path
.
dirname
(
img_metas
[
batch_idx
][
'image_file'
])
+
"_flip"
flip_image_name
=
os
.
path
.
basename
(
img_metas
[
batch_idx
][
'image_file'
])
flip_image_path
=
os
.
path
.
join
(
flip_image_dir
,
flip_image_name
)
image
=
np
.
array
(
Image
.
open
(
flip_image_path
))
image_tensor
=
torch
.
from_numpy
(
image
).
to
(
img
.
device
)
image_flip_list
.
append
(
image_tensor
)
img_flipped
=
torch
.
stack
(
image_flip_list
)
# (b, h, w, 3)
if
self
.
with_keypoint
:
# output_flipped_heatmap = self.keypoint_head.inference_model(
# features_flipped, img_metas[0]['flip_pairs'])
output
=
self
.
decode_images_to_heatmaps_minmax
(
images
=
img_flipped
,
resize
=
False
,
)
flip_pairs
=
img_metas
[
0
][
'flip_pairs'
]
assert
flip_pairs
is
not
None
output_flipped_heatmap
=
flip_back
(
output
,
flip_pairs
,
target_type
=
self
.
keypoint_head
.
target_type
)
# feature is not aligned, shift flipped heatmap for higher accuracy
if
self
.
test_cfg
.
get
(
'shift_heatmap'
,
False
):
output_flipped_heatmap
[:,
:,
:,
1
:]
=
output_flipped_heatmap
[:,
:,
:,
:
-
1
]
output_heatmap
=
(
output_heatmap
+
output_flipped_heatmap
)
if
self
.
test_cfg
.
get
(
'regression_flip_shift'
,
False
):
output_heatmap
[...,
0
]
-=
1.0
/
img_width
output_heatmap
=
output_heatmap
/
2
if
self
.
with_keypoint
:
keypoint_result
=
self
.
keypoint_head
.
decode
(
img_metas
,
output_heatmap
,
img_size
=
[
img_width
,
img_height
])
result
.
update
(
keypoint_result
)
if
not
return_heatmap
:
output_heatmap
=
None
result
[
'output_heatmap'
]
=
output_heatmap
return
result
def
decode_images_to_heatmaps_minmax
(
self
,
images
,
resize
=
False
):
"""
Args:
images: (bs, 256, 192, 3)
resize: whether to resize to (64, 48)
Returns:
heatmaps: (bs, 17, h, w)
"""
assert
images
.
shape
[
-
1
]
==
3
batch_size
,
image_height
,
image_width
,
_
=
images
.
shape
images
=
images
.
float
()
# classify each pixel using GB
GB
=
images
[...,
1
:].
view
(
batch_size
,
1
,
image_height
,
image_width
,
2
)
# (bs, 1, 256, 192, 2)
colors
=
TopDown
.
colors
num_classes
=
colors
.
shape
[
0
]
colors
=
colors
.
view
(
1
,
-
1
,
1
,
1
,
2
)
dist
=
torch
.
abs
(
GB
-
colors
).
sum
(
-
1
)
# (bs, 18, 256, 192)
dist
,
indices
=
torch
.
min
(
dist
,
dim
=
1
)
# (bs, 256, 192)
keypoint_mask_list
=
[]
for
idx
in
range
(
num_classes
):
mask
=
indices
==
idx
# (bs, 256, 192)
keypoint_mask_list
.
append
(
mask
)
R
=
images
[...,
0
]
# (bs, 256, 192)
heatmap_list
=
[]
for
idx
in
range
(
num_classes
):
if
idx
==
17
:
continue
mask
=
keypoint_mask_list
[
idx
]
heatmap
=
mask
*
R
heatmap_list
.
append
(
heatmap
.
unsqueeze
(
1
))
heatmaps
=
torch
.
cat
(
heatmap_list
,
dim
=
1
)
if
resize
:
raise
NotImplementedError
return
heatmaps
.
cpu
().
numpy
()
/
255.
eval/mmpose_custom/painter_inference_pose.py
0 → 100644
View file @
106580f9
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import
sys
import
os
import
warnings
import
requests
import
argparse
import
torch
import
torch.nn.functional
as
F
import
numpy
as
np
import
glob
import
tqdm
import
matplotlib.pyplot
as
plt
from
PIL
import
Image
import
torch.distributed
as
dist
from
torch.utils.data
import
DataLoader
,
DistributedSampler
sys
.
path
.
append
(
'.'
)
import
models_painter
from
util.ddp_utils
import
DatasetTest
from
util
import
ddp_utils
imagenet_mean
=
np
.
array
([
0.485
,
0.456
,
0.406
])
imagenet_std
=
np
.
array
([
0.229
,
0.224
,
0.225
])
def
get_args_parser
():
parser
=
argparse
.
ArgumentParser
(
'COCO Pose Estimation'
,
add_help
=
False
)
parser
.
add_argument
(
'--ckpt_path'
,
type
=
str
,
help
=
'path to ckpt'
,
default
=
''
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
help
=
'dir to ckpt'
,
default
=
'painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1'
)
parser
.
add_argument
(
'--prompt'
,
type
=
str
,
help
=
'prompt image in train set'
,
default
=
'000000000165_box0'
)
parser
.
add_argument
(
'--input_size'
,
type
=
int
,
default
=
448
)
parser
.
add_argument
(
'--flip_test'
,
action
=
'store_true'
,
help
=
'use offline bbox'
)
# distributed training parameters
parser
.
add_argument
(
'--world_size'
,
default
=
1
,
type
=
int
,
help
=
'number of distributed processes'
)
parser
.
add_argument
(
'--dist_url'
,
default
=
'env://'
,
help
=
'url used to set up distributed training'
)
return
parser
.
parse_args
()
def
prepare_model
(
chkpt_dir
,
arch
,
args
=
None
):
# build model
model
=
getattr
(
models_painter
,
arch
)()
model
.
to
(
"cuda"
)
model
=
torch
.
nn
.
parallel
.
DistributedDataParallel
(
model
,
device_ids
=
[
args
.
gpu
])
model_without_ddp
=
model
.
module
# load model
checkpoint
=
torch
.
load
(
chkpt_dir
,
map_location
=
'cpu'
)
msg
=
model_without_ddp
.
load_state_dict
(
checkpoint
[
'model'
],
strict
=
False
)
print
(
msg
)
return
model
def
run_one_image
(
img
,
tgt
,
size
,
model
,
out_path
,
device
):
x
=
torch
.
tensor
(
img
)
x
=
x
.
unsqueeze
(
dim
=
0
)
x
=
torch
.
einsum
(
'nhwc->nchw'
,
x
)
tgt
=
torch
.
tensor
(
tgt
)
tgt
=
tgt
.
unsqueeze
(
dim
=
0
)
tgt
=
torch
.
einsum
(
'nhwc->nchw'
,
tgt
)
bool_masked_pos
=
torch
.
zeros
(
model
.
module
.
patch_embed
.
num_patches
)
bool_masked_pos
[
model
.
module
.
patch_embed
.
num_patches
//
2
:]
=
1
bool_masked_pos
=
bool_masked_pos
.
unsqueeze
(
dim
=
0
)
valid
=
torch
.
ones_like
(
tgt
)
loss
,
y
,
mask
=
model
(
x
.
float
().
to
(
device
),
tgt
.
float
().
to
(
device
),
bool_masked_pos
.
to
(
device
),
valid
.
float
().
to
(
device
))
y
=
model
.
module
.
unpatchify
(
y
)
y
=
torch
.
einsum
(
'nchw->nhwc'
,
y
).
detach
().
cpu
()
output
=
y
[
0
,
y
.
shape
[
1
]
//
2
:,
:,
:]
output
=
torch
.
clip
((
output
*
imagenet_std
+
imagenet_mean
)
*
255
,
0
,
255
)
output
=
F
.
interpolate
(
output
[
None
,
...].
permute
(
0
,
3
,
1
,
2
),
size
=
[
size
[
1
],
size
[
0
]],
mode
=
'nearest'
).
permute
(
0
,
2
,
3
,
1
)[
0
]
output
=
output
.
int
()
output
=
Image
.
fromarray
(
output
.
numpy
().
astype
(
np
.
uint8
))
output
.
save
(
out_path
)
if
__name__
==
'__main__'
:
dataset_dir
=
"datasets/"
args
=
get_args_parser
()
args
=
ddp_utils
.
init_distributed_mode
(
args
)
device
=
torch
.
device
(
"cuda"
)
ckpt_path
=
args
.
ckpt_path
model
=
args
.
model
prompt
=
args
.
prompt
input_size
=
args
.
input_size
path_splits
=
ckpt_path
.
split
(
'/'
)
ckpt_dir
,
ckpt_file
=
path_splits
[
-
2
],
path_splits
[
-
1
]
dst_dir
=
os
.
path
.
join
(
'models_inference'
,
ckpt_dir
.
split
(
'/'
)[
-
1
],
"coco_pose_inference_{}_{}"
.
format
(
ckpt_path
,
os
.
path
.
basename
(
prompt
).
split
(
"."
)[
0
]))
if
args
.
flip_test
:
dst_dir
=
dst_dir
+
"_flip"
if
ddp_utils
.
get_rank
()
==
0
:
if
not
os
.
path
.
exists
(
dst_dir
):
os
.
makedirs
(
dst_dir
)
print
(
"output_dir: {}"
.
format
(
dst_dir
))
model_painter
=
prepare_model
(
ckpt_path
,
model
,
args
)
print
(
'Model loaded.'
)
img_src_dir
=
dataset_dir
+
"coco_pose/data_pair/test_256x192"
if
args
.
flip_test
:
img_src_dir
+=
"_flip"
dataset_val
=
DatasetTest
(
img_src_dir
,
input_size
,
ext_list
=
(
'*.png'
,))
sampler_val
=
DistributedSampler
(
dataset_val
,
shuffle
=
False
)
data_loader_val
=
DataLoader
(
dataset_val
,
batch_size
=
1
,
sampler
=
sampler_val
,
drop_last
=
False
,
collate_fn
=
ddp_utils
.
collate_fn
,
num_workers
=
2
)
img2_path
=
dataset_dir
+
"coco_pose/data_pair/train_256x192_aug0/{}_image.png"
.
format
(
prompt
)
tgt2_path
=
dataset_dir
+
"coco_pose/data_pair/train_256x192_aug0/{}_label.png"
.
format
(
prompt
)
# load the shared prompt image pair
img2
=
Image
.
open
(
img2_path
).
convert
(
"RGB"
)
img2
=
img2
.
resize
((
input_size
,
input_size
))
img2
=
np
.
array
(
img2
)
/
255.
tgt2
=
Image
.
open
(
tgt2_path
)
tgt2
=
tgt2
.
resize
((
input_size
,
input_size
))
tgt2
=
np
.
array
(
tgt2
)
/
255.
model_painter
.
eval
()
for
data
in
tqdm
.
tqdm
(
data_loader_val
):
""" Load an image """
assert
len
(
data
)
==
1
img
,
img_path
,
size
=
data
[
0
]
img_name
=
os
.
path
.
basename
(
img_path
)
out_path
=
os
.
path
.
join
(
dst_dir
,
img_name
.
replace
(
'.jpg'
,
'.png'
))
img
=
np
.
concatenate
((
img2
,
img
),
axis
=
0
)
assert
img
.
shape
==
(
input_size
*
2
,
input_size
,
3
)
# normalize by ImageNet mean and std
img
=
img
-
imagenet_mean
img
=
img
/
imagenet_std
tgt
=
tgt2
# tgt is not available
tgt
=
np
.
concatenate
((
tgt2
,
tgt
),
axis
=
0
)
assert
tgt
.
shape
==
(
input_size
*
2
,
input_size
,
3
)
# normalize by ImageNet mean and std
tgt
=
tgt
-
imagenet_mean
tgt
=
tgt
/
imagenet_std
# make random mask reproducible (comment out to make it change)
torch
.
manual_seed
(
2
)
run_one_image
(
img
,
tgt
,
size
,
model_painter
,
out_path
,
device
)
eval/mmpose_custom/tools/dist_test.sh
0 → 100644
View file @
106580f9
#!/usr/bin/env bash
# Copyright (c) OpenMMLab. All rights reserved.
CONFIG
=
$1
CHECKPOINT
=
$2
GPUS
=
$3
NNODES
=
${
NNODES
:-
1
}
NODE_RANK
=
${
NODE_RANK
:-
0
}
PORT
=
${
PORT
:-
29500
}
MASTER_ADDR
=
${
MASTER_ADDR
:-
"127.0.0.1"
}
PYTHONPATH
=
"
$(
dirname
$0
)
/.."
:
$PYTHONPATH
\
python
-m
torch.distributed.launch
\
--nnodes
=
$NNODES
\
--node_rank
=
$NODE_RANK
\
--master_addr
=
$MASTER_ADDR
\
--nproc_per_node
=
$GPUS
\
--master_port
=
$PORT
\
$(
dirname
"
$0
"
)
/test.py
\
$CONFIG
\
$CHECKPOINT
\
--launcher
pytorch
\
${
@
:4
}
eval/mmpose_custom/tools/dist_train.sh
0 → 100644
View file @
106580f9
#!/usr/bin/env bash
# Copyright (c) OpenMMLab. All rights reserved.
CONFIG
=
$1
GPUS
=
$2
NNODES
=
${
NNODES
:-
1
}
NODE_RANK
=
${
NODE_RANK
:-
0
}
PORT
=
${
PORT
:-
29500
}
MASTER_ADDR
=
${
MASTER_ADDR
:-
"127.0.0.1"
}
PYTHONPATH
=
"
$(
dirname
$0
)
/.."
:
$PYTHONPATH
\
python
-m
torch.distributed.launch
\
--nnodes
=
$NNODES
\
--node_rank
=
$NODE_RANK
\
--master_addr
=
$MASTER_ADDR
\
--nproc_per_node
=
$GPUS
\
--master_port
=
$PORT
\
$(
dirname
"
$0
"
)
/train.py
\
$CONFIG
\
--launcher
pytorch
${
@
:3
}
eval/mmpose_custom/tools/test.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
os
import
os.path
as
osp
import
sys
sys
.
path
.
insert
(
0
,
"./"
)
import
tqdm
import
warnings
import
mmcv
import
torch
from
mmcv
import
Config
,
DictAction
from
mmcv.cnn
import
fuse_conv_bn
from
mmcv.parallel
import
MMDataParallel
,
MMDistributedDataParallel
from
mmcv.runner
import
get_dist_info
,
init_dist
,
load_checkpoint
from
mmpose.apis
import
multi_gpu_test
from
apis.test
import
single_gpu_test
from
mmpose.datasets
import
build_dataloader
,
build_dataset
from
mmpose.models
import
build_posenet
from
mmpose.utils
import
setup_multi_processes
try
:
from
mmcv.runner
import
wrap_fp16_model
except
ImportError
:
warnings
.
warn
(
'auto_fp16 from mmpose will be deprecated from v0.15.0'
'Please install mmcv>=1.1.4'
)
from
mmpose.core
import
wrap_fp16_model
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'mmpose test model'
)
parser
.
add_argument
(
'config'
,
help
=
'test config file path'
)
parser
.
add_argument
(
'checkpoint'
,
help
=
'checkpoint file'
)
parser
.
add_argument
(
'--out'
,
help
=
'output result file'
)
parser
.
add_argument
(
'--work-dir'
,
help
=
'the dir to save evaluation results'
)
parser
.
add_argument
(
'--fuse-conv-bn'
,
action
=
'store_true'
,
help
=
'Whether to fuse conv and bn, this will slightly increase'
'the inference speed'
)
parser
.
add_argument
(
'--gpu-id'
,
type
=
int
,
default
=
0
,
help
=
'id of gpu to use '
'(only applicable to non-distributed testing)'
)
parser
.
add_argument
(
'--eval'
,
default
=
None
,
nargs
=
'+'
,
help
=
'evaluation metric, which depends on the dataset,'
' e.g., "mAP" for MSCOCO'
)
parser
.
add_argument
(
'--gpu-collect'
,
action
=
'store_true'
,
help
=
'whether to use gpu to collect results'
)
parser
.
add_argument
(
'--tmpdir'
,
help
=
'tmp dir for writing some results'
)
parser
.
add_argument
(
'--cfg-options'
,
nargs
=
'+'
,
action
=
DictAction
,
default
=
{},
help
=
'override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. For example, '
"'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'"
)
parser
.
add_argument
(
'--launcher'
,
choices
=
[
'none'
,
'pytorch'
,
'slurm'
,
'mpi'
],
default
=
'none'
,
help
=
'job launcher'
)
parser
.
add_argument
(
'--local_rank'
,
type
=
int
,
default
=
0
)
args
=
parser
.
parse_args
()
if
'LOCAL_RANK'
not
in
os
.
environ
:
os
.
environ
[
'LOCAL_RANK'
]
=
str
(
args
.
local_rank
)
return
args
def
merge_configs
(
cfg1
,
cfg2
):
# Merge cfg2 into cfg1
# Overwrite cfg1 if repeated, ignore if value is None.
cfg1
=
{}
if
cfg1
is
None
else
cfg1
.
copy
()
cfg2
=
{}
if
cfg2
is
None
else
cfg2
for
k
,
v
in
cfg2
.
items
():
if
v
:
cfg1
[
k
]
=
v
return
cfg1
def
main
():
args
=
parse_args
()
cfg
=
Config
.
fromfile
(
args
.
config
)
if
args
.
cfg_options
is
not
None
:
cfg
.
merge_from_dict
(
args
.
cfg_options
)
# set multi-process settings
setup_multi_processes
(
cfg
)
# set cudnn_benchmark
if
cfg
.
get
(
'cudnn_benchmark'
,
False
):
torch
.
backends
.
cudnn
.
benchmark
=
True
cfg
.
data
.
test
.
test_mode
=
True
# work_dir is determined in this priority: CLI > segment in file > filename
if
args
.
work_dir
is
not
None
:
# update configs according to CLI args if args.work_dir is not None
cfg
.
work_dir
=
args
.
work_dir
elif
cfg
.
get
(
'work_dir'
,
None
)
is
None
:
# use config filename as default work_dir if cfg.work_dir is None
cfg
.
work_dir
=
osp
.
join
(
'./work_dirs'
,
osp
.
splitext
(
osp
.
basename
(
args
.
config
))[
0
])
mmcv
.
mkdir_or_exist
(
osp
.
abspath
(
cfg
.
work_dir
))
# init distributed env first, since logger depends on the dist info.
if
args
.
launcher
==
'none'
:
distributed
=
False
else
:
distributed
=
True
init_dist
(
args
.
launcher
,
**
cfg
.
dist_params
)
# build the dataloader
dataset
=
build_dataset
(
cfg
.
data
.
test
,
dict
(
test_mode
=
True
))
# step 1: give default values and override (if exist) from cfg.data
loader_cfg
=
{
**
dict
(
seed
=
cfg
.
get
(
'seed'
),
drop_last
=
False
,
dist
=
distributed
),
**
({}
if
torch
.
__version__
!=
'parrots'
else
dict
(
prefetch_num
=
2
,
pin_memory
=
False
,
)),
**
dict
((
k
,
cfg
.
data
[
k
])
for
k
in
[
'seed'
,
'prefetch_num'
,
'pin_memory'
,
'persistent_workers'
,
]
if
k
in
cfg
.
data
)
}
# step2: cfg.data.test_dataloader has higher priority
test_loader_cfg
=
{
**
loader_cfg
,
**
dict
(
shuffle
=
False
,
drop_last
=
False
),
**
dict
(
workers_per_gpu
=
cfg
.
data
.
get
(
'workers_per_gpu'
,
1
)),
**
dict
(
samples_per_gpu
=
cfg
.
data
.
get
(
'samples_per_gpu'
,
1
)),
**
cfg
.
data
.
get
(
'test_dataloader'
,
{})
}
data_loader
=
build_dataloader
(
dataset
,
**
test_loader_cfg
)
load_data_only
=
cfg
.
data
.
get
(
'load_data_only'
,
False
)
if
load_data_only
:
for
_
in
tqdm
.
tqdm
(
data_loader
):
pass
print
(
"dataset enumerated, exit!"
)
sys
.
exit
()
# build the model and load checkpoint
model
=
build_posenet
(
cfg
.
model
)
fp16_cfg
=
cfg
.
get
(
'fp16'
,
None
)
if
fp16_cfg
is
not
None
:
wrap_fp16_model
(
model
)
# load_checkpoint(model, args.checkpoint, map_location='cpu')
if
args
.
fuse_conv_bn
:
model
=
fuse_conv_bn
(
model
)
pseudo_test
=
cfg
.
data
.
get
(
'pseudo_test'
,
False
)
assert
pseudo_test
# only support single gpu test
model
=
MMDataParallel
(
model
,
device_ids
=
[
args
.
gpu_id
])
outputs
=
single_gpu_test
(
model
,
data_loader
,
pseudo_test
=
True
)
rank
,
_
=
get_dist_info
()
eval_config
=
cfg
.
get
(
'evaluation'
,
{})
eval_config
=
merge_configs
(
eval_config
,
dict
(
metric
=
args
.
eval
))
if
rank
==
0
:
if
args
.
out
:
print
(
f
'
\n
writing results to
{
args
.
out
}
'
)
mmcv
.
dump
(
outputs
,
args
.
out
)
results
=
dataset
.
evaluate
(
outputs
,
cfg
.
work_dir
,
**
eval_config
)
for
k
,
v
in
sorted
(
results
.
items
()):
print
(
f
'
{
k
}
:
{
v
}
'
)
if
__name__
==
'__main__'
:
main
()
eval/mmpose_custom/tools/train.py
0 → 100644
View file @
106580f9
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
copy
import
os
import
os.path
as
osp
import
time
import
warnings
import
mmcv
import
torch
import
torch.distributed
as
dist
from
mmcv
import
Config
,
DictAction
from
mmcv.runner
import
get_dist_info
,
init_dist
,
set_random_seed
from
mmcv.utils
import
get_git_hash
from
mmpose
import
__version__
from
mmpose.apis
import
init_random_seed
from
apis.train
import
train_model
from
mmpose.datasets
import
build_dataset
from
mmpose.models
import
build_posenet
from
mmpose.utils
import
collect_env
,
get_root_logger
,
setup_multi_processes
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Train a pose model'
)
parser
.
add_argument
(
'config'
,
help
=
'train config file path'
)
parser
.
add_argument
(
'--work-dir'
,
help
=
'the dir to save logs and models'
)
parser
.
add_argument
(
'--resume-from'
,
help
=
'the checkpoint file to resume from'
)
parser
.
add_argument
(
'--no-validate'
,
action
=
'store_true'
,
help
=
'whether not to evaluate the checkpoint during training'
)
group_gpus
=
parser
.
add_mutually_exclusive_group
()
group_gpus
.
add_argument
(
'--gpus'
,
type
=
int
,
help
=
'(Deprecated, please use --gpu-id) number of gpus to use '
'(only applicable to non-distributed training)'
)
group_gpus
.
add_argument
(
'--gpu-ids'
,
type
=
int
,
nargs
=
'+'
,
help
=
'(Deprecated, please use --gpu-id) ids of gpus to use '
'(only applicable to non-distributed training)'
)
group_gpus
.
add_argument
(
'--gpu-id'
,
type
=
int
,
default
=
0
,
help
=
'id of gpu to use '
'(only applicable to non-distributed training)'
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
None
,
help
=
'random seed'
)
parser
.
add_argument
(
'--diff_seed'
,
action
=
'store_true'
,
help
=
'Whether or not set different seeds for different ranks'
)
parser
.
add_argument
(
'--deterministic'
,
action
=
'store_true'
,
help
=
'whether to set deterministic options for CUDNN backend.'
)
parser
.
add_argument
(
'--cfg-options'
,
nargs
=
'+'
,
action
=
DictAction
,
default
=
{},
help
=
'override some settings in the used config, the key-value pair '
'in xxx=yyy format will be merged into config file. For example, '
"'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'"
)
parser
.
add_argument
(
'--launcher'
,
choices
=
[
'none'
,
'pytorch'
,
'slurm'
,
'mpi'
],
default
=
'none'
,
help
=
'job launcher'
)
parser
.
add_argument
(
'--local_rank'
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
'--autoscale-lr'
,
action
=
'store_true'
,
help
=
'automatically scale lr with the number of gpus'
)
args
=
parser
.
parse_args
()
if
'LOCAL_RANK'
not
in
os
.
environ
:
os
.
environ
[
'LOCAL_RANK'
]
=
str
(
args
.
local_rank
)
return
args
def
main
():
args
=
parse_args
()
cfg
=
Config
.
fromfile
(
args
.
config
)
if
args
.
cfg_options
is
not
None
:
cfg
.
merge_from_dict
(
args
.
cfg_options
)
# set multi-process settings
setup_multi_processes
(
cfg
)
# set cudnn_benchmark
if
cfg
.
get
(
'cudnn_benchmark'
,
False
):
torch
.
backends
.
cudnn
.
benchmark
=
True
# work_dir is determined in this priority: CLI > segment in file > filename
if
args
.
work_dir
is
not
None
:
# update configs according to CLI args if args.work_dir is not None
cfg
.
work_dir
=
args
.
work_dir
elif
cfg
.
get
(
'work_dir'
,
None
)
is
None
:
# use config filename as default work_dir if cfg.work_dir is None
cfg
.
work_dir
=
osp
.
join
(
'./work_dirs'
,
osp
.
splitext
(
osp
.
basename
(
args
.
config
))[
0
])
if
args
.
resume_from
is
not
None
:
cfg
.
resume_from
=
args
.
resume_from
if
args
.
gpus
is
not
None
:
cfg
.
gpu_ids
=
range
(
1
)
warnings
.
warn
(
'`--gpus` is deprecated because we only support '
'single GPU mode in non-distributed training. '
'Use `gpus=1` now.'
)
if
args
.
gpu_ids
is
not
None
:
cfg
.
gpu_ids
=
args
.
gpu_ids
[
0
:
1
]
warnings
.
warn
(
'`--gpu-ids` is deprecated, please use `--gpu-id`. '
'Because we only support single GPU mode in '
'non-distributed training. Use the first GPU '
'in `gpu_ids` now.'
)
if
args
.
gpus
is
None
and
args
.
gpu_ids
is
None
:
cfg
.
gpu_ids
=
[
args
.
gpu_id
]
if
args
.
autoscale_lr
:
# apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
cfg
.
optimizer
[
'lr'
]
=
cfg
.
optimizer
[
'lr'
]
*
len
(
cfg
.
gpu_ids
)
/
8
# init distributed env first, since logger depends on the dist info.
if
args
.
launcher
==
'none'
:
distributed
=
False
if
len
(
cfg
.
gpu_ids
)
>
1
:
warnings
.
warn
(
f
'We treat
{
cfg
.
gpu_ids
}
as gpu-ids, and reset to '
f
'
{
cfg
.
gpu_ids
[
0
:
1
]
}
as gpu-ids to avoid potential error in '
'non-distribute training time.'
)
cfg
.
gpu_ids
=
cfg
.
gpu_ids
[
0
:
1
]
else
:
distributed
=
True
init_dist
(
args
.
launcher
,
**
cfg
.
dist_params
)
# re-set gpu_ids with distributed training mode
_
,
world_size
=
get_dist_info
()
cfg
.
gpu_ids
=
range
(
world_size
)
# create work_dir
mmcv
.
mkdir_or_exist
(
osp
.
abspath
(
cfg
.
work_dir
))
# init the logger before other steps
timestamp
=
time
.
strftime
(
'%Y%m%d_%H%M%S'
,
time
.
localtime
())
log_file
=
osp
.
join
(
cfg
.
work_dir
,
f
'
{
timestamp
}
.log'
)
logger
=
get_root_logger
(
log_file
=
log_file
,
log_level
=
cfg
.
log_level
)
# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta
=
dict
()
# log env info
env_info_dict
=
collect_env
()
env_info
=
'
\n
'
.
join
([(
f
'
{
k
}
:
{
v
}
'
)
for
k
,
v
in
env_info_dict
.
items
()])
dash_line
=
'-'
*
60
+
'
\n
'
logger
.
info
(
'Environment info:
\n
'
+
dash_line
+
env_info
+
'
\n
'
+
dash_line
)
meta
[
'env_info'
]
=
env_info
# log some basic info
logger
.
info
(
f
'Distributed training:
{
distributed
}
'
)
logger
.
info
(
f
'Config:
\n
{
cfg
.
pretty_text
}
'
)
# set random seeds
seed
=
init_random_seed
(
args
.
seed
)
seed
=
seed
+
dist
.
get_rank
()
if
args
.
diff_seed
else
seed
logger
.
info
(
f
'Set random seed to
{
seed
}
, '
f
'deterministic:
{
args
.
deterministic
}
'
)
set_random_seed
(
seed
,
deterministic
=
args
.
deterministic
)
cfg
.
seed
=
seed
meta
[
'seed'
]
=
seed
# model = build_posenet(cfg.model)
model
=
None
datasets
=
[
build_dataset
(
cfg
.
data
.
train
)]
if
len
(
cfg
.
workflow
)
==
2
:
val_dataset
=
copy
.
deepcopy
(
cfg
.
data
.
val
)
val_dataset
.
pipeline
=
cfg
.
data
.
train
.
pipeline
datasets
.
append
(
build_dataset
(
val_dataset
))
if
cfg
.
checkpoint_config
is
not
None
:
# save mmpose version, config file content
# checkpoints as meta data
cfg
.
checkpoint_config
.
meta
=
dict
(
mmpose_version
=
__version__
+
get_git_hash
(
digits
=
7
),
config
=
cfg
.
pretty_text
,
)
train_model
(
model
,
datasets
,
cfg
,
distributed
=
distributed
,
validate
=
(
not
args
.
no_validate
),
timestamp
=
timestamp
,
meta
=
meta
)
if
__name__
==
'__main__'
:
main
()
eval/nyuv2_depth/eval.sh
0 → 100644
View file @
106580f9
# !/bin/bash
set
-x
JOB_NAME
=
"painter_vit_large"
CKPT_FILE
=
"painter_vit_large.pth"
PROMPT
=
"study_room_0005b/rgb_00094"
MODEL
=
"painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1"
CKPT_PATH
=
"models/
${
JOB_NAME
}
/
${
CKPT_FILE
}
"
DST_DIR
=
"models_inference/
${
JOB_NAME
}
/nyuv2_depth_inference_
${
CKPT_FILE
}
_
${
PROMPT
}
"
# inference
python
eval
/nyuv2_depth/painter_inference_depth.py
\
--ckpt_path
${
CKPT_PATH
}
--model
${
MODEL
}
--prompt
${
PROMPT
}
python
eval
/nyuv2_depth/eval_with_pngs.py
\
--pred_path
${
DST_DIR
}
\
--gt_path
datasets/nyu_depth_v2/official_splits/test/
\
--dataset
nyu
--min_depth_eval
1e-3
--max_depth_eval
10
--eigen_crop
eval/nyuv2_depth/eval_with_pngs.py
0 → 100644
View file @
106580f9
# Copyright (C) 2019 Jin Han Lee
#
# This file is a part of BTS.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
from
__future__
import
absolute_import
,
division
,
print_function
import
os
import
argparse
import
fnmatch
import
cv2
import
numpy
as
np
os
.
environ
[
'TF_CPP_MIN_LOG_LEVEL'
]
=
'1'
def
convert_arg_line_to_args
(
arg_line
):
for
arg
in
arg_line
.
split
():
if
not
arg
.
strip
():
continue
yield
arg
parser
=
argparse
.
ArgumentParser
(
description
=
'BTS TensorFlow implementation.'
,
fromfile_prefix_chars
=
'@'
)
parser
.
convert_arg_line_to_args
=
convert_arg_line_to_args
parser
.
add_argument
(
'--pred_path'
,
type
=
str
,
help
=
'path to the prediction results in png'
,
required
=
True
)
parser
.
add_argument
(
'--gt_path'
,
type
=
str
,
help
=
'root path to the groundtruth data'
,
required
=
False
)
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
help
=
'dataset to test on, nyu or kitti'
,
default
=
'nyu'
)
parser
.
add_argument
(
'--eigen_crop'
,
help
=
'if set, crops according to Eigen NIPS14'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--garg_crop'
,
help
=
'if set, crops according to Garg ECCV16'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--min_depth_eval'
,
type
=
float
,
help
=
'minimum depth for evaluation'
,
default
=
1e-3
)
parser
.
add_argument
(
'--max_depth_eval'
,
type
=
float
,
help
=
'maximum depth for evaluation'
,
default
=
80
)
parser
.
add_argument
(
'--do_kb_crop'
,
help
=
'if set, crop input images as kitti benchmark images'
,
action
=
'store_true'
)
args
=
parser
.
parse_args
()
def
compute_errors
(
gt
,
pred
):
thresh
=
np
.
maximum
((
gt
/
pred
),
(
pred
/
gt
))
d1
=
(
thresh
<
1.25
).
mean
()
d2
=
(
thresh
<
1.25
**
2
).
mean
()
d3
=
(
thresh
<
1.25
**
3
).
mean
()
rmse
=
(
gt
-
pred
)
**
2
rmse
=
np
.
sqrt
(
rmse
.
mean
())
rmse_log
=
(
np
.
log
(
gt
)
-
np
.
log
(
pred
))
**
2
rmse_log
=
np
.
sqrt
(
rmse_log
.
mean
())
abs_rel
=
np
.
mean
(
np
.
abs
(
gt
-
pred
)
/
gt
)
sq_rel
=
np
.
mean
(((
gt
-
pred
)
**
2
)
/
gt
)
err
=
np
.
log
(
pred
)
-
np
.
log
(
gt
)
silog
=
np
.
sqrt
(
np
.
mean
(
err
**
2
)
-
np
.
mean
(
err
)
**
2
)
*
100
err
=
np
.
abs
(
np
.
log10
(
pred
)
-
np
.
log10
(
gt
))
log10
=
np
.
mean
(
err
)
return
silog
,
log10
,
abs_rel
,
sq_rel
,
rmse
,
rmse_log
,
d1
,
d2
,
d3
def
test
():
global
gt_depths
,
missing_ids
,
pred_filenames
gt_depths
=
[]
missing_ids
=
set
()
pred_filenames
=
[]
for
root
,
dirnames
,
filenames
in
os
.
walk
(
args
.
pred_path
):
for
pred_filename
in
fnmatch
.
filter
(
filenames
,
'*.png'
):
if
'cmap'
in
pred_filename
or
'gt'
in
pred_filename
:
continue
dirname
=
root
.
replace
(
args
.
pred_path
,
''
)
pred_filenames
.
append
(
os
.
path
.
join
(
dirname
,
pred_filename
))
num_test_samples
=
len
(
pred_filenames
)
pred_depths
=
[]
for
i
in
range
(
num_test_samples
):
pred_depth_path
=
os
.
path
.
join
(
args
.
pred_path
,
pred_filenames
[
i
])
pred_depth
=
cv2
.
imread
(
pred_depth_path
,
-
1
)
if
pred_depth
is
None
:
print
(
'Missing: %s '
%
pred_depth_path
)
missing_ids
.
add
(
i
)
continue
if
args
.
dataset
==
'nyu'
:
pred_depth
=
pred_depth
.
astype
(
np
.
float32
)
/
1000.0
else
:
pred_depth
=
pred_depth
.
astype
(
np
.
float32
)
/
256.0
pred_depths
.
append
(
pred_depth
)
print
(
'Raw png files reading done'
)
print
(
'Evaluating {} files'
.
format
(
len
(
pred_depths
)))
if
args
.
dataset
==
'kitti'
:
for
t_id
in
range
(
num_test_samples
):
file_dir
=
pred_filenames
[
t_id
].
split
(
'.'
)[
0
]
filename
=
file_dir
.
split
(
'_'
)[
-
1
]
directory
=
file_dir
.
replace
(
'_'
+
filename
,
''
)
gt_depth_path
=
os
.
path
.
join
(
args
.
gt_path
,
directory
,
'proj_depth/groundtruth/image_02'
,
filename
+
'.png'
)
depth
=
cv2
.
imread
(
gt_depth_path
,
-
1
)
if
depth
is
None
:
print
(
'Missing: %s '
%
gt_depth_path
)
missing_ids
.
add
(
t_id
)
continue
depth
=
depth
.
astype
(
np
.
float32
)
/
256.0
gt_depths
.
append
(
depth
)
elif
args
.
dataset
==
'nyu'
:
for
t_id
in
range
(
num_test_samples
):
file_dir
=
pred_filenames
[
t_id
].
split
(
'.'
)[
0
]
filename
=
file_dir
.
split
(
'_'
)[
-
1
]
directory
=
file_dir
.
replace
(
'_rgb_'
+
file_dir
.
split
(
'_'
)[
-
1
],
''
)
gt_depth_path
=
os
.
path
.
join
(
args
.
gt_path
,
directory
,
'sync_depth_'
+
filename
+
'.png'
)
depth
=
cv2
.
imread
(
gt_depth_path
,
-
1
)
if
depth
is
None
:
print
(
'Missing: %s '
%
gt_depth_path
)
missing_ids
.
add
(
t_id
)
continue
depth
=
depth
.
astype
(
np
.
float32
)
/
1000.0
gt_depths
.
append
(
depth
)
print
(
'GT files reading done'
)
print
(
'{} GT files missing'
.
format
(
len
(
missing_ids
)))
print
(
'Computing errors'
)
eval
(
pred_depths
)
print
(
'Done.'
)
def
eval
(
pred_depths
):
num_samples
=
len
(
pred_depths
)
pred_depths_valid
=
[]
i
=
0
for
t_id
in
range
(
num_samples
):
if
t_id
in
missing_ids
:
continue
pred_depths_valid
.
append
(
pred_depths
[
t_id
])
num_samples
=
num_samples
-
len
(
missing_ids
)
silog
=
np
.
zeros
(
num_samples
,
np
.
float32
)
log10
=
np
.
zeros
(
num_samples
,
np
.
float32
)
rms
=
np
.
zeros
(
num_samples
,
np
.
float32
)
log_rms
=
np
.
zeros
(
num_samples
,
np
.
float32
)
abs_rel
=
np
.
zeros
(
num_samples
,
np
.
float32
)
sq_rel
=
np
.
zeros
(
num_samples
,
np
.
float32
)
d1
=
np
.
zeros
(
num_samples
,
np
.
float32
)
d2
=
np
.
zeros
(
num_samples
,
np
.
float32
)
d3
=
np
.
zeros
(
num_samples
,
np
.
float32
)
for
i
in
range
(
num_samples
):
gt_depth
=
gt_depths
[
i
]
pred_depth
=
pred_depths_valid
[
i
]
pred_depth
[
pred_depth
<
args
.
min_depth_eval
]
=
args
.
min_depth_eval
pred_depth
[
pred_depth
>
args
.
max_depth_eval
]
=
args
.
max_depth_eval
pred_depth
[
np
.
isinf
(
pred_depth
)]
=
args
.
max_depth_eval
gt_depth
[
np
.
isinf
(
gt_depth
)]
=
0
gt_depth
[
np
.
isnan
(
gt_depth
)]
=
0
valid_mask
=
np
.
logical_and
(
gt_depth
>
args
.
min_depth_eval
,
gt_depth
<
args
.
max_depth_eval
)
if
args
.
do_kb_crop
:
height
,
width
=
gt_depth
.
shape
top_margin
=
int
(
height
-
352
)
left_margin
=
int
((
width
-
1216
)
/
2
)
pred_depth_uncropped
=
np
.
zeros
((
height
,
width
),
dtype
=
np
.
float32
)
pred_depth_uncropped
[
top_margin
:
top_margin
+
352
,
left_margin
:
left_margin
+
1216
]
=
pred_depth
pred_depth
=
pred_depth_uncropped
if
args
.
garg_crop
or
args
.
eigen_crop
:
gt_height
,
gt_width
=
gt_depth
.
shape
eval_mask
=
np
.
zeros
(
valid_mask
.
shape
)
if
args
.
garg_crop
:
eval_mask
[
int
(
0.40810811
*
gt_height
):
int
(
0.99189189
*
gt_height
),
int
(
0.03594771
*
gt_width
):
int
(
0.96405229
*
gt_width
)]
=
1
elif
args
.
eigen_crop
:
if
args
.
dataset
==
'kitti'
:
eval_mask
[
int
(
0.3324324
*
gt_height
):
int
(
0.91351351
*
gt_height
),
int
(
0.0359477
*
gt_width
):
int
(
0.96405229
*
gt_width
)]
=
1
else
:
eval_mask
[
45
:
471
,
41
:
601
]
=
1
valid_mask
=
np
.
logical_and
(
valid_mask
,
eval_mask
)
silog
[
i
],
log10
[
i
],
abs_rel
[
i
],
sq_rel
[
i
],
rms
[
i
],
log_rms
[
i
],
d1
[
i
],
d2
[
i
],
d3
[
i
]
=
compute_errors
(
gt_depth
[
valid_mask
],
pred_depth
[
valid_mask
])
print
(
"{:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}, {:>7}"
.
format
(
'd1'
,
'd2'
,
'd3'
,
'AbsRel'
,
'SqRel'
,
'RMSE'
,
'RMSElog'
,
'SILog'
,
'log10'
))
print
(
"{:7.3f}, {:7.3f}, {:7.3f}, {:7.3f}, {:7.3f}, {:7.3f}, {:7.3f}, {:7.3f}, {:7.3f}"
.
format
(
d1
.
mean
(),
d2
.
mean
(),
d3
.
mean
(),
abs_rel
.
mean
(),
sq_rel
.
mean
(),
rms
.
mean
(),
log_rms
.
mean
(),
silog
.
mean
(),
log10
.
mean
()))
return
silog
,
log10
,
abs_rel
,
sq_rel
,
rms
,
log_rms
,
d1
,
d2
,
d3
def
main
():
test
()
if
__name__
==
'__main__'
:
main
()
eval/nyuv2_depth/painter_inference_depth.py
0 → 100644
View file @
106580f9
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import
sys
import
os
import
argparse
import
torch
import
torch.nn.functional
as
F
import
numpy
as
np
import
glob
import
tqdm
import
matplotlib.pyplot
as
plt
from
PIL
import
Image
sys
.
path
.
append
(
'.'
)
import
models_painter
imagenet_mean
=
np
.
array
([
0.485
,
0.456
,
0.406
])
imagenet_std
=
np
.
array
([
0.229
,
0.224
,
0.225
])
def
show_image
(
image
,
title
=
''
):
# image is [H, W, 3]
assert
image
.
shape
[
2
]
==
3
plt
.
imshow
(
torch
.
clip
((
image
*
imagenet_std
+
imagenet_mean
)
*
255
,
0
,
255
).
int
())
plt
.
title
(
title
,
fontsize
=
16
)
plt
.
axis
(
'off'
)
return
def
prepare_model
(
chkpt_dir
,
arch
=
'painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1'
):
# build model
model
=
getattr
(
models_painter
,
arch
)()
# load model
checkpoint
=
torch
.
load
(
chkpt_dir
,
map_location
=
'cuda:0'
)
msg
=
model
.
load_state_dict
(
checkpoint
[
'model'
],
strict
=
False
)
print
(
msg
)
model
.
eval
()
return
model
def
run_one_image
(
img
,
tgt
,
size
,
model
,
out_path
,
device
):
x
=
torch
.
tensor
(
img
)
x
=
x
.
unsqueeze
(
dim
=
0
)
x
=
torch
.
einsum
(
'nhwc->nchw'
,
x
)
tgt
=
torch
.
tensor
(
tgt
)
tgt
=
tgt
.
unsqueeze
(
dim
=
0
)
tgt
=
torch
.
einsum
(
'nhwc->nchw'
,
tgt
)
bool_masked_pos
=
torch
.
zeros
(
model
.
patch_embed
.
num_patches
)
bool_masked_pos
[
model
.
patch_embed
.
num_patches
//
2
:]
=
1
bool_masked_pos
=
bool_masked_pos
.
unsqueeze
(
dim
=
0
)
valid
=
torch
.
ones_like
(
tgt
)
loss
,
y
,
mask
=
model
(
x
.
float
().
to
(
device
),
tgt
.
float
().
to
(
device
),
bool_masked_pos
.
to
(
device
),
valid
.
float
().
to
(
device
))
y
=
model
.
unpatchify
(
y
)
y
=
torch
.
einsum
(
'nchw->nhwc'
,
y
).
detach
().
cpu
()
output
=
y
[
0
,
y
.
shape
[
1
]
//
2
:,
:,
:]
output
=
torch
.
clip
((
output
*
imagenet_std
+
imagenet_mean
)
*
10000
,
0
,
10000
)
output
=
F
.
interpolate
(
output
[
None
,
...].
permute
(
0
,
3
,
1
,
2
),
size
=
[
size
[
1
],
size
[
0
]],
mode
=
'bilinear'
).
permute
(
0
,
2
,
3
,
1
)[
0
]
output
=
output
.
mean
(
-
1
).
int
()
output
=
Image
.
fromarray
(
output
.
numpy
())
output
.
save
(
out_path
)
def
get_args_parser
():
parser
=
argparse
.
ArgumentParser
(
'NYU Depth V2'
,
add_help
=
False
)
parser
.
add_argument
(
'--ckpt_path'
,
type
=
str
,
help
=
'path to ckpt'
,
default
=
''
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
help
=
'dir to ckpt'
,
default
=
'painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1'
)
parser
.
add_argument
(
'--prompt'
,
type
=
str
,
help
=
'prompt image in train set'
,
default
=
'study_room_0005b/rgb_00094'
)
parser
.
add_argument
(
'--input_size'
,
type
=
int
,
default
=
448
)
return
parser
.
parse_args
()
if
__name__
==
'__main__'
:
args
=
get_args_parser
()
ckpt_path
=
args
.
ckpt_path
path_splits
=
ckpt_path
.
split
(
'/'
)
ckpt_dir
,
ckpt_file
=
path_splits
[
-
2
],
path_splits
[
-
1
]
model_painter
=
prepare_model
(
ckpt_path
,
args
.
model
)
print
(
'Model loaded.'
)
device
=
torch
.
device
(
"cuda"
)
model_painter
.
to
(
device
)
dst_dir
=
os
.
path
.
join
(
'models_inference'
,
ckpt_dir
,
"nyuv2_depth_inference_{}_{}/"
.
format
(
ckpt_file
,
args
.
prompt
))
print
(
dst_dir
)
if
not
os
.
path
.
exists
(
dst_dir
):
os
.
makedirs
(
dst_dir
)
img_src_dir
=
"datasets/nyu_depth_v2/official_splits/test/"
img_path_list
=
glob
.
glob
(
img_src_dir
+
"/*/rgb*g"
)
img2_path
=
"datasets/nyu_depth_v2/sync/{}.jpg"
.
format
(
args
.
prompt
)
tgt_path
=
"datasets/nyu_depth_v2/sync/{}.png"
.
format
(
args
.
prompt
.
replace
(
'rgb'
,
'sync_depth'
))
tgt2_path
=
tgt_path
res
,
hres
=
args
.
input_size
,
args
.
input_size
for
img_path
in
tqdm
.
tqdm
(
img_path_list
):
room_name
=
img_path
.
split
(
"/"
)[
-
2
]
img_name
=
img_path
.
split
(
"/"
)[
-
1
].
split
(
"."
)[
0
]
out_path
=
dst_dir
+
"/"
+
room_name
+
"_"
+
img_name
+
".png"
img
=
Image
.
open
(
img_path
).
convert
(
"RGB"
)
size
=
img
.
size
img
=
img
.
resize
((
res
,
hres
))
img
=
np
.
array
(
img
)
/
255.
img2
=
Image
.
open
(
img2_path
).
convert
(
"RGB"
)
img2
=
img2
.
resize
((
res
,
hres
))
img2
=
np
.
array
(
img2
)
/
255.
img
=
np
.
concatenate
((
img2
,
img
),
axis
=
0
)
assert
img
.
shape
==
(
2
*
res
,
res
,
3
)
# normalize by ImageNet mean and std
img
=
img
-
imagenet_mean
img
=
img
/
imagenet_std
tgt
=
Image
.
open
(
tgt_path
)
tgt
=
np
.
array
(
tgt
)
/
10000.
tgt
=
tgt
*
255
tgt
=
Image
.
fromarray
(
tgt
).
convert
(
"RGB"
)
tgt
=
tgt
.
resize
((
res
,
hres
))
tgt
=
np
.
array
(
tgt
)
/
255.
tgt2
=
Image
.
open
(
tgt2_path
)
tgt2
=
np
.
array
(
tgt2
)
/
10000.
tgt2
=
tgt2
*
255
tgt2
=
Image
.
fromarray
(
tgt2
).
convert
(
"RGB"
)
tgt2
=
tgt2
.
resize
((
res
,
hres
))
tgt2
=
np
.
array
(
tgt2
)
/
255.
tgt
=
np
.
concatenate
((
tgt2
,
tgt
),
axis
=
0
)
assert
tgt
.
shape
==
(
2
*
res
,
res
,
3
)
# normalize by ImageNet mean and std
tgt
=
tgt
-
imagenet_mean
tgt
=
tgt
/
imagenet_std
torch
.
manual_seed
(
2
)
run_one_image
(
img
,
tgt
,
size
,
model_painter
,
out_path
,
device
)
eval/sidd/eval_sidd.m
0 → 100644
View file @
106580f9
close all;clear all;
denoised = load('/MATLAB Drive/painter/sidd/Idenoised.mat');
gt = load('/MATLAB Drive/painter/sidd/ValidationGtBlocksSrgb.mat');
denoised = denoised.Idenoised;
gt = gt.ValidationGtBlocksSrgb;
gt = im2single(gt);
total_psnr = 0;
total_ssim = 0;
for i = 1:40
for k = 1:32
denoised_patch = squeeze(denoised(i,k,:,:,:));
gt_patch = squeeze(gt(i,k,:,:,:));
ssim_val = ssim(denoised_patch, gt_patch);
psnr_val = psnr(denoised_patch, gt_patch);
total_ssim = total_ssim + ssim_val;
total_psnr = total_psnr + psnr_val;
end
end
qm_psnr = total_psnr / (40*32);
qm_ssim = total_ssim / (40*32);
fprintf('PSNR: %f SSIM: %f\n', qm_psnr, qm_ssim);
\ No newline at end of file
eval/sidd/painter_inference_sidd.py
0 → 100644
View file @
106580f9
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import
sys
import
os
import
warnings
import
requests
import
argparse
import
torch
import
torch.nn.functional
as
F
import
numpy
as
np
import
glob
import
tqdm
import
matplotlib.pyplot
as
plt
import
cv2
from
PIL
import
Image
import
scipy.io
as
sio
sys
.
path
.
append
(
'.'
)
import
models_painter
from
skimage.metrics
import
peak_signal_noise_ratio
as
psnr_loss
from
skimage.metrics
import
structural_similarity
as
ssim_loss
imagenet_mean
=
np
.
array
([
0.485
,
0.456
,
0.406
])
imagenet_std
=
np
.
array
([
0.229
,
0.224
,
0.225
])
def
get_args_parser
():
parser
=
argparse
.
ArgumentParser
(
'SIDD denoising'
,
add_help
=
False
)
parser
.
add_argument
(
'--ckpt_path'
,
type
=
str
,
help
=
'path to ckpt'
,
default
=
''
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
help
=
'dir to ckpt'
,
default
=
'painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1'
)
parser
.
add_argument
(
'--prompt'
,
type
=
str
,
help
=
'prompt image in train set'
,
default
=
'9_9'
)
parser
.
add_argument
(
'--input_size'
,
type
=
int
,
default
=
448
)
parser
.
add_argument
(
'--save'
,
action
=
'store_true'
,
help
=
'save predictions'
,
default
=
False
)
return
parser
.
parse_args
()
def
prepare_model
(
chkpt_dir
,
arch
=
'painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1'
):
# build model
model
=
getattr
(
models_painter
,
arch
)()
# load model
checkpoint
=
torch
.
load
(
chkpt_dir
,
map_location
=
'cuda:0'
)
msg
=
model
.
load_state_dict
(
checkpoint
[
'model'
],
strict
=
False
)
print
(
msg
)
return
model
def
run_one_image
(
img
,
tgt
,
size
,
model
,
out_path
,
device
):
x
=
torch
.
tensor
(
img
)
x
=
x
.
unsqueeze
(
dim
=
0
)
x
=
torch
.
einsum
(
'nhwc->nchw'
,
x
)
tgt
=
torch
.
tensor
(
tgt
)
tgt
=
tgt
.
unsqueeze
(
dim
=
0
)
tgt
=
torch
.
einsum
(
'nhwc->nchw'
,
tgt
)
bool_masked_pos
=
torch
.
zeros
(
model
.
patch_embed
.
num_patches
)
bool_masked_pos
[
model
.
patch_embed
.
num_patches
//
2
:]
=
1
bool_masked_pos
=
bool_masked_pos
.
unsqueeze
(
dim
=
0
)
valid
=
torch
.
ones_like
(
tgt
)
loss
,
y
,
mask
=
model
(
x
.
float
().
to
(
device
),
tgt
.
float
().
to
(
device
),
bool_masked_pos
.
to
(
device
),
valid
.
float
().
to
(
device
))
y
=
model
.
unpatchify
(
y
)
y
=
torch
.
einsum
(
'nchw->nhwc'
,
y
).
detach
().
cpu
()
output
=
y
[
0
,
y
.
shape
[
1
]
//
2
:,
:,
:]
output
=
output
*
imagenet_std
+
imagenet_mean
output
=
F
.
interpolate
(
output
[
None
,
...].
permute
(
0
,
3
,
1
,
2
),
size
=
[
size
[
1
],
size
[
0
]],
mode
=
'bicubic'
).
permute
(
0
,
2
,
3
,
1
)[
0
]
return
output
.
numpy
()
if
__name__
==
'__main__'
:
args
=
get_args_parser
()
ckpt_path
=
args
.
ckpt_path
model
=
args
.
model
prompt
=
args
.
prompt
input_size
=
args
.
input_size
path_splits
=
ckpt_path
.
split
(
'/'
)
ckpt_dir
,
ckpt_file
=
path_splits
[
-
2
],
path_splits
[
-
1
]
dst_dir
=
os
.
path
.
join
(
'models_inference'
,
ckpt_dir
.
split
(
'/'
)[
-
1
],
"sidd_inference_{}_{}"
.
format
(
ckpt_file
,
os
.
path
.
basename
(
prompt
).
split
(
"."
)[
0
]))
if
not
os
.
path
.
exists
(
dst_dir
):
os
.
makedirs
(
dst_dir
)
print
(
"output_dir: {}"
.
format
(
dst_dir
))
model_painter
=
prepare_model
(
ckpt_path
,
model
)
print
(
'Model loaded.'
)
device
=
torch
.
device
(
"cuda"
)
model_painter
.
to
(
device
)
img_src_dir
=
"datasets/denoise/val/"
filepath
=
os
.
path
.
join
(
img_src_dir
,
'ValidationNoisyBlocksSrgb.mat'
)
img
=
sio
.
loadmat
(
filepath
)
Inoisy
=
np
.
float32
(
np
.
array
(
img
[
'ValidationNoisyBlocksSrgb'
]))
# (40, 32, 256, 256, 3)
Inoisy
/=
255.
img2_path
=
"datasets/denoise/train/input/{}.png"
.
format
(
prompt
)
tgt2_path
=
"datasets/denoise/train/groundtruth/{}.png"
.
format
(
prompt
)
# load the shared prompt image pair
img2
=
Image
.
open
(
img2_path
).
convert
(
"RGB"
)
img2
=
img2
.
resize
((
input_size
,
input_size
))
img2
=
np
.
array
(
img2
)
/
255.
tgt2
=
Image
.
open
(
tgt2_path
)
tgt2
=
tgt2
.
resize
((
input_size
,
input_size
))
tgt2
=
np
.
array
(
tgt2
)
/
255.
model_painter
.
eval
()
restored
=
np
.
zeros_like
(
Inoisy
)
for
img_idx
in
tqdm
.
tqdm
(
range
(
40
)):
for
patch_idx
in
range
(
32
):
""" Load an image """
img_org
=
Inoisy
[
img_idx
,
patch_idx
,
:,
:,
:]
img
=
cv2
.
resize
(
img_org
,
(
input_size
,
input_size
))
# img = img_org.resize((input_size, input_size))
img
=
np
.
concatenate
((
img2
,
img
),
axis
=
0
)
assert
img
.
shape
==
(
input_size
*
2
,
input_size
,
3
)
# normalize by ImageNet mean and std
img
=
img
-
imagenet_mean
img
=
img
/
imagenet_std
tgt
=
tgt2
# tgt is not available
tgt
=
np
.
concatenate
((
tgt2
,
tgt
),
axis
=
0
)
assert
tgt
.
shape
==
(
input_size
*
2
,
input_size
,
3
)
# normalize by ImageNet mean and std
tgt
=
tgt
-
imagenet_mean
tgt
=
tgt
/
imagenet_std
# make random mask reproducible (comment out to make it change)
torch
.
manual_seed
(
2
)
output
=
run_one_image
(
img
,
tgt
,
size
=
(
256
,
256
),
model
=
model_painter
,
out_path
=
None
,
device
=
device
)
rgb_restored
=
output
rgb_restored
=
np
.
clip
(
rgb_restored
,
0
,
1
)
restored
[
img_idx
,
patch_idx
,
:,
:,
:]
=
rgb_restored
# optionally save images
if
args
.
save
:
out_path
=
os
.
path
.
join
(
dst_dir
,
'%04d_%02d.png'
%
(
img_idx
+
1
,
patch_idx
+
1
))
output
=
rgb_restored
*
255
output
=
Image
.
fromarray
(
output
.
astype
(
np
.
uint8
))
output
.
save
(
out_path
)
# save denoised data
sio
.
savemat
(
os
.
path
.
join
(
dst_dir
,
'Idenoised.mat'
),
{
"Idenoised"
:
restored
,
})
print
(
os
.
path
.
join
(
dst_dir
,
'Idenoised.mat'
))
main_train.py
0 → 100644
View file @
106580f9
# --------------------------------------------------------
# Images Speak in Images: A Generalist Painter for In-Context Visual Learning (https://arxiv.org/abs/2212.02499)
# Github source: https://github.com/baaivision/Painter
# Copyright (c) 2022 Beijing Academy of Artificial Intelligence (BAAI)
# Licensed under The MIT License [see LICENSE for details]
# By Xinlong Wang, Wen Wang
# Based on MAE, BEiT, detectron2, Mask2Former, bts, mmcv, mmdetetection, mmpose, MIRNet, MPRNet, and Uformer codebases
# --------------------------------------------------------'
import
argparse
import
datetime
import
json
import
numpy
as
np
import
os
import
time
from
pathlib
import
Path
import
torch
import
torch.backends.cudnn
as
cudnn
from
torch.utils.tensorboard
import
SummaryWriter
import
timm
assert
timm
.
__version__
==
"0.3.2"
# version check
import
util.lr_decay
as
lrd
import
util.misc
as
misc
from
util.misc
import
get_parameter_groups
from
util.misc
import
NativeScalerWithGradNormCount
as
NativeScaler
from
util.pos_embed
import
interpolate_pos_embed
import
models_painter
from
engine_train
import
train_one_epoch
,
evaluate_pt
from
data.pairdataset
import
PairDataset
import
data.pair_transforms
as
pair_transforms
from
util.masking_generator
import
MaskingGenerator
from
data.sampler
import
DistributedSamplerWrapper
try
:
import
wandb
has_wandb
=
True
except
ImportError
:
has_wandb
=
False
def
get_args_parser
():
parser
=
argparse
.
ArgumentParser
(
'Painter pre-training'
,
add_help
=
False
)
parser
.
add_argument
(
'--batch_size'
,
default
=
2
,
type
=
int
,
help
=
'Batch size per GPU (effective batch size is batch_size * accum_iter * # gpus'
)
parser
.
add_argument
(
'--epochs'
,
default
=
15
,
type
=
int
)
parser
.
add_argument
(
'--accum_iter'
,
default
=
16
,
type
=
int
,
help
=
'Accumulate gradient iterations (for increasing the effective batch size under memory constraints)'
)
# Model parameters
parser
.
add_argument
(
'--model'
,
default
=
'painter_vit_large_patch16_input896x448_win_dec64_8glb_sl1'
,
type
=
str
,
metavar
=
'MODEL'
,
help
=
'Name of model to train'
)
parser
.
add_argument
(
'--input_size'
,
default
=
224
,
type
=
int
,
nargs
=
'+'
,
help
=
'images input size'
)
parser
.
add_argument
(
'--mask_ratio'
,
default
=
0.5
,
type
=
float
,
help
=
'Masking ratio (percentage of removed patches).'
)
parser
.
add_argument
(
'--norm_pix_loss'
,
action
=
'store_true'
,
help
=
'Use (per-patch) normalized pixels as targets for computing loss'
)
parser
.
set_defaults
(
norm_pix_loss
=
False
)
parser
.
add_argument
(
'--num_mask_patches'
,
default
=
784
,
type
=
int
,
help
=
'number of the visual tokens/patches need be masked'
)
parser
.
add_argument
(
'--max_mask_patches_per_block'
,
type
=
int
,
default
=
None
)
parser
.
add_argument
(
'--min_mask_patches_per_block'
,
type
=
int
,
default
=
16
)
parser
.
add_argument
(
'--stop_grad_patch_embed'
,
action
=
'store_true'
,
help
=
'stop-grad after first conv, or patch embedding'
)
parser
.
set_defaults
(
stop_grad_patch_embed
=
False
)
parser
.
add_argument
(
'--finetune'
,
default
=
''
,
help
=
'finetune from checkpoint'
)
parser
.
add_argument
(
'--drop_path'
,
default
=
0.
,
type
=
float
,
help
=
'Drop path rate (default: 0.)'
)
parser
.
add_argument
(
'--min_random_scale'
,
default
=
0.3
,
type
=
float
,
help
=
'Minimal random scale for randomresizecrop (default: 0.3)'
)
parser
.
add_argument
(
'--last_norm_instance'
,
action
=
'store_true'
,
default
=
False
,
help
=
'use instance norm to normalize each channel map before the decoder layer'
)
parser
.
add_argument
(
'--half_mask_ratio'
,
default
=
0.1
,
type
=
float
,
help
=
'ratio of using half mask during training (default: 0.1)'
)
parser
.
add_argument
(
'--use_checkpoint'
,
action
=
'store_true'
,
default
=
False
,
help
=
'use checkpoint to save GPU memory'
)
# Optimizer parameters
parser
.
add_argument
(
'--weight_decay'
,
type
=
float
,
default
=
0.1
,
help
=
'weight decay (default: 0.1)'
)
parser
.
add_argument
(
'--lr'
,
type
=
float
,
default
=
None
,
metavar
=
'LR'
,
help
=
'learning rate (absolute lr)'
)
parser
.
add_argument
(
'--blr'
,
type
=
float
,
default
=
1e-3
,
metavar
=
'LR'
,
help
=
'base learning rate: absolute_lr = base_lr * total_batch_size / 256'
)
parser
.
add_argument
(
'--min_lr'
,
type
=
float
,
default
=
0.
,
metavar
=
'LR'
,
help
=
'lower lr bound for cyclic schedulers that hit 0'
)
parser
.
add_argument
(
'--warmup_epochs'
,
type
=
int
,
default
=
40
,
metavar
=
'N'
,
help
=
'epochs to warmup LR'
)
parser
.
add_argument
(
'--save_freq'
,
type
=
int
,
default
=
100
,
help
=
'save checkkpoints frequency'
)
parser
.
add_argument
(
'--clip_grad'
,
type
=
float
,
default
=
3.0
,
metavar
=
'NORM'
,
help
=
'Clip gradient norm (default: None, no clipping)'
)
parser
.
add_argument
(
'--opt_eps'
,
default
=
1e-8
,
type
=
float
,
metavar
=
'EPSILON'
,
help
=
'Optimizer Epsilon (default: 1e-8)'
)
parser
.
add_argument
(
'--opt_betas'
,
default
=
[
0.9
,
0.999
],
type
=
float
,
nargs
=
'+'
,
metavar
=
'BETA'
,
help
=
'Optimizer Betas (default: None, use opt default)'
)
parser
.
add_argument
(
'--layer_decay'
,
type
=
float
,
default
=
1.0
,
metavar
=
'LRD'
,
help
=
'Learning rate layer decay'
)
# Dataset parameters
parser
.
add_argument
(
'--data_path'
,
default
=
'/datasets01/imagenet_full_size/061417/'
,
type
=
str
,
help
=
'dataset path'
)
#parser.add_argument('--json_path', default='./', type=str,
parser
.
add_argument
(
'--json_path'
,
default
=
'./'
,
nargs
=
'+'
,
type
=
str
,
help
=
'json path'
)
parser
.
add_argument
(
'--val_json_path'
,
default
=
'./'
,
nargs
=
'+'
,
type
=
str
,
help
=
'json path'
)
parser
.
add_argument
(
'--output_dir'
,
default
=
'./output_dir'
,
help
=
'path where to save, empty for no saving'
)
parser
.
add_argument
(
'--log_dir'
,
default
=
'./output_dir'
,
help
=
'path where to tensorboard log'
)
parser
.
add_argument
(
'--device'
,
default
=
'cuda'
,
help
=
'device to use for training / testing'
)
parser
.
add_argument
(
'--seed'
,
default
=
0
,
type
=
int
)
parser
.
add_argument
(
'--resume'
,
default
=
''
,
help
=
'resume from checkpoint'
)
parser
.
add_argument
(
'--auto_resume'
,
action
=
'store_true'
)
parser
.
set_defaults
(
auto_resume
=
False
)
parser
.
add_argument
(
'--start_epoch'
,
default
=
0
,
type
=
int
,
metavar
=
'N'
,
help
=
'start epoch'
)
parser
.
add_argument
(
'--num_workers'
,
default
=
10
,
type
=
int
)
parser
.
add_argument
(
'--pin_mem'
,
action
=
'store_true'
,
help
=
'Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.'
)
parser
.
add_argument
(
'--no_pin_mem'
,
action
=
'store_false'
,
dest
=
'pin_mem'
)
parser
.
set_defaults
(
pin_mem
=
True
)
parser
.
add_argument
(
'--use_two_pairs'
,
action
=
'store_true'
,
help
=
'concatenate two pairs of images'
)
parser
.
set_defaults
(
use_two_pairs
=
True
)
# distributed training parameters
parser
.
add_argument
(
'--world_size'
,
default
=
1
,
type
=
int
,
help
=
'number of distributed processes'
)
parser
.
add_argument
(
'--local_rank'
,
default
=-
1
,
type
=
int
)
parser
.
add_argument
(
'--dist_on_itp'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--dist_url'
,
default
=
'env://'
,
help
=
'url used to set up distributed training'
)
parser
.
add_argument
(
'--enable_deepspeed'
,
action
=
'store_true'
,
default
=
False
)
parser
.
add_argument
(
'--zero_stage'
,
default
=
0
,
type
=
int
,
help
=
'ZeRO optimizer stage (default: 0)'
)
# misc
parser
.
add_argument
(
'--log_wandb'
,
action
=
'store_true'
,
default
=
False
,
help
=
'log training and validation metrics to wandb'
)
known_args
,
_
=
parser
.
parse_known_args
()
if
known_args
.
enable_deepspeed
:
try
:
import
deepspeed
from
deepspeed
import
DeepSpeedConfig
parser
=
deepspeed
.
add_config_arguments
(
parser
)
ds_init
=
deepspeed
.
initialize
except
:
print
(
"Please 'pip install deepspeed==0.4.0'"
)
exit
(
0
)
else
:
ds_init
=
None
return
parser
.
parse_args
(),
ds_init
def
main
(
args
,
ds_init
):
misc
.
init_distributed_mode
(
args
)
if
ds_init
is
not
None
:
misc
.
create_ds_config
(
args
)
print
(
'job dir: {}'
.
format
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))))
print
(
"{}"
.
format
(
args
).
replace
(
', '
,
',
\n
'
))
device
=
torch
.
device
(
args
.
device
)
# fix the seed for reproducibility
seed
=
args
.
seed
+
misc
.
get_rank
()
torch
.
manual_seed
(
seed
)
np
.
random
.
seed
(
seed
)
cudnn
.
benchmark
=
True
# define the model
model
=
models_painter
.
__dict__
[
args
.
model
]()
if
args
.
finetune
:
checkpoint
=
torch
.
load
(
args
.
finetune
,
map_location
=
'cpu'
)
print
(
"Load pre-trained checkpoint from: %s"
%
args
.
finetune
)
checkpoint_model
=
checkpoint
[
'model'
]
state_dict
=
model
.
state_dict
()
rm_key_list
=
[
'decoder_embed.weight'
,
'decoder_embed.bias'
,
'mask_token'
]
if
args
.
last_norm_instance
:
rm_key_list
.
extend
([
'norm.weight'
,
'norm.bias'
])
for
k
in
rm_key_list
:
if
k
in
checkpoint_model
and
checkpoint_model
[
k
].
shape
!=
state_dict
[
k
].
shape
:
print
(
f
"Removing key
{
k
}
from pretrained checkpoint"
)
del
checkpoint_model
[
k
]
# interpolate patch embedding
if
"patch32"
in
args
.
model
:
patch_weight
=
checkpoint
[
'model'
][
'patch_embed.proj.weight'
]
new_patch_weight
=
torch
.
nn
.
functional
.
interpolate
(
patch_weight
,
size
=
(
32
,
32
),
mode
=
'bicubic'
,
align_corners
=
False
)
checkpoint
[
'model'
][
'patch_embed.proj.weight'
]
=
new_patch_weight
# interpolate position embedding
if
"painter"
not
in
args
.
model
:
interpolate_pos_embed
(
model
,
checkpoint_model
)
# load pre-trained model
msg
=
model
.
load_state_dict
(
checkpoint_model
,
strict
=
False
)
print
(
msg
)
patch_size
=
model
.
patch_size
print
(
"Patch size = %s"
%
str
(
patch_size
))
args
.
window_size
=
(
args
.
input_size
[
0
]
//
patch_size
,
args
.
input_size
[
1
]
//
patch_size
)
args
.
patch_size
=
patch_size
# simple augmentation
transform_train
=
pair_transforms
.
Compose
([
pair_transforms
.
RandomResizedCrop
(
args
.
input_size
[
1
],
scale
=
(
args
.
min_random_scale
,
1.0
),
interpolation
=
3
),
# 3 is bicubic
pair_transforms
.
RandomApply
([
pair_transforms
.
ColorJitter
(
0.4
,
0.4
,
0.2
,
0.1
)
],
p
=
0.8
),
pair_transforms
.
RandomHorizontalFlip
(),
pair_transforms
.
ToTensor
(),
pair_transforms
.
Normalize
(
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
])])
transform_train2
=
pair_transforms
.
Compose
([
pair_transforms
.
RandomResizedCrop
(
args
.
input_size
[
1
],
scale
=
(
0.9999
,
1.0
),
interpolation
=
3
),
# 3 is bicubic
pair_transforms
.
ToTensor
(),
pair_transforms
.
Normalize
(
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
])])
transform_train3
=
pair_transforms
.
Compose
([
pair_transforms
.
RandomResizedCrop
(
args
.
input_size
[
1
],
scale
=
(
0.9999
,
1.0
),
interpolation
=
3
),
# 3 is bicubic
pair_transforms
.
ToTensor
(),
pair_transforms
.
Normalize
(
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
])])
transform_train_seccrop
=
pair_transforms
.
Compose
([
pair_transforms
.
RandomResizedCrop
(
args
.
input_size
,
scale
=
(
args
.
min_random_scale
,
1.0
),
ratio
=
(
0.3
,
0.7
),
interpolation
=
3
),
# 3 is bicubic
])
transform_val
=
pair_transforms
.
Compose
([
pair_transforms
.
RandomResizedCrop
(
args
.
input_size
[
1
],
scale
=
(
0.9999
,
1.0
),
interpolation
=
3
),
# 3 is bicubic
pair_transforms
.
ToTensor
(),
pair_transforms
.
Normalize
(
mean
=
[
0.485
,
0.456
,
0.406
],
std
=
[
0.229
,
0.224
,
0.225
])])
masked_position_generator
=
MaskingGenerator
(
args
.
window_size
,
num_masking_patches
=
args
.
num_mask_patches
,
max_num_patches
=
args
.
max_mask_patches_per_block
,
min_num_patches
=
args
.
min_mask_patches_per_block
,
)
dataset_train
=
PairDataset
(
args
.
data_path
,
args
.
json_path
,
transform
=
transform_train
,
transform2
=
transform_train2
,
transform3
=
transform_train3
,
transform_seccrop
=
transform_train_seccrop
,
masked_position_generator
=
masked_position_generator
,
use_two_pairs
=
args
.
use_two_pairs
,
half_mask_ratio
=
args
.
half_mask_ratio
)
dataset_val
=
PairDataset
(
args
.
data_path
,
args
.
val_json_path
,
transform
=
transform_val
,
transform2
=
None
,
transform3
=
None
,
masked_position_generator
=
masked_position_generator
,
use_two_pairs
=
args
.
use_two_pairs
,
half_mask_ratio
=
1.0
)
print
(
dataset_train
)
print
(
dataset_val
)
if
True
:
# args.distributed:
num_tasks
=
misc
.
get_world_size
()
global_rank
=
misc
.
get_rank
()
num_samples_train
=
len
(
dataset_train
)
weights_train
=
dataset_train
.
weights
sampler_train
=
torch
.
utils
.
data
.
WeightedRandomSampler
(
weights_train
,
num_samples_train
,
replacement
=
True
)
sampler_train
=
DistributedSamplerWrapper
(
sampler_train
,
num_replicas
=
num_tasks
,
rank
=
global_rank
,
shuffle
=
True
)
print
(
"Sampler_train = %s"
%
str
(
sampler_train
))
sampler_val
=
torch
.
utils
.
data
.
DistributedSampler
(
dataset_val
,
num_replicas
=
num_tasks
,
rank
=
global_rank
,
shuffle
=
False
)
else
:
sampler_train
=
torch
.
utils
.
data
.
RandomSampler
(
dataset_train
)
if
global_rank
==
0
and
args
.
log_dir
is
not
None
:
os
.
makedirs
(
args
.
log_dir
,
exist_ok
=
True
)
log_writer
=
SummaryWriter
(
log_dir
=
args
.
log_dir
)
else
:
log_writer
=
None
if
global_rank
==
0
and
args
.
log_wandb
:
experiment
=
args
.
log_dir
.
split
(
'/'
)[
-
2
]
if
args
.
resume
==
''
:
wandb
.
init
(
project
=
"Painter"
,
name
=
experiment
,
config
=
args
)
else
:
wandb
.
init
(
project
=
"Painter"
,
name
=
experiment
,
config
=
args
,
resume
=
True
)
data_loader_train
=
torch
.
utils
.
data
.
DataLoader
(
dataset_train
,
sampler
=
sampler_train
,
batch_size
=
args
.
batch_size
,
num_workers
=
args
.
num_workers
,
pin_memory
=
args
.
pin_mem
,
drop_last
=
True
,
)
data_loader_val
=
torch
.
utils
.
data
.
DataLoader
(
dataset_val
,
sampler
=
sampler_val
,
batch_size
=
int
(
1.5
*
args
.
batch_size
),
num_workers
=
args
.
num_workers
,
pin_memory
=
args
.
pin_mem
,
drop_last
=
False
,
)
model
.
to
(
device
)
model_without_ddp
=
model
print
(
"Model = %s"
%
str
(
model_without_ddp
))
eff_batch_size
=
args
.
batch_size
*
args
.
accum_iter
*
misc
.
get_world_size
()
if
args
.
lr
is
None
:
# only base_lr is specified
args
.
lr
=
args
.
blr
*
eff_batch_size
/
256
print
(
"base lr: %.2e"
%
(
args
.
lr
*
256
/
eff_batch_size
))
print
(
"actual lr: %.2e"
%
args
.
lr
)
print
(
"accumulate grad iterations: %d"
%
args
.
accum_iter
)
print
(
"effective batch size: %d"
%
eff_batch_size
)
if
args
.
enable_deepspeed
:
loss_scaler
=
None
optimizer_params
=
get_parameter_groups
(
model
,
args
.
weight_decay
,
model
.
no_weight_decay
()
)
model
,
optimizer
,
_
,
_
=
ds_init
(
args
=
args
,
model
=
model
,
model_parameters
=
optimizer_params
,
dist_init_required
=
not
args
.
distributed
,
)
print
(
"model.gradient_accumulation_steps() = %d"
%
model
.
gradient_accumulation_steps
())
assert
model
.
gradient_accumulation_steps
()
==
args
.
accum_iter
else
:
if
args
.
distributed
:
model
=
torch
.
nn
.
parallel
.
DistributedDataParallel
(
model
,
device_ids
=
[
args
.
gpu
])
model_without_ddp
=
model
.
module
# following timm: set wd as 0 for bias and norm layers
param_groups
=
lrd
.
param_groups_lrd
(
model_without_ddp
,
args
.
weight_decay
,
no_weight_decay_list
=
model_without_ddp
.
no_weight_decay
(),
layer_decay
=
args
.
layer_decay
)
optimizer
=
torch
.
optim
.
AdamW
(
param_groups
,
lr
=
args
.
lr
,
betas
=
args
.
opt_betas
)
print
(
optimizer
)
loss_scaler
=
NativeScaler
()
misc
.
auto_load_model
(
args
=
args
,
model
=
model
,
model_without_ddp
=
model_without_ddp
,
optimizer
=
optimizer
,
loss_scaler
=
loss_scaler
)
print
(
f
"Start training for
{
args
.
epochs
}
epochs"
)
start_time
=
time
.
time
()
for
epoch
in
range
(
args
.
start_epoch
,
args
.
epochs
):
if
args
.
distributed
:
data_loader_train
.
sampler
.
set_epoch
(
epoch
)
train_stats
=
train_one_epoch
(
model
,
data_loader_train
,
optimizer
,
device
,
epoch
,
loss_scaler
,
log_writer
=
log_writer
,
global_rank
=
global_rank
,
args
=
args
)
if
args
.
output_dir
and
(
epoch
%
args
.
save_freq
==
0
or
epoch
+
1
==
args
.
epochs
):
misc
.
save_model
(
args
=
args
,
model
=
model
,
model_without_ddp
=
model_without_ddp
,
optimizer
=
optimizer
,
loss_scaler
=
loss_scaler
,
epoch
=
epoch
)
test_stats
=
evaluate_pt
(
data_loader_val
,
model
,
device
,
epoch
=
epoch
,
global_rank
=
global_rank
,
args
=
args
)
print
(
f
"Val loss of the network on the
{
len
(
dataset_val
)
}
test images:
{
test_stats
[
'loss'
]:.
3
f
}
"
)
log_stats
=
{
**
{
f
'train_
{
k
}
'
:
v
for
k
,
v
in
train_stats
.
items
()},
**
{
f
'test_
{
k
}
'
:
v
for
k
,
v
in
test_stats
.
items
()},
'epoch'
:
epoch
,}
if
args
.
output_dir
and
misc
.
is_main_process
():
if
log_writer
is
not
None
:
log_writer
.
flush
()
with
open
(
os
.
path
.
join
(
args
.
output_dir
,
"log.txt"
),
mode
=
"a"
,
encoding
=
"utf-8"
)
as
f
:
f
.
write
(
json
.
dumps
(
log_stats
)
+
"
\n
"
)
total_time
=
time
.
time
()
-
start_time
total_time_str
=
str
(
datetime
.
timedelta
(
seconds
=
int
(
total_time
)))
print
(
'Training time {}'
.
format
(
total_time_str
))
if
global_rank
==
0
and
args
.
log_wandb
:
wandb
.
finish
()
if
__name__
==
'__main__'
:
args
,
ds_init
=
get_args_parser
()
if
args
.
output_dir
:
Path
(
args
.
output_dir
).
mkdir
(
parents
=
True
,
exist_ok
=
True
)
main
(
args
,
ds_init
)
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment