Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
76ccaa54
Commit
76ccaa54
authored
Jan 16, 2023
by
unknown
Browse files
添加mmaction2测试用例
parent
44c28b2b
Changes
440
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1735 additions
and
0 deletions
+1735
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py
...ecognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py
+85
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow.py
...cognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow.py
+96
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py
...tion/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py
+107
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py
...ion/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py
+105
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py
...nition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py
+98
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py
...ition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py
+88
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py
...ognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py
+42
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py
...cognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py
+96
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py
...cognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py
+91
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py
...ition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py
+29
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb.py
...recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb.py
+98
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb.py
.../recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb.py
+98
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py
...cognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py
+87
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py
...cognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py
+91
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb.py
...cognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb.py
+91
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb.py
...tion/tsn/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb.py
+82
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb.py
...ion/tsn/tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb.py
+88
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_imgaug_1x1x8_100e_kinetics400_rgb.py
...on/tsn/tsn_r50_video_imgaug_1x1x8_100e_kinetics400_rgb.py
+126
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py
...tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py
+30
-0
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_mixup_1x1x8_100e_kinetics400_rgb.py
...ion/tsn/tsn_r50_video_mixup_1x1x8_100e_kinetics400_rgb.py
+107
-0
No files found.
Too many changes to show.
To preserve performance only
440 of 440+
files are displayed.
Plain diff
Email patch
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train_320p'
data_root_val
=
'data/kinetics400/rawframes_val_320p'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes_320p.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes_320p.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes_320p.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.00375
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_100e_kinetics400_rgb/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
# ``in_channels`` should be 2 * clip_len
model
=
dict
(
backbone
=
dict
(
in_channels
=
10
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train_320p'
data_root_val
=
'data/kinetics400/rawframes_val_320p'
ann_file_train
=
'data/kinetics400/kinetics400_flow_train_list_320p.txt'
ann_file_val
=
'data/kinetics400/kinetics400_flow_val_list_320p.txt'
ann_file_test
=
'data/kinetics400/kinetics400_flow_val_list_320p.txt'
img_norm_cfg
=
dict
(
mean
=
[
128
,
128
],
std
=
[
128
,
128
])
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
modality
=
'Flow'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
modality
=
'Flow'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
modality
=
'Flow'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001875
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
70
,
100
])
total_epochs
=
110
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_110e_kinetics400_flow/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
# ``in_channels`` should be 2 * clip_len
model
=
dict
(
backbone
=
dict
(
in_channels
=
10
),
cls_head
=
dict
(
num_classes
=
200
,
dropout_ratio
=
0.8
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/ActivityNet/rawframes'
data_root_val
=
'data/ActivityNet/rawframes'
ann_file_train
=
'data/ActivityNet/anet_train_clip.txt'
ann_file_val
=
'data/ActivityNet/anet_val_clip.txt'
ann_file_test
=
'data/ActivityNet/anet_val_clip.txt'
img_norm_cfg
=
dict
(
mean
=
[
128
,
128
],
std
=
[
128
,
128
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
with_offset
=
True
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
with_offset
=
True
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
with_offset
=
True
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
60
,
120
])
total_epochs
=
150
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow/'
load_from
=
(
'https://download.openmmlab.com/mmaction/recognition/tsn/'
'tsn_r50_320p_1x1x8_110e_kinetics400_flow/'
'tsn_r50_320p_1x1x8_110e_kinetics400_flow_20200705-1f39486b.pth'
)
workflow
=
[(
'train'
,
5
)]
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
# ``in_channels`` should be 2 * clip_len
model
=
dict
(
backbone
=
dict
(
in_channels
=
10
),
cls_head
=
dict
(
num_classes
=
200
,
dropout_ratio
=
0.8
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/ActivityNet/rawframes'
data_root_val
=
'data/ActivityNet/rawframes'
ann_file_train
=
'data/ActivityNet/anet_train_video.txt'
ann_file_val
=
'data/ActivityNet/anet_val_video.txt'
ann_file_test
=
'data/ActivityNet/anet_val_clip.txt'
img_norm_cfg
=
dict
(
mean
=
[
128
,
128
],
std
=
[
128
,
128
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
with_offset
=
True
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
60
,
120
])
total_epochs
=
150
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_150e_activitynet_video_flow/'
load_from
=
(
'https://download.openmmlab.com/mmaction/recognition/tsn/'
'tsn_r50_320p_1x1x8_110e_kinetics400_flow/'
'tsn_r50_320p_1x1x8_110e_kinetics400_flow_20200705-1f39486b.pth'
)
workflow
=
[(
'train'
,
5
)]
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
200
,
dropout_ratio
=
0.8
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/ActivityNet/rawframes'
data_root_val
=
'data/ActivityNet/rawframes'
ann_file_train
=
'data/ActivityNet/anet_train_clip.txt'
ann_file_val
=
'data/ActivityNet/anet_val_clip.txt'
ann_file_test
=
'data/ActivityNet/anet_val_clip.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
,
with_offset
=
True
,
start_index
=
0
,
filename_tmpl
=
'image_{:05d}.jpg'
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
,
with_offset
=
True
,
start_index
=
0
,
filename_tmpl
=
'image_{:05d}.jpg'
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
,
with_offset
=
True
,
start_index
=
0
,
filename_tmpl
=
'image_{:05d}.jpg'
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb/'
load_from
=
(
'https://download.openmmlab.com/mmaction/recognition/tsn/'
'tsn_r50_320p_1x1x8_100e_kinetics400_rgb/'
'tsn_r50_320p_1x1x8_100e_kinetics400_rgb_20200702-ef80e3d7.pth'
)
workflow
=
[(
'train'
,
5
)]
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
200
,
dropout_ratio
=
0.8
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/ActivityNet/rawframes'
data_root_val
=
'data/ActivityNet/rawframes'
ann_file_train
=
'data/ActivityNet/anet_train_video.txt'
ann_file_val
=
'data/ActivityNet/anet_val_video.txt'
ann_file_test
=
'data/ActivityNet/anet_val_video.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb/'
load_from
=
(
'https://download.openmmlab.com/mmaction/recognition/tsn/'
'tsn_r50_320p_1x1x8_100e_kinetics400_rgb/'
'tsn_r50_320p_1x1x8_100e_kinetics400_rgb_20200702-ef80e3d7.pth'
)
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py
0 → 100644
View file @
76ccaa54
# model settings
model
=
dict
(
type
=
'Recognizer2D'
,
backbone
=
dict
(
type
=
'ResNet'
,
pretrained
=
'torchvision://resnet50'
,
depth
=
50
,
norm_eval
=
False
),
train_cfg
=
None
,
test_cfg
=
dict
(
feature_extraction
=
True
))
# dataset settings
dataset_type
=
'VideoDataset'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
test_pipeline
=
[
dict
(
type
=
'DecordInit'
,
num_threads
=
1
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
1
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
None
,
data_prefix
=
None
,
pipeline
=
test_pipeline
))
dist_params
=
dict
(
backend
=
'nccl'
)
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
dropout_ratio
=
0.5
,
init_std
=
0.001
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
5
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
16
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
videos_per_gpu
=
1
),
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.03
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
20
,
norm_type
=
2
))
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_dense_1x1x5_100e_kinetics400_rgb/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.005
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_dense_1x1x8_100e_kinetics400_rgb/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
]
# dataset settings
dataset_type
=
'RawframeDataset'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
1
,
workers_per_gpu
=
2
,
test
=
dict
(
type
=
dataset_type
,
ann_file
=
None
,
data_prefix
=
None
,
pipeline
=
test_pipeline
))
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
48
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/diving48/videos'
data_root_val
=
'data/diving48/videos'
ann_file_train
=
'data/diving48/diving48_train_list_videos.txt'
ann_file_val
=
'data/diving48/diving48_val_list_videos.txt'
ann_file_test
=
'data/diving48/diving48_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
4
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.00125
,
# this lr is used for 8 gpus
momentum
=
0.9
,
weight_decay
=
0.0001
)
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_video_1x1x16_100e_diving48_rgb/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
48
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/diving48/videos'
data_root_val
=
'data/diving48/videos'
ann_file_train
=
'data/diving48/diving48_train_list_videos.txt'
ann_file_val
=
'data/diving48/diving48_val_list_videos.txt'
ann_file_test
=
'data/diving48/diving48_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.0025
,
# this lr is used for 8 gpus
momentum
=
0.9
,
weight_decay
=
0.0001
)
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_video_1x1x8_100e_diving48_rgb/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics400/videos_train'
data_root_val
=
'data/kinetics400/videos_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_videos.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_videos.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
32
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_video_1x1x8_100e_kinetics400_rgb/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
600
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics600/videos_train'
data_root_val
=
'data/kinetics600/videos_val'
ann_file_train
=
'data/kinetics600/kinetics600_train_list_videos.txt'
ann_file_val
=
'data/kinetics600/kinetics600_val_list_videos.txt'
ann_file_test
=
'data/kinetics600/kinetics600_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.00375
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_1x1x3_100e_kinetics600_rgb/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
700
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics700/videos_train'
data_root_val
=
'data/kinetics700/videos_val'
ann_file_train
=
'data/kinetics700/kinetics700_train_list_videos.txt'
ann_file_val
=
'data/kinetics700/kinetics700_val_list_videos.txt'
ann_file_test
=
'data/kinetics700/kinetics700_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.00375
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_1x1x3_100e_kinetics700_rgb/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics400/videos_train'
data_root_val
=
'data/kinetics400/videos_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_videos.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_videos.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
3
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
3
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
32
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_video_1x1x3_100e_kinetics400_rgb/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics400/videos_train'
data_root_val
=
'data/kinetics400/videos_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_videos.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_videos.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
32
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
videos_per_gpu
=
1
),
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb/'
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_imgaug_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
76ccaa54
# model settings
model
=
dict
(
type
=
'Recognizer2D'
,
backbone
=
dict
(
type
=
'ResNet'
,
pretrained
=
'torchvision://resnet50'
,
depth
=
50
,
norm_eval
=
False
),
cls_head
=
dict
(
type
=
'TSNHead'
,
num_classes
=
400
,
in_channels
=
2048
,
spatial_type
=
'avg'
,
consensus
=
dict
(
type
=
'AvgConsensus'
,
dim
=
1
),
dropout_ratio
=
0.4
,
init_std
=
0.01
),
train_cfg
=
None
,
test_cfg
=
dict
(
average_clips
=
None
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics400/videos_train'
data_root_val
=
'data/kinetics400/videos_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_videos.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_videos.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Imgaug'
,
transforms
=
'default'
),
# dict(
# type='Imgaug',
# transforms=[
# dict(type='Rotate', rotate=(-20, 20)),
# dict(type='Dropout', p=(0, 0.05))
# ]),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
32
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
40
,
80
])
total_epochs
=
100
checkpoint_config
=
dict
(
interval
=
1
)
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
log_config
=
dict
(
interval
=
20
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
# dict(type='TensorboardLoggerHook'),
])
# runtime settings
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/tsn_r50_video_1x1x8_100e_kinetics400_rgb/'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/models/tsn_r50.py'
]
# dataset settings
dataset_type
=
'VideoDataset'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
test_pipeline
=
[
dict
(
type
=
'OpenCVInit'
,
num_threads
=
1
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'OpenCVDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
1
,
workers_per_gpu
=
2
,
test
=
dict
(
type
=
dataset_type
,
ann_file
=
None
,
data_prefix
=
None
,
pipeline
=
test_pipeline
))
openmmlab_test/mmaction2-0.24.1/configs/recognition/tsn/tsn_r50_video_mixup_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
76ccaa54
_base_
=
[
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
type
=
'Recognizer2D'
,
backbone
=
dict
(
type
=
'ResNet'
,
pretrained
=
'torchvision://resnet50'
,
depth
=
50
,
norm_eval
=
False
),
cls_head
=
dict
(
type
=
'TSNHead'
,
num_classes
=
400
,
in_channels
=
2048
,
spatial_type
=
'avg'
,
consensus
=
dict
(
type
=
'AvgConsensus'
,
dim
=
1
),
dropout_ratio
=
0.4
,
init_std
=
0.01
),
# model training and testing settings
train_cfg
=
dict
(
blending
=
dict
(
type
=
'MixupBlending'
,
num_classes
=
400
,
alpha
=
.
2
)),
test_cfg
=
dict
(
average_clips
=
None
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics400/videos_train'
data_root_val
=
'data/kinetics400/videos_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_videos.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_videos.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
32
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_video_mixup_1x1x8_100e_kinetics400_rgb/'
Prev
1
…
12
13
14
15
16
17
18
19
20
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment