Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
TSM_pytorch
Commits
5b3e36dc
Commit
5b3e36dc
authored
Jun 07, 2023
by
Sugon_ldc
Browse files
add model TSM
parents
Pipeline
#315
failed with stages
in 0 seconds
Changes
440
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1757 additions
and
0 deletions
+1757
-0
configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_mit_rgb.py
configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_mit_rgb.py
+90
-0
configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv1_rgb.py
configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv1_rgb.py
+101
-0
configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv2_rgb.py
configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv2_rgb.py
+93
-0
configs/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb.py
...ecognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb.py
+75
-0
configs/recognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow.py
...cognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow.py
+96
-0
configs/recognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py
...ecognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py
+85
-0
configs/recognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow.py
...cognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow.py
+96
-0
configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py
...tion/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py
+107
-0
configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py
...ion/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py
+105
-0
configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py
...nition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py
+98
-0
configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py
...ition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py
+88
-0
configs/recognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py
...ognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py
+42
-0
configs/recognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py
...cognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py
+96
-0
configs/recognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py
...cognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py
+91
-0
configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py
...ition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py
+29
-0
configs/recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb.py
...recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb.py
+98
-0
configs/recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb.py
.../recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb.py
+98
-0
configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py
...cognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py
+87
-0
configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py
...cognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py
+91
-0
configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb.py
...cognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb.py
+91
-0
No files found.
Too many changes to show.
To preserve performance only
440 of 440+
files are displayed.
Plain diff
Email patch
configs/recognition/tsn/tsn_r50_1x1x8_50e_hmdb51_mit_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
51
))
# dataset settings
split
=
1
dataset_type
=
'RawframeDataset'
data_root
=
'data/hmdb51/rawframes'
data_root_val
=
'data/hmdb51/rawframes'
ann_file_train
=
f
'data/hmdb51/hmdb51_train_split_
{
split
}
_rawframes.txt'
ann_file_val
=
f
'data/hmdb51/hmdb51_val_split_
{
split
}
_rawframes.txt'
ann_file_test
=
f
'data/hmdb51/hmdb51_val_split_
{
split
}
_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
1
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
],
topk
=
(
1
,
5
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.025
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
log_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_1x1x8_50e_hmdb51_mit_rgb/'
load_from
=
'https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x6_100e_mit_rgb/tsn_r50_1x1x6_100e_mit_rgb_20200618-d512ab1b.pth'
# noqa: E501
gpu_ids
=
range
(
0
,
1
)
configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv1_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
norm_cfg
=
dict
(
type
=
'SyncBN'
,
requires_grad
=
True
),
norm_eval
=
True
),
cls_head
=
dict
(
num_classes
=
174
,
dropout_ratio
=
0.5
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv1/rawframes'
data_root_val
=
'data/sthv1/rawframes'
ann_file_train
=
'data/sthv1/sthv1_train_list_rawframes.txt'
ann_file_val
=
'data/sthv1/sthv1_val_list_rawframes.txt'
ann_file_test
=
'data/sthv1/sthv1_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
16
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0005
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
20
,
norm_type
=
2
))
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_1x1x8_50e_sthv1_rgb/'
configs/recognition/tsn/tsn_r50_1x1x8_50e_sthv2_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
174
,
dropout_ratio
=
0.5
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv2/rawframes'
data_root_val
=
'data/sthv2/rawframes'
ann_file_train
=
'data/sthv2/sthv2_train_list_rawframes.txt'
ann_file_val
=
'data/sthv2/sthv2_val_list_rawframes.txt'
ann_file_test
=
'data/sthv2/sthv2_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
16
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_1x1x8_50e_sthv2_rgb/'
configs/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'./tsn_r50_1x1x3_100e_kinetics400_rgb.py'
]
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train_320p'
data_root_val
=
'data/kinetics400/rawframes_val_320p'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes_320p.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes_320p.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes_320p.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
3
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
3
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
32
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x3_100e_kinetics400_rgb/'
configs/recognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
# ``in_channels`` should be 2 * clip_len
model
=
dict
(
backbone
=
dict
(
in_channels
=
10
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train_320p'
data_root_val
=
'data/kinetics400/rawframes_val_320p'
ann_file_train
=
'data/kinetics400/kinetics_flow_train_list.txt'
ann_file_val
=
'data/kinetics400/kinetics_flow_val_list.txt'
ann_file_test
=
'data/kinetics400/kinetics_flow_val_list.txt'
img_norm_cfg
=
dict
(
mean
=
[
128
,
128
],
std
=
[
128
,
128
])
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
3
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
3
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
32
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
modality
=
'Flow'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
modality
=
'Flow'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
modality
=
'Flow'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.005
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
70
,
100
])
total_epochs
=
110
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x3_110e_kinetics400_flow/'
configs/recognition/tsn/tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train_320p'
data_root_val
=
'data/kinetics400/rawframes_val_320p'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes_320p.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes_320p.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes_320p.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.00375
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_100e_kinetics400_rgb/'
configs/recognition/tsn/tsn_r50_320p_1x1x8_110e_kinetics400_flow.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
# ``in_channels`` should be 2 * clip_len
model
=
dict
(
backbone
=
dict
(
in_channels
=
10
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train_320p'
data_root_val
=
'data/kinetics400/rawframes_val_320p'
ann_file_train
=
'data/kinetics400/kinetics400_flow_train_list_320p.txt'
ann_file_val
=
'data/kinetics400/kinetics400_flow_val_list_320p.txt'
ann_file_test
=
'data/kinetics400/kinetics400_flow_val_list_320p.txt'
img_norm_cfg
=
dict
(
mean
=
[
128
,
128
],
std
=
[
128
,
128
])
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
modality
=
'Flow'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
modality
=
'Flow'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
modality
=
'Flow'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001875
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
70
,
100
])
total_epochs
=
110
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_110e_kinetics400_flow/'
configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
# ``in_channels`` should be 2 * clip_len
model
=
dict
(
backbone
=
dict
(
in_channels
=
10
),
cls_head
=
dict
(
num_classes
=
200
,
dropout_ratio
=
0.8
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/ActivityNet/rawframes'
data_root_val
=
'data/ActivityNet/rawframes'
ann_file_train
=
'data/ActivityNet/anet_train_clip.txt'
ann_file_val
=
'data/ActivityNet/anet_val_clip.txt'
ann_file_test
=
'data/ActivityNet/anet_val_clip.txt'
img_norm_cfg
=
dict
(
mean
=
[
128
,
128
],
std
=
[
128
,
128
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
with_offset
=
True
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
with_offset
=
True
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
with_offset
=
True
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
60
,
120
])
total_epochs
=
150
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_150e_activitynet_clip_flow/'
load_from
=
(
'https://download.openmmlab.com/mmaction/recognition/tsn/'
'tsn_r50_320p_1x1x8_110e_kinetics400_flow/'
'tsn_r50_320p_1x1x8_110e_kinetics400_flow_20200705-1f39486b.pth'
)
workflow
=
[(
'train'
,
5
)]
configs/recognition/tsn/tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
# ``in_channels`` should be 2 * clip_len
model
=
dict
(
backbone
=
dict
(
in_channels
=
10
),
cls_head
=
dict
(
num_classes
=
200
,
dropout_ratio
=
0.8
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/ActivityNet/rawframes'
data_root_val
=
'data/ActivityNet/rawframes'
ann_file_train
=
'data/ActivityNet/anet_train_video.txt'
ann_file_val
=
'data/ActivityNet/anet_val_video.txt'
ann_file_test
=
'data/ActivityNet/anet_val_clip.txt'
img_norm_cfg
=
dict
(
mean
=
[
128
,
128
],
std
=
[
128
,
128
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
5
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW_Flow'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'flow_{}_{:05d}.jpg'
,
with_offset
=
True
,
modality
=
'Flow'
,
start_index
=
0
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
60
,
120
])
total_epochs
=
150
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_150e_activitynet_video_flow/'
load_from
=
(
'https://download.openmmlab.com/mmaction/recognition/tsn/'
'tsn_r50_320p_1x1x8_110e_kinetics400_flow/'
'tsn_r50_320p_1x1x8_110e_kinetics400_flow_20200705-1f39486b.pth'
)
workflow
=
[(
'train'
,
5
)]
configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
200
,
dropout_ratio
=
0.8
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/ActivityNet/rawframes'
data_root_val
=
'data/ActivityNet/rawframes'
ann_file_train
=
'data/ActivityNet/anet_train_clip.txt'
ann_file_val
=
'data/ActivityNet/anet_val_clip.txt'
ann_file_test
=
'data/ActivityNet/anet_val_clip.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
,
with_offset
=
True
,
start_index
=
0
,
filename_tmpl
=
'image_{:05d}.jpg'
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
,
with_offset
=
True
,
start_index
=
0
,
filename_tmpl
=
'image_{:05d}.jpg'
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
,
with_offset
=
True
,
start_index
=
0
,
filename_tmpl
=
'image_{:05d}.jpg'
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb/'
load_from
=
(
'https://download.openmmlab.com/mmaction/recognition/tsn/'
'tsn_r50_320p_1x1x8_100e_kinetics400_rgb/'
'tsn_r50_320p_1x1x8_100e_kinetics400_rgb_20200702-ef80e3d7.pth'
)
workflow
=
[(
'train'
,
5
)]
configs/recognition/tsn/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
200
,
dropout_ratio
=
0.8
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/ActivityNet/rawframes'
data_root_val
=
'data/ActivityNet/rawframes'
ann_file_train
=
'data/ActivityNet/anet_train_video.txt'
ann_file_val
=
'data/ActivityNet/anet_val_video.txt'
ann_file_test
=
'data/ActivityNet/anet_val_video.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.001
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_320p_1x1x8_50e_activitynet_video_rgb/'
load_from
=
(
'https://download.openmmlab.com/mmaction/recognition/tsn/'
'tsn_r50_320p_1x1x8_100e_kinetics400_rgb/'
'tsn_r50_320p_1x1x8_100e_kinetics400_rgb_20200702-ef80e3d7.pth'
)
configs/recognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py
0 → 100644
View file @
5b3e36dc
# model settings
model
=
dict
(
type
=
'Recognizer2D'
,
backbone
=
dict
(
type
=
'ResNet'
,
pretrained
=
'torchvision://resnet50'
,
depth
=
50
,
norm_eval
=
False
),
train_cfg
=
None
,
test_cfg
=
dict
(
feature_extraction
=
True
))
# dataset settings
dataset_type
=
'VideoDataset'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
test_pipeline
=
[
dict
(
type
=
'DecordInit'
,
num_threads
=
1
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
1
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
None
,
data_prefix
=
None
,
pipeline
=
test_pipeline
))
dist_params
=
dict
(
backend
=
'nccl'
)
configs/recognition/tsn/tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
dropout_ratio
=
0.5
,
init_std
=
0.001
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
5
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
16
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
videos_per_gpu
=
1
),
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.03
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
20
,
norm_type
=
2
))
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_dense_1x1x5_100e_kinetics400_rgb/'
configs/recognition/tsn/tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.005
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_dense_1x1x8_100e_kinetics400_rgb/'
configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
]
# dataset settings
dataset_type
=
'RawframeDataset'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
1
,
workers_per_gpu
=
2
,
test
=
dict
(
type
=
dataset_type
,
ann_file
=
None
,
data_prefix
=
None
,
pipeline
=
test_pipeline
))
configs/recognition/tsn/tsn_r50_video_1x1x16_100e_diving48_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
48
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/diving48/videos'
data_root_val
=
'data/diving48/videos'
ann_file_train
=
'data/diving48/diving48_train_list_videos.txt'
ann_file_val
=
'data/diving48/diving48_val_list_videos.txt'
ann_file_test
=
'data/diving48/diving48_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
4
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.00125
,
# this lr is used for 8 gpus
momentum
=
0.9
,
weight_decay
=
0.0001
)
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_video_1x1x16_100e_diving48_rgb/'
configs/recognition/tsn/tsn_r50_video_1x1x8_100e_diving48_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
48
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/diving48/videos'
data_root_val
=
'data/diving48/videos'
ann_file_train
=
'data/diving48/diving48_train_list_videos.txt'
ann_file_val
=
'data/diving48/diving48_val_list_videos.txt'
ann_file_test
=
'data/diving48/diving48_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.0025
,
# this lr is used for 8 gpus
momentum
=
0.9
,
weight_decay
=
0.0001
)
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_video_1x1x8_100e_diving48_rgb/'
configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics400/videos_train'
data_root_val
=
'data/kinetics400/videos_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_videos.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_videos.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
32
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# runtime settings
work_dir
=
'./work_dirs/tsn_r50_video_1x1x8_100e_kinetics400_rgb/'
configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics600_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
600
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics600/videos_train'
data_root_val
=
'data/kinetics600/videos_val'
ann_file_train
=
'data/kinetics600/kinetics600_train_list_videos.txt'
ann_file_val
=
'data/kinetics600/kinetics600_val_list_videos.txt'
ann_file_test
=
'data/kinetics600/kinetics600_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.00375
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_1x1x3_100e_kinetics600_rgb/'
configs/recognition/tsn/tsn_r50_video_1x1x8_100e_kinetics700_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsn_r50.py'
,
'../../_base_/schedules/sgd_100e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
700
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics700/videos_train'
data_root_val
=
'data/kinetics700/videos_val'
ann_file_train
=
'data/kinetics700/kinetics700_train_list_videos.txt'
ann_file_val
=
'data/kinetics700/kinetics700_val_list_videos.txt'
ann_file_test
=
'data/kinetics700/kinetics700_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
25
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.00375
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsn_r50_1x1x3_100e_kinetics700_rgb/'
Prev
1
…
11
12
13
14
15
16
17
18
19
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment