Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
TSM_pytorch
Commits
5b3e36dc
Commit
5b3e36dc
authored
Jun 07, 2023
by
Sugon_ldc
Browse files
add model TSM
parents
Pipeline
#315
failed with stages
in 0 seconds
Changes
440
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1574 additions
and
0 deletions
+1574
-0
configs/recognition/tsm/tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb.py
...tion/tsm/tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb.py
+96
-0
configs/recognition/tsm/tsm_r101_1x1x8_50e_sthv1_rgb.py
configs/recognition/tsm/tsm_r101_1x1x8_50e_sthv1_rgb.py
+7
-0
configs/recognition/tsm/tsm_r101_1x1x8_50e_sthv2_rgb.py
configs/recognition/tsm/tsm_r101_1x1x8_50e_sthv2_rgb.py
+90
-0
configs/recognition/tsm/tsm_r50_1x1x16_100e_kinetics400_rgb.py
...gs/recognition/tsm/tsm_r50_1x1x16_100e_kinetics400_rgb.py
+7
-0
configs/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb.py
...igs/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb.py
+95
-0
configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv1_rgb.py
configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv1_rgb.py
+99
-0
configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb.py
configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb.py
+96
-0
configs/recognition/tsm/tsm_r50_1x1x8_100e_kinetics400_rgb.py
...igs/recognition/tsm/tsm_r50_1x1x8_100e_kinetics400_rgb.py
+6
-0
configs/recognition/tsm/tsm_r50_1x1x8_50e_jester_rgb.py
configs/recognition/tsm/tsm_r50_1x1x8_50e_jester_rgb.py
+91
-0
configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py
configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py
+87
-0
configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv1_rgb.py
configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv1_rgb.py
+95
-0
configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb.py
configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb.py
+94
-0
configs/recognition/tsm/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb.py
...igs/recognition/tsm/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb.py
+115
-0
configs/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py
...cognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py
+87
-0
configs/recognition/tsm/tsm_r50_dense_1x1x8_50e_kinetics400_rgb.py
...ecognition/tsm/tsm_r50_dense_1x1x8_50e_kinetics400_rgb.py
+7
-0
configs/recognition/tsm/tsm_r50_flip_1x1x8_50e_sthv1_rgb.py
configs/recognition/tsm/tsm_r50_flip_1x1x8_50e_sthv1_rgb.py
+99
-0
configs/recognition/tsm/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb.py
...ition/tsm/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb.py
+100
-0
configs/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb.py
...on/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb.py
+93
-0
configs/recognition/tsm/tsm_r50_mixup_1x1x8_50e_sthv1_rgb.py
configs/recognition/tsm/tsm_r50_mixup_1x1x8_50e_sthv1_rgb.py
+114
-0
configs/recognition/tsm/tsm_r50_ptv_augmix_1x1x8_50e_sthv1_rgb.py
...recognition/tsm/tsm_r50_ptv_augmix_1x1x8_50e_sthv1_rgb.py
+96
-0
No files found.
Too many changes to show.
To preserve performance only
440 of 440+
files are displayed.
Plain diff
Email patch
configs/recognition/tsm/tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
non_local
=
((
0
,
0
,
0
),
(
1
,
0
,
1
,
0
),
(
1
,
0
,
1
,
0
,
1
,
0
),
(
0
,
0
,
0
)),
non_local_cfg
=
dict
(
sub_sample
=
True
,
use_scale
=
False
,
norm_cfg
=
dict
(
type
=
'BN3d'
,
requires_grad
=
True
),
mode
=
'gaussian'
)))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# runtime settings
work_dir
=
'./work_dirs/tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb/'
configs/recognition/tsm/tsm_r101_1x1x8_50e_sthv1_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'./tsm_r50_1x1x8_50e_sthv1_rgb.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
pretrained
=
'torchvision://resnet101'
,
depth
=
101
))
# runtime settings
work_dir
=
'./work_dirs/tsm_r101_1x1x8_50e_sthv1_rgb/'
configs/recognition/tsm/tsm_r101_1x1x8_50e_sthv2_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'./tsm_r50_1x1x8_50e_sthv2_rgb.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
pretrained
=
'torchvision://resnet101'
,
depth
=
101
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv2/rawframes'
data_root_val
=
'data/sthv2/rawframes'
ann_file_train
=
'data/sthv2/sthv2_train_list_rawframes.txt'
ann_file_val
=
'data/sthv2/sthv2_val_list_rawframes.txt'
ann_file_test
=
'data/sthv2/sthv2_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
lr
=
0.01
,
# this lr is used for 8 gpus
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r101_1x1x8_50e_sthv2_rgb/'
configs/recognition/tsm/tsm_r50_1x1x16_100e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'tsm_r50_1x1x16_50e_kinetics400_rgb.py'
]
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
20
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
40
,
80
])
total_epochs
=
100
work_dir
=
'./work_dirs/tsm_r50_1x1x16_100e_kinetics400_rgb/'
configs/recognition/tsm/tsm_r50_1x1x16_50e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
num_segments
=
16
),
cls_head
=
dict
(
num_segments
=
16
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
6
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
lr
=
0.0075
,
# this lr is used for 8 gpus
)
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsm_r50_1x1x16_50e_kinetics400_rgb/'
configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv1_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
num_segments
=
16
),
cls_head
=
dict
(
num_classes
=
174
,
num_segments
=
16
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv1/rawframes'
data_root_val
=
'data/sthv1/rawframes'
ann_file_train
=
'data/sthv1/sthv1_train_list_rawframes.txt'
ann_file_val
=
'data/sthv1/sthv1_val_list_rawframes.txt'
ann_file_test
=
'data/sthv1/sthv1_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
6
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
lr
=
0.0075
,
# this lr is used for 8 gpus
weight_decay
=
0.0005
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_1x1x16_50e_sthv1_rgb/'
configs/recognition/tsm/tsm_r50_1x1x16_50e_sthv2_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
num_segments
=
16
),
cls_head
=
dict
(
num_classes
=
174
,
num_segments
=
16
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv2/rawframes'
data_root_val
=
'data/sthv2/rawframes'
ann_file_train
=
'data/sthv2/sthv2_train_list_rawframes.txt'
ann_file_val
=
'data/sthv2/sthv2_val_list_rawframes.txt'
ann_file_test
=
'data/sthv2/sthv2_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
16
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
6
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
lr
=
0.0075
,
# this lr is used for 8 gpus
weight_decay
=
0.0005
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_1x1x16_50e_sthv2_rgb/'
configs/recognition/tsm/tsm_r50_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'./tsm_r50_1x1x8_50e_kinetics400_rgb.py'
]
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
20
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
40
,
80
])
total_epochs
=
100
work_dir
=
'./work_dirs/tsm_r50_1x1x8_100e_kinetics400_rgb/'
configs/recognition/tsm/tsm_r50_1x1x8_50e_jester_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
27
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/jester/rawframes'
data_root_val
=
'data/jester/rawframes'
ann_file_train
=
'data/jester/jester_train_list_rawframes.txt'
ann_file_val
=
'data/jester/jester_val_list_rawframes.txt'
ann_file_test
=
'data/jester/jester_val_list_rawframes.txt'
jester_flip_label_map
=
{
0
:
1
,
1
:
0
,
6
:
7
,
7
:
6
}
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
,
flip_label_map
=
jester_flip_label_map
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
videos_per_gpu
=
1
),
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
weight_decay
=
0.0005
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_1x1x8_50e_jester_rgb/'
configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsm_r50_1x1x8_100e_kinetics400_rgb/'
configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv1_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
174
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv1/rawframes'
data_root_val
=
'data/sthv1/rawframes'
ann_file_train
=
'data/sthv1/sthv1_train_list_rawframes.txt'
ann_file_val
=
'data/sthv1/sthv1_val_list_rawframes.txt'
ann_file_test
=
'data/sthv1/sthv1_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
weight_decay
=
0.0005
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_1x1x8_50e_sthv1_rgb/'
configs/recognition/tsm/tsm_r50_1x1x8_50e_sthv2_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
174
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv2/rawframes'
data_root_val
=
'data/sthv2/rawframes'
ann_file_train
=
'data/sthv2/sthv2_train_list_rawframes.txt'
ann_file_val
=
'data/sthv2/sthv2_val_list_rawframes.txt'
ann_file_test
=
'data/sthv2/sthv2_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
6
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
lr
=
0.0075
,
# this lr is used for 8 gpus
weight_decay
=
0.0005
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_1x1x8_50e_sthv2_rgb/'
configs/recognition/tsm/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
# model settings# model settings
model
=
dict
(
type
=
'Recognizer2D'
,
backbone
=
dict
(
type
=
'ResNetTSM'
,
pretrained
=
'torchvision://resnet50'
,
depth
=
50
,
norm_eval
=
False
,
shift_div
=
8
),
cls_head
=
dict
(
type
=
'TSMHead'
,
num_classes
=
174
,
in_channels
=
2048
,
spatial_type
=
'avg'
,
consensus
=
dict
(
type
=
'AvgConsensus'
,
dim
=
1
),
dropout_ratio
=
0.5
,
init_std
=
0.001
,
is_shift
=
True
),
# model training and testing settings
train_cfg
=
dict
(
blending
=
dict
(
type
=
'CutmixBlending'
,
num_classes
=
174
,
alpha
=
.
2
)),
test_cfg
=
dict
(
average_clips
=
'prob'
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv1/rawframes'
data_root_val
=
'data/sthv1/rawframes'
ann_file_train
=
'data/sthv1/sthv1_train_list_rawframes.txt'
ann_file_val
=
'data/sthv1/sthv1_val_list_rawframes.txt'
ann_file_test
=
'data/sthv1/sthv1_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
weight_decay
=
0.0005
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_cutmix_1x1x8_50e_sthv1_rgb/'
configs/recognition/tsm/tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_100e.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DenseSampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'TenCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
videos_per_gpu
=
1
),
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_dense_1x1x8_100e_kinetics400_rgb/'
configs/recognition/tsm/tsm_r50_dense_1x1x8_50e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py'
]
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
20
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
20
,
40
])
total_epochs
=
50
work_dir
=
'./work_dirs/tsm_r50_dense_1x1x8_50e_kinetics400_rgb/'
configs/recognition/tsm/tsm_r50_flip_1x1x8_50e_sthv1_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
174
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv1/rawframes'
data_root_val
=
'data/sthv1/rawframes'
ann_file_train
=
'data/sthv1/sthv1_train_list_rawframes.txt'
ann_file_val
=
'data/sthv1/sthv1_val_list_rawframes.txt'
ann_file_test
=
'data/sthv1/sthv1_val_list_rawframes.txt'
sthv1_flip_label_map
=
{
2
:
4
,
4
:
2
,
30
:
41
,
41
:
30
,
52
:
66
,
66
:
52
}
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
,
flip_label_map
=
sthv1_flip_label_map
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
weight_decay
=
0.0005
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_flip_1x1x8_50e_sthv1_rgb/'
configs/recognition/tsm/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
174
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv1/rawframes'
data_root_val
=
'data/sthv1/rawframes'
ann_file_train
=
'data/sthv1/sthv1_train_list_rawframes.txt'
ann_file_val
=
'data/sthv1/sthv1_val_list_rawframes.txt'
ann_file_test
=
'data/sthv1/sthv1_val_list_rawframes.txt'
sthv1_flip_label_map
=
{
2
:
4
,
4
:
2
,
30
:
41
,
41
:
30
,
52
:
66
,
66
:
52
}
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
,
flip_label_map
=
sthv1_flip_label_map
),
dict
(
type
=
'Imgaug'
,
transforms
=
'default'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
weight_decay
=
0.0005
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_flip_randaugment_1x1x8_50e_sthv1_rgb/'
configs/recognition/tsm/tsm_r50_gpu_normalize_1x1x8_50e_kinetics400_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
module_hooks
=
[
dict
(
type
=
'GPUNormalize'
,
hooked_module
=
'backbone'
,
hook_pos
=
'forward_pre'
,
input_format
=
'NCHW'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
])
]
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# runtime settings
checkpoint_config
=
dict
(
interval
=
5
)
work_dir
=
'./work_dirs/tsm_r50_gpu_normalize_1x1x8_100e_kinetics400_rgb/'
configs/recognition/tsm/tsm_r50_mixup_1x1x8_50e_sthv1_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
type
=
'Recognizer2D'
,
backbone
=
dict
(
type
=
'ResNetTSM'
,
pretrained
=
'torchvision://resnet50'
,
depth
=
50
,
norm_eval
=
False
,
shift_div
=
8
),
cls_head
=
dict
(
type
=
'TSMHead'
,
num_classes
=
174
,
in_channels
=
2048
,
spatial_type
=
'avg'
,
consensus
=
dict
(
type
=
'AvgConsensus'
,
dim
=
1
),
dropout_ratio
=
0.5
,
init_std
=
0.001
,
is_shift
=
True
),
# model training and testing settings
train_cfg
=
dict
(
blending
=
dict
(
type
=
'MixupBlending'
,
num_classes
=
174
,
alpha
=
.
2
)),
test_cfg
=
dict
(
average_clips
=
'prob'
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv1/rawframes'
data_root_val
=
'data/sthv1/rawframes'
ann_file_train
=
'data/sthv1/sthv1_train_list_rawframes.txt'
ann_file_val
=
'data/sthv1/sthv1_val_list_rawframes.txt'
ann_file_test
=
'data/sthv1/sthv1_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
weight_decay
=
0.0005
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_mixup_1x1x8_50e_sthv1_rgb/'
configs/recognition/tsm/tsm_r50_ptv_augmix_1x1x8_50e_sthv1_rgb.py
0 → 100644
View file @
5b3e36dc
_base_
=
[
'../../_base_/models/tsm_r50.py'
,
'../../_base_/schedules/sgd_tsm_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
174
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv1/rawframes'
data_root_val
=
'data/sthv1/rawframes'
ann_file_train
=
'data/sthv1/sthv1_train_list_rawframes.txt'
ann_file_val
=
'data/sthv1/sthv1_val_list_rawframes.txt'
ann_file_test
=
'data/sthv1/sthv1_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'MultiScaleCrop'
,
input_size
=
224
,
scales
=
(
1
,
0.875
,
0.75
,
0.66
),
random_crop
=
False
,
max_wh_scale_gap
=
1
,
num_fixed_crops
=
13
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'pytorchvideo.AugMix'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
1
,
frame_interval
=
1
,
num_clips
=
8
,
twice_sample
=
True
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
2
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
weight_decay
=
0.0005
)
# runtime settings
work_dir
=
'./work_dirs/tsm_r50_ptv_augmix_1x1x8_50e_sthv1_rgb/'
Prev
1
…
8
9
10
11
12
13
14
15
16
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment