Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
STGCN-PyTorch
Commits
aa58d024
Commit
aa58d024
authored
Mar 20, 2023
by
unknown
Browse files
Initial add code.
parents
Pipeline
#135
failed with stages
in 0 seconds
Changes
436
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1785 additions
and
0 deletions
+1785
-0
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_hmdb51_rgb.py
.../slowonly_imagenet_pretrained_r50_8x4x1_64e_hmdb51_rgb.py
+93
-0
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py
...y/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py
+100
-0
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv2_rgb.py
...y/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv2_rgb.py
+97
-0
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_ucf101_rgb.py
.../slowonly_imagenet_pretrained_r50_8x4x1_64e_ucf101_rgb.py
+93
-0
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb.py
...nly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb.py
+96
-0
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_64e_jester_rgb.py
.../slowonly_imagenet_pretrained_r50_8x8x1_64e_jester_rgb.py
+97
-0
configs/recognition/slowonly/slowonly_k400_pretrained_r50_4x16x1_120e_gym99_flow.py
...ly/slowonly_k400_pretrained_r50_4x16x1_120e_gym99_flow.py
+101
-0
configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_hmdb51_rgb.py
...only/slowonly_k400_pretrained_r50_8x4x1_40e_hmdb51_rgb.py
+81
-0
configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb.py
...only/slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb.py
+97
-0
configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb.py
...y_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb.py
+93
-0
configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb.py
...ly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb.py
+98
-0
configs/recognition/slowonly/slowonly_r101_8x8x1_196e_kinetics400_rgb.py
...tion/slowonly/slowonly_r101_8x8x1_196e_kinetics400_rgb.py
+21
-0
configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_flow.py
...ion/slowonly/slowonly_r50_4x16x1_256e_kinetics400_flow.py
+103
-0
configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb.py
...tion/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb.py
+93
-0
configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_flow.py
...tion/slowonly/slowonly_r50_8x8x1_256e_kinetics400_flow.py
+103
-0
configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_rgb.py
...ition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_rgb.py
+93
-0
configs/recognition/slowonly/slowonly_r50_clip_feature_extraction_4x16x1_rgb.py
...owonly/slowonly_r50_clip_feature_extraction_4x16x1_rgb.py
+45
-0
configs/recognition/slowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py
...lowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py
+96
-0
configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics600_rgb.py
...slowonly/slowonly_r50_video_8x8x1_256e_kinetics600_rgb.py
+93
-0
configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics700_rgb.py
...slowonly/slowonly_r50_video_8x8x1_256e_kinetics700_rgb.py
+92
-0
No files found.
Too many changes to show.
To preserve performance only
436 of 436+
files are displayed.
Plain diff
Email patch
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_hmdb51_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/schedules/sgd_150e_warmup.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
51
))
# dataset settings
split
=
1
dataset_type
=
'RawframeDataset'
data_root
=
'data/hmdb51/rawframes'
data_root_val
=
'data/hmdb51/rawframes'
ann_file_train
=
f
'data/hmdb51/hmdb51_train_split_
{
split
}
_rawframes.txt'
ann_file_val
=
f
'data/hmdb51/hmdb51_val_split_
{
split
}
_rawframes.txt'
ann_file_test
=
f
'data/hmdb51/hmdb51_val_split_
{
split
}
_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
1
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
lr
=
0.1
)
# this lr is used for 8 gpus
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
,
by_epoch
=
False
)
total_epochs
=
64
# runtime settings
work_dir
=
'./work_dirs/slowonly_r50_8x4x1_64e_hmdb51_rgb'
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv1_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/schedules/sgd_150e_warmup.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
with_pool1
=
False
),
cls_head
=
dict
(
num_classes
=
174
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv1/rawframes'
data_root_val
=
'data/sthv1/rawframes'
ann_file_train
=
'data/sthv1/sthv1_train_list_rawframes.txt'
ann_file_val
=
'data/sthv1/sthv1_val_list_rawframes.txt'
ann_file_test
=
'data/sthv1/sthv1_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
128
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
112
,
112
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
128
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
112
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
128
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
128
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
filename_tmpl
=
'{:05}.jpg'
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
filename_tmpl
=
'{:05}.jpg'
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
filename_tmpl
=
'{:05}.jpg'
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
1
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
lr
=
0.1
)
# this lr is used for 8 gpus
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
,
warmup
=
'linear'
,
warmup_by_epoch
=
True
,
warmup_iters
=
10
)
total_epochs
=
64
# runtime settings
work_dir
=
'./work_dirs/slowonly_r50_8x4x1_64e_sthv1_rgb'
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_sthv2_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/schedules/sgd_150e_warmup.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
with_pool1
=
False
),
cls_head
=
dict
(
num_classes
=
174
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/sthv2/rawframes'
data_root_val
=
'data/sthv2/rawframes'
ann_file_train
=
'data/sthv2/sthv2_train_list_rawframes.txt'
ann_file_val
=
'data/sthv2/sthv2_val_list_rawframes.txt'
ann_file_test
=
'data/sthv2/sthv2_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
128
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
112
,
112
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
128
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
112
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
128
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
128
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
1
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
lr
=
0.1
)
# this lr is used for 8 gpus
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
,
warmup
=
'linear'
,
warmup_by_epoch
=
True
,
warmup_iters
=
10
)
total_epochs
=
64
# runtime settings
work_dir
=
'./work_dirs/slowonly_r50_8x4x1_64e_sthv2_rgb'
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x4x1_64e_ucf101_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/schedules/sgd_150e_warmup.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
101
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/ucf101/rawframes/'
data_root_val
=
'data/ucf101/rawframes/'
split
=
1
# official train/test splits. valid numbers: 1, 2, 3
ann_file_train
=
f
'data/ucf101/ucf101_train_split_
{
split
}
_rawframes.txt'
ann_file_val
=
f
'data/ucf101/ucf101_val_split_
{
split
}
_rawframes.txt'
ann_file_test
=
f
'data/ucf101/ucf101_val_split_
{
split
}
_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
1
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
lr
=
0.1
)
# this lr is used for 8 gpus
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
,
by_epoch
=
False
)
total_epochs
=
64
# runtime settings
work_dir
=
'./work_dirs/slowonly_r50_8x4x1_64e_ucf101_rgb'
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
90
,
130
],
warmup
=
'linear'
,
warmup_by_epoch
=
True
,
warmup_iters
=
10
)
total_epochs
=
150
# runtime settings
checkpoint_config
=
dict
(
interval
=
4
)
work_dir
=
(
'./work_dirs/slowonly_imagenet_pretrained_r50_8x8x1_150e'
'_kinetics400_rgb'
)
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_imagenet_pretrained_r50_8x8x1_64e_jester_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
27
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/jester/rawframes'
data_root_val
=
'data/jester/rawframes'
ann_file_train
=
'data/jester/jester_train_list_rawframes.txt'
ann_file_val
=
'data/jester/jester_val_list_rawframes.txt'
ann_file_test
=
'data/jester/jester_val_list_rawframes.txt'
jester_flip_label_map
=
{
0
:
1
,
1
:
0
,
6
:
7
,
7
:
6
}
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
,
flip_label_map
=
jester_flip_label_map
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
val_dataloader
=
dict
(
videos_per_gpu
=
1
),
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
filename_tmpl
=
'{:05}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
,
by_epoch
=
False
)
total_epochs
=
64
# runtime settings
checkpoint_config
=
dict
(
interval
=
4
)
work_dir
=
'./work_dirs/slowonly_imagenet_pretrained_r50_8x8x1_64e_jester_rgb'
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_k400_pretrained_r50_4x16x1_120e_gym99_flow.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
pretrained
=
None
,
in_channels
=
2
,
with_pool2
=
False
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/gym/rawframes'
data_root_val
=
'data/gym/rawframes'
ann_file_train
=
'data/gym/annotations/gym99_train_list_rawframes.txt'
ann_file_val
=
'data/gym/annotations/gym99_val_list_rawframes.txt'
ann_file_test
=
'data/gym/annotations/gym99_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
128
,
128
],
std
=
[
128
,
128
])
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
24
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
modality
=
'Flow'
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
modality
=
'Flow'
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
modality
=
'Flow'
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.03
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
90
,
110
])
total_epochs
=
120
# runtime settings
work_dir
=
(
'./work_dirs/'
'slowonly_kinetics_pretrained_r50_4x16x1_120e_gym99_flow'
)
load_from
=
(
'https://download.openmmlab.com/mmaction/recognition/slowonly/'
'slowonly_r50_4x16x1_256e_kinetics400_flow/'
'slowonly_r50_4x16x1_256e_kinetics400_flow_20200704-decb8568.pth'
)
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_hmdb51_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'./slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
51
))
# dataset settings
split
=
1
dataset_type
=
'RawframeDataset'
data_root
=
'data/hmdb51/rawframes'
data_root_val
=
'data/hmdb51/rawframes'
ann_file_train
=
f
'data/hmdb51/hmdb51_train_split_
{
split
}
_rawframes.txt'
ann_file_val
=
f
'data/hmdb51/hmdb51_val_split_
{
split
}
_rawframes.txt'
ann_file_test
=
f
'data/hmdb51/hmdb51_val_split_
{
split
}
_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
# runtime settings
work_dir
=
'./work_dirs/slowonly_k400_pretrained_r50_8x4x1_40e_hmdb51_rgb'
configs/recognition/slowonly/slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/schedules/sgd_50e.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
cls_head
=
dict
(
num_classes
=
101
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/ucf101/rawframes/'
data_root_val
=
'data/ucf101/rawframes/'
split
=
1
# official train/test splits. valid numbers: 1, 2, 3
ann_file_train
=
f
'data/ucf101/ucf101_train_split_
{
split
}
_rawframes.txt'
ann_file_val
=
f
'data/ucf101/ucf101_val_split_
{
split
}
_rawframes.txt'
ann_file_test
=
f
'data/ucf101/ucf101_val_split_
{
split
}
_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
4
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
1
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
lr
=
0.001
,
# this lr is used for 8 gpus
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
20
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
step
=
[
15
,
30
])
total_epochs
=
40
# runtime settings
work_dir
=
'./work_dirs/slowonly_k400_pretrained_r50_8x4x1_40e_ucf101_rgb'
load_from
=
'https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_rgb/slowonly_r50_8x8x1_256e_kinetics400_rgb_20200703-a79c555a.pth'
# noqa: E501
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/schedules/sgd_150e_warmup.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
non_local
=
((
0
,
0
,
0
),
(
1
,
0
,
1
,
0
),
(
1
,
0
,
1
,
0
,
1
,
0
),
(
0
,
0
,
0
)),
non_local_cfg
=
dict
(
sub_sample
=
True
,
use_scale
=
True
,
norm_cfg
=
dict
(
type
=
'BN3d'
,
requires_grad
=
True
),
mode
=
'embedded_gaussian'
)))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# runtime settings
work_dir
=
'./work_dirs/slowonly_nl_embedded_gaussian_r50_4x16x1_150e_kinetics400_rgb'
# noqa E501
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/schedules/sgd_150e_warmup.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
non_local
=
((
0
,
0
,
0
),
(
1
,
0
,
1
,
0
),
(
1
,
0
,
1
,
0
,
1
,
0
),
(
0
,
0
,
0
)),
non_local_cfg
=
dict
(
sub_sample
=
True
,
use_scale
=
True
,
norm_cfg
=
dict
(
type
=
'BN3d'
,
requires_grad
=
True
),
mode
=
'embedded_gaussian'
)))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.01
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# runtime settings
work_dir
=
'./work_dirs/slowonly_nl_embedded_gaussian_r50_8x8x1_150e_kinetics400_rgb'
# noqa E501
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_r101_8x8x1_196e_kinetics400_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'./slowonly_r50_8x8x1_256e_kinetics400_rgb.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
depth
=
101
,
pretrained
=
None
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
,
warmup
=
'linear'
,
warmup_ratio
=
0.1
,
warmup_by_epoch
=
True
,
warmup_iters
=
34
)
total_epochs
=
196
# runtime settings
work_dir
=
'./work_dirs/slowonly_r101_8x8x1_196e_kinetics400_rgb'
configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_flow.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
in_channels
=
2
,
with_pool2
=
False
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics_flow_train_list.txt'
ann_file_val
=
'data/kinetics400/kinetics_flow_val_list.txt'
ann_file_test
=
'data/kinetics400/kinetics_flow_val_list.txt'
img_norm_cfg
=
dict
(
mean
=
[
128
,
128
],
std
=
[
128
,
128
])
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
24
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
modality
=
'Flow'
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
modality
=
'Flow'
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
modality
=
'Flow'
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.06
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
,
warmup
=
'linear'
,
warmup_by_epoch
=
True
,
warmup_iters
=
34
)
total_epochs
=
256
# runtime settings
checkpoint_config
=
dict
(
interval
=
4
)
work_dir
=
'./work_dirs/slowonly_r50_4x16x1_256e_kinetics400_flow'
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_r50_4x16x1_256e_kinetics400_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
pretrained
=
None
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
)
total_epochs
=
256
# runtime settings
checkpoint_config
=
dict
(
interval
=
4
)
work_dir
=
'./work_dirs/slowonly_r50_4x16x1_256e_kinetics400_rgb'
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_flow.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
in_channels
=
2
,
with_pool2
=
False
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics_flow_train_list.txt'
ann_file_val
=
'data/kinetics400/kinetics_flow_val_list.txt'
ann_file_test
=
'data/kinetics400/kinetics_flow_val_list.txt'
img_norm_cfg
=
dict
(
mean
=
[
128
,
128
],
std
=
[
128
,
128
])
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
modality
=
'Flow'
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
modality
=
'Flow'
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
modality
=
'Flow'
,
filename_tmpl
=
'{}_{:05d}.jpg'
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.06
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
,
warmup
=
'linear'
,
warmup_by_epoch
=
True
,
warmup_iters
=
34
)
total_epochs
=
196
# runtime settings
checkpoint_config
=
dict
(
interval
=
4
)
work_dir
=
'./work_dirs/slowonly_r50_8x8x1_256e_kinetics400_flow'
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_r50_8x8x1_256e_kinetics400_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
pretrained
=
None
))
# dataset settings
dataset_type
=
'RawframeDataset'
data_root
=
'data/kinetics400/rawframes_train'
data_root_val
=
'data/kinetics400/rawframes_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_rawframes.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_rawframes.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'RawFrameDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
8
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
)
total_epochs
=
256
# runtime settings
checkpoint_config
=
dict
(
interval
=
4
)
work_dir
=
'./work_dirs/slowonly_r50_8x8x1_256e_kinetics400_rgb'
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_r50_clip_feature_extraction_4x16x1_rgb.py
0 → 100644
View file @
aa58d024
model
=
dict
(
type
=
'Recognizer3D'
,
backbone
=
dict
(
type
=
'ResNet3dSlowOnly'
,
depth
=
50
,
pretrained
=
None
,
lateral
=
False
,
conv1_kernel
=
(
1
,
7
,
7
),
conv1_stride_t
=
1
,
pool1_stride_t
=
1
,
inflate
=
(
0
,
0
,
1
,
1
),
norm_eval
=
False
),
train_cfg
=
None
,
test_cfg
=
dict
(
feature_extraction
=
True
))
# dataset settings
dataset_type
=
'VideoDataset'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
1
,
workers_per_gpu
=
2
,
test
=
dict
(
type
=
dataset_type
,
ann_file
=
None
,
data_prefix
=
None
,
pipeline
=
test_pipeline
))
dist_params
=
dict
(
backend
=
'nccl'
)
configs/recognition/slowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
pretrained
=
None
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics400/videos_train'
data_root_val
=
'data/kinetics400/videos_val'
ann_file_train
=
'data/kinetics400/kinetics400_train_list_videos.txt'
ann_file_val
=
'data/kinetics400/kinetics400_val_list_videos.txt'
ann_file_test
=
'data/kinetics400/kinetics400_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
1
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
4
,
frame_interval
=
16
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'Resize'
,
scale
=
(
-
1
,
256
)),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
24
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.3
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
)
total_epochs
=
256
# runtime settings
checkpoint_config
=
dict
(
interval
=
4
)
work_dir
=
'./work_dirs/slowonly_r50_video_4x16x1_256e_kinetics400_rgb'
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics600_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
pretrained
=
None
),
cls_head
=
dict
(
num_classes
=
600
))
# dataset settings
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics600/videos_train'
data_root_val
=
'data/kinetics600/videos_val'
ann_file_train
=
'data/kinetics600/kinetics600_train_list_videos.txt'
ann_file_val
=
'data/kinetics600/kinetics600_val_list_videos.txt'
ann_file_test
=
'data/kinetics600/kinetics600_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.15
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
)
total_epochs
=
256
# runtime settings
checkpoint_config
=
dict
(
interval
=
4
)
work_dir
=
'./work_dirs/slowonly_r50_video_8x8x1_256e_kinetics600_rgb'
find_unused_parameters
=
False
configs/recognition/slowonly/slowonly_r50_video_8x8x1_256e_kinetics700_rgb.py
0 → 100644
View file @
aa58d024
_base_
=
[
'../../_base_/models/slowonly_r50.py'
,
'../../_base_/default_runtime.py'
]
# model settings
model
=
dict
(
backbone
=
dict
(
pretrained
=
None
),
cls_head
=
dict
(
num_classes
=
700
))
dataset_type
=
'VideoDataset'
data_root
=
'data/kinetics700/videos_train'
data_root_val
=
'data/kinetics700/videos_val'
ann_file_train
=
'data/kinetics700/kinetics700_train_list_videos.txt'
ann_file_val
=
'data/kinetics700/kinetics700_val_list_videos.txt'
ann_file_test
=
'data/kinetics700/kinetics700_val_list_videos.txt'
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_bgr
=
False
)
train_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'RandomResizedCrop'
),
dict
(
type
=
'Resize'
,
scale
=
(
224
,
224
),
keep_ratio
=
False
),
dict
(
type
=
'Flip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
,
'label'
])
]
val_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
1
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'CenterCrop'
,
crop_size
=
224
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
test_pipeline
=
[
dict
(
type
=
'DecordInit'
),
dict
(
type
=
'SampleFrames'
,
clip_len
=
8
,
frame_interval
=
8
,
num_clips
=
10
,
test_mode
=
True
),
dict
(
type
=
'DecordDecode'
),
dict
(
type
=
'ThreeCrop'
,
crop_size
=
256
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'FormatShape'
,
input_format
=
'NCTHW'
),
dict
(
type
=
'Collect'
,
keys
=
[
'imgs'
,
'label'
],
meta_keys
=
[]),
dict
(
type
=
'ToTensor'
,
keys
=
[
'imgs'
])
]
data
=
dict
(
videos_per_gpu
=
12
,
workers_per_gpu
=
2
,
test_dataloader
=
dict
(
videos_per_gpu
=
1
),
train
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_train
,
data_prefix
=
data_root
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_val
,
data_prefix
=
data_root_val
,
pipeline
=
val_pipeline
),
test
=
dict
(
type
=
dataset_type
,
ann_file
=
ann_file_test
,
data_prefix
=
data_root_val
,
pipeline
=
test_pipeline
))
evaluation
=
dict
(
interval
=
5
,
metrics
=
[
'top_k_accuracy'
,
'mean_class_accuracy'
])
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.15
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
# this lr is used for 8 gpus
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
40
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
min_lr
=
0
)
total_epochs
=
256
# runtime settings
checkpoint_config
=
dict
(
interval
=
4
)
work_dir
=
'./work_dirs/slowonly_r50_video_8x8x1_256e_kinetics700_rgb'
find_unused_parameters
=
False
Prev
1
…
5
6
7
8
9
10
11
12
13
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment