Commit 76ccaa54 authored by unknown's avatar unknown
Browse files

添加mmaction2测试用例

parent 44c28b2b
This diff is collapsed.
This diff is collapsed.
checkpoint_config = dict(interval=1)
log_config = dict(
interval=20,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook'),
])
# runtime settings
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
# disable opencv multithreading to avoid system being overloaded
opencv_num_threads = 0
# set multi-process start method as `fork` to speed up the training
mp_start_method = 'fork'
# model settings
model = dict(
type='AudioRecognizer',
backbone=dict(
type='ResNetAudio',
depth=50,
pretrained=None,
in_channels=1,
norm_eval=False),
cls_head=dict(
type='AudioTSNHead',
num_classes=400,
in_channels=1024,
dropout_ratio=0.5,
init_std=0.01),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
# model settings
model = dict(
type='BMN',
temporal_dim=100,
boundary_ratio=0.5,
num_samples=32,
num_samples_per_bin=3,
feat_dim=400,
soft_nms_alpha=0.4,
soft_nms_low_threshold=0.5,
soft_nms_high_threshold=0.9,
post_process_top_k=100)
# model settings
model = dict(
type='PEM',
pem_feat_dim=32,
pem_hidden_dim=256,
pem_u_ratio_m=1,
pem_u_ratio_l=2,
pem_high_temporal_iou_threshold=0.6,
pem_low_temporal_iou_threshold=0.2,
soft_nms_alpha=0.75,
soft_nms_low_threshold=0.65,
soft_nms_high_threshold=0.9,
post_process_top_k=100)
# model settings
model = dict(
type='TEM',
temporal_dim=100,
boundary_ratio=0.1,
tem_feat_dim=400,
tem_hidden_dim=512,
tem_match_threshold=0.5)
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='C3D',
pretrained= # noqa: E251
'https://download.openmmlab.com/mmaction/recognition/c3d/c3d_sports1m_pretrain_20201016-dcc47ddc.pth', # noqa: E501
style='pytorch',
conv_cfg=dict(type='Conv3d'),
norm_cfg=None,
act_cfg=dict(type='ReLU'),
dropout_ratio=0.5,
init_std=0.005),
cls_head=dict(
type='I3DHead',
num_classes=101,
in_channels=4096,
spatial_type=None,
dropout_ratio=0.5,
init_std=0.01),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='score'))
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3d',
pretrained2d=True,
pretrained='torchvision://resnet50',
depth=50,
conv1_kernel=(5, 7, 7),
conv1_stride_t=2,
pool1_stride_t=2,
conv_cfg=dict(type='Conv3d'),
norm_eval=False,
inflate=((1, 1, 1), (1, 0, 1, 0), (1, 0, 1, 0, 1, 0), (0, 1, 0)),
zero_init_residual=False),
cls_head=dict(
type='I3DHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
dropout_ratio=0.5,
init_std=0.01),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
# This setting refers to https://github.com/open-mmlab/mmaction/blob/master/mmaction/models/tenons/backbones/resnet_i3d.py#L329-L332 # noqa: E501
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3dCSN',
pretrained2d=False,
pretrained=None,
depth=152,
with_pool2=False,
bottleneck_mode='ir',
norm_eval=False,
zero_init_residual=False),
cls_head=dict(
type='I3DHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
dropout_ratio=0.5,
init_std=0.01),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob', max_testing_views=10))
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet2Plus1d',
depth=34,
pretrained=None,
pretrained2d=False,
norm_eval=False,
conv_cfg=dict(type='Conv2plus1d'),
norm_cfg=dict(type='SyncBN', requires_grad=True, eps=1e-3),
conv1_kernel=(3, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(1, 1, 1, 1),
spatial_strides=(1, 2, 2, 2),
temporal_strides=(1, 2, 2, 2),
zero_init_residual=False),
cls_head=dict(
type='I3DHead',
num_classes=400,
in_channels=512,
spatial_type='avg',
dropout_ratio=0.5,
init_std=0.01),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3dSlowFast',
pretrained=None,
resample_rate=8, # tau
speed_ratio=8, # alpha
channel_ratio=8, # beta_inv
slow_pathway=dict(
type='resnet3d',
depth=50,
pretrained=None,
lateral=True,
conv1_kernel=(1, 7, 7),
dilations=(1, 1, 1, 1),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(0, 0, 1, 1),
norm_eval=False),
fast_pathway=dict(
type='resnet3d',
depth=50,
pretrained=None,
lateral=False,
base_channels=8,
conv1_kernel=(5, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
norm_eval=False)),
cls_head=dict(
type='SlowFastHead',
in_channels=2304, # 2048+256
num_classes=400,
spatial_type='avg',
dropout_ratio=0.5),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3dSlowOnly',
depth=50,
pretrained='torchvision://resnet50',
lateral=False,
conv1_kernel=(1, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(0, 0, 1, 1),
norm_eval=False),
cls_head=dict(
type='I3DHead',
in_channels=2048,
num_classes=400,
spatial_type='avg',
dropout_ratio=0.5),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='TANet',
pretrained='torchvision://resnet50',
depth=50,
num_segments=8,
tam_cfg=dict()),
cls_head=dict(
type='TSMHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.001),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='ResNetTIN',
pretrained='torchvision://resnet50',
depth=50,
norm_eval=False,
shift_div=4),
cls_head=dict(
type='TSMHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.001,
is_shift=False),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips=None))
# model settings
model = dict(
type='Recognizer3D',
backbone=dict(
type='ResNet3dSlowOnly',
depth=50,
pretrained='torchvision://resnet50',
lateral=False,
out_indices=(2, 3),
conv1_kernel=(1, 7, 7),
conv1_stride_t=1,
pool1_stride_t=1,
inflate=(0, 0, 1, 1),
norm_eval=False),
neck=dict(
type='TPN',
in_channels=(1024, 2048),
out_channels=1024,
spatial_modulation_cfg=dict(
in_channels=(1024, 2048), out_channels=2048),
temporal_modulation_cfg=dict(downsample_scales=(8, 8)),
upsample_cfg=dict(scale_factor=(1, 1, 1)),
downsample_cfg=dict(downsample_scale=(1, 1, 1)),
level_fusion_cfg=dict(
in_channels=(1024, 1024),
mid_channels=(1024, 1024),
out_channels=2048,
downsample_scales=((1, 1, 1), (1, 1, 1))),
aux_head_cfg=dict(out_channels=400, loss_weight=0.5)),
cls_head=dict(
type='TPNHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.01),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='ResNetTSM',
pretrained='torchvision://resnet50',
depth=50,
out_indices=(2, 3),
norm_eval=False,
shift_div=8),
neck=dict(
type='TPN',
in_channels=(1024, 2048),
out_channels=1024,
spatial_modulation_cfg=dict(
in_channels=(1024, 2048), out_channels=2048),
temporal_modulation_cfg=dict(downsample_scales=(8, 8)),
upsample_cfg=dict(scale_factor=(1, 1, 1)),
downsample_cfg=dict(downsample_scale=(1, 1, 1)),
level_fusion_cfg=dict(
in_channels=(1024, 1024),
mid_channels=(1024, 1024),
out_channels=2048,
downsample_scales=((1, 1, 1), (1, 1, 1))),
aux_head_cfg=dict(out_channels=174, loss_weight=0.5)),
cls_head=dict(
type='TPNHead',
num_classes=174,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.01),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob', fcn_test=True))
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='ResNet',
pretrained='torchvision://resnet50',
depth=50,
norm_eval=False,
partial_bn=True),
cls_head=dict(
type='TRNHead',
num_classes=400,
in_channels=2048,
num_segments=8,
spatial_type='avg',
relation_type='TRNMultiScale',
hidden_dim=256,
dropout_ratio=0.8,
init_std=0.001),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='MobileNetV2TSM',
shift_div=8,
num_segments=8,
is_shift=True,
pretrained='mmcls://mobilenet_v2'),
cls_head=dict(
type='TSMHead',
num_segments=8,
num_classes=400,
in_channels=1280,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.001,
is_shift=True),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
# model settings
model = dict(
type='Recognizer2D',
backbone=dict(
type='ResNetTSM',
pretrained='torchvision://resnet50',
depth=50,
norm_eval=False,
shift_div=8),
cls_head=dict(
type='TSMHead',
num_classes=400,
in_channels=2048,
spatial_type='avg',
consensus=dict(type='AvgConsensus', dim=1),
dropout_ratio=0.5,
init_std=0.001,
is_shift=True),
# model training and testing settings
train_cfg=None,
test_cfg=dict(average_clips='prob'))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment