Commit 5b3e36dc authored by Sugon_ldc's avatar Sugon_ldc
Browse files

add model TSM

parents
Pipeline #315 failed with stages
in 0 seconds
_base_ = ['../_base_/models/slowonly_r50.py']
# model settings
lfb_prefix_path = 'data/ava/lfb_half'
max_num_sampled_feat = 5
window_size = 60
lfb_channels = 2048
dataset_modes = ('train', 'val')
model = dict(
roi_head=dict(
shared_head=dict(
type='FBOHead',
lfb_cfg=dict(
lfb_prefix_path=lfb_prefix_path,
max_num_sampled_feat=max_num_sampled_feat,
window_size=window_size,
lfb_channels=lfb_channels,
dataset_modes=dataset_modes,
device='gpu'),
fbo_cfg=dict(type='avg')),
bbox_head=dict(in_channels=4096)))
dataset_type = 'AVADataset'
data_root = 'data/ava/rawframes'
anno_root = 'data/ava/annotations'
ann_file_train = f'{anno_root}/ava_train_v2.1.csv'
ann_file_val = f'{anno_root}/ava_val_v2.1.csv'
exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.1.csv'
exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.1.csv'
label_file = f'{anno_root}/ava_action_list_v2.1_for_activitynet_2018.pbtxt'
proposal_file_train = (f'{anno_root}/ava_dense_proposals_train.FAIR.'
'recall_93.9.pkl')
proposal_file_val = f'{anno_root}/ava_dense_proposals_val.FAIR.recall_93.9.pkl'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
train_pipeline = [
dict(type='SampleAVAFrames', clip_len=4, frame_interval=16),
dict(type='RawFrameDecode'),
dict(type='RandomRescale', scale_range=(256, 320)),
dict(type='RandomCrop', size=256),
dict(type='Flip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW', collapse=True),
# Rename is needed to use mmdet detectors
dict(type='Rename', mapping=dict(imgs='img')),
dict(type='ToTensor', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),
dict(
type='ToDataContainer',
fields=[
dict(key=['proposals', 'gt_bboxes', 'gt_labels'], stack=False)
]),
dict(
type='Collect',
keys=['img', 'proposals', 'gt_bboxes', 'gt_labels'],
meta_keys=['scores', 'entity_ids', 'img_key'])
]
# The testing is w/o. any cropping / flipping
val_pipeline = [
dict(
type='SampleAVAFrames', clip_len=4, frame_interval=16, test_mode=True),
dict(type='RawFrameDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW', collapse=True),
# Rename is needed to use mmdet detectors
dict(type='Rename', mapping=dict(imgs='img')),
dict(type='ToTensor', keys=['img', 'proposals']),
dict(type='ToDataContainer', fields=[dict(key='proposals', stack=False)]),
dict(
type='Collect',
keys=['img', 'proposals'],
meta_keys=['scores', 'img_shape', 'img_key'],
nested=True)
]
data = dict(
videos_per_gpu=12,
workers_per_gpu=2,
val_dataloader=dict(videos_per_gpu=1),
test_dataloader=dict(videos_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=ann_file_train,
exclude_file=exclude_file_train,
pipeline=train_pipeline,
label_file=label_file,
proposal_file=proposal_file_train,
person_det_score_thr=0.9,
data_prefix=data_root),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
exclude_file=exclude_file_val,
pipeline=val_pipeline,
label_file=label_file,
proposal_file=proposal_file_val,
person_det_score_thr=0.9,
data_prefix=data_root))
data['test'] = data['val']
evaluation = dict(interval=1, save_best='mAP@0.5IOU')
optimizer = dict(type='SGD', lr=0.15, momentum=0.9, weight_decay=1e-05)
# this lr is used for 8 gpus
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
step=[10, 15],
warmup='linear',
warmup_by_epoch=True,
warmup_iters=5,
warmup_ratio=0.1)
total_epochs = 20
checkpoint_config = dict(interval=1)
workflow = [('train', 1)]
log_config = dict(
interval=20, hooks=[
dict(type='TextLoggerHook'),
])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/lfb/lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb' # noqa E501
load_from = ('https://download.openmmlab.com/mmaction/recognition/slowonly/'
'slowonly_r50_4x16x1_256e_kinetics400_rgb/'
'slowonly_r50_4x16x1_256e_kinetics400_rgb_20200704-a69556c6.pth')
resume_from = None
find_unused_parameters = False
_base_ = ['../_base_/models/slowonly_r50.py']
# model settings
lfb_prefix_path = 'data/ava/lfb_half'
max_num_sampled_feat = 5
window_size = 60
lfb_channels = 2048
dataset_modes = ('train', 'val')
model = dict(
roi_head=dict(
shared_head=dict(
type='FBOHead',
lfb_cfg=dict(
lfb_prefix_path=lfb_prefix_path,
max_num_sampled_feat=max_num_sampled_feat,
window_size=window_size,
lfb_channels=lfb_channels,
dataset_modes=dataset_modes,
device='gpu'),
fbo_cfg=dict(type='max')),
bbox_head=dict(in_channels=4096)))
dataset_type = 'AVADataset'
data_root = 'data/ava/rawframes'
anno_root = 'data/ava/annotations'
ann_file_train = f'{anno_root}/ava_train_v2.1.csv'
ann_file_val = f'{anno_root}/ava_val_v2.1.csv'
exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.1.csv'
exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.1.csv'
label_file = f'{anno_root}/ava_action_list_v2.1_for_activitynet_2018.pbtxt'
proposal_file_train = (f'{anno_root}/ava_dense_proposals_train.FAIR.'
'recall_93.9.pkl')
proposal_file_val = f'{anno_root}/ava_dense_proposals_val.FAIR.recall_93.9.pkl'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
train_pipeline = [
dict(type='SampleAVAFrames', clip_len=4, frame_interval=16),
dict(type='RawFrameDecode'),
dict(type='RandomRescale', scale_range=(256, 320)),
dict(type='RandomCrop', size=256),
dict(type='Flip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW', collapse=True),
# Rename is needed to use mmdet detectors
dict(type='Rename', mapping=dict(imgs='img')),
dict(type='ToTensor', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),
dict(
type='ToDataContainer',
fields=[
dict(key=['proposals', 'gt_bboxes', 'gt_labels'], stack=False)
]),
dict(
type='Collect',
keys=['img', 'proposals', 'gt_bboxes', 'gt_labels'],
meta_keys=['scores', 'entity_ids', 'img_key'])
]
# The testing is w/o. any cropping / flipping
val_pipeline = [
dict(
type='SampleAVAFrames', clip_len=4, frame_interval=16, test_mode=True),
dict(type='RawFrameDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW', collapse=True),
# Rename is needed to use mmdet detectors
dict(type='Rename', mapping=dict(imgs='img')),
dict(type='ToTensor', keys=['img', 'proposals']),
dict(type='ToDataContainer', fields=[dict(key='proposals', stack=False)]),
dict(
type='Collect',
keys=['img', 'proposals'],
meta_keys=['scores', 'img_shape', 'img_key'],
nested=True)
]
data = dict(
videos_per_gpu=12,
workers_per_gpu=2,
val_dataloader=dict(videos_per_gpu=1),
test_dataloader=dict(videos_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=ann_file_train,
exclude_file=exclude_file_train,
pipeline=train_pipeline,
label_file=label_file,
proposal_file=proposal_file_train,
person_det_score_thr=0.9,
data_prefix=data_root),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
exclude_file=exclude_file_val,
pipeline=val_pipeline,
label_file=label_file,
proposal_file=proposal_file_val,
person_det_score_thr=0.9,
data_prefix=data_root))
data['test'] = data['val']
evaluation = dict(interval=1, save_best='mAP@0.5IOU')
optimizer = dict(type='SGD', lr=0.15, momentum=0.9, weight_decay=1e-05)
# this lr is used for 8 gpus
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
step=[10, 15],
warmup='linear',
warmup_by_epoch=True,
warmup_iters=5,
warmup_ratio=0.1)
total_epochs = 20
checkpoint_config = dict(interval=1)
workflow = [('train', 1)]
log_config = dict(
interval=20, hooks=[
dict(type='TextLoggerHook'),
])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb' # noqa E501
load_from = ('https://download.openmmlab.com/mmaction/recognition/slowonly/'
'slowonly_r50_4x16x1_256e_kinetics400_rgb/'
'slowonly_r50_4x16x1_256e_kinetics400_rgb_20200704-a69556c6.pth')
resume_from = None
find_unused_parameters = False
_base_ = ['../_base_/models/slowonly_r50.py']
# model settings
lfb_prefix_path = 'data/ava/lfb_half'
max_num_sampled_feat = 5
window_size = 60
lfb_channels = 2048
dataset_modes = ('train', 'val')
model = dict(
roi_head=dict(
shared_head=dict(
type='FBOHead',
lfb_cfg=dict(
lfb_prefix_path=lfb_prefix_path,
max_num_sampled_feat=max_num_sampled_feat,
window_size=window_size,
lfb_channels=lfb_channels,
dataset_modes=dataset_modes,
device='gpu'),
fbo_cfg=dict(
type='non_local',
st_feat_channels=2048,
lt_feat_channels=lfb_channels,
latent_channels=512,
num_st_feat=1,
num_lt_feat=window_size * max_num_sampled_feat,
num_non_local_layers=2,
st_feat_dropout_ratio=0.2,
lt_feat_dropout_ratio=0.2,
pre_activate=True)),
bbox_head=dict(in_channels=2560)))
dataset_type = 'AVADataset'
data_root = 'data/ava/rawframes'
anno_root = 'data/ava/annotations'
ann_file_train = f'{anno_root}/ava_train_v2.1.csv'
ann_file_val = f'{anno_root}/ava_val_v2.1.csv'
exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.1.csv'
exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.1.csv'
label_file = f'{anno_root}/ava_action_list_v2.1_for_activitynet_2018.pbtxt'
proposal_file_train = (f'{anno_root}/ava_dense_proposals_train.FAIR.'
'recall_93.9.pkl')
proposal_file_val = f'{anno_root}/ava_dense_proposals_val.FAIR.recall_93.9.pkl'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
train_pipeline = [
dict(type='SampleAVAFrames', clip_len=4, frame_interval=16),
dict(type='RawFrameDecode'),
dict(type='RandomRescale', scale_range=(256, 320)),
dict(type='RandomCrop', size=256),
dict(type='Flip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW', collapse=True),
# Rename is needed to use mmdet detectors
dict(type='Rename', mapping=dict(imgs='img')),
dict(type='ToTensor', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),
dict(
type='ToDataContainer',
fields=[
dict(key=['proposals', 'gt_bboxes', 'gt_labels'], stack=False)
]),
dict(
type='Collect',
keys=['img', 'proposals', 'gt_bboxes', 'gt_labels'],
meta_keys=['scores', 'entity_ids', 'img_key'])
]
# The testing is w/o. any cropping / flipping
val_pipeline = [
dict(
type='SampleAVAFrames', clip_len=4, frame_interval=16, test_mode=True),
dict(type='RawFrameDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW', collapse=True),
# Rename is needed to use mmdet detectors
dict(type='Rename', mapping=dict(imgs='img')),
dict(type='ToTensor', keys=['img', 'proposals']),
dict(type='ToDataContainer', fields=[dict(key='proposals', stack=False)]),
dict(
type='Collect',
keys=['img', 'proposals'],
meta_keys=['scores', 'img_shape', 'img_key'],
nested=True)
]
data = dict(
videos_per_gpu=12,
workers_per_gpu=2,
val_dataloader=dict(videos_per_gpu=1),
test_dataloader=dict(videos_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=ann_file_train,
exclude_file=exclude_file_train,
pipeline=train_pipeline,
label_file=label_file,
proposal_file=proposal_file_train,
person_det_score_thr=0.9,
data_prefix=data_root),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
exclude_file=exclude_file_val,
pipeline=val_pipeline,
label_file=label_file,
proposal_file=proposal_file_val,
person_det_score_thr=0.9,
data_prefix=data_root))
data['test'] = data['val']
evaluation = dict(interval=1, save_best='mAP@0.5IOU')
optimizer = dict(type='SGD', lr=0.15, momentum=0.9, weight_decay=1e-05)
# this lr is used for 8 gpus
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
step=[10, 15],
warmup='linear',
warmup_by_epoch=True,
warmup_iters=5,
warmup_ratio=0.1)
total_epochs = 20
checkpoint_config = dict(interval=1)
workflow = [('train', 1)]
log_config = dict(
interval=20, hooks=[
dict(type='TextLoggerHook'),
])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb' # noqa E501
load_from = ('https://download.openmmlab.com/mmaction/recognition/slowonly/'
'slowonly_r50_4x16x1_256e_kinetics400_rgb/'
'slowonly_r50_4x16x1_256e_kinetics400_rgb_20200704-a69556c6.pth')
resume_from = None
find_unused_parameters = False
# This config is used to generate long-term feature bank.
_base_ = ['../_base_/models/slowonly_r50.py']
# model settings
lfb_prefix_path = 'data/ava/lfb_half'
dataset_mode = 'train' # ['train', 'val', 'test']
model = dict(
roi_head=dict(
shared_head=dict(
type='LFBInferHead',
lfb_prefix_path=lfb_prefix_path,
dataset_mode=dataset_mode,
use_half_precision=True)))
# dataset settings
dataset_type = 'AVADataset'
data_root = 'data/ava/rawframes'
anno_root = 'data/ava/annotations'
ann_file_infer = f'{anno_root}/ava_{dataset_mode}_v2.1.csv'
exclude_file_infer = (
f'{anno_root}/ava_{dataset_mode}_excluded_timestamps_v2.1.csv')
label_file = f'{anno_root}/ava_action_list_v2.1_for_activitynet_2018.pbtxt'
proposal_file_infer = (
f'{anno_root}/ava_dense_proposals_{dataset_mode}.FAIR.recall_93.9.pkl')
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
infer_pipeline = [
dict(
type='SampleAVAFrames', clip_len=4, frame_interval=16, test_mode=True),
dict(type='RawFrameDecode'),
dict(type='Resize', scale=(-1, 256)),
dict(type='Normalize', **img_norm_cfg),
dict(type='FormatShape', input_format='NCTHW', collapse=True),
# Rename is needed to use mmdet detectors
dict(type='Rename', mapping=dict(imgs='img')),
dict(type='ToTensor', keys=['img', 'proposals']),
dict(type='ToDataContainer', fields=[dict(key='proposals', stack=False)]),
dict(
type='Collect',
keys=['img', 'proposals'],
meta_keys=['scores', 'img_shape', 'img_key'],
nested=True)
]
data = dict(
videos_per_gpu=1,
workers_per_gpu=2,
test=dict(
type=dataset_type,
ann_file=ann_file_infer,
exclude_file=exclude_file_infer,
pipeline=infer_pipeline,
label_file=label_file,
proposal_file=proposal_file_infer,
person_det_score_thr=0.9,
data_prefix=data_root))
dist_params = dict(backend='nccl')
Collections:
- Name: LFB
README: configs/detection/lfb/README.md
Paper:
URL: https://arxiv.org/abs/1812.05038
Title: Long-Term Feature Banks for Detailed Video Understanding
Models:
- Config: configs/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py
In Collection: LFB
Metadata:
Architecture: ResNet50
Batch Size: 12
Epochs: 20
Input: 4x16
Pretrained: Kinetics-400
Resolution: short-side 256
Training Data: AVA v2.1
Training Resources: 8 GPUs
Modality: RGB
Name: lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py
Results:
- Dataset: AVA v2.1
Metrics:
mAP: 24.11
Task: Spatial Temporal Action Detection
Training Json Log: https://download.openmmlab.com/mmaction/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb/20210224_125052.log.json
Training Log: https://download.openmmlab.com/mmaction/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb/20210224_125052.log
Weights: https://download.openmmlab.com/mmaction/detection/lfb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb/lfb_nl_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb_20210224-2ae136d9.pth
- Config: configs/detection/lfb/lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py
In Collection: LFB
Metadata:
Architecture: ResNet50
Batch Size: 12
Epochs: 20
Input: 4x16
Pretrained: Kinetics-400
Resolution: short-side 256
Training Data: AVA v2.1
Training Resources: 8 GPUs
Modality: RGB
Name: lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py
Results:
- Dataset: AVA v2.1
Metrics:
mAP: 20.17
Task: Spatial Temporal Action Detection
Training Json Log: https://download.openmmlab.com/mmaction/detection/lfb/lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb/20210301_124812.log.json
Training Log: https://download.openmmlab.com/mmaction/detection/lfb/lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb/20210301_124812.log
Weights: https://download.openmmlab.com/mmaction/detection/lfb/lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb/lfb_avg_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb_20210301-19c330b7.pth
- Config: configs/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py
In Collection: LFB
Metadata:
Architecture: ResNet50
Batch Size: 12
Epochs: 20
Input: 4x16
Pretrained: Kinetics-400
Resolution: short-side 256
Training Data: AVA v2.1
Training Resources: 8 GPUs
Modality: RGB
Name: lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb.py
Results:
- Dataset: AVA v2.1
Metrics:
mAP: 22.15
Task: Spatial Temporal Action Detection
Training Json Log: https://download.openmmlab.com/mmaction/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb/20210301_124812.log.json
Training Log: https://download.openmmlab.com/mmaction/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb/20210301_124812.log
Weights: https://download.openmmlab.com/mmaction/detection/lfb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb/lfb_max_kinetics_pretrained_slowonly_r50_4x16x1_20e_ava_rgb_20210301-37efcd15.pth
# BMN
[Bmn: Boundary-matching network for temporal action proposal generation](https://openaccess.thecvf.com/content_ICCV_2019/html/Lin_BMN_Boundary-Matching_Network_for_Temporal_Action_Proposal_Generation_ICCV_2019_paper.html)
<!-- [ALGORITHM] -->
## Abstract
<!-- [ABSTRACT] -->
Temporal action proposal generation is an challenging and promising task which aims to locate temporal regions in real-world videos where action or event may occur. Current bottom-up proposal generation methods can generate proposals with precise boundary, but cannot efficiently generate adequately reliable confidence scores for retrieving proposals. To address these difficulties, we introduce the Boundary-Matching (BM) mechanism to evaluate confidence scores of densely distributed proposals, which denote a proposal as a matching pair of starting and ending boundaries and combine all densely distributed BM pairs into the BM confidence map. Based on BM mechanism, we propose an effective, efficient and end-to-end proposal generation method, named Boundary-Matching Network (BMN), which generates proposals with precise temporal boundaries as well as reliable confidence scores simultaneously. The two-branches of BMN are jointly trained in an unified framework. We conduct experiments on two challenging datasets: THUMOS-14 and ActivityNet-1.3, where BMN shows significant performance improvement with remarkable efficiency and generalizability. Further, combining with existing action classifier, BMN can achieve state-of-the-art temporal action detection performance.
<!-- [IMAGE] -->
<div align=center>
<img src="https://user-images.githubusercontent.com/34324155/143016479-2ca7e8b6-a17b-4a4c-b4c9-ae731935cd91.png" width="800"/>
</div>
## Results and Models
### ActivityNet feature
| config | feature | gpus | AR@100 | AUC | AP@0.5 | AP@0.75 | AP@0.95 | mAP | gpu_mem(M) | iter time(s) | ckpt | log | json |
| :-----------------------------------------------------------------------------------------------------------: | :------------: | :--: | :----: | :---: | :----: | :-----: | :-----: | :---: | :--------: | ------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------- |
| [bmn_400x100_9e_2x8_activitynet_feature](/configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py) | cuhk_mean_100 | 2 | 75.28 | 67.22 | 42.47 | 31.31 | 9.92 | 30.34 | 5420 | 3.27 | [ckpt](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_9e_activitynet_feature/bmn_400x100_9e_activitynet_feature_20200619-42a3b111.pth) | [log](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_9e_activitynet_feature/bmn_400x100_9e_activitynet_feature.log) | [json](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_9e_activitynet_feature/bmn_400x100_9e_activitynet_feature.log.json) |
| | mmaction_video | 2 | 75.43 | 67.22 | 42.62 | 31.56 | 10.86 | 30.77 | 5420 | 3.27 | [ckpt](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_video/bmn_400x100_2x8_9e_mmaction_video_20200809-c9fd14d2.pth) | [log](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_video/bmn_400x100_2x8_9e_mmaction_video_20200809.log) | [json](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_video/bmn_400x100_2x8_9e_mmaction_video_20200809.json) |
| | mmaction_clip | 2 | 75.35 | 67.38 | 43.08 | 32.19 | 10.73 | 31.15 | 5420 | 3.27 | [ckpt](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_clip/bmn_400x100_2x8_9e_mmaction_clip_20200809-10d803ce.pth) | [log](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_clip/bmn_400x100_2x8_9e_mmaction_clip_20200809.log) | [json](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_clip/bmn_400x100_2x8_9e_mmaction_clip_20200809.json) |
| [BMN-official](https://github.com/JJBOY/BMN-Boundary-Matching-Network) (for reference)\* | cuhk_mean_100 | - | 75.27 | 67.49 | 42.22 | 30.98 | 9.22 | 30.00 | - | - | - | - | - |
:::{note}
1. The **gpus** indicates the number of gpu we used to get the checkpoint.
According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you may set the learning rate proportional to the batch size if you use different GPUs or videos per GPU,
e.g., lr=0.01 for 4 GPUs x 2 video/gpu and lr=0.08 for 16 GPUs x 4 video/gpu.
2. For feature column, cuhk_mean_100 denotes the widely used cuhk activitynet feature extracted by [anet2016-cuhk](https://github.com/yjxiong/anet2016-cuhk), mmaction_video and mmaction_clip denote feature extracted by mmaction, with video-level activitynet finetuned model or clip-level activitynet finetuned model respectively.
3. We evaluate the action detection performance of BMN, using [anet_cuhk_2017](https://download.openmmlab.com/mmaction/localization/cuhk_anet17_pred.json) submission for ActivityNet2017 Untrimmed Video Classification Track to assign label for each action proposal.
:::
\*We train BMN with the [official repo](https://github.com/JJBOY/BMN-Boundary-Matching-Network), evaluate its proposal generation and action detection performance with [anet_cuhk_2017](https://download.openmmlab.com/mmaction/localization/cuhk_anet17_pred.json) for label assigning.
For more details on data preparation, you can refer to ActivityNet feature in [Data Preparation](/docs/en/data_preparation.md).
## Train
You can use the following command to train a model.
```shell
python tools/train.py ${CONFIG_FILE} [optional arguments]
```
Example: train BMN model on ActivityNet features dataset.
```shell
python tools/train.py configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py
```
For more details and optional arguments infos, you can refer to **Training setting** part in [getting_started](/docs/en/getting_started.md#training-setting) .
## Test
You can use the following command to test a model.
```shell
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
Example: test BMN on ActivityNet feature dataset.
```shell
# Note: If evaluated, then please make sure the annotation file for test data contains groundtruth.
python tools/test.py configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
```
You can also test the action detection performance of the model, with [anet_cuhk_2017](https://download.openmmlab.com/mmaction/localization/cuhk_anet17_pred.json) prediction file and generated proposal file (`results.json` in last command).
```shell
python tools/analysis/report_map.py --proposal path/to/proposal_file
```
:::{note}
1. (Optional) You can use the following command to generate a formatted proposal file, which will be fed into the action classifier (Currently supports SSN and P-GCN, not including TSN, I3D etc.) to get the classification result of proposals.
```shell
python tools/data/activitynet/convert_proposal_format.py
```
:::
For more details and optional arguments infos, you can refer to **Test a dataset** part in [getting_started](/docs/en/getting_started.md#test-a-dataset) .
## Citation
```BibTeX
@inproceedings{lin2019bmn,
title={Bmn: Boundary-matching network for temporal action proposal generation},
author={Lin, Tianwei and Liu, Xiao and Li, Xin and Ding, Errui and Wen, Shilei},
booktitle={Proceedings of the IEEE International Conference on Computer Vision},
pages={3889--3898},
year={2019}
}
```
<!-- [DATASET] -->
```BibTeX
@article{zhao2017cuhk,
title={Cuhk \& ethz \& siat submission to activitynet challenge 2017},
author={Zhao, Y and Zhang, B and Wu, Z and Yang, S and Zhou, L and Yan, S and Wang, L and Xiong, Y and Lin, D and Qiao, Y and others},
journal={arXiv preprint arXiv:1710.08011},
volume={8},
year={2017}
}
```
# BMN
## 简介
<!-- [ALGORITHM] -->
```BibTeX
@inproceedings{lin2019bmn,
title={Bmn: Boundary-matching network for temporal action proposal generation},
author={Lin, Tianwei and Liu, Xiao and Li, Xin and Ding, Errui and Wen, Shilei},
booktitle={Proceedings of the IEEE International Conference on Computer Vision},
pages={3889--3898},
year={2019}
}
```
<!-- [DATASET] -->
```BibTeX
@article{zhao2017cuhk,
title={Cuhk \& ethz \& siat submission to activitynet challenge 2017},
author={Zhao, Y and Zhang, B and Wu, Z and Yang, S and Zhou, L and Yan, S and Wang, L and Xiong, Y and Lin, D and Qiao, Y and others},
journal={arXiv preprint arXiv:1710.08011},
volume={8},
year={2017}
}
```
## 模型库
### ActivityNet feature
| 配置文件 | 特征 | GPU 数量 | AR@100 | AUC | AP@0.5 | AP@0.75 | AP@0.95 | mAP | GPU 显存占用 (M) | 推理时间 (s) | ckpt | log | json |
| :-----------------------------------------------------------------------------------------------------------: | :------------: | :------: | :----: | :---: | :----: | :-----: | :-----: | :---: | :--------------: | ------------ | :----------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------- |
| [bmn_400x100_9e_2x8_activitynet_feature](/configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py) | cuhk_mean_100 | 2 | 75.28 | 67.22 | 42.47 | 31.31 | 9.92 | 30.34 | 5420 | 3.27 | [ckpt](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_9e_activitynet_feature/bmn_400x100_9e_activitynet_feature_20200619-42a3b111.pth) | [log](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_9e_activitynet_feature/bmn_400x100_9e_activitynet_feature.log) | [json](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_9e_activitynet_feature/bmn_400x100_9e_activitynet_feature.log.json) |
| | mmaction_video | 2 | 75.43 | 67.22 | 42.62 | 31.56 | 10.86 | 30.77 | 5420 | 3.27 | [ckpt](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_video/bmn_400x100_2x8_9e_mmaction_video_20200809-c9fd14d2.pth) | [log](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_video/bmn_400x100_2x8_9e_mmaction_video_20200809.log) | [json](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_video/bmn_400x100_2x8_9e_mmaction_video_20200809.json) |
| | mmaction_clip | 2 | 75.35 | 67.38 | 43.08 | 32.19 | 10.73 | 31.15 | 5420 | 3.27 | [ckpt](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_clip/bmn_400x100_2x8_9e_mmaction_clip_20200809-10d803ce.pth) | [log](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_clip/bmn_400x100_2x8_9e_mmaction_clip_20200809.log) | [json](https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_clip/bmn_400x100_2x8_9e_mmaction_clip_20200809.json) |
| [BMN-official](https://github.com/JJBOY/BMN-Boundary-Matching-Network) (for reference)\* | cuhk_mean_100 | - | 75.27 | 67.49 | 42.22 | 30.98 | 9.22 | 30.00 | - | - | - | - | - |
- 注:
1. 这里的 **GPU 数量** 指的是得到模型权重文件对应的 GPU 个数。默认地,MMAction2 所提供的配置文件对应使用 8 块 GPU 进行训练的情况。
依据 [线性缩放规则](https://arxiv.org/abs/1706.02677),当用户使用不同数量的 GPU 或者每块 GPU 处理不同视频个数时,需要根据批大小等比例地调节学习率。
如,lr=0.01 对应 4 GPUs x 2 video/gpu,以及 lr=0.08 对应 16 GPUs x 4 video/gpu。
2. 对于 **特征** 这一列,`cuhk_mean_100` 表示所使用的特征为利用 [anet2016-cuhk](https://github.com/yjxiong/anet2016-cuhk) 代码库抽取的,被广泛利用的 CUHK ActivityNet 特征,
`mmaction_video``mmaction_clip` 分布表示所使用的特征为利用 MMAction 抽取的,视频级别 ActivityNet 预训练模型的特征;视频片段级别 ActivityNet 预训练模型的特征。
3. MMAction2 使用 ActivityNet2017 未剪辑视频分类赛道上 [anet_cuhk_2017](https://download.openmmlab.com/mmaction/localization/cuhk_anet17_pred.json) 所提交的结果来为每个视频的时序动作候选指定标签,以用于 BMN 模型评估。
\*MMAction2 在 [原始代码库](https://github.com/JJBOY/BMN-Boundary-Matching-Network) 上训练 BMN,并且在 [anet_cuhk_2017](https://download.openmmlab.com/mmaction/localization/cuhk_anet17_pred.json) 的对应标签上评估时序动作候选生成和时序检测的结果。
对于数据集准备的细节,用户可参考 [数据集准备文档](/docs/zh_cn/data_preparation.md) 中的 ActivityNet 特征部分。
## 如何训练
用户可以使用以下指令进行模型训练。
```shell
python tools/train.py ${CONFIG_FILE} [optional arguments]
```
例如:在 ActivityNet 特征上训练 BMN。
```shell
python tools/train.py configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py
```
更多训练细节,可参考 [基础教程](/docs/zh_cn/getting_started.md#训练配置) 中的 **训练配置** 部分。
## 如何测试
用户可以使用以下指令进行模型测试。
```shell
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
例如:在 ActivityNet 特征上测试 BMN 模型。
```shell
# 注:如果需要进行指标验证,需确测试数据的保标注文件包含真实标签
python tools/test.py configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
```
用户也可以利用 [anet_cuhk_2017](https://download.openmmlab.com/mmaction/localization/cuhk_anet17_pred.json) 的预测文件评估模型时序检测的结果,并生成时序动作候选文件(即命令中的 `results.json`
```shell
python tools/analysis/report_map.py --proposal path/to/proposal_file
```
注:
1. (可选项) 用户可以使用以下指令生成格式化的时序动作候选文件,该文件可被送入动作识别器中(目前只支持 SSN 和 P-GCN,不包括 TSN, I3D 等),以获得时序动作候选的分类结果。
```shell
python tools/data/activitynet/convert_proposal_format.py
```
更多测试细节,可参考 [基础教程](/docs/zh_cn/getting_started.md#测试某个数据集) 中的 **测试某个数据集** 部分。
_base_ = [
'../../_base_/models/bmn_400x100.py', '../../_base_/default_runtime.py'
]
# dataset settings
dataset_type = 'ActivityNetDataset'
data_root = 'data/ActivityNet/activitynet_feature_cuhk/csv_mean_100/'
data_root_val = 'data/ActivityNet/activitynet_feature_cuhk/csv_mean_100/'
ann_file_train = 'data/ActivityNet/anet_anno_train.json'
ann_file_val = 'data/ActivityNet/anet_anno_val.json'
ann_file_test = 'data/ActivityNet/anet_anno_val.json'
test_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(
type='Collect',
keys=['raw_feature'],
meta_name='video_meta',
meta_keys=[
'video_name', 'duration_second', 'duration_frame', 'annotations',
'feature_frame'
]),
dict(type='ToTensor', keys=['raw_feature']),
]
train_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(type='GenerateLocalizationLabels'),
dict(
type='Collect',
keys=['raw_feature', 'gt_bbox'],
meta_name='video_meta',
meta_keys=['video_name']),
dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
dict(
type='ToDataContainer',
fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])
]
val_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(type='GenerateLocalizationLabels'),
dict(
type='Collect',
keys=['raw_feature', 'gt_bbox'],
meta_name='video_meta',
meta_keys=[
'video_name', 'duration_second', 'duration_frame', 'annotations',
'feature_frame'
]),
dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
dict(
type='ToDataContainer',
fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])
]
data = dict(
videos_per_gpu=8,
workers_per_gpu=8,
train_dataloader=dict(drop_last=True),
val_dataloader=dict(videos_per_gpu=1),
test_dataloader=dict(videos_per_gpu=1),
test=dict(
type=dataset_type,
ann_file=ann_file_test,
pipeline=test_pipeline,
data_prefix=data_root_val),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
pipeline=val_pipeline,
data_prefix=data_root_val),
train=dict(
type=dataset_type,
ann_file=ann_file_train,
pipeline=train_pipeline,
data_prefix=data_root))
evaluation = dict(interval=1, metrics=['AR@AN'])
# optimizer
optimizer = dict(
type='Adam', lr=0.001, weight_decay=0.0001) # this lr is used for 2 gpus
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=7)
total_epochs = 9
# runtime settings
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
work_dir = './work_dirs/bmn_400x100_2x8_9e_activitynet_feature/'
output_config = dict(out=f'{work_dir}/results.json', output_format='json')
_base_ = [
'../../_base_/models/bmn_400x100.py', '../../_base_/default_runtime.py'
]
# dataset settings
dataset_type = 'ActivityNetDataset'
data_root = 'data/ActivityNet/activitynet_feature_mmaction_clip/'
data_root_val = 'data/ActivityNet/activitynet_feature_mmaction_clip/'
ann_file_train = 'data/ActivityNet/anet_anno_train.json'
ann_file_val = 'data/ActivityNet/anet_anno_val.json'
ann_file_test = 'data/ActivityNet/anet_anno_val.json'
test_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(
type='Collect',
keys=['raw_feature'],
meta_name='video_meta',
meta_keys=[
'video_name', 'duration_second', 'duration_frame', 'annotations',
'feature_frame'
]),
dict(type='ToTensor', keys=['raw_feature']),
]
train_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(type='GenerateLocalizationLabels'),
dict(
type='Collect',
keys=['raw_feature', 'gt_bbox'],
meta_name='video_meta',
meta_keys=['video_name']),
dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
dict(
type='ToDataContainer',
fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])
]
val_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(type='GenerateLocalizationLabels'),
dict(
type='Collect',
keys=['raw_feature', 'gt_bbox'],
meta_name='video_meta',
meta_keys=[
'video_name', 'duration_second', 'duration_frame', 'annotations',
'feature_frame'
]),
dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
dict(
type='ToDataContainer',
fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])
]
data = dict(
videos_per_gpu=8,
workers_per_gpu=8,
train_dataloader=dict(drop_last=True),
val_dataloader=dict(videos_per_gpu=1),
test_dataloader=dict(videos_per_gpu=1),
test=dict(
type=dataset_type,
ann_file=ann_file_test,
pipeline=test_pipeline,
data_prefix=data_root_val),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
pipeline=val_pipeline,
data_prefix=data_root_val),
train=dict(
type=dataset_type,
ann_file=ann_file_train,
pipeline=train_pipeline,
data_prefix=data_root))
evaluation = dict(interval=1, metrics=['AR@AN'])
# optimizer
optimizer = dict(
type='Adam', lr=0.001, weight_decay=0.0001) # this lr is used for 2 gpus
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=7)
total_epochs = 9
# runtime settings
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
work_dir = './work_dirs/bmn_400x100_2x8_9e_activitynet_feature/'
output_config = dict(out=f'{work_dir}/results.json', output_format='json')
_base_ = [
'../../_base_/models/bmn_400x100.py', '../../_base_/default_runtime.py'
]
# dataset settings
dataset_type = 'ActivityNetDataset'
data_root = 'data/ActivityNet/activitynet_feature_mmaction_video/'
data_root_val = 'data/ActivityNet/activitynet_feature_mmaction_video/'
ann_file_train = 'data/ActivityNet/anet_anno_train.json'
ann_file_val = 'data/ActivityNet/anet_anno_val.json'
ann_file_test = 'data/ActivityNet/anet_anno_val.json'
test_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(
type='Collect',
keys=['raw_feature'],
meta_name='video_meta',
meta_keys=[
'video_name', 'duration_second', 'duration_frame', 'annotations',
'feature_frame'
]),
dict(type='ToTensor', keys=['raw_feature']),
]
train_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(type='GenerateLocalizationLabels'),
dict(
type='Collect',
keys=['raw_feature', 'gt_bbox'],
meta_name='video_meta',
meta_keys=['video_name']),
dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
dict(
type='ToDataContainer',
fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])
]
val_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(type='GenerateLocalizationLabels'),
dict(
type='Collect',
keys=['raw_feature', 'gt_bbox'],
meta_name='video_meta',
meta_keys=[
'video_name', 'duration_second', 'duration_frame', 'annotations',
'feature_frame'
]),
dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
dict(
type='ToDataContainer',
fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])
]
data = dict(
videos_per_gpu=8,
workers_per_gpu=8,
train_dataloader=dict(drop_last=True),
val_dataloader=dict(videos_per_gpu=1),
test_dataloader=dict(videos_per_gpu=1),
test=dict(
type=dataset_type,
ann_file=ann_file_test,
pipeline=test_pipeline,
data_prefix=data_root_val),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
pipeline=val_pipeline,
data_prefix=data_root_val),
train=dict(
type=dataset_type,
ann_file=ann_file_train,
pipeline=train_pipeline,
data_prefix=data_root))
evaluation = dict(interval=1, metrics=['AR@AN'])
# optimizer
optimizer = dict(
type='Adam', lr=0.001, weight_decay=0.0001) # this lr is used for 2 gpus
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=7)
total_epochs = 9
# runtime settings
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
work_dir = './work_dirs/bmn_400x100_2x8_9e_activitynet_feature/'
output_config = dict(out=f'{work_dir}/results.json', output_format='json')
Collections:
- Name: BMN
README: configs/localization/bmn/README.md
Paper:
URL: https://arxiv.org/abs/1907.09702
Title: "BMN: Boundary-Matching Network for Temporal Action Proposal Generation"
Models:
- Config: configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py
In Collection: BMN
Metadata:
Batch Size: 8
Epochs: 9
Training Data: ActivityNet v1.3
Training Resources: 2 GPUs
feature: cuhk_mean_100
Name: bmn_400x100_2x8_9e_activitynet_feature
Results:
- Dataset: ActivityNet v1.3
Metrics:
AP@0.5: 42.47
AP@0.75: 31.31
AP@0.95: 9.92
AR@100: 75.28
AUC: 67.22
mAP: 30.34
Task: Temporal Action Localization
Training Json Log: https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_9e_activitynet_feature/bmn_400x100_9e_activitynet_feature.log.json
Training Log: https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_9e_activitynet_feature/bmn_400x100_9e_activitynet_feature.log
Weights: https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_9e_activitynet_feature/bmn_400x100_9e_activitynet_feature_20200619-42a3b111.pth
- Config: configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature_mmaction_video.py
In Collection: BMN
Metadata:
Batch Size: 8
Epochs: 9
Training Data: ActivityNet v1.3
Training Resources: 2 GPUs
feature: mmaction_video
Name: bmn_400x100_2x8_9e_activitynet_feature_mmaction_video
Results:
- Dataset: ActivityNet v1.3
Metrics:
AP@0.5: 42.62
AP@0.75: 31.56
AP@0.95: 10.86
AR@100: 75.43
AUC: 67.22
mAP: 30.77
Task: Temporal Action Localization
Training Json Log: https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_video/bmn_400x100_2x8_9e_mmaction_video_20200809.json
Training Log: https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_video/bmn_400x100_2x8_9e_mmaction_video_20200809.log
Weights: https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_video/bmn_400x100_2x8_9e_mmaction_video_20200809-c9fd14d2.pth
- Config: configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature_mmaction_clip.py
In Collection: BMN
Metadata:
Batch Size: 8
Epochs: 9
Training Data: ActivityNet v1.3
Training Resources: 2 GPUs
feature: mmaction_clip
Name: bmn_400x100_2x8_9e_activitynet_feature_mmaction_clip
Results:
- Dataset: ActivityNet v1.3
Metrics:
AP@0.5: 43.08
AP@0.75: 32.19
AP@0.95: 10.73
AR@100: 75.35
AUC: 67.38
mAP: 31.15
Task: Temporal Action Localization
Training Json Log: https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_clip/bmn_400x100_2x8_9e_mmaction_clip_20200809.json
Training Log: https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_clip/bmn_400x100_2x8_9e_mmaction_clip_20200809.log
Weights: https://download.openmmlab.com/mmaction/localization/bmn/bmn_400x100_2x8_9e_mmaction_clip/bmn_400x100_2x8_9e_mmaction_clip_20200809-10d803ce.pth
# BSN
[Bsn: Boundary sensitive network for temporal action proposal generation](https://openaccess.thecvf.com/content_ECCV_2018/html/Tianwei_Lin_BSN_Boundary_Sensitive_ECCV_2018_paper.html)
<!-- [ALGORITHM] -->
## Abstract
<!-- [ABSTRACT] -->
Temporal action proposal generation is an important yet challenging problem, since temporal proposals with rich action content are indispensable for analysing real-world videos with long duration and high proportion irrelevant content. This problem requires methods not only generating proposals with precise temporal boundaries, but also retrieving proposals to cover truth action instances with high recall and high overlap using relatively fewer proposals. To address these difficulties, we introduce an effective proposal generation method, named Boundary-Sensitive Network (BSN), which adopts "local to global" fashion. Locally, BSN first locates temporal boundaries with high probabilities, then directly combines these boundaries as proposals. Globally, with Boundary-Sensitive Proposal feature, BSN retrieves proposals by evaluating the confidence of whether a proposal contains an action within its region. We conduct experiments on two challenging datasets: ActivityNet-1.3 and THUMOS14, where BSN outperforms other state-of-the-art temporal action proposal generation methods with high recall and high temporal precision. Finally, further experiments demonstrate that by combining existing action classifiers, our method significantly improves the state-of-the-art temporal action detection performance.
<!-- [IMAGE] -->
<div align=center>
<img src="https://user-images.githubusercontent.com/34324155/143016692-69efafbd-cec6-47f1-af45-371d0ff78a97.png" width="800"/>
</div>
## Results and Models
### ActivityNet feature
| config | feature | gpus | pretrain | AR@100 | AUC | gpu_mem(M) | iter time(s) | ckpt | log | json |
| :--------------------------------------- | :------------: | :--: | :------: | :----: | :---: | :-------------: | :-------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| bsn_400x100_1x16_20e_activitynet_feature | cuhk_mean_100 | 1 | None | 74.66 | 66.45 | 41(TEM)+25(PEM) | 0.074(TEM)+0.036(PEM) | [ckpt_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature/bsn_tem_400x100_1x16_20e_activitynet_feature_20200619-cd6accc3.pth) [ckpt_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature/bsn_pem_400x100_1x16_20e_activitynet_feature_20210203-1c27763d.pth) | [log_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature/bsn_tem_400x100_1x16_20e_activitynet_feature.log) [log_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature/bsn_pem_400x100_1x16_20e_activitynet_feature.log) | [json_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature/bsn_tem_400x100_1x16_20e_activitynet_feature.log.json) [json_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature/bsn_pem_400x100_1x16_20e_activitynet_feature.log.json) |
| | mmaction_video | 1 | None | 74.93 | 66.74 | 41(TEM)+25(PEM) | 0.074(TEM)+0.036(PEM) | [ckpt_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_video/bsn_tem_400x100_1x16_20e_mmaction_video_20200809-ad6ec626.pth) [ckpt_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_video/bsn_pem_400x100_1x16_20e_mmaction_video_20200809-aa861b26.pth) | [log_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_video/bsn_tem_400x100_1x16_20e_mmaction_video_20200809.log) [log_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_video/bsn_pem_400x100_1x16_20e_mmaction_video_20200809.log) | [json_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_video/bsn_tem_400x100_1x16_20e_mmaction_video_20200809.json) [json_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_video/bsn_pem_400x100_1x16_20e_mmaction_video_20200809.json) |
| | mmaction_clip | 1 | None | 75.19 | 66.81 | 41(TEM)+25(PEM) | 0.074(TEM)+0.036(PEM) | [ckpt_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_clip/bsn_tem_400x100_1x16_20e_mmaction_clip_20200809-0a563554.pth) [ckpt_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_clip/bsn_pem_400x100_1x16_20e_mmaction_clip_20200809-e32f61e6.pth) | [log_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_clip/bsn_tem_400x100_1x16_20e_mmaction_clip_20200809.log) [log_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_clip/bsn_pem_400x100_1x16_20e_mmaction_clip_20200809.log) | [json_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_clip/bsn_tem_400x100_1x16_20e_mmaction_clip_20200809.json) [json_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_clip/bsn_pem_400x100_1x16_20e_mmaction_clip_20200809.json) |
:::{note}
1. The **gpus** indicates the number of gpu we used to get the checkpoint.
According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you may set the learning rate proportional to the batch size if you use different GPUs or videos per GPU,
e.g., lr=0.01 for 4 GPUs x 2 video/gpu and lr=0.08 for 16 GPUs x 4 video/gpu.
2. For feature column, cuhk_mean_100 denotes the widely used cuhk activitynet feature extracted by [anet2016-cuhk](https://github.com/yjxiong/anet2016-cuhk), mmaction_video and mmaction_clip denote feature extracted by mmaction, with video-level activitynet finetuned model or clip-level activitynet finetuned model respectively.
:::
For more details on data preparation, you can refer to ActivityNet feature in [Data Preparation](/docs/en/data_preparation.md).
## Train
You can use the following commands to train a model.
```shell
python tools/train.py ${CONFIG_FILE} [optional arguments]
```
Examples:
1. train BSN(TEM) on ActivityNet features dataset.
```shell
python tools/train.py configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py
```
2. train BSN(PEM) on PGM results.
```shell
python tools/train.py configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py
```
For more details and optional arguments infos, you can refer to **Training setting** part in [getting_started](/docs/en/getting_started.md#training-setting).
## Inference
You can use the following commands to inference a model.
1. For TEM Inference
```shell
# Note: This could not be evaluated.
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
2. For PGM Inference
```shell
python tools/misc/bsn_proposal_generation.py ${CONFIG_FILE} [--mode ${MODE}]
```
3. For PEM Inference
```shell
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
Examples:
1. Inference BSN(TEM) with pretrained model.
```shell
python tools/test.py configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth
```
2. Inference BSN(PGM) with pretrained model.
```shell
python tools/misc/bsn_proposal_generation.py configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py --mode train
```
3. Inference BSN(PEM) with evaluation metric 'AR@AN' and output the results.
```shell
# Note: If evaluated, then please make sure the annotation file for test data contains groundtruth.
python tools/test.py configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
```
## Test
You can use the following commands to test a model.
1. TEM
```shell
# Note: This could not be evaluated.
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
2. PGM
```shell
python tools/misc/bsn_proposal_generation.py ${CONFIG_FILE} [--mode ${MODE}]
```
3. PEM
```shell
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
Examples:
1. Test a TEM model on ActivityNet dataset.
```shell
python tools/test.py configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth
```
2. Test a PGM model on ActivityNet dataset.
```shell
python tools/misc/bsn_proposal_generation.py configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py --mode test
```
3. Test a PEM model with with evaluation metric 'AR@AN' and output the results.
```shell
python tools/test.py configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
```
:::{note}
1. (Optional) You can use the following command to generate a formatted proposal file, which will be fed into the action classifier (Currently supports only SSN and P-GCN, not including TSN, I3D etc.) to get the classification result of proposals.
```shell
python tools/data/activitynet/convert_proposal_format.py
```
:::
For more details and optional arguments infos, you can refer to **Test a dataset** part in [getting_started](/docs/en/getting_started.md#test-a-dataset).
## Citation
```BibTeX
@inproceedings{lin2018bsn,
title={Bsn: Boundary sensitive network for temporal action proposal generation},
author={Lin, Tianwei and Zhao, Xu and Su, Haisheng and Wang, Chongjing and Yang, Ming},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
pages={3--19},
year={2018}
}
```
# BSN
## 简介
<!-- [ALGORITHM] -->
```BibTeX
@inproceedings{lin2018bsn,
title={Bsn: Boundary sensitive network for temporal action proposal generation},
author={Lin, Tianwei and Zhao, Xu and Su, Haisheng and Wang, Chongjing and Yang, Ming},
booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
pages={3--19},
year={2018}
}
```
## 模型库
### ActivityNet feature
| 配置文件 | 特征 | GPU 数量 | 预训练 | AR@100 | AUC | GPU 显存占用 (M) | 迭代时间 (s) | ckpt | log | json |
| :--------------------------------------- | :------------: | :------: | :----: | :----: | :---: | :--------------: | :-------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| bsn_400x100_1x16_20e_activitynet_feature | cuhk_mean_100 | 1 | None | 74.66 | 66.45 | 41(TEM)+25(PEM) | 0.074(TEM)+0.036(PEM) | [ckpt_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature/bsn_tem_400x100_1x16_20e_activitynet_feature_20200619-cd6accc3.pth) [ckpt_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature/bsn_pem_400x100_1x16_20e_activitynet_feature_20210203-1c27763d.pth) | [log_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature/bsn_tem_400x100_1x16_20e_activitynet_feature.log) [log_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature/bsn_pem_400x100_1x16_20e_activitynet_feature.log) | [json_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature/bsn_tem_400x100_1x16_20e_activitynet_feature.log.json) [json_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature/bsn_pem_400x100_1x16_20e_activitynet_feature.log.json) |
| | mmaction_video | 1 | None | 74.93 | 66.74 | 41(TEM)+25(PEM) | 0.074(TEM)+0.036(PEM) | [ckpt_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_video/bsn_tem_400x100_1x16_20e_mmaction_video_20200809-ad6ec626.pth) [ckpt_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_video/bsn_pem_400x100_1x16_20e_mmaction_video_20200809-aa861b26.pth) | [log_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_video/bsn_tem_400x100_1x16_20e_mmaction_video_20200809.log) [log_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_video/bsn_pem_400x100_1x16_20e_mmaction_video_20200809.log) | [json_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_video/bsn_tem_400x100_1x16_20e_mmaction_video_20200809.json) [json_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_video/bsn_pem_400x100_1x16_20e_mmaction_video_20200809.json) |
| | mmaction_clip | 1 | None | 75.19 | 66.81 | 41(TEM)+25(PEM) | 0.074(TEM)+0.036(PEM) | [ckpt_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_clip/bsn_tem_400x100_1x16_20e_mmaction_clip_20200809-0a563554.pth) [ckpt_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_clip/bsn_pem_400x100_1x16_20e_mmaction_clip_20200809-e32f61e6.pth) | [log_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_clip/bsn_tem_400x100_1x16_20e_mmaction_clip_20200809.log) [log_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_clip/bsn_pem_400x100_1x16_20e_mmaction_clip_20200809.log) | [json_tem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_clip/bsn_tem_400x100_1x16_20e_mmaction_clip_20200809.json) [json_pem](https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_clip/bsn_pem_400x100_1x16_20e_mmaction_clip_20200809.json) |
注:
1. 这里的 **GPU 数量** 指的是得到模型权重文件对应的 GPU 个数。默认地,MMAction2 所提供的配置文件对应使用 8 块 GPU 进行训练的情况。
依据 [线性缩放规则](https://arxiv.org/abs/1706.02677),当用户使用不同数量的 GPU 或者每块 GPU 处理不同视频个数时,需要根据批大小等比例地调节学习率。
如,lr=0.01 对应 4 GPUs x 2 video/gpu,以及 lr=0.08 对应 16 GPUs x 4 video/gpu。
2. 对于 **特征** 这一列,`cuhk_mean_100` 表示所使用的特征为利用 [anet2016-cuhk](https://github.com/yjxiong/anet2016-cuhk) 代码库抽取的,被广泛利用的 CUHK ActivityNet 特征,
`mmaction_video``mmaction_clip` 分布表示所使用的特征为利用 MMAction 抽取的,视频级别 ActivityNet 预训练模型的特征;视频片段级别 ActivityNet 预训练模型的特征。
对于数据集准备的细节,用户可参考 [数据集准备文档](/docs/zh_cn/data_preparation.md) 中的 ActivityNet 特征部分。
## 如何训练
用户可以使用以下指令进行模型训练。
```shell
python tools/train.py ${CONFIG_FILE} [optional arguments]
```
例如:
1. 在 ActivityNet 特征上训练 BSN(TEM) 模型。
```shell
python tools/train.py configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py
```
2. 基于 PGM 的结果训练 BSN(PEM)。
```shell
python tools/train.py configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py
```
更多训练细节,可参考 [基础教程](/docs/zh_cn/getting_started.md#训练配置) 中的 **训练配置** 部分。
## 如何进行推理
用户可以使用以下指令进行模型推理。
1. 推理 TEM 模型。
```shell
# Note: This could not be evaluated.
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
2. 推理 PGM 模型
```shell
python tools/misc/bsn_proposal_generation.py ${CONFIG_FILE} [--mode ${MODE}]
```
3. 推理 PEM 模型
```shell
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
例如
1. 利用预训练模型进行 BSN(TEM) 模型的推理。
```shell
python tools/test.py configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth
```
2. 利用预训练模型进行 BSN(PGM) 模型的推理
```shell
python tools/misc/bsn_proposal_generation.py configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py --mode train
```
3. 推理 BSN(PEM) 模型,并计算 'AR@AN' 指标,输出结果文件。
```shell
# 注:如果需要进行指标验证,需确测试数据的保标注文件包含真实标签
python tools/test.py configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
```
## 如何测试
用户可以使用以下指令进行模型测试。
1. TEM
```shell
# 注:该命令无法进行指标验证
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
2. PGM
```shell
python tools/misc/bsn_proposal_generation.py ${CONFIG_FILE} [--mode ${MODE}]
```
3. PEM
```shell
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
例如:
1. 在 ActivityNet 数据集上测试 TEM 模型。
```shell
python tools/test.py configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth
```
2. 在 ActivityNet 数据集上测试 PGM 模型。
```shell
python tools/misc/bsn_proposal_generation.py configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py --mode test
```
3. 测试 PEM 模型,并计算 'AR@AN' 指标,输出结果文件。
```shell
python tools/test.py configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py checkpoints/SOME_CHECKPOINT.pth --eval AR@AN --out results.json
```
注:
1. (可选项) 用户可以使用以下指令生成格式化的时序动作候选文件,该文件可被送入动作识别器中(目前只支持 SSN 和 P-GCN,不包括 TSN, I3D 等),以获得时序动作候选的分类结果。
```shell
python tools/data/activitynet/convert_proposal_format.py
```
更多测试细节,可参考 [基础教程](/docs/zh_cn/getting_started.md#测试某个数据集) 中的 **测试某个数据集** 部分。
_base_ = [
'../../_base_/models/bsn_pem.py', '../../_base_/schedules/adam_20e.py',
'../../_base_/default_runtime.py'
]
# dataset settings
dataset_type = 'ActivityNetDataset'
data_root = 'data/ActivityNet/activitynet_feature_cuhk/csv_mean_100/'
data_root_val = 'data/ActivityNet/activitynet_feature_cuhk/csv_mean_100/'
ann_file_train = 'data/ActivityNet/anet_anno_train.json'
ann_file_val = 'data/ActivityNet/anet_anno_val.json'
ann_file_test = 'data/ActivityNet/anet_anno_val.json'
work_dir = 'work_dirs/bsn_400x100_20e_1x16_activitynet_feature/'
pgm_proposals_dir = f'{work_dir}/pgm_proposals/'
pgm_features_dir = f'{work_dir}/pgm_features/'
test_pipeline = [
dict(
type='LoadProposals',
top_k=1000,
pgm_proposals_dir=pgm_proposals_dir,
pgm_features_dir=pgm_features_dir),
dict(
type='Collect',
keys=['bsp_feature', 'tmin', 'tmax', 'tmin_score', 'tmax_score'],
meta_name='video_meta',
meta_keys=[
'video_name', 'duration_second', 'duration_frame', 'annotations',
'feature_frame'
]),
dict(type='ToTensor', keys=['bsp_feature'])
]
train_pipeline = [
dict(
type='LoadProposals',
top_k=500,
pgm_proposals_dir=pgm_proposals_dir,
pgm_features_dir=pgm_features_dir),
dict(
type='Collect',
keys=['bsp_feature', 'reference_temporal_iou'],
meta_name='video_meta',
meta_keys=[]),
dict(type='ToTensor', keys=['bsp_feature', 'reference_temporal_iou']),
dict(
type='ToDataContainer',
fields=(dict(key='bsp_feature', stack=False),
dict(key='reference_temporal_iou', stack=False)))
]
val_pipeline = [
dict(
type='LoadProposals',
top_k=1000,
pgm_proposals_dir=pgm_proposals_dir,
pgm_features_dir=pgm_features_dir),
dict(
type='Collect',
keys=['bsp_feature', 'tmin', 'tmax', 'tmin_score', 'tmax_score'],
meta_name='video_meta',
meta_keys=[
'video_name', 'duration_second', 'duration_frame', 'annotations',
'feature_frame'
]),
dict(type='ToTensor', keys=['bsp_feature'])
]
data = dict(
videos_per_gpu=16,
workers_per_gpu=8,
train_dataloader=dict(drop_last=True),
val_dataloader=dict(videos_per_gpu=1),
test_dataloader=dict(videos_per_gpu=1),
test=dict(
type=dataset_type,
ann_file=ann_file_test,
pipeline=test_pipeline,
data_prefix=data_root_val),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
pipeline=val_pipeline,
data_prefix=data_root_val),
train=dict(
type=dataset_type,
ann_file=ann_file_train,
pipeline=train_pipeline,
data_prefix=data_root))
evaluation = dict(interval=1, metrics=['AR@AN'])
# runtime settings
checkpoint_config = dict(interval=1, filename_tmpl='pem_epoch_{}.pth')
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
output_config = dict(out=f'{work_dir}/results.json', output_format='json')
# dataset settings
dataset_type = 'ActivityNetDataset'
data_root = 'data/ActivityNet/activitynet_feature_cuhk/csv_mean_100/'
data_root_val = 'data/ActivityNet/activitynet_feature_cuhk/csv_mean_100/'
ann_file_train = 'data/ActivityNet/anet_anno_train.json'
ann_file_val = 'data/ActivityNet/anet_anno_val.json'
ann_file_test = 'data/ActivityNet/anet_anno_test.json'
work_dir = 'work_dirs/bsn_400x100_20e_1x16_activitynet_feature/'
tem_results_dir = f'{work_dir}/tem_results/'
pgm_proposals_dir = f'{work_dir}/pgm_proposals/'
pgm_features_dir = f'{work_dir}/pgm_features/'
temporal_scale = 100
pgm_proposals_cfg = dict(
pgm_proposals_thread=8, temporal_scale=temporal_scale, peak_threshold=0.5)
pgm_features_test_cfg = dict(
pgm_features_thread=4,
top_k=1000,
num_sample_start=8,
num_sample_end=8,
num_sample_action=16,
num_sample_interp=3,
bsp_boundary_ratio=0.2)
pgm_features_train_cfg = dict(
pgm_features_thread=4,
top_k=500,
num_sample_start=8,
num_sample_end=8,
num_sample_action=16,
num_sample_interp=3,
bsp_boundary_ratio=0.2)
_base_ = ['../../_base_/models/bsn_tem.py', '../../_base_/default_runtime.py']
# dataset settings
dataset_type = 'ActivityNetDataset'
data_root = 'data/ActivityNet/activitynet_feature_cuhk/csv_mean_100/'
data_root_val = 'data/ActivityNet/activitynet_feature_cuhk/csv_mean_100/'
ann_file_train = 'data/ActivityNet/anet_anno_train.json'
ann_file_val = 'data/ActivityNet/anet_anno_val.json'
ann_file_test = 'data/ActivityNet/anet_anno_full.json'
test_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(
type='Collect',
keys=['raw_feature'],
meta_name='video_meta',
meta_keys=['video_name']),
dict(type='ToTensor', keys=['raw_feature'])
]
train_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(type='GenerateLocalizationLabels'),
dict(
type='Collect',
keys=['raw_feature', 'gt_bbox'],
meta_name='video_meta',
meta_keys=['video_name']),
dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
dict(type='ToDataContainer', fields=[dict(key='gt_bbox', stack=False)])
]
val_pipeline = [
dict(type='LoadLocalizationFeature'),
dict(type='GenerateLocalizationLabels'),
dict(
type='Collect',
keys=['raw_feature', 'gt_bbox'],
meta_name='video_meta',
meta_keys=['video_name']),
dict(type='ToTensor', keys=['raw_feature', 'gt_bbox']),
dict(type='ToDataContainer', fields=[dict(key='gt_bbox', stack=False)])
]
data = dict(
videos_per_gpu=16,
workers_per_gpu=8,
train_dataloader=dict(drop_last=True),
val_dataloader=dict(videos_per_gpu=1),
test_dataloader=dict(videos_per_gpu=1),
test=dict(
type=dataset_type,
ann_file=ann_file_test,
pipeline=test_pipeline,
data_prefix=data_root_val),
val=dict(
type=dataset_type,
ann_file=ann_file_val,
pipeline=val_pipeline,
data_prefix=data_root_val),
train=dict(
type=dataset_type,
ann_file=ann_file_train,
pipeline=train_pipeline,
data_prefix=data_root))
# optimizer
optimizer = dict(
type='Adam', lr=0.001, weight_decay=0.0001) # this lr is used for 1 gpus
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=7)
total_epochs = 20
# runtime settings
checkpoint_config = dict(interval=1, filename_tmpl='tem_epoch_{}.pth')
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
workflow = [('train', 1), ('val', 1)]
work_dir = 'work_dirs/bsn_400x100_20e_1x16_activitynet_feature/'
tem_results_dir = f'{work_dir}/tem_results/'
output_config = dict(out=tem_results_dir, output_format='csv')
Collections:
- Name: BSN
README: configs/localization/bsn/README.md
Paper:
URL: https://arxiv.org/abs/1806.02964
Title: "BSN: Boundary Sensitive Network for Temporal Action Proposal Generation"
Models:
- Config:
- configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py
- configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py
- configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py
In Collection: BSN
Metadata:
Pretrained: None
Training Data: ActivityNet v1.3
Training Resources: 1 GPUs
feature: cuhk_mean_100
Name: bsn_400x100_1x16_20e_activitynet_feature (cuhk_mean_100)
Results:
- Dataset: ActivityNet v1.3
Metrics:
AR@100: 74.66
AUC: 66.45
Task: Temporal Action Localization
Training Json Log:
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature/bsn_tem_400x100_1x16_20e_activitynet_feature.log.json
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature/bsn_pem_400x100_1x16_20e_activitynet_feature.log.json
Training Log:
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature/bsn_tem_400x100_1x16_20e_activitynet_feature.log
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature/bsn_pem_400x100_1x16_20e_activitynet_feature.log
Weights:
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature/bsn_tem_400x100_1x16_20e_activitynet_feature_20200619-cd6accc3.pth
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature/bsn_pem_400x100_1x16_20e_activitynet_feature_20210203-1c27763d.pth
- Config:
- configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py
- configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py
- configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py
In Collection: BSN
Metadata:
Pretrained: None
Training Data: ActivityNet v1.3
Training Resources: 1 GPUs
feature: mmaction_video
Name: bsn_400x100_1x16_20e_activitynet_feature (mmaction_video)
Results:
- Dataset: ActivityNet v1.3
Metrics:
AR@100: 74.93
AUC: 66.74
Task: Temporal Action Localization
Training Json Log:
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_video/bsn_tem_400x100_1x16_20e_mmaction_video_20200809.json
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_video/bsn_pem_400x100_1x16_20e_mmaction_video_20200809.json
Training Log:
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_video/bsn_tem_400x100_1x16_20e_mmaction_video_20200809.log
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_video/bsn_pem_400x100_1x16_20e_mmaction_video_20200809.log
Weights:
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_video/bsn_tem_400x100_1x16_20e_mmaction_video_20200809-ad6ec626.pth
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_video/bsn_pem_400x100_1x16_20e_mmaction_video_20200809-aa861b26.pth
- Config:
- configs/localization/bsn/bsn_pem_400x100_1x16_20e_activitynet_feature.py
- configs/localization/bsn/bsn_pgm_400x100_activitynet_feature.py
- configs/localization/bsn/bsn_tem_400x100_1x16_20e_activitynet_feature.py
In Collection: BSN
Metadata:
Pretrained: None
Training Data: ActivityNet v1.3
Training Resources: 1 GPUs
feature: mmaction_clip
Name: bsn_400x100_1x16_20e_activitynet_feature (mmaction_clip)
Results:
- Dataset: ActivityNet v1.3
Metrics:
AR@100: 75.19
AUC: 66.81
Task: Temporal Action Localization
Training Json Log:
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_clip/bsn_tem_400x100_1x16_20e_mmaction_clip_20200809.json
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_clip/bsn_pem_400x100_1x16_20e_mmaction_clip_20200809.json
Training Log:
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_clip/bsn_tem_400x100_1x16_20e_mmaction_clip_20200809.log
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_clip/bsn_pem_400x100_1x16_20e_mmaction_clip_20200809.log
Weights:
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_tem_400x100_1x16_20e_mmaction_clip/bsn_tem_400x100_1x16_20e_mmaction_clip_20200809-0a563554.pth
- https://download.openmmlab.com/mmaction/localization/bsn/bsn_pem_400x100_1x16_20e_mmaction_clip/bsn_pem_400x100_1x16_20e_mmaction_clip_20200809-e32f61e6.pth
# SSN
[Temporal Action Detection With Structured Segment Networks](https://openaccess.thecvf.com/content_iccv_2017/html/Zhao_Temporal_Action_Detection_ICCV_2017_paper.html)
<!-- [ALGORITHM] -->
## Abstract
<!-- [ABSTRACT] -->
Detecting actions in untrimmed videos is an important yet challenging task. In this paper, we present the structured segment network (SSN), a novel framework which models the temporal structure of each action instance via a structured temporal pyramid. On top of the pyramid, we further introduce a decomposed discriminative model comprising two classifiers, respectively for classifying actions and determining completeness. This allows the framework to effectively distinguish positive proposals from background or incomplete ones, thus leading to both accurate recognition and localization. These components are integrated into a unified network that can be efficiently trained in an end-to-end fashion. Additionally, a simple yet effective temporal action proposal scheme, dubbed temporal actionness grouping (TAG) is devised to generate high quality action proposals. On two challenging benchmarks, THUMOS14 and ActivityNet, our method remarkably outperforms previous state-of-the-art methods, demonstrating superior accuracy and strong adaptivity in handling actions with various temporal structures.
<!-- [IMAGE] -->
<div align=center>
<img src="https://user-images.githubusercontent.com/34324155/143016899-017893d3-a907-4487-90a2-cb884088266c.png" width="800"/>
</div>
## Results and Models
| config | gpus | backbone | pretrain | mAP@0.3 | mAP@0.4 | mAP@0.5 | reference mAP@0.3 | reference mAP@0.4 | reference mAP@0.5 | gpu_mem(M) | ckpt | log | json | reference ckpt | reference json |
| :---------------------------------------------------------------------------------------: | :--: | :------: | :------: | :-----: | :-----: | :-----: | :---------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------: | :--------: | :----------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------: | ------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------: |
| [ssn_r50_450e_thumos14_rgb](/configs/localization/ssn/ssn_r50_450e_thumos14_rgb_train.py) | 8 | ResNet50 | ImageNet | 29.37 | 22.15 | 15.69 | [27.61](https://github.com/open-mmlab/mmaction/tree/c7e3b7c11fb94131be9b48a8e3d510589addc3ce#Get%20started) | [21.28](https://github.com/open-mmlab/mmaction/tree/c7e3b7c11fb94131be9b48a8e3d510589addc3ce#Get%20started) | [14.57](https://github.com/open-mmlab/mmaction/tree/c7e3b7c11fb94131be9b48a8e3d510589addc3ce#Get%20started) | 6352 | [ckpt](https://download.openmmlab.com/mmaction/localization/ssn/ssn_r50_450e_thumos14_rgb/ssn_r50_450e_thumos14_rgb_20201012-1920ab16.pth) | [log](https://download.openmmlab.com/mmaction/localization/ssn/ssn_r50_450e_thumos14_rgb/20201005_144656.log) | [json](https://download.openmmlab.com/mmaction/localization/ssn/ssn_r50_450e_thumos14_rgb/20201005_144656.log.json) | [ckpt](https://download.openmmlab.com/mmaction/localization/ssn/mmaction_reference/ssn_r50_450e_thumos14_rgb_ref/ssn_r50_450e_thumos14_rgb_ref_20201014-b6f48f68.pth) | [json](https://download.openmmlab.com/mmaction/localization/ssn/mmaction_reference/ssn_r50_450e_thumos14_rgb_ref/20201008_103258.log.json) |
:::{note}
1. The **gpus** indicates the number of gpu we used to get the checkpoint.
According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you may set the learning rate proportional to the batch size if you use different GPUs or videos per GPU,
e.g., lr=0.01 for 4 GPUs x 2 video/gpu and lr=0.08 for 16 GPUs x 4 video/gpu.
2. Since SSN utilizes different structured temporal pyramid pooling methods at training and testing, please refer to [ssn_r50_450e_thumos14_rgb_train](/configs/localization/ssn/ssn_r50_450e_thumos14_rgb_train.py) at training and [ssn_r50_450e_thumos14_rgb_test](/configs/localization/ssn/ssn_r50_450e_thumos14_rgb_test.py) at testing.
3. We evaluate the action detection performance of SSN, using action proposals of TAG. For more details on data preparation, you can refer to thumos14 TAG proposals in [Data Preparation](/docs/en/data_preparation.md).
4. The reference SSN in is evaluated with `ResNet50` backbone in MMAction, which is the same backbone with ours. Note that the original setting of MMAction SSN uses the `BNInception` backbone.
:::
## Train
You can use the following command to train a model.
```shell
python tools/train.py ${CONFIG_FILE} [optional arguments]
```
Example: train SSN model on thumos14 dataset.
```shell
python tools/train.py configs/localization/ssn/ssn_r50_450e_thumos14_rgb_train.py
```
For more details and optional arguments infos, you can refer to **Training setting** part in [getting_started](/docs/en/getting_started.md#training-setting).
## Test
You can use the following command to test a model.
```shell
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
Example: test BMN on ActivityNet feature dataset.
```shell
# Note: If evaluated, then please make sure the annotation file for test data contains groundtruth.
python tools/test.py configs/localization/ssn/ssn_r50_450e_thumos14_rgb_test.py checkpoints/SOME_CHECKPOINT.pth --eval mAP
```
For more details and optional arguments infos, you can refer to **Test a dataset** part in [getting_started](/docs/en/getting_started.md#test-a-dataset).
## Citation
```BibTeX
@InProceedings{Zhao_2017_ICCV,
author = {Zhao, Yue and Xiong, Yuanjun and Wang, Limin and Wu, Zhirong and Tang, Xiaoou and Lin, Dahua},
title = {Temporal Action Detection With Structured Segment Networks},
booktitle = {Proceedings of the IEEE International Conference on Computer Vision (ICCV)},
month = {Oct},
year = {2017}
}
```
# SSN
## 简介
<!-- [ALGORITHM] -->
```BibTeX
@InProceedings{Zhao_2017_ICCV,
author = {Zhao, Yue and Xiong, Yuanjun and Wang, Limin and Wu, Zhirong and Tang, Xiaoou and Lin, Dahua},
title = {Temporal Action Detection With Structured Segment Networks},
booktitle = {Proceedings of the IEEE International Conference on Computer Vision (ICCV)},
month = {Oct},
year = {2017}
}
```
## 模型库
| 配置文件 | GPU 数量 | 主干网络 | 预训练 | mAP@0.3 | mAP@0.4 | mAP@0.5 | 参考代码的 mAP@0.3 | 参考代码的 mAP@0.4 | 参考代码的 mAP@0.5 | GPU 显存占用 (M) | ckpt | log | json | 参考代码的 ckpt | 参考代码的 json |
| :---------------------------------------------------------------------------------------: | :------: | :------: | :------: | :-----: | :-----: | :-----: | :---------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------: | :--------------: | :----------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------: | ------------------------------------------------------------------------------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------: |
| [ssn_r50_450e_thumos14_rgb](/configs/localization/ssn/ssn_r50_450e_thumos14_rgb_train.py) | 8 | ResNet50 | ImageNet | 29.37 | 22.15 | 15.69 | [27.61](https://github.com/open-mmlab/mmaction/tree/c7e3b7c11fb94131be9b48a8e3d510589addc3ce#Get%20started) | [21.28](https://github.com/open-mmlab/mmaction/tree/c7e3b7c11fb94131be9b48a8e3d510589addc3ce#Get%20started) | [14.57](https://github.com/open-mmlab/mmaction/tree/c7e3b7c11fb94131be9b48a8e3d510589addc3ce#Get%20started) | 6352 | [ckpt](https://download.openmmlab.com/mmaction/localization/ssn/ssn_r50_450e_thumos14_rgb/ssn_r50_450e_thumos14_rgb_20201012-1920ab16.pth) | [log](https://download.openmmlab.com/mmaction/localization/ssn/ssn_r50_450e_thumos14_rgb/20201005_144656.log) | [json](https://download.openmmlab.com/mmaction/localization/ssn/ssn_r50_450e_thumos14_rgb/20201005_144656.log.json) | [ckpt](https://download.openmmlab.com/mmaction/localization/ssn/mmaction_reference/ssn_r50_450e_thumos14_rgb_ref/ssn_r50_450e_thumos14_rgb_ref_20201014-b6f48f68.pth) | [json](https://download.openmmlab.com/mmaction/localization/ssn/mmaction_reference/ssn_r50_450e_thumos14_rgb_ref/20201008_103258.log.json) |
注:
1. 这里的 **GPU 数量** 指的是得到模型权重文件对应的 GPU 个数。默认地,MMAction2 所提供的配置文件对应使用 8 块 GPU 进行训练的情况。
依据 [线性缩放规则](https://arxiv.org/abs/1706.02677),当用户使用不同数量的 GPU 或者每块 GPU 处理不同视频个数时,需要根据批大小等比例地调节学习率。
如,lr=0.01 对应 4 GPUs x 2 video/gpu,以及 lr=0.08 对应 16 GPUs x 4 video/gpu。
2. 由于 SSN 在训练和测试阶段使用不同的结构化时序金字塔池化方法(structured temporal pyramid pooling methods),请分别参考 [ssn_r50_450e_thumos14_rgb_train](/configs/localization/ssn/ssn_r50_450e_thumos14_rgb_train.py)[ssn_r50_450e_thumos14_rgb_test](/configs/localization/ssn/ssn_r50_450e_thumos14_rgb_test.py)
3. MMAction2 使用 TAG 的时序动作候选进行 SSN 模型的精度验证。关于数据准备的更多细节,用户可参考 [Data 数据集准备文档](/docs/zh_cn/data_preparation.md) 准备 thumos14 的 TAG 时序动作候选。
4. 参考代码的 SSN 模型是和 MMAction2 一样在 `ResNet50` 主干网络上验证的。注意,这里的 SSN 的初始设置与原代码库的 `BNInception` 骨干网络的设置相同。
## 如何训练
用户可以使用以下指令进行模型训练。
```shell
python tools/train.py ${CONFIG_FILE} [optional arguments]
```
例如:在 thumos14 数据集上训练 SSN 模型。
```shell
python tools/train.py configs/localization/ssn/ssn_r50_450e_thumos14_rgb_train.py
```
更多训练细节,可参考 [基础教程](/docs/zh_cn/getting_started.md#训练配置) 中的 **训练配置** 部分。
## 如何测试
用户可以使用以下指令进行模型测试。
```shell
python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [optional arguments]
```
例如:在 ActivityNet 特征上测试 BMN。
```shell
# 注:如果需要进行指标验证,需确测试数据的保标注文件包含真实标签
python tools/test.py configs/localization/ssn/ssn_r50_450e_thumos14_rgb_test.py checkpoints/SOME_CHECKPOINT.pth --eval mAP
```
更多测试细节,可参考 [基础教程](/docs/zh_cn/getting_started.md#测试某个数据集) 中的 **测试某个数据集** 部分。
Collections:
- Name: SSN
README: configs/localization/ssn/README.md
Paper:
URL: https://arxiv.org/abs/1704.06228
Title: Temporal Action Detection with Structured Segment Networks
Models:
- Config: configs/localization/ssn/ssn_r50_450e_thumos14_rgb_train.py
In Collection: SSN
Metadata:
Architecture: ResNet50
Pretrained: ImageNet
Training Data: THUMOS 14
Training Resources: 8 GPUs
Name: ssn_r50_450e_thumos14_rgb
Results:
- Dataset: THUMOS 14
Metrics:
mAP@0.3: 29.37
mAP@0.4: 22.15
mAP@0.5: 15.69
Task: Temporal Action Localization
Training Json Log: https://download.openmmlab.com/mmaction/localization/ssn/ssn_r50_450e_thumos14_rgb/20201005_144656.log.json
Training Log: https://download.openmmlab.com/mmaction/localization/ssn/ssn_r50_450e_thumos14_rgb/20201005_144656.log
Weights: https://download.openmmlab.com/mmaction/localization/ssn/ssn_r50_450e_thumos14_rgb/ssn_r50_450e_thumos14_rgb_20201012-1920ab16.pth
reference mAP@0.3: '[27.61](https://github.com/open-mmlab/mmaction/tree/c7e3b7c11fb94131be9b48a8e3d510589addc3ce#Get%20started)'
reference mAP@0.4: '[21.28](https://github.com/open-mmlab/mmaction/tree/c7e3b7c11fb94131be9b48a8e3d510589addc3ce#Get%20started)'
reference mAP@0.5: '[14.57](https://github.com/open-mmlab/mmaction/tree/c7e3b7c11fb94131be9b48a8e3d510589addc3ce#Get%20started)'
reference ckpt: '[ckpt](https://download.openmmlab.com/mmaction/localization/ssn/mmaction_reference/ssn_r50_450e_thumos14_rgb_ref/ssn_r50_450e_thumos14_rgb_ref_20201014-b6f48f68.pth)'
reference json: '[json](https://download.openmmlab.com/mmaction/localization/ssn/mmaction_reference/ssn_r50_450e_thumos14_rgb_ref/20201008_103258.log.json)'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment