添加mmaction2测试用例

76ccaa54 · unknown · 44c28b2b · 76ccaa54 · 76ccaa54 · 76ccaa54
Commit 76ccaa54 authored Jan 16, 2023 by unknown
20 changed files
--- a/openmmlab_test/mmaction2-0.24.1/docs/stat.py
+++ b/openmmlab_test/mmaction2-0.24.1/docs/stat.py
+#!/usr/bin/env python
+# Copyright (c) OpenMMLab. All rights reserved.
+import functools as func
+import glob
+import re
+from os.path import basename, splitext
+import numpy as np
+import titlecase
+def anchor(name):
+    return re.sub(r'-+', '-', re.sub(r'[^a-zA-Z0-9]', '-',
+                                     name.strip().lower())).strip('-')
+# Count algorithms
+files = sorted(glob.glob('*_models.md'))
+# files = sorted(glob.glob('docs/*_models.md'))
+stats = []
+for f in files:
+    with open(f, 'r') as content_file:
+        content = content_file.read()
+    # title
+    title = content.split('\n')[0].replace('#', '')
+    # skip IMAGE and ABSTRACT tags
+    content = [
+        x for x in content.split('\n')
+        if 'IMAGE' not in x and 'ABSTRACT' not in x
+    ]
+    content = '\n'.join(content)
+    # count papers
+    papers = set(
+        (papertype, titlecase.titlecase(paper.lower().strip()))
+        for (papertype, paper) in re.findall(
+            r'<!--\s*\[([A-Z]*?)\]\s*-->\s*\n.*?\btitle\s*=\s*{(.*?)}',
+            content, re.DOTALL))
+    # paper links
+    revcontent = '\n'.join(list(reversed(content.splitlines())))
+    paperlinks = {}
+    for _, p in papers:
+        print(p)
+        q = p.replace('\\', '\\\\').replace('?', '\\?')
+        paperlinks[p] = ' '.join(
+            (f'[->]({splitext(basename(f))[0]}.html#{anchor(paperlink)})'
+             for paperlink in re.findall(
+                 rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
+                 revcontent, re.DOTALL | re.IGNORECASE)))
+        print('   ', paperlinks[p])
+    paperlist = '\n'.join(
+        sorted(f'    - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
+    # count configs
+    configs = set(x.lower().strip()
+                  for x in re.findall(r'https.*configs/.*\.py', content))
+    # count ckpts
+    ckpts = set(x.lower().strip()
+                for x in re.findall(r'https://download.*\.pth', content)
+                if 'mmaction' in x)
+    statsmsg = f"""
+## [{title}]({f})
+* Number of checkpoints: {len(ckpts)}
+* Number of configs: {len(configs)}
+* Number of papers: {len(papers)}
+{paperlist}
+    """
+    stats.append((papers, configs, ckpts, statsmsg))
+allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _, _ in stats])
+allconfigs = func.reduce(lambda a, b: a.union(b), [c for _, c, _, _ in stats])
+allckpts = func.reduce(lambda a, b: a.union(b), [c for _, _, c, _ in stats])
+msglist = '\n'.join(x for _, _, _, x in stats)
+papertypes, papercounts = np.unique([t for t, _ in allpapers],
+                                    return_counts=True)
+countstr = '\n'.join(
+    [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])
+modelzoo = f"""
+# Overview
+* Number of checkpoints: {len(allckpts)}
+* Number of configs: {len(allconfigs)}
+* Number of papers: {len(allpapers)}
+{countstr}
+For supported datasets, see [datasets overview](datasets.md).
+{msglist}
+"""
+with open('modelzoo.md', 'w') as f:
+    f.write(modelzoo)
+# Count datasets
+files = ['supported_datasets.md']
+# files = sorted(glob.glob('docs/tasks/*.md'))
+datastats = []
+for f in files:
+    with open(f, 'r') as content_file:
+        content = content_file.read()
+    # title
+    title = content.split('\n')[0].replace('#', '')
+    # count papers
+    papers = set(
+        (papertype, titlecase.titlecase(paper.lower().strip()))
+        for (papertype, paper) in re.findall(
+            r'<!--\s*\[([A-Z]*?)\]\s*-->\s*\n.*?\btitle\s*=\s*{(.*?)}',
+            content, re.DOTALL))
+    # paper links
+    revcontent = '\n'.join(list(reversed(content.splitlines())))
+    paperlinks = {}
+    for _, p in papers:
+        print(p)
+        q = p.replace('\\', '\\\\').replace('?', '\\?')
+        paperlinks[p] = ', '.join(
+            (f'[{p.strip()} ->]({splitext(basename(f))[0]}.html#{anchor(p)})'
+             for p in re.findall(
+                 rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
+                 revcontent, re.DOTALL | re.IGNORECASE)))
+        print('   ', paperlinks[p])
+    paperlist = '\n'.join(
+        sorted(f'    - [{t}] {x} ({paperlinks[x]})' for t, x in papers))
+    statsmsg = f"""
+## [{title}]({f})
+* Number of papers: {len(papers)}
+{paperlist}
+    """
+    datastats.append((papers, configs, ckpts, statsmsg))
+alldatapapers = func.reduce(lambda a, b: a.union(b),
+                            [p for p, _, _, _ in datastats])
+# Summarize
+msglist = '\n'.join(x for _, _, _, x in stats)
+datamsglist = '\n'.join(x for _, _, _, x in datastats)
+papertypes, papercounts = np.unique([t for t, _ in alldatapapers],
+                                    return_counts=True)
+countstr = '\n'.join(
+    [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])
+modelzoo = f"""
+# Overview
+* Number of papers: {len(alldatapapers)}
+{countstr}
+For supported action algorithms, see [modelzoo overview](modelzoo.md).
+{datamsglist}
+"""
+with open('datasets.md', 'w') as f:
+    f.write(modelzoo)
--- a/openmmlab_test/mmaction2-0.24.1/docs/supported_datasets.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs/supported_datasets.md
+# Supported Datasets
+- Action Recognition
+  - [UCF101](/tools/data/ucf101/README.md) \[ [Homepage](https://www.crcv.ucf.edu/research/data-sets/ucf101/) \].
+  - [HMDB51](/tools/data/hmdb51/README.md) \[ [Homepage](https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/) \].
+  - [Kinetics-\[400/600/700\]](/tools/data/kinetics/README.md) \[ [Homepage](https://deepmind.com/research/open-source/kinetics) \]
+  - [Something-Something V1](/tools/data/sthv1/README.md) \[ [Homepage](https://20bn.com/datasets/something-something/v1) \]
+  - [Something-Something V2](/tools/data/sthv2/README.md) \[ [Homepage](https://20bn.com/datasets/something-something) \]
+  - [Moments in Time](/tools/data/mit/README.md) \[ [Homepage](http://moments.csail.mit.edu/) \]
+  - [Multi-Moments in Time](/tools/data/mmit/README.md) \[ [Homepage](http://moments.csail.mit.edu/challenge_iccv_2019.html) \]
+  - [HVU](/tools/data/hvu/README.md) \[ [Homepage](https://github.com/holistic-video-understanding/HVU-Dataset) \]
+  - [Jester](/tools/data/jester/README.md) \[ [Homepage](https://20bn.com/datasets/jester/v1) \]
+  - [GYM](/tools/data/gym/README.md) \[ [Homepage](https://sdolivia.github.io/FineGym/) \]
+  - [ActivityNet](/tools/data/activitynet/README.md) \[ [Homepage](http://activity-net.org/) \]
+  - [Diving48](/tools/data/diving48/README.md) \[ [Homepage](http://www.svcl.ucsd.edu/projects/resound/dataset.html) \]
+  - [OmniSource](/tools/data/omnisource/README.md) \[ [Homepage](https://kennymckormick.github.io/omnisource/) \]
+- Temporal Action Detection
+  - [ActivityNet](/tools/data/activitynet/README.md) \[ [Homepage](http://activity-net.org/) \]
+  - [THUMOS14](/tools/data/thumos14/README.md) \[ [Homepage](https://www.crcv.ucf.edu/THUMOS14/download.html) \]
+- Spatial Temporal Action Detection
+  - [AVA](/tools/data/ava/README.md) \[ [Homepage](https://research.google.com/ava/index.html) \]
+  - [UCF101-24](/tools/data/ucf101_24/README.md) \[ [Homepage](http://www.thumos.info/download.html) \]
+  - [JHMDB](/tools/data/jhmdb/README.md) \[ [Homepage](http://jhmdb.is.tue.mpg.de/) \]
+- Skeleton-based Action Recognition
+  - [PoseC3D Skeleton Dataset](/tools/data/skeleton/README.md) \[ [Homepage](https://kennymckormick.github.io/posec3d/) \]
+The supported datasets are listed above.
+We provide shell scripts for data preparation under the path `$MMACTION2/tools/data/`.
+Below is the detailed tutorials of data deployment for each dataset.
--- a/openmmlab_test/mmaction2-0.24.1/docs/switch_language.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs/switch_language.md
+## <a href='https://mmaction2.readthedocs.io/en/latest/'>English</a>
+## <a href='https://mmaction2.readthedocs.io/zh_CN/latest/'>简体中文</a>
--- a/openmmlab_test/mmaction2-0.24.1/docs/tutorials/1_config.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs/tutorials/1_config.md
+# Tutorial 1: Learn about Configs
+We use python files as configs, incorporate modular and inheritance design into our config system, which is convenient to conduct various experiments.
+You can find all the provided configs under `$MMAction2/configs`. If you wish to inspect the config file,
+you may run `python tools/analysis/print_config.py /PATH/TO/CONFIG` to see the complete config.
+<!-- TOC -->
+- [Modify config through script arguments](#modify-config-through-script-arguments)
+- [Config File Structure](#config-file-structure)
+- [Config File Naming Convention](#config-file-naming-convention)
+  - [Config System for Action localization](#config-system-for-action-localization)
+  - [Config System for Action Recognition](#config-system-for-action-recognition)
+  - [Config System for Spatio-Temporal Action Detection](#config-system-for-spatio-temporal-action-detection)
+- [FAQ](#faq)
+  - [Use intermediate variables in configs](#use-intermediate-variables-in-configs)
+<!-- TOC -->
+## Modify config through script arguments
+When submitting jobs using "tools/train.py" or "tools/test.py", you may specify `--cfg-options` to in-place modify the config.
+- Update config keys of dict.
+  The config options can be specified following the order of the dict keys in the original config.
+  For example, `--cfg-options model.backbone.norm_eval=False` changes the all BN modules in model backbones to `train` mode.
+- Update keys inside a list of configs.
+  Some config dicts are composed as a list in your config. For example, the training pipeline `data.train.pipeline` is normally a list
+  e.g. `[dict(type='SampleFrames'), ...]`. If you want to change `'SampleFrames'` to `'DenseSampleFrames'` in the pipeline,
+  you may specify `--cfg-options data.train.pipeline.0.type=DenseSampleFrames`.
+- Update values of list/tuples.
+  If the value to be updated is a list or a tuple. For example, the config file normally sets `workflow=[('train', 1)]`. If you want to
+  change this key, you may specify `--cfg-options workflow="[(train,1),(val,1)]"`. Note that the quotation mark " is necessary to
+  support list/tuple data types, and that **NO** white space is allowed inside the quotation marks in the specified value.
+## Config File Structure
+There are 3 basic component types under `config/_base_`, model, schedule, default_runtime.
+Many methods could be easily constructed with one of each like TSN, I3D, SlowOnly, etc.
+The configs that are composed by components from `_base_` are called _primitive_.
+For all configs under the same folder, it is recommended to have only **one** _primitive_ config. All other configs should inherit from the _primitive_ config. In this way, the maximum of inheritance level is 3.
+For easy understanding, we recommend contributors to inherit from exiting methods.
+For example, if some modification is made base on TSN, users may first inherit the basic TSN structure by specifying `_base_ = ../tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py`, then modify the necessary fields in the config files.
+If you are building an entirely new method that does not share the structure with any of the existing methods, you may create a folder under `configs/TASK`.
+Please refer to [mmcv](https://mmcv.readthedocs.io/en/latest/understand_mmcv/config.html) for detailed documentation.
+## Config File Naming Convention
+We follow the style below to name config files. Contributors are advised to follow the same style.
+```
+{model}_[model setting]_{backbone}_[misc]_{data setting}_[gpu x batch_per_gpu]_{schedule}_{dataset}_{modality}
+```
+`{xxx}` is required field and `[yyy]` is optional.
+- `{model}`: model type, e.g. `tsn`, `i3d`, etc.
+- `[model setting]`: specific setting for some models.
+- `{backbone}`: backbone type, e.g. `r50` (ResNet-50), etc.
+- `[misc]`: miscellaneous setting/plugins of model, e.g. `dense`, `320p`, `video`, etc.
+- `{data setting}`: frame sample setting in `{clip_len}x{frame_interval}x{num_clips}` format.
+- `[gpu x batch_per_gpu]`: GPUs and samples per GPU.
+- `{schedule}`: training schedule, e.g. `20e` means 20 epochs.
+- `{dataset}`: dataset name, e.g. `kinetics400`, `mmit`, etc.
+- `{modality}`: frame modality, e.g. `rgb`, `flow`, etc.
+### Config System for Action localization
+We incorporate modular design into our config system,
+which is convenient to conduct various experiments.
+- An Example of BMN
+  To help the users have a basic idea of a complete config structure and the modules in an action localization system,
+  we make brief comments on the config of BMN as the following.
+  For more detailed usage and alternative for per parameter in each module, please refer to the [API documentation](https://mmaction2.readthedocs.io/en/latest/api.html).
+  ```python
+  # model settings
+  model = dict(  # Config of the model
+      type='BMN',  # Type of the localizer
+      temporal_dim=100,  # Total frames selected for each video
+      boundary_ratio=0.5,  # Ratio for determining video boundaries
+      num_samples=32,  # Number of samples for each proposal
+      num_samples_per_bin=3,  # Number of bin samples for each sample
+      feat_dim=400,  # Dimension of feature
+      soft_nms_alpha=0.4,  # Soft NMS alpha
+      soft_nms_low_threshold=0.5,  # Soft NMS low threshold
+      soft_nms_high_threshold=0.9,  # Soft NMS high threshold
+      post_process_top_k=100)  # Top k proposals in post process
+  # model training and testing settings
+  train_cfg = None  # Config of training hyperparameters for BMN
+  test_cfg = dict(average_clips='score')  # Config for testing hyperparameters for BMN
+  # dataset settings
+  dataset_type = 'ActivityNetDataset'  # Type of dataset for training, validation and testing
+  data_root = 'data/activitynet_feature_cuhk/csv_mean_100/'  # Root path to data for training
+  data_root_val = 'data/activitynet_feature_cuhk/csv_mean_100/'  # Root path to data for validation and testing
+  ann_file_train = 'data/ActivityNet/anet_anno_train.json'  # Path to the annotation file for training
+  ann_file_val = 'data/ActivityNet/anet_anno_val.json'  # Path to the annotation file for validation
+  ann_file_test = 'data/ActivityNet/anet_anno_test.json'  # Path to the annotation file for testing
+  train_pipeline = [  # List of training pipeline steps
+      dict(type='LoadLocalizationFeature'),  # Load localization feature pipeline
+      dict(type='GenerateLocalizationLabels'),  # Generate localization labels pipeline
+      dict(  # Config of Collect
+          type='Collect',  # Collect pipeline that decides which keys in the data should be passed to the localizer
+          keys=['raw_feature', 'gt_bbox'],  # Keys of input
+          meta_name='video_meta',  # Meta name
+          meta_keys=['video_name']),  # Meta keys of input
+      dict(  # Config of ToTensor
+          type='ToTensor',  # Convert other types to tensor type pipeline
+          keys=['raw_feature']),  # Keys to be converted from image to tensor
+      dict(  # Config of ToDataContainer
+          type='ToDataContainer',  # Pipeline to convert the data to DataContainer
+          fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])  # Required fields to be converted with keys and attributes
+  ]
+  val_pipeline = [  # List of validation pipeline steps
+      dict(type='LoadLocalizationFeature'),  # Load localization feature pipeline
+      dict(type='GenerateLocalizationLabels'),  # Generate localization labels pipeline
+      dict(  # Config of Collect
+          type='Collect',  # Collect pipeline that decides which keys in the data should be passed to the localizer
+          keys=['raw_feature', 'gt_bbox'],  # Keys of input
+          meta_name='video_meta',  # Meta name
+          meta_keys=[
+              'video_name', 'duration_second', 'duration_frame', 'annotations',
+              'feature_frame'
+          ]),  # Meta keys of input
+      dict(  # Config of ToTensor
+          type='ToTensor',  # Convert other types to tensor type pipeline
+          keys=['raw_feature']),  # Keys to be converted from image to tensor
+      dict(  # Config of ToDataContainer
+          type='ToDataContainer',  # Pipeline to convert the data to DataContainer
+          fields=[dict(key='gt_bbox', stack=False, cpu_only=True)])  # Required fields to be converted with keys and attributes
+  ]
+  test_pipeline = [  # List of testing pipeline steps
+      dict(type='LoadLocalizationFeature'),  # Load localization feature pipeline
+      dict(  # Config of Collect
+          type='Collect',  # Collect pipeline that decides which keys in the data should be passed to the localizer
+          keys=['raw_feature'],  # Keys of input
+          meta_name='video_meta',  # Meta name
+          meta_keys=[
+              'video_name', 'duration_second', 'duration_frame', 'annotations',
+              'feature_frame'
+          ]),  # Meta keys of input
+      dict(  # Config of ToTensor
+          type='ToTensor',  # Convert other types to tensor type pipeline
+          keys=['raw_feature']),  # Keys to be converted from image to tensor
+  ]
+  data = dict(  # Config of data
+      videos_per_gpu=8,  # Batch size of each single GPU
+      workers_per_gpu=8,  # Workers to pre-fetch data for each single GPU
+      train_dataloader=dict(  # Additional config of train dataloader
+          drop_last=True),  # Whether to drop out the last batch of data in training
+      val_dataloader=dict(  # Additional config of validation dataloader
+          videos_per_gpu=1),  # Batch size of each single GPU during evaluation
+      test_dataloader=dict(  # Additional config of test dataloader
+          videos_per_gpu=2),  # Batch size of each single GPU during testing
+      test=dict(  # Testing dataset config
+          type=dataset_type,
+          ann_file=ann_file_test,
+          pipeline=test_pipeline,
+          data_prefix=data_root_val),
+      val=dict(  # Validation dataset config
+          type=dataset_type,
+          ann_file=ann_file_val,
+          pipeline=val_pipeline,
+          data_prefix=data_root_val),
+      train=dict(  # Training dataset config
+          type=dataset_type,
+          ann_file=ann_file_train,
+          pipeline=train_pipeline,
+          data_prefix=data_root))
+  # optimizer
+  optimizer = dict(
+      # Config used to build optimizer, support (1). All the optimizers in PyTorch
+      # whose arguments are also the same as those in PyTorch. (2). Custom optimizers
+      # which are built on `constructor`, referring to "tutorials/5_new_modules.md"
+      # for implementation.
+      type='Adam',  # Type of optimizer, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details
+      lr=0.001,  # Learning rate, see detail usages of the parameters in the documentation of PyTorch
+      weight_decay=0.0001)  # Weight decay of Adam
+  optimizer_config = dict(  # Config used to build the optimizer hook
+      grad_clip=None)  # Most of the methods do not use gradient clip
+  # learning policy
+  lr_config = dict(  # Learning rate scheduler config used to register LrUpdater hook
+      policy='step',  # Policy of scheduler, also support CosineAnnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9
+      step=7)  # Steps to decay the learning rate
+  total_epochs = 9  # Total epochs to train the model
+  checkpoint_config = dict(  # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation
+      interval=1)  # Interval to save checkpoint
+  evaluation = dict(  # Config of evaluation during training
+      interval=1,  # Interval to perform evaluation
+      metrics=['AR@AN'])  # Metrics to be performed
+  log_config = dict(  # Config to register logger hook
+      interval=50,  # Interval to print the log
+      hooks=[  # Hooks to be implemented during training
+          dict(type='TextLoggerHook'),  # The logger used to record the training process
+          # dict(type='TensorboardLoggerHook'),  # The Tensorboard logger is also supported
+      ])
+  # runtime settings
+  dist_params = dict(backend='nccl')  # Parameters to setup distributed training, the port can also be set
+  log_level = 'INFO'  # The level of logging
+  work_dir = './work_dirs/bmn_400x100_2x8_9e_activitynet_feature/'  # Directory to save the model checkpoints and logs for the current experiments
+  load_from = None  # load models as a pre-trained model from a given path. This will not resume training
+  resume_from = None  # Resume checkpoints from a given path, the training will be resumed from the epoch when the checkpoint's is saved
+  workflow = [('train', 1)]  # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once
+  output_config = dict(  # Config of localization output
+      out=f'{work_dir}/results.json',  # Path to output file
+      output_format='json')  # File format of output file
+  ```
+### Config System for Action Recognition
+We incorporate modular design into our config system,
+which is convenient to conduct various experiments.
+- An Example of TSN
+  To help the users have a basic idea of a complete config structure and the modules in an action recognition system,
+  we make brief comments on the config of TSN as the following.
+  For more detailed usage and alternative for per parameter in each module, please refer to the API documentation.
+  ```python
+  # model settings
+  model = dict(  # Config of the model
+      type='Recognizer2D',  # Type of the recognizer
+      backbone=dict(  # Dict for backbone
+          type='ResNet',  # Name of the backbone
+          pretrained='torchvision://resnet50',  # The url/site of the pretrained model
+          depth=50,  # Depth of ResNet model
+          norm_eval=False),  # Whether to set BN layers to eval mode when training
+      cls_head=dict(  # Dict for classification head
+          type='TSNHead',  # Name of classification head
+          num_classes=400,  # Number of classes to be classified.
+          in_channels=2048,  # The input channels of classification head.
+          spatial_type='avg',  # Type of pooling in spatial dimension
+          consensus=dict(type='AvgConsensus', dim=1),  # Config of consensus module
+          dropout_ratio=0.4,  # Probability in dropout layer
+          init_std=0.01), # Std value for linear layer initiation
+          # model training and testing settings
+          train_cfg=None,  # Config of training hyperparameters for TSN
+          test_cfg=dict(average_clips=None))  # Config for testing hyperparameters for TSN.
+  # dataset settings
+  dataset_type = 'RawframeDataset'  # Type of dataset for training, validation and testing
+  data_root = 'data/kinetics400/rawframes_train/'  # Root path to data for training
+  data_root_val = 'data/kinetics400/rawframes_val/'  # Root path to data for validation and testing
+  ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt'  # Path to the annotation file for training
+  ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt'  # Path to the annotation file for validation
+  ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt'  # Path to the annotation file for testing
+  img_norm_cfg = dict(  # Config of image normalization used in data pipeline
+      mean=[123.675, 116.28, 103.53],  # Mean values of different channels to normalize
+      std=[58.395, 57.12, 57.375],  # Std values of different channels to normalize
+      to_bgr=False)  # Whether to convert channels from RGB to BGR
+  train_pipeline = [  # List of training pipeline steps
+      dict(  # Config of SampleFrames
+          type='SampleFrames',  # Sample frames pipeline, sampling frames from video
+          clip_len=1,  # Frames of each sampled output clip
+          frame_interval=1,  # Temporal interval of adjacent sampled frames
+          num_clips=3),  # Number of clips to be sampled
+      dict(  # Config of RawFrameDecode
+          type='RawFrameDecode'),  # Load and decode Frames pipeline, picking raw frames with given indices
+      dict(  # Config of Resize
+          type='Resize',  # Resize pipeline
+          scale=(-1, 256)),  # The scale to resize images
+      dict(  # Config of MultiScaleCrop
+          type='MultiScaleCrop',  # Multi scale crop pipeline, cropping images with a list of randomly selected scales
+          input_size=224,  # Input size of the network
+          scales=(1, 0.875, 0.75, 0.66),  # Scales of width and height to be selected
+          random_crop=False,  # Whether to randomly sample cropping bbox
+          max_wh_scale_gap=1),  # Maximum gap of w and h scale levels
+      dict(  # Config of Resize
+          type='Resize',  # Resize pipeline
+          scale=(224, 224),  # The scale to resize images
+          keep_ratio=False),  # Whether to resize with changing the aspect ratio
+      dict(  # Config of Flip
+          type='Flip',  # Flip Pipeline
+          flip_ratio=0.5),  # Probability of implementing flip
+      dict(  # Config of Normalize
+          type='Normalize',  # Normalize pipeline
+          **img_norm_cfg),  # Config of image normalization
+      dict(  # Config of FormatShape
+          type='FormatShape',  # Format shape pipeline, Format final image shape to the given input_format
+          input_format='NCHW'),  # Final image shape format
+      dict(  # Config of Collect
+          type='Collect',  # Collect pipeline that decides which keys in the data should be passed to the recognizer
+          keys=['imgs', 'label'],  # Keys of input
+          meta_keys=[]),  # Meta keys of input
+      dict(  # Config of ToTensor
+          type='ToTensor',  # Convert other types to tensor type pipeline
+          keys=['imgs', 'label'])  # Keys to be converted from image to tensor
+  ]
+  val_pipeline = [  # List of validation pipeline steps
+      dict(  # Config of SampleFrames
+          type='SampleFrames',  # Sample frames pipeline, sampling frames from video
+          clip_len=1,  # Frames of each sampled output clip
+          frame_interval=1,  # Temporal interval of adjacent sampled frames
+          num_clips=3,  # Number of clips to be sampled
+          test_mode=True),  # Whether to set test mode in sampling
+      dict(  # Config of RawFrameDecode
+          type='RawFrameDecode'),  # Load and decode Frames pipeline, picking raw frames with given indices
+      dict(  # Config of Resize
+          type='Resize',  # Resize pipeline
+          scale=(-1, 256)),  # The scale to resize images
+      dict(  # Config of CenterCrop
+          type='CenterCrop',  # Center crop pipeline, cropping the center area from images
+          crop_size=224),  # The size to crop images
+      dict(  # Config of Flip
+          type='Flip',  # Flip pipeline
+          flip_ratio=0),  # Probability of implementing flip
+      dict(  # Config of Normalize
+          type='Normalize',  # Normalize pipeline
+          **img_norm_cfg),  # Config of image normalization
+      dict(  # Config of FormatShape
+          type='FormatShape',  # Format shape pipeline, Format final image shape to the given input_format
+          input_format='NCHW'),  # Final image shape format
+      dict(  # Config of Collect
+          type='Collect',  # Collect pipeline that decides which keys in the data should be passed to the recognizer
+          keys=['imgs', 'label'],  # Keys of input
+          meta_keys=[]),  # Meta keys of input
+      dict(  # Config of ToTensor
+          type='ToTensor',  # Convert other types to tensor type pipeline
+          keys=['imgs'])  # Keys to be converted from image to tensor
+  ]
+  test_pipeline = [  # List of testing pipeline steps
+      dict(  # Config of SampleFrames
+          type='SampleFrames',  # Sample frames pipeline, sampling frames from video
+          clip_len=1,  # Frames of each sampled output clip
+          frame_interval=1,  # Temporal interval of adjacent sampled frames
+          num_clips=25,  # Number of clips to be sampled
+          test_mode=True),  # Whether to set test mode in sampling
+      dict(  # Config of RawFrameDecode
+          type='RawFrameDecode'),  # Load and decode Frames pipeline, picking raw frames with given indices
+      dict(  # Config of Resize
+          type='Resize',  # Resize pipeline
+          scale=(-1, 256)),  # The scale to resize images
+      dict(  # Config of TenCrop
+          type='TenCrop',  # Ten crop pipeline, cropping ten area from images
+          crop_size=224),  # The size to crop images
+      dict(  # Config of Flip
+          type='Flip',  # Flip pipeline
+          flip_ratio=0),  # Probability of implementing flip
+      dict(  # Config of Normalize
+          type='Normalize',  # Normalize pipeline
+          **img_norm_cfg),  # Config of image normalization
+      dict(  # Config of FormatShape
+          type='FormatShape',  # Format shape pipeline, Format final image shape to the given input_format
+          input_format='NCHW'),  # Final image shape format
+      dict(  # Config of Collect
+          type='Collect',  # Collect pipeline that decides which keys in the data should be passed to the recognizer
+          keys=['imgs', 'label'],  # Keys of input
+          meta_keys=[]),  # Meta keys of input
+      dict(  # Config of ToTensor
+          type='ToTensor',  # Convert other types to tensor type pipeline
+          keys=['imgs'])  # Keys to be converted from image to tensor
+  ]
+  data = dict(  # Config of data
+      videos_per_gpu=32,  # Batch size of each single GPU
+      workers_per_gpu=2,  # Workers to pre-fetch data for each single GPU
+      train_dataloader=dict(  # Additional config of train dataloader
+          drop_last=True),  # Whether to drop out the last batch of data in training
+      val_dataloader=dict(  # Additional config of validation dataloader
+          videos_per_gpu=1),  # Batch size of each single GPU during evaluation
+      test_dataloader=dict(  # Additional config of test dataloader
+          videos_per_gpu=2),  # Batch size of each single GPU during testing
+      train=dict(  # Training dataset config
+          type=dataset_type,
+          ann_file=ann_file_train,
+          data_prefix=data_root,
+          pipeline=train_pipeline),
+      val=dict(  # Validation dataset config
+          type=dataset_type,
+          ann_file=ann_file_val,
+          data_prefix=data_root_val,
+          pipeline=val_pipeline),
+      test=dict(  # Testing dataset config
+          type=dataset_type,
+          ann_file=ann_file_test,
+          data_prefix=data_root_val,
+          pipeline=test_pipeline))
+  # optimizer
+  optimizer = dict(
+      # Config used to build optimizer, support (1). All the optimizers in PyTorch
+      # whose arguments are also the same as those in PyTorch. (2). Custom optimizers
+      # which are built on `constructor`, referring to "tutorials/5_new_modules.md"
+      # for implementation.
+      type='SGD',  # Type of optimizer, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details
+      lr=0.01,  # Learning rate, see detail usages of the parameters in the documentation of PyTorch
+      momentum=0.9,  # Momentum,
+      weight_decay=0.0001)  # Weight decay of SGD
+  optimizer_config = dict(  # Config used to build the optimizer hook
+      grad_clip=dict(max_norm=40, norm_type=2))  # Use gradient clip
+  # learning policy
+  lr_config = dict(  # Learning rate scheduler config used to register LrUpdater hook
+      policy='step',  # Policy of scheduler, also support CosineAnnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9
+      step=[40, 80])  # Steps to decay the learning rate
+  total_epochs = 100  # Total epochs to train the model
+  checkpoint_config = dict(  # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation
+      interval=5)  # Interval to save checkpoint
+  evaluation = dict(  # Config of evaluation during training
+      interval=5,  # Interval to perform evaluation
+      metrics=['top_k_accuracy', 'mean_class_accuracy'],  # Metrics to be performed
+      metric_options=dict(top_k_accuracy=dict(topk=(1, 3))), # Set top-k accuracy to 1 and 3 during validation
+      save_best='top_k_accuracy')  # set `top_k_accuracy` as key indicator to save best checkpoint
+  eval_config = dict(
+      metric_options=dict(top_k_accuracy=dict(topk=(1, 3)))) # Set top-k accuracy to 1 and 3 during testing. You can also use `--eval top_k_accuracy` to assign evaluation metrics
+  log_config = dict(  # Config to register logger hook
+      interval=20,  # Interval to print the log
+      hooks=[  # Hooks to be implemented during training
+          dict(type='TextLoggerHook'),  # The logger used to record the training process
+          # dict(type='TensorboardLoggerHook'),  # The Tensorboard logger is also supported
+      ])
+  # runtime settings
+  dist_params = dict(backend='nccl')  # Parameters to setup distributed training, the port can also be set
+  log_level = 'INFO'  # The level of logging
+  work_dir = './work_dirs/tsn_r50_1x1x3_100e_kinetics400_rgb/'  # Directory to save the model checkpoints and logs for the current experiments
+  load_from = None  # load models as a pre-trained model from a given path. This will not resume training
+  resume_from = None  # Resume checkpoints from a given path, the training will be resumed from the epoch when the checkpoint's is saved
+  workflow = [('train', 1)]  # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once
+  ```
+### Config System for Spatio-Temporal Action Detection
+We incorporate modular design into our config system, which is convenient to conduct various experiments.
+- An Example of FastRCNN
+  To help the users have a basic idea of a complete config structure and the modules in a spatio-temporal action detection system,
+  we make brief comments on the config of FastRCNN as the following.
+  For more detailed usage and alternative for per parameter in each module, please refer to the API documentation.
+  ```python
+  # model setting
+  model = dict(  # Config of the model
+      type='FastRCNN',  # Type of the detector
+      backbone=dict(  # Dict for backbone
+          type='ResNet3dSlowOnly',  # Name of the backbone
+          depth=50, # Depth of ResNet model
+          pretrained=None,   # The url/site of the pretrained model
+          pretrained2d=False, # If the pretrained model is 2D
+          lateral=False,  # If the backbone is with lateral connections
+          num_stages=4, # Stages of ResNet model
+          conv1_kernel=(1, 7, 7), # Conv1 kernel size
+          conv1_stride_t=1, # Conv1 temporal stride
+          pool1_stride_t=1, # Pool1 temporal stride
+          spatial_strides=(1, 2, 2, 1)),  # The spatial stride for each ResNet stage
+      roi_head=dict(  # Dict for roi_head
+          type='AVARoIHead',  # Name of the roi_head
+          bbox_roi_extractor=dict(  # Dict for bbox_roi_extractor
+              type='SingleRoIExtractor3D',  # Name of the bbox_roi_extractor
+              roi_layer_type='RoIAlign',  # Type of the RoI op
+              output_size=8,  # Output feature size of the RoI op
+              with_temporal_pool=True), # If temporal dim is pooled
+          bbox_head=dict( # Dict for bbox_head
+              type='BBoxHeadAVA', # Name of the bbox_head
+              in_channels=2048, # Number of channels of the input feature
+              num_classes=81, # Number of action classes + 1
+              multilabel=True,  # If the dataset is multilabel
+              dropout_ratio=0.5)),  # The dropout ratio used
+      # model training and testing settings
+      train_cfg=dict(  # Training config of FastRCNN
+          rcnn=dict(  # Dict for rcnn training config
+              assigner=dict(  # Dict for assigner
+                  type='MaxIoUAssignerAVA', # Name of the assigner
+                  pos_iou_thr=0.9,  # IoU threshold for positive examples, > pos_iou_thr -> positive
+                  neg_iou_thr=0.9,  # IoU threshold for negative examples, < neg_iou_thr -> negative
+                  min_pos_iou=0.9), # Minimum acceptable IoU for positive examples
+              sampler=dict( # Dict for sample
+                  type='RandomSampler', # Name of the sampler
+                  num=32, # Batch Size of the sampler
+                  pos_fraction=1, # Positive bbox fraction of the sampler
+                  neg_pos_ub=-1,  # Upper bound of the ratio of num negative to num positive
+                  add_gt_as_proposals=True), # Add gt bboxes as proposals
+              pos_weight=1.0, # Loss weight of positive examples
+              debug=False)), # Debug mode
+      test_cfg=dict( # Testing config of FastRCNN
+          rcnn=dict(  # Dict for rcnn testing config
+              action_thr=0.002))) # The threshold of an action
+  # dataset settings
+  dataset_type = 'AVADataset' # Type of dataset for training, validation and testing
+  data_root = 'data/ava/rawframes'  # Root path to data
+  anno_root = 'data/ava/annotations'  # Root path to annotations
+  ann_file_train = f'{anno_root}/ava_train_v2.1.csv'  # Path to the annotation file for training
+  ann_file_val = f'{anno_root}/ava_val_v2.1.csv'  # Path to the annotation file for validation
+  exclude_file_train = f'{anno_root}/ava_train_excluded_timestamps_v2.1.csv'  # Path to the exclude annotation file for training
+  exclude_file_val = f'{anno_root}/ava_val_excluded_timestamps_v2.1.csv'  # Path to the exclude annotation file for validation
+  label_file = f'{anno_root}/ava_action_list_v2.1_for_activitynet_2018.pbtxt'  # Path to the label file
+  proposal_file_train = f'{anno_root}/ava_dense_proposals_train.FAIR.recall_93.9.pkl'  # Path to the human detection proposals for training examples
+  proposal_file_val = f'{anno_root}/ava_dense_proposals_val.FAIR.recall_93.9.pkl'  # Path to the human detection proposals for validation examples
+  img_norm_cfg = dict(  # Config of image normalization used in data pipeline
+      mean=[123.675, 116.28, 103.53], # Mean values of different channels to normalize
+      std=[58.395, 57.12, 57.375],   # Std values of different channels to normalize
+      to_bgr=False) # Whether to convert channels from RGB to BGR
+  train_pipeline = [  # List of training pipeline steps
+      dict(  # Config of SampleFrames
+          type='AVASampleFrames',  # Sample frames pipeline, sampling frames from video
+          clip_len=4,  # Frames of each sampled output clip
+          frame_interval=16),  # Temporal interval of adjacent sampled frames
+      dict(  # Config of RawFrameDecode
+          type='RawFrameDecode'),  # Load and decode Frames pipeline, picking raw frames with given indices
+      dict(  # Config of RandomRescale
+          type='RandomRescale',   # Randomly rescale the shortedge by a given range
+          scale_range=(256, 320)),   # The shortedge size range of RandomRescale
+      dict(  # Config of RandomCrop
+          type='RandomCrop',   # Randomly crop a patch with the given size
+          size=256),   # The size of the cropped patch
+      dict(  # Config of Flip
+          type='Flip',  # Flip Pipeline
+          flip_ratio=0.5),  # Probability of implementing flip
+      dict(  # Config of Normalize
+          type='Normalize',  # Normalize pipeline
+          **img_norm_cfg),  # Config of image normalization
+      dict(  # Config of FormatShape
+          type='FormatShape',  # Format shape pipeline, Format final image shape to the given input_format
+          input_format='NCTHW',  # Final image shape format
+          collapse=True),   # Collapse the dim N if N == 1
+      dict(  # Config of Rename
+          type='Rename',  # Rename keys
+          mapping=dict(imgs='img')),  # The old name to new name mapping
+      dict(  # Config of ToTensor
+          type='ToTensor',  # Convert other types to tensor type pipeline
+          keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),  # Keys to be converted from image to tensor
+      dict(  # Config of ToDataContainer
+          type='ToDataContainer',  # Convert other types to DataContainer type pipeline
+          fields=[   # Fields to convert to DataContainer
+              dict(   # Dict of fields
+                  key=['proposals', 'gt_bboxes', 'gt_labels'],  # Keys to Convert to DataContainer
+                  stack=False)]),  # Whether to stack these tensor
+      dict(  # Config of Collect
+          type='Collect',  # Collect pipeline that decides which keys in the data should be passed to the detector
+          keys=['img', 'proposals', 'gt_bboxes', 'gt_labels'],  # Keys of input
+          meta_keys=['scores', 'entity_ids']),  # Meta keys of input
+  ]
+  val_pipeline = [  # List of validation pipeline steps
+      dict(  # Config of SampleFrames
+          type='AVASampleFrames',  # Sample frames pipeline, sampling frames from video
+          clip_len=4,  # Frames of each sampled output clip
+          frame_interval=16)  # Temporal interval of adjacent sampled frames
+      dict(  # Config of RawFrameDecode
+          type='RawFrameDecode'),  # Load and decode Frames pipeline, picking raw frames with given indices
+      dict(  # Config of Resize
+          type='Resize',  # Resize pipeline
+          scale=(-1, 256)),  # The scale to resize images
+      dict(  # Config of Normalize
+          type='Normalize',  # Normalize pipeline
+          **img_norm_cfg),  # Config of image normalization
+      dict(  # Config of FormatShape
+          type='FormatShape',  # Format shape pipeline, Format final image shape to the given input_format
+          input_format='NCTHW',  # Final image shape format
+          collapse=True),   # Collapse the dim N if N == 1
+      dict(  # Config of Rename
+          type='Rename',  # Rename keys
+          mapping=dict(imgs='img')),  # The old name to new name mapping
+      dict(  # Config of ToTensor
+          type='ToTensor',  # Convert other types to tensor type pipeline
+          keys=['img', 'proposals']),  # Keys to be converted from image to tensor
+      dict(  # Config of ToDataContainer
+          type='ToDataContainer',  # Convert other types to DataContainer type pipeline
+          fields=[   # Fields to convert to DataContainer
+              dict(   # Dict of fields
+                  key=['proposals'],  # Keys to Convert to DataContainer
+                  stack=False)]),  # Whether to stack these tensor
+      dict(  # Config of Collect
+          type='Collect',  # Collect pipeline that decides which keys in the data should be passed to the detector
+          keys=['img', 'proposals'],  # Keys of input
+          meta_keys=['scores', 'entity_ids'],  # Meta keys of input
+          nested=True)  # Whether to wrap the data in a nested list
+  ]
+  data = dict(  # Config of data
+      videos_per_gpu=16,  # Batch size of each single GPU
+      workers_per_gpu=2,  # Workers to pre-fetch data for each single GPU
+      val_dataloader=dict(   # Additional config of validation dataloader
+          videos_per_gpu=1),  # Batch size of each single GPU during evaluation
+      train=dict(   # Training dataset config
+          type=dataset_type,
+          ann_file=ann_file_train,
+          exclude_file=exclude_file_train,
+          pipeline=train_pipeline,
+          label_file=label_file,
+          proposal_file=proposal_file_train,
+          person_det_score_thr=0.9,
+          data_prefix=data_root),
+      val=dict(     # Validation dataset config
+          type=dataset_type,
+          ann_file=ann_file_val,
+          exclude_file=exclude_file_val,
+          pipeline=val_pipeline,
+          label_file=label_file,
+          proposal_file=proposal_file_val,
+          person_det_score_thr=0.9,
+          data_prefix=data_root))
+  data['test'] = data['val']    # Set test_dataset as val_dataset
+  # optimizer
+  optimizer = dict(
+      # Config used to build optimizer, support (1). All the optimizers in PyTorch
+      # whose arguments are also the same as those in PyTorch. (2). Custom optimizers
+      # which are built on `constructor`, referring to "tutorials/5_new_modules.md"
+      # for implementation.
+      type='SGD',  # Type of optimizer, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details
+      lr=0.2,  # Learning rate, see detail usages of the parameters in the documentation of PyTorch (for 8gpu)
+      momentum=0.9,  # Momentum,
+      weight_decay=0.00001)  # Weight decay of SGD
+  optimizer_config = dict(  # Config used to build the optimizer hook
+      grad_clip=dict(max_norm=40, norm_type=2))   # Use gradient clip
+  lr_config = dict(  # Learning rate scheduler config used to register LrUpdater hook
+      policy='step',  # Policy of scheduler, also support CosineAnnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9
+      step=[40, 80],  # Steps to decay the learning rate
+      warmup='linear',  # Warmup strategy
+      warmup_by_epoch=True,  # Warmup_iters indicates iter num or epoch num
+      warmup_iters=5,   # Number of iters or epochs for warmup
+      warmup_ratio=0.1)   # The initial learning rate is warmup_ratio * lr
+  total_epochs = 20  # Total epochs to train the model
+  checkpoint_config = dict(  # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation
+      interval=1)   # Interval to save checkpoint
+  workflow = [('train', 1)]   # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once
+  evaluation = dict(  # Config of evaluation during training
+      interval=1, save_best='mAP@0.5IOU')  # Interval to perform evaluation and the key for saving best checkpoint
+  log_config = dict(  # Config to register logger hook
+      interval=20,  # Interval to print the log
+      hooks=[  # Hooks to be implemented during training
+          dict(type='TextLoggerHook'),  # The logger used to record the training process
+      ])
+  # runtime settings
+  dist_params = dict(backend='nccl')  # Parameters to setup distributed training, the port can also be set
+  log_level = 'INFO'  # The level of logging
+  work_dir = ('./work_dirs/ava/'  # Directory to save the model checkpoints and logs for the current experiments
+              'slowonly_kinetics_pretrained_r50_4x16x1_20e_ava_rgb')
+  load_from = ('https://download.openmmlab.com/mmaction/recognition/slowonly/'  # load models as a pre-trained model from a given path. This will not resume training
+               'slowonly_r50_4x16x1_256e_kinetics400_rgb/'
+               'slowonly_r50_4x16x1_256e_kinetics400_rgb_20200704-a69556c6.pth')
+  resume_from = None  # Resume checkpoints from a given path, the training will be resumed from the epoch when the checkpoint's is saved
+  ```
+## FAQ
+### Use intermediate variables in configs
+Some intermediate variables are used in the config files, like `train_pipeline`/`val_pipeline`/`test_pipeline`,
+`ann_file_train`/`ann_file_val`/`ann_file_test`, `img_norm_cfg` etc.
+For Example, we would like to first define `train_pipeline`/`val_pipeline`/`test_pipeline` and pass them into `data`.
+Thus, `train_pipeline`/`val_pipeline`/`test_pipeline` are intermediate variable.
+we also define `ann_file_train`/`ann_file_val`/`ann_file_test` and `data_root`/`data_root_val` to provide data pipeline some
+basic information.
+In addition, we use `img_norm_cfg` as intermediate variables to construct data augmentation components.
+```python
+...
+dataset_type = 'RawframeDataset'
+data_root = 'data/kinetics400/rawframes_train'
+data_root_val = 'data/kinetics400/rawframes_val'
+ann_file_train = 'data/kinetics400/kinetics400_train_list_rawframes.txt'
+ann_file_val = 'data/kinetics400/kinetics400_val_list_rawframes.txt'
+ann_file_test = 'data/kinetics400/kinetics400_val_list_rawframes.txt'
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
+train_pipeline = [
+    dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1),
+    dict(type='RawFrameDecode'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(
+        type='MultiScaleCrop',
+        input_size=224,
+        scales=(1, 0.8),
+        random_crop=False,
+        max_wh_scale_gap=0),
+    dict(type='Resize', scale=(224, 224), keep_ratio=False),
+    dict(type='Flip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs', 'label'])
+]
+val_pipeline = [
+    dict(
+        type='SampleFrames',
+        clip_len=32,
+        frame_interval=2,
+        num_clips=1,
+        test_mode=True),
+    dict(type='RawFrameDecode'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='CenterCrop', crop_size=224),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs'])
+]
+test_pipeline = [
+    dict(
+        type='SampleFrames',
+        clip_len=32,
+        frame_interval=2,
+        num_clips=10,
+        test_mode=True),
+    dict(type='RawFrameDecode'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='ThreeCrop', crop_size=256),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs'])
+]
+data = dict(
+    videos_per_gpu=8,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=ann_file_train,
+        data_prefix=data_root,
+        pipeline=train_pipeline),
+    val=dict(
+        type=dataset_type,
+        ann_file=ann_file_val,
+        data_prefix=data_root_val,
+        pipeline=val_pipeline),
+    test=dict(
+        type=dataset_type,
+        ann_file=ann_file_val,
+        data_prefix=data_root_val,
+        pipeline=test_pipeline))
+```
--- a/openmmlab_test/mmaction2-0.24.1/docs/tutorials/2_finetune.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs/tutorials/2_finetune.md
+# Tutorial 2: Finetuning Models
+This tutorial provides instructions for users to use the pre-trained models
+to finetune them on other datasets, so that better performance can be achieved.
+<!-- TOC -->
+- [Outline](#outline)
+- [Modify Head](#modify-head)
+- [Modify Dataset](#modify-dataset)
+- [Modify Training Schedule](#modify-training-schedule)
+- [Use Pre-Trained Model](#use-pre-trained-model)
+<!-- TOC -->
+## Outline
+There are two steps to finetune a model on a new dataset.
+1. Add support for the new dataset. See [Tutorial 3: Adding New Dataset](3_new_dataset.md).
+2. Modify the configs. This will be discussed in this tutorial.
+For example, if the users want to finetune models pre-trained on Kinetics-400 Dataset to another dataset, say UCF101,
+then four parts in the config (see [here](1_config.md)) needs attention.
+## Modify Head
+The `num_classes` in the `cls_head` need to be changed to the class number of the new dataset.
+The weights of the pre-trained models are reused except for the final prediction layer.
+So it is safe to change the class number.
+In our case, UCF101 has 101 classes.
+So we change it from 400 (class number of Kinetics-400) to 101.
+```python
+model = dict(
+    type='Recognizer2D',
+    backbone=dict(
+        type='ResNet',
+        pretrained='torchvision://resnet50',
+        depth=50,
+        norm_eval=False),
+    cls_head=dict(
+        type='TSNHead',
+        num_classes=101,   # change from 400 to 101
+        in_channels=2048,
+        spatial_type='avg',
+        consensus=dict(type='AvgConsensus', dim=1),
+        dropout_ratio=0.4,
+        init_std=0.01),
+    train_cfg=None,
+    test_cfg=dict(average_clips=None))
+```
+Note that the `pretrained='torchvision://resnet50'` setting is used for initializing backbone.
+If you are training a new model from ImageNet-pretrained weights, this is for you.
+However, this setting is not related to our task at hand.
+What we need is `load_from`, which will be discussed later.
+## Modify Dataset
+MMAction2 supports UCF101, Kinetics-400, Moments in Time, Multi-Moments in Time, THUMOS14,
+Something-Something V1&V2, ActivityNet Dataset.
+The users may need to adapt one of the above dataset to fit for their special datasets.
+In our case, UCF101 is already supported by various dataset types, like `RawframeDataset`,
+so we change the config as follows.
+```python
+# dataset settings
+dataset_type = 'RawframeDataset'
+data_root = 'data/ucf101/rawframes_train/'
+data_root_val = 'data/ucf101/rawframes_val/'
+ann_file_train = 'data/ucf101/ucf101_train_list.txt'
+ann_file_val = 'data/ucf101/ucf101_val_list.txt'
+ann_file_test = 'data/ucf101/ucf101_val_list.txt'
+```
+## Modify Training Schedule
+Finetuning usually requires smaller learning rate and less training epochs.
+```python
+# optimizer
+optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)  # change from 0.01 to 0.005
+optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
+# learning policy
+lr_config = dict(policy='step', step=[20, 40])
+total_epochs = 50 # change from 100 to 50
+checkpoint_config = dict(interval=5)
+```
+## Use Pre-Trained Model
+To use the pre-trained model for the whole network, the new config adds the link of pre-trained models in the `load_from`.
+We set `load_from=None` as default in `configs/_base_/default_runtime.py` and owing to [inheritance design](/docs/tutorials/1_config.md), users can directly change it by setting `load_from` in their configs.
+```python
+# use the pre-trained model for the whole TSN network
+load_from = 'https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/mmaction-v1/recognition/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'  # model path can be found in model zoo
+```
--- a/openmmlab_test/mmaction2-0.24.1/docs/tutorials/3_new_dataset.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs/tutorials/3_new_dataset.md
+# Tutorial 3: Adding New Dataset
+In this tutorial, we will introduce some methods about how to customize your own dataset by reorganizing data and mixing dataset for the project.
+<!-- TOC -->
+- [Customize Datasets by Reorganizing Data](#customize-datasets-by-reorganizing-data)
+  - [Reorganize datasets to existing format](#reorganize-datasets-to-existing-format)
+  - [An example of a custom dataset](#an-example-of-a-custom-dataset)
+- [Customize Dataset by Mixing Dataset](#customize-dataset-by-mixing-dataset)
+  - [Repeat dataset](#repeat-dataset)
+<!-- TOC -->
+## Customize Datasets by Reorganizing Data
+### Reorganize datasets to existing format
+The simplest way is to convert your dataset to existing dataset formats (RawframeDataset or VideoDataset).
+There are three kinds of annotation files.
+- rawframe annotation
+  The annotation of a rawframe dataset is a text file with multiple lines,
+  and each line indicates `frame_directory` (relative path) of a video,
+  `total_frames` of a video and the `label` of a video, which are split by a whitespace.
+  Here is an example.
+  ```
+  some/directory-1 163 1
+  some/directory-2 122 1
+  some/directory-3 258 2
+  some/directory-4 234 2
+  some/directory-5 295 3
+  some/directory-6 121 3
+  ```
+- video annotation
+  The annotation of a video dataset is a text file with multiple lines,
+  and each line indicates a sample video with the `filepath` (relative path) and `label`,
+  which are split by a whitespace.
+  Here is an example.
+  ```
+  some/path/000.mp4 1
+  some/path/001.mp4 1
+  some/path/002.mp4 2
+  some/path/003.mp4 2
+  some/path/004.mp4 3
+  some/path/005.mp4 3
+  ```
+- ActivityNet annotation
+  The annotation of ActivityNet dataset is a json file. Each key is a video name
+  and the corresponding value is the meta data and annotation for the video.
+  Here is an example.
+  ```
+  {
+    "video1": {
+        "duration_second": 211.53,
+        "duration_frame": 6337,
+        "annotations": [
+            {
+                "segment": [
+                    30.025882995319815,
+                    205.2318595943838
+                ],
+                "label": "Rock climbing"
+            }
+        ],
+        "feature_frame": 6336,
+        "fps": 30.0,
+        "rfps": 29.9579255898
+    },
+    "video2": {
+        "duration_second": 26.75,
+        "duration_frame": 647,
+        "annotations": [
+            {
+                "segment": [
+                    2.578755070202808,
+                    24.914101404056165
+                ],
+                "label": "Drinking beer"
+            }
+        ],
+        "feature_frame": 624,
+        "fps": 24.0,
+        "rfps": 24.1869158879
+    }
+  }
+  ```
+There are two ways to work with custom datasets.
+- online conversion
+  You can write a new Dataset class inherited from [BaseDataset](/mmaction/datasets/base.py), and overwrite three methods
+  `load_annotations(self)`, `evaluate(self, results, metrics, logger)` and `dump_results(self, results, out)`,
+  like [RawframeDataset](/mmaction/datasets/rawframe_dataset.py), [VideoDataset](/mmaction/datasets/video_dataset.py) or [ActivityNetDataset](/mmaction/datasets/activitynet_dataset.py).
+- offline conversion
+  You can convert the annotation format to the expected format above and save it to
+  a pickle or json file, then you can simply use `RawframeDataset`, `VideoDataset` or `ActivityNetDataset`.
+After the data pre-processing, the users need to further modify the config files to use the dataset.
+Here is an example of using a custom dataset in rawframe format.
+In `configs/task/method/my_custom_config.py`:
+```python
+...
+# dataset settings
+dataset_type = 'RawframeDataset'
+data_root = 'path/to/your/root'
+data_root_val = 'path/to/your/root_val'
+ann_file_train = 'data/custom/custom_train_list.txt'
+ann_file_val = 'data/custom/custom_val_list.txt'
+ann_file_test = 'data/custom/custom_val_list.txt'
+...
+data = dict(
+    videos_per_gpu=32,
+    workers_per_gpu=2,
+    train=dict(
+        type=dataset_type,
+        ann_file=ann_file_train,
+        ...),
+    val=dict(
+        type=dataset_type,
+        ann_file=ann_file_val,
+        ...),
+    test=dict(
+        type=dataset_type,
+        ann_file=ann_file_test,
+        ...))
+...
+```
+We use this way to support Rawframe dataset.
+### An example of a custom dataset
+Assume the annotation is in a new format in text files, and the image file name is of template like `img_00005.jpg`
+The video annotations are stored in text file `annotation.txt` as following
+```
+directory,total frames,class
+D32_1gwq35E,299,66
+-G-5CJ0JkKY,249,254
+T4h1bvOd9DA,299,33
+4uZ27ivBl00,299,341
+0LfESFkfBSw,249,186
+-YIsNpBEx6c,299,169
+```
+We can create a new dataset in `mmaction/datasets/my_dataset.py` to load the data.
+```python
+import copy
+import os.path as osp
+import mmcv
+from .base import BaseDataset
+from .builder import DATASETS
+@DATASETS.register_module()
+class MyDataset(BaseDataset):
+    def __init__(self,
+                 ann_file,
+                 pipeline,
+                 data_prefix=None,
+                 test_mode=False,
+                 filename_tmpl='img_{:05}.jpg'):
+        super(MyDataset, self).__init__(ann_file, pipeline, test_mode)
+        self.filename_tmpl = filename_tmpl
+    def load_annotations(self):
+        video_infos = []
+        with open(self.ann_file, 'r') as fin:
+            for line in fin:
+                if line.startswith("directory"):
+                    continue
+                frame_dir, total_frames, label = line.split(',')
+                if self.data_prefix is not None:
+                    frame_dir = osp.join(self.data_prefix, frame_dir)
+                video_infos.append(
+                    dict(
+                        frame_dir=frame_dir,
+                        total_frames=int(total_frames),
+                        label=int(label)))
+        return video_infos
+    def prepare_train_frames(self, idx):
+        results = copy.deepcopy(self.video_infos[idx])
+        results['filename_tmpl'] = self.filename_tmpl
+        return self.pipeline(results)
+    def prepare_test_frames(self, idx):
+        results = copy.deepcopy(self.video_infos[idx])
+        results['filename_tmpl'] = self.filename_tmpl
+        return self.pipeline(results)
+    def evaluate(self,
+                 results,
+                 metrics='top_k_accuracy',
+                 topk=(1, 5),
+                 logger=None):
+        pass
+```
+Then in the config, to use `MyDataset` you can modify the config as the following
+```python
+dataset_A_train = dict(
+    type='MyDataset',
+    ann_file=ann_file_train,
+    pipeline=train_pipeline
+)
+```
+## Customize Dataset by Mixing Dataset
+MMAction2 also supports to mix dataset for training. Currently it supports to repeat dataset.
+### Repeat dataset
+We use `RepeatDataset` as wrapper to repeat the dataset. For example, suppose the original dataset as `Dataset_A`,
+to repeat it, the config looks like the following
+```python
+dataset_A_train = dict(
+        type='RepeatDataset',
+        times=N,
+        dataset=dict(  # This is the original config of Dataset_A
+            type='Dataset_A',
+            ...
+            pipeline=train_pipeline
+        )
+    )
+```
--- a/openmmlab_test/mmaction2-0.24.1/docs/tutorials/4_data_pipeline.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs/tutorials/4_data_pipeline.md
+# Tutorial 4: Customize Data Pipelines
+In this tutorial, we will introduce some methods about the design of data pipelines, and how to customize and extend your own data pipelines for the project.
+<!-- TOC -->
+- [Tutorial 4: Customize Data Pipelines](#tutorial-4-customize-data-pipelines)
+  - [Design of Data Pipelines](#design-of-data-pipelines)
+    - [Data loading](#data-loading)
+    - [Pre-processing](#pre-processing)
+    - [Formatting](#formatting)
+  - [Extend and Use Custom Pipelines](#extend-and-use-custom-pipelines)
+<!-- TOC -->
+## Design of Data Pipelines
+Following typical conventions, we use `Dataset` and `DataLoader` for data loading
+with multiple workers. `Dataset` returns a dict of data items corresponding
+the arguments of models' forward method.
+Since the data in action recognition & localization may not be the same size (image size, gt bbox size, etc.),
+The `DataContainer` in MMCV is used to help collect and distribute data of different sizes.
+See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details.
+The data preparation pipeline and the dataset is decomposed. Usually a dataset
+defines how to process the annotations and a data pipeline defines all the steps to prepare a data dict.
+A pipeline consists of a sequence of operations. Each operation takes a dict as input and also output a dict for the next operation.
+We present a typical pipeline in the following figure. The blue blocks are pipeline operations.
+With the pipeline going on, each operator can add new keys (marked as green) to the result dict or update the existing keys (marked as orange).
+![pipeline figure](https://github.com/open-mmlab/mmaction2/raw/master/resources/data_pipeline.png)
+The operations are categorized into data loading, pre-processing and formatting.
+Here is a pipeline example for TSN.
+```python
+img_norm_cfg = dict(
+    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
+train_pipeline = [
+    dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),
+    dict(type='RawFrameDecode', io_backend='disk'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(
+        type='MultiScaleCrop',
+        input_size=224,
+        scales=(1, 0.875, 0.75, 0.66),
+        random_crop=False,
+        max_wh_scale_gap=1),
+    dict(type='Resize', scale=(224, 224), keep_ratio=False),
+    dict(type='Flip', flip_ratio=0.5),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs', 'label'])
+]
+val_pipeline = [
+    dict(
+        type='SampleFrames',
+        clip_len=1,
+        frame_interval=1,
+        num_clips=3,
+        test_mode=True),
+    dict(type='RawFrameDecode', io_backend='disk'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='CenterCrop', crop_size=224),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs'])
+]
+test_pipeline = [
+    dict(
+        type='SampleFrames',
+        clip_len=1,
+        frame_interval=1,
+        num_clips=25,
+        test_mode=True),
+    dict(type='RawFrameDecode', io_backend='disk'),
+    dict(type='Resize', scale=(-1, 256)),
+    dict(type='TenCrop', crop_size=224),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs'])
+]
+```
+We have supported some lazy operators and encourage users to apply them.
+Lazy ops record how the data should be processed, but it will postpone the processing on the raw data until the raw data forward `Fuse` stage.
+Specifically, lazy ops avoid frequent reading and modification operation on the raw data, but process the raw data once in the final Fuse stage, thus accelerating data preprocessing.
+Here is a pipeline example applying lazy ops.
+```python
+train_pipeline = [
+    dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1),
+    dict(type='RawFrameDecode', decoding_backend='turbojpeg'),
+    # The following three lazy ops only process the bbox of frames without
+    # modifying the raw data.
+    dict(type='Resize', scale=(-1, 256), lazy=True),
+    dict(
+        type='MultiScaleCrop',
+        input_size=224,
+        scales=(1, 0.8),
+        random_crop=False,
+        max_wh_scale_gap=0,
+        lazy=True),
+    dict(type='Resize', scale=(224, 224), keep_ratio=False, lazy=True),
+    # Lazy operator `Flip` only record whether a frame should be fliped and the
+    # flip direction.
+    dict(type='Flip', flip_ratio=0.5, lazy=True),
+    # Processing the raw data once in Fuse stage.
+    dict(type='Fuse'),
+    dict(type='Normalize', **img_norm_cfg),
+    dict(type='FormatShape', input_format='NCTHW'),
+    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+    dict(type='ToTensor', keys=['imgs', 'label'])
+]
+```
+For each operation, we list the related dict fields that are added/updated/removed below, where `*` means the key may not be affected.
+### Data loading
+`SampleFrames`
+- add: frame_inds, clip_len, frame_interval, num_clips, \*total_frames
+`DenseSampleFrames`
+- add: frame_inds, clip_len, frame_interval, num_clips, \*total_frames
+`PyAVDecode`
+- add: imgs, original_shape
+- update: \*frame_inds
+`DecordDecode`
+- add: imgs, original_shape
+- update: \*frame_inds
+`OpenCVDecode`
+- add: imgs, original_shape
+- update: \*frame_inds
+`RawFrameDecode`
+- add: imgs, original_shape
+- update: \*frame_inds
+### Pre-processing
+`RandomCrop`
+- add: crop_bbox, img_shape
+- update: imgs
+`RandomResizedCrop`
+- add: crop_bbox, img_shape
+- update: imgs
+`MultiScaleCrop`
+- add: crop_bbox, img_shape, scales
+- update: imgs
+`Resize`
+- add: img_shape, keep_ratio, scale_factor
+- update: imgs
+`Flip`
+- add: flip, flip_direction
+- update: imgs, label
+`Normalize`
+- add: img_norm_cfg
+- update: imgs
+`CenterCrop`
+- add: crop_bbox, img_shape
+- update: imgs
+`ThreeCrop`
+- add: crop_bbox, img_shape
+- update: imgs
+`TenCrop`
+- add: crop_bbox, img_shape
+- update: imgs
+### Formatting
+`ToTensor`
+- update: specified by `keys`.
+`ImageToTensor`
+- update: specified by `keys`.
+`Transpose`
+- update: specified by `keys`.
+`Collect`
+- add: img_metas (the keys of img_metas is specified by `meta_keys`)
+- remove: all other keys except for those specified by `keys`
+It is **noteworthy** that the first key, commonly `imgs`, will be used as the main key to calculate the batch size.
+`FormatShape`
+- add: input_shape
+- update: imgs
+## Extend and Use Custom Pipelines
+1. Write a new pipeline in any file, e.g., `my_pipeline.py`. It takes a dict as input and return a dict.
+   ```python
+   from mmaction.datasets import PIPELINES
+   @PIPELINES.register_module()
+   class MyTransform:
+       def __call__(self, results):
+           results['key'] = value
+           return results
+   ```
+2. Import the new class.
+   ```python
+   from .my_pipeline import MyTransform
+   ```
+3. Use it in config files.
+   ```python
+   img_norm_cfg = dict(
+        mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
+   train_pipeline = [
+       dict(type='DenseSampleFrames', clip_len=8, frame_interval=8, num_clips=1),
+       dict(type='RawFrameDecode', io_backend='disk'),
+       dict(type='MyTransform'),       # use a custom pipeline
+       dict(type='Normalize', **img_norm_cfg),
+       dict(type='FormatShape', input_format='NCTHW'),
+       dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
+       dict(type='ToTensor', keys=['imgs', 'label'])
+   ]
+   ```
--- a/openmmlab_test/mmaction2-0.24.1/docs/tutorials/5_new_modules.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs/tutorials/5_new_modules.md
+# Tutorial 5: Adding New Modules
+In this tutorial, we will introduce some methods about how to customize optimizer, develop new components and new a learning rate scheduler for this project.
+<!-- TOC -->
+- [Customize Optimizer](#customize-optimizer)
+- [Customize Optimizer Constructor](#customize-optimizer-constructor)
+- [Develop New Components](#develop-new-components)
+  - [Add new backbones](#add-new-backbones)
+  - [Add new heads](#add-new-heads)
+  - [Add new loss](#add-new-loss)
+- [Add new learning rate scheduler (updater)](#add-new-learning-rate-scheduler--updater-)
+<!-- TOC -->
+## Customize Optimizer
+An example of customized optimizer is [CopyOfSGD](/mmaction/core/optimizer/copy_of_sgd.py) is defined in `mmaction/core/optimizer/copy_of_sgd.py`.
+More generally, a customized optimizer could be defined as following.
+Assume you want to add an optimizer named as `MyOptimizer`, which has arguments `a`, `b` and `c`.
+You need to first implement the new optimizer in a file, e.g., in `mmaction/core/optimizer/my_optimizer.py`:
+```python
+from mmcv.runner import OPTIMIZERS
+from torch.optim import Optimizer
+@OPTIMIZERS.register_module()
+class MyOptimizer(Optimizer):
+    def __init__(self, a, b, c):
+```
+Then add this module in `mmaction/core/optimizer/__init__.py`, thus the registry will find the new module and add it:
+```python
+from .my_optimizer import MyOptimizer
+```
+Then you can use `MyOptimizer` in `optimizer` field of config files.
+In the configs, the optimizers are defined by the field `optimizer` like the following:
+```python
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+```
+To use your own optimizer, the field can be changed as
+```python
+optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
+```
+We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files.
+For example, if you want to use `ADAM`, though the performance will drop a lot, the modification could be as the following.
+```python
+optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
+```
+The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
+## Customize Optimizer Constructor
+Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers.
+The users can do those fine-grained parameter tuning through customizing optimizer constructor.
+You can write a new optimizer constructor inherit from [DefaultOptimizerConstructor](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py)
+and overwrite the `add_params(self, params, module)` method.
+An example of customized optimizer constructor is [TSMOptimizerConstructor](/mmaction/core/optimizer/tsm_optimizer_constructor.py).
+More generally, a customized optimizer constructor could be defined as following.
+In `mmaction/core/optimizer/my_optimizer_constructor.py`:
+```python
+from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
+@OPTIMIZER_BUILDERS.register_module()
+class MyOptimizerConstructor(DefaultOptimizerConstructor):
+```
+In `mmaction/core/optimizer/__init__.py`:
+```python
+from .my_optimizer_constructor import MyOptimizerConstructor
+```
+Then you can use `MyOptimizerConstructor` in `optimizer` field of config files.
+```python
+# optimizer
+optimizer = dict(
+    type='SGD',
+    constructor='MyOptimizerConstructor',
+    paramwise_cfg=dict(fc_lr5=True),
+    lr=0.02,
+    momentum=0.9,
+    weight_decay=0.0001)
+```
+## Develop New Components
+We basically categorize model components into 4 types.
+- recognizer: the whole recognizer model pipeline, usually contains a backbone and cls_head.
+- backbone: usually an FCN network to extract feature maps, e.g., ResNet, BNInception.
+- cls_head: the component for classification task, usually contains an FC layer with some pooling layers.
+- localizer: the model for temporal localization task, currently available: BSN, BMN, SSN.
+### Add new backbones
+Here we show how to develop new components with an example of TSN.
+1. Create a new file `mmaction/models/backbones/resnet.py`.
+   ```python
+   import torch.nn as nn
+   from ..builder import BACKBONES
+   @BACKBONES.register_module()
+   class ResNet(nn.Module):
+       def __init__(self, arg1, arg2):
+           pass
+       def forward(self, x):  # should return a tuple
+           pass
+       def init_weights(self, pretrained=None):
+           pass
+   ```
+2. Import the module in `mmaction/models/backbones/__init__.py`.
+   ```python
+   from .resnet import ResNet
+   ```
+3. Use it in your config file.
+   ```python
+   model = dict(
+       ...
+       backbone=dict(
+           type='ResNet',
+           arg1=xxx,
+           arg2=xxx),
+   )
+   ```
+### Add new heads
+Here we show how to develop a new head with the example of TSNHead as the following.
+1. Create a new file `mmaction/models/heads/tsn_head.py`.
+   You can write a new classification head inheriting from [BaseHead](/mmaction/models/heads/base.py),
+   and overwrite `init_weights(self)` and `forward(self, x)` method.
+   ```python
+   from ..builder import HEADS
+   from .base import BaseHead
+   @HEADS.register_module()
+   class TSNHead(BaseHead):
+       def __init__(self, arg1, arg2):
+           pass
+       def forward(self, x):
+           pass
+       def init_weights(self):
+           pass
+   ```
+2. Import the module in `mmaction/models/heads/__init__.py`
+   ```python
+   from .tsn_head import TSNHead
+   ```
+3. Use it in your config file
+   ```python
+   model = dict(
+       ...
+       cls_head=dict(
+           type='TSNHead',
+           num_classes=400,
+           in_channels=2048,
+           arg1=xxx,
+           arg2=xxx),
+   ```
+### Add new loss
+Assume you want to add a new loss as `MyLoss`. To add a new loss function, the users need implement it in `mmaction/models/losses/my_loss.py`.
+```python
+import torch
+import torch.nn as nn
+from ..builder import LOSSES
+def my_loss(pred, target):
+    assert pred.size() == target.size() and target.numel() > 0
+    loss = torch.abs(pred - target)
+    return loss
+@LOSSES.register_module()
+class MyLoss(nn.Module):
+    def forward(self, pred, target):
+        loss = my_loss(pred, target)
+        return loss
+```
+Then the users need to add it in the `mmaction/models/losses/__init__.py`
+```python
+from .my_loss import MyLoss, my_loss
+```
+To use it, modify the `loss_xxx` field. Since MyLoss is for regression, we can use it for the bbox loss `loss_bbox`.
+```python
+loss_bbox=dict(type='MyLoss'))
+```
+## Add new learning rate scheduler (updater)
+The default manner of constructing a lr updater(namely, 'scheduler' by pytorch convention), is to modify the config such as:
+```python
+...
+lr_config = dict(policy='step', step=[20, 40])
+...
+```
+In the api for [`train.py`](/mmaction/apis/train.py), it will register the learning rate updater hook based on the config at:
+```python
+...
+    runner.register_training_hooks(
+        cfg.lr_config,
+        optimizer_config,
+        cfg.checkpoint_config,
+        cfg.log_config,
+        cfg.get('momentum_config', None))
+...
+```
+So far, the supported updaters can be find in [mmcv](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py), but if you want to customize a new learning rate updater, you may follow the steps below:
+1. First, write your own LrUpdaterHook in `$MMAction2/mmaction/core/scheduler`. The snippet followed is an example of customized lr updater that uses learning rate based on a specific learning rate ratio: `lrs`, by which the learning rate decreases at each `steps`:
+```python
+@HOOKS.register_module()
+# Register it here
+class RelativeStepLrUpdaterHook(LrUpdaterHook):
+    # You should inheritate it from mmcv.LrUpdaterHook
+    def __init__(self, steps, lrs, **kwargs):
+        super().__init__(**kwargs)
+        assert len(steps) == (len(lrs))
+        self.steps = steps
+        self.lrs = lrs
+    def get_lr(self, runner, base_lr):
+        # Only this function is required to override
+        # This function is called before each training epoch, return the specific learning rate here.
+        progress = runner.epoch if self.by_epoch else runner.iter
+        for i in range(len(self.steps)):
+            if progress < self.steps[i]:
+                return self.lrs[i]
+```
+2. Modify your config:
+In your config file, swap the original `lr_config` by:
+```python
+lr_config = dict(policy='RelativeStep', steps=[20, 40, 60], lrs=[0.1, 0.01, 0.001])
+```
+More examples can be found in [mmcv](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py).
--- a/openmmlab_test/mmaction2-0.24.1/docs/tutorials/6_export_model.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs/tutorials/6_export_model.md
+# Tutorial 6: Exporting a model to ONNX
+Open Neural Network Exchange [(ONNX)](https://onnx.ai/) is an open ecosystem that empowers AI developers to choose the right tools as their project evolves.
+<!-- TOC -->
+- [Supported Models](#supported-models)
+- [Usage](#usage)
+  - [Prerequisite](#prerequisite)
+  - [Recognizers](#recognizers)
+  - [Localizers](#localizers)
+<!-- TOC -->
+## Supported Models
+So far, our codebase supports onnx exporting from pytorch models trained with MMAction2. The supported models are:
+- I3D
+- TSN
+- TIN
+- TSM
+- R(2+1)D
+- SLOWFAST
+- SLOWONLY
+- BMN
+- BSN(tem, pem)
+## Usage
+For simple exporting, you can use the [script](/tools/deployment/pytorch2onnx.py) here. Note that the package `onnx` and `onnxruntime` are required for verification after exporting.
+### Prerequisite
+First, install onnx.
+```shell
+pip install onnx onnxruntime
+```
+We provide a python script to export the pytorch model trained by MMAction2 to ONNX.
+```shell
+python tools/deployment/pytorch2onnx.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--shape ${SHAPE}] \
+    [--verify] [--show] [--output-file ${OUTPUT_FILE}]  [--is-localizer] [--opset-version ${VERSION}]
+```
+Optional arguments:
+- `--shape`: The shape of input tensor to the model. For 2D recognizer(e.g. TSN), the input should be `$batch $clip $channel $height $width`(e.g. `1 1 3 224 224`); For 3D recognizer(e.g. I3D), the input should be `$batch $clip $channel $time $height $width`(e.g. `1 1 3 32 224 224`); For localizer such as BSN, the input for each module is different, please check the `forward` function for it. If not specified, it will be set to `1 1 3 224 224`.
+- `--verify`: Determines whether to verify the exported model, runnably and numerically. If not specified, it will be set to `False`.
+- `--show`: Determines whether to print the architecture of the exported model. If not specified, it will be set to `False`.
+- `--output-file`: The output onnx model name. If not specified, it will be set to `tmp.onnx`.
+- `--is-localizer`: Determines whether the model to be exported is a localizer. If not specified, it will be set to `False`.
+- `--opset-version`: Determines the operation set version of onnx, we recommend you to use a higher version such as 11 for compatibility. If not specified, it will be set to `11`.
+- `--softmax`: Determines whether to add a softmax layer at the end of recognizers. If not specified, it will be set to `False`. For now, localizers are not supported.
+### Recognizers
+For recognizers, please run:
+```shell
+python tools/deployment/pytorch2onnx.py $CONFIG_PATH $CHECKPOINT_PATH --shape $SHAPE --verify
+```
+### Localizers
+For localizers, please run:
+```shell
+python tools/deployment/pytorch2onnx.py $CONFIG_PATH $CHECKPOINT_PATH --is-localizer --shape $SHAPE --verify
+```
+Please fire an issue if you discover any checkpoints that are not perfectly exported or suffer some loss in accuracy.
--- a/openmmlab_test/mmaction2-0.24.1/docs/tutorials/7_customize_runtime.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs/tutorials/7_customize_runtime.md
+# Tutorial 7: Customize Runtime Settings
+In this tutorial, we will introduce some methods about how to customize optimization methods, training schedules, workflow and hooks when running your own settings for the project.
+<!-- TOC -->
+- [Customize Optimization Methods](#customize-optimization-methods)
+  - [Customize optimizer supported by PyTorch](#customize-optimizer-supported-by-pytorch)
+  - [Customize self-implemented optimizer](#customize-self-implemented-optimizer)
+    - [1. Define a new optimizer](#1-define-a-new-optimizer)
+    - [2. Add the optimizer to registry](#2-add-the-optimizer-to-registry)
+    - [3. Specify the optimizer in the config file](#3-specify-the-optimizer-in-the-config-file)
+  - [Customize optimizer constructor](#customize-optimizer-constructor)
+  - [Additional settings](#additional-settings)
+- [Customize Training Schedules](#customize-training-schedules)
+- [Customize Workflow](#customize-workflow)
+- [Customize Hooks](#customize-hooks)
+  - [Customize self-implemented hooks](#customize-self-implemented-hooks)
+    - [1. Implement a new hook](#1-implement-a-new-hook)
+    - [2. Register the new hook](#2-register-the-new-hook)
+    - [3. Modify the config](#3-modify-the-config)
+  - [Use hooks implemented in MMCV](#use-hooks-implemented-in-mmcv)
+  - [Modify default runtime hooks](#modify-default-runtime-hooks)
+    - [Checkpoint config](#checkpoint-config)
+    - [Log config](#log-config)
+    - [Evaluation config](#evaluation-config)
+<!-- TOC -->
+## Customize Optimization Methods
+### Customize optimizer supported by PyTorch
+We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files.
+For example, if you want to use `Adam`, the modification could be as the following.
+```python
+optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001)
+```
+To modify the learning rate of the model, the users only need to modify the `lr` in the config of optimizer.
+The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch.
+For example, if you want to use `Adam` with the setting like `torch.optim.Adam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)` in PyTorch,
+the modification could be set as the following.
+```python
+optimizer = dict(type='Adam', lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
+```
+### Customize self-implemented optimizer
+#### 1. Define a new optimizer
+A customized optimizer could be defined as following.
+Assume you want to add an optimizer named `MyOptimizer`, which has arguments `a`, `b`, and `c`.
+You need to create a new directory named `mmaction/core/optimizer`.
+And then implement the new optimizer in a file, e.g., in `mmaction/core/optimizer/my_optimizer.py`:
+```python
+from mmcv.runner import OPTIMIZERS
+from torch.optim import Optimizer
+@OPTIMIZERS.register_module()
+class MyOptimizer(Optimizer):
+    def __init__(self, a, b, c):
+```
+#### 2. Add the optimizer to registry
+To find the above module defined above, this module should be imported into the main namespace at first. There are two ways to achieve it.
+- Modify `mmaction/core/optimizer/__init__.py` to import it.
+  The newly defined module should be imported in `mmaction/core/optimizer/__init__.py` so that the registry will
+  find the new module and add it:
+```python
+from .my_optimizer import MyOptimizer
+```
+- Use `custom_imports` in the config to manually import it
+```python
+custom_imports = dict(imports=['mmaction.core.optimizer.my_optimizer'], allow_failed_imports=False)
+```
+The module `mmaction.core.optimizer.my_optimizer` will be imported at the beginning of the program and the class `MyOptimizer` is then automatically registered.
+Note that only the package containing the class `MyOptimizer` should be imported. `mmaction.core.optimizer.my_optimizer.MyOptimizer` **cannot** be imported directly.
+#### 3. Specify the optimizer in the config file
+Then you can use `MyOptimizer` in `optimizer` field of config files.
+In the configs, the optimizers are defined by the field `optimizer` like the following:
+```python
+optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
+```
+To use your own optimizer, the field can be changed to
+```python
+optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value)
+```
+### Customize optimizer constructor
+Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers.
+The users can do those fine-grained parameter tuning through customizing optimizer constructor.
+```python
+from mmcv.runner.optimizer import OPTIMIZER_BUILDERS
+@OPTIMIZER_BUILDERS.register_module()
+class MyOptimizerConstructor:
+    def __init__(self, optimizer_cfg, paramwise_cfg=None):
+        pass
+    def __call__(self, model):
+        return my_optimizer
+```
+The default optimizer constructor is implemented [here](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/optimizer/default_constructor.py#L11),
+which could also serve as a template for new optimizer constructor.
+### Additional settings
+Tricks not implemented by the optimizer should be implemented through optimizer constructor (e.g., set parameter-wise learning rates) or hooks.
+We list some common settings that could stabilize the training or accelerate the training. Feel free to create PR, issue for more settings.
+- __Use gradient clip to stabilize training__:
+  Some models need gradient clip to clip the gradients to stabilize the training process. An example is as below:
+  ```python
+  optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
+  ```
+- __Use momentum schedule to accelerate model convergence__:
+  We support momentum scheduler to modify model's momentum according to learning rate, which could make the model converge in a faster way.
+  Momentum scheduler is usually used with LR scheduler, for example, the following config is used in 3D detection to accelerate convergence.
+  For more details, please refer to the implementation of [CyclicLrUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327)
+  and [CyclicMomentumUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130).
+  ```python
+  lr_config = dict(
+      policy='cyclic',
+      target_ratio=(10, 1e-4),
+      cyclic_times=1,
+      step_ratio_up=0.4,
+  )
+  momentum_config = dict(
+      policy='cyclic',
+      target_ratio=(0.85 / 0.95, 1),
+      cyclic_times=1,
+      step_ratio_up=0.4,
+  )
+  ```
+## Customize Training Schedules
+we use step learning rate with default value in config files, this calls [`StepLRHook`](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L153) in MMCV.
+We support many other learning rate schedule [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py), such as `CosineAnnealing` and `Poly` schedule. Here are some examples
+- Poly schedule:
+  ```python
+  lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False)
+  ```
+- ConsineAnnealing schedule:
+  ```python
+  lr_config = dict(
+      policy='CosineAnnealing',
+      warmup='linear',
+      warmup_iters=1000,
+      warmup_ratio=1.0 / 10,
+      min_lr_ratio=1e-5)
+  ```
+## Customize Workflow
+By default, we recommend users to use `EvalHook` to do evaluation after training epoch, but they can still use `val` workflow as an alternative.
+Workflow is a list of (phase, epochs) to specify the running order and epochs. By default it is set to be
+```python
+workflow = [('train', 1)]
+```
+which means running 1 epoch for training.
+Sometimes user may want to check some metrics (e.g. loss, accuracy) about the model on the validate set.
+In such case, we can set the workflow as
+```python
+[('train', 1), ('val', 1)]
+```
+so that 1 epoch for training and 1 epoch for validation will be run iteratively.
+:::{note}
+1. The parameters of model will not be updated during val epoch.
+2. Keyword `total_epochs` in the config only controls the number of training epochs and will not affect the validation workflow.
+3. Workflows `[('train', 1), ('val', 1)]` and `[('train', 1)]` will not change the behavior of `EvalHook` because `EvalHook` is called by `after_train_epoch` and validation workflow only affect hooks that are called through `after_val_epoch`.
+   Therefore, the only difference between `[('train', 1), ('val', 1)]` and `[('train', 1)]` is that the runner will calculate losses on validation set after each training epoch.
+:::
+## Customize Hooks
+### Customize self-implemented hooks
+#### 1. Implement a new hook
+Here we give an example of creating a new hook in MMAction2 and using it in training.
+```python
+from mmcv.runner import HOOKS, Hook
+@HOOKS.register_module()
+class MyHook(Hook):
+    def __init__(self, a, b):
+        pass
+    def before_run(self, runner):
+        pass
+    def after_run(self, runner):
+        pass
+    def before_epoch(self, runner):
+        pass
+    def after_epoch(self, runner):
+        pass
+    def before_iter(self, runner):
+        pass
+    def after_iter(self, runner):
+        pass
+```
+Depending on the functionality of the hook, the users need to specify what the hook will do at each stage of the training in `before_run`, `after_run`, `before_epoch`, `after_epoch`, `before_iter`, and `after_iter`.
+#### 2. Register the new hook
+Then we need to make `MyHook` imported. Assuming the file is in `mmaction/core/utils/my_hook.py` there are two ways to do that:
+- Modify `mmaction/core/utils/__init__.py` to import it.
+  The newly defined module should be imported in `mmaction/core/utils/__init__.py` so that the registry will
+  find the new module and add it:
+```python
+from .my_hook import MyHook
+```
+- Use `custom_imports` in the config to manually import it
+```python
+custom_imports = dict(imports=['mmaction.core.utils.my_hook'], allow_failed_imports=False)
+```
+#### 3. Modify the config
+```python
+custom_hooks = [
+    dict(type='MyHook', a=a_value, b=b_value)
+]
+```
+You can also set the priority of the hook by adding key `priority` to `'NORMAL'` or `'HIGHEST'` as below
+```python
+custom_hooks = [
+    dict(type='MyHook', a=a_value, b=b_value, priority='NORMAL')
+]
+```
+By default the hook's priority is set as `NORMAL` during registration.
+### Use hooks implemented in MMCV
+If the hook is already implemented in MMCV, you can directly modify the config to use the hook as below
+```python
+mmcv_hooks = [
+    dict(type='MMCVHook', a=a_value, b=b_value, priority='NORMAL')
+]
+```
+### Modify default runtime hooks
+There are some common hooks that are not registered through `custom_hooks` but has been registered by default when importing MMCV, they are
+- log_config
+- checkpoint_config
+- evaluation
+- lr_config
+- optimizer_config
+- momentum_config
+In those hooks, only the logger hook has the `VERY_LOW` priority, others' priority are `NORMAL`.
+The above-mentioned tutorials already cover how to modify `optimizer_config`, `momentum_config`, and `lr_config`.
+Here we reveals how what we can do with `log_config`, `checkpoint_config`, and `evaluation`.
+#### Checkpoint config
+The MMCV runner will use `checkpoint_config` to initialize [`CheckpointHook`](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/hooks/checkpoint.py#L9).
+```python
+checkpoint_config = dict(interval=1)
+```
+The users could set `max_keep_ckpts` to only save only small number of checkpoints or decide whether to store state dict of optimizer by `save_optimizer`.
+More details of the arguments are [here](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.CheckpointHook)
+#### Log config
+The `log_config` wraps multiple logger hooks and enables to set intervals. Now MMCV supports `WandbLoggerHook`, `MlflowLoggerHook`, and `TensorboardLoggerHook`.
+The detail usages can be found in the [doc](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook).
+```python
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+```
+#### Evaluation config
+The config of `evaluation` will be used to initialize the [`EvalHook`](https://github.com/open-mmlab/mmaction2/blob/master/mmaction/core/evaluation/eval_hooks.py#L12).
+Except the key `interval`, other arguments such as `metrics` will be passed to the `dataset.evaluate()`
+```python
+evaluation = dict(interval=1, metrics='bbox')
+```
--- a/openmmlab_test/mmaction2-0.24.1/docs/useful_tools.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs/useful_tools.md
+Apart from training/testing scripts, We provide lots of useful tools under the `tools/` directory.
+## Useful Tools Link
+<!-- TOC -->
+- [Useful Tools Link](#useful-tools-link)
+- [Log Analysis](#log-analysis)
+- [Model Complexity](#model-complexity)
+- [Model Conversion](#model-conversion)
+  - [MMAction2 model to ONNX (experimental)](#mmaction2-model-to-onnx-experimental)
+  - [Prepare a model for publishing](#prepare-a-model-for-publishing)
+- [Model Serving](#model-serving)
+  - [1. Convert model from MMAction2 to TorchServe](#1-convert-model-from-mmaction2-to-torchserve)
+  - [2. Build `mmaction-serve` docker image](#2-build-mmaction-serve-docker-image)
+  - [3. Launch `mmaction-serve`](#3-launch-mmaction-serve)
+  - [4. Test deployment](#4-test-deployment)
+- [Miscellaneous](#miscellaneous)
+  - [Evaluating a metric](#evaluating-a-metric)
+  - [Print the entire config](#print-the-entire-config)
+  - [Check videos](#check-videos)
+<!-- TOC -->
+## Log Analysis
+`tools/analysis/analyze_logs.py` plots loss/top-k acc curves given a training log file. Run `pip install seaborn` first to install the dependency.
+![acc_curve_image](https://github.com/open-mmlab/mmaction2/raw/master/resources/acc_curve.png)
+```shell
+python tools/analysis/analyze_logs.py plot_curve ${JSON_LOGS} [--keys ${KEYS}] [--title ${TITLE}] [--legend ${LEGEND}] [--backend ${BACKEND}] [--style ${STYLE}] [--out ${OUT_FILE}]
+```
+Examples:
+- Plot the classification loss of some run.
+  ```shell
+  python tools/analysis/analyze_logs.py plot_curve log.json --keys loss_cls --legend loss_cls
+  ```
+- Plot the top-1 acc and top-5 acc of some run, and save the figure to a pdf.
+  ```shell
+  python tools/analysis/analyze_logs.py plot_curve log.json --keys top1_acc top5_acc --out results.pdf
+  ```
+- Compare the top-1 acc of two runs in the same figure.
+  ```shell
+  python tools/analysis/analyze_logs.py plot_curve log1.json log2.json --keys top1_acc --legend run1 run2
+  ```
+  You can also compute the average training speed.
+  ```shell
+  python tools/analysis/analyze_logs.py cal_train_time ${JSON_LOGS} [--include-outliers]
+  ```
+- Compute the average training speed for a config file.
+  ```shell
+  python tools/analysis/analyze_logs.py cal_train_time work_dirs/some_exp/20200422_153324.log.json
+  ```
+  The output is expected to be like the following.
+  ```text
+  -----Analyze train time of work_dirs/some_exp/20200422_153324.log.json-----
+  slowest epoch 60, average time is 0.9736
+  fastest epoch 18, average time is 0.9001
+  time std over epochs is 0.0177
+  average iter time: 0.9330 s/iter
+  ```
+## Model Complexity
+`/tools/analysis/get_flops.py` is a script adapted from [flops-counter.pytorch](https://github.com/sovrasov/flops-counter.pytorch) to compute the FLOPs and params of a given model.
+```shell
+python tools/analysis/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}]
+```
+We will get the result like this
+```text
+==============================
+Input shape: (1, 3, 32, 340, 256)
+Flops: 37.1 GMac
+Params: 28.04 M
+==============================
+```
+:::{note}
+This tool is still experimental and we do not guarantee that the number is absolutely correct.
+You may use the result for simple comparisons, but double check it before you adopt it in technical reports or papers.
+(1) FLOPs are related to the input shape while parameters are not. The default input shape is (1, 3, 340, 256) for 2D recognizer, (1, 3, 32, 340, 256) for 3D recognizer.
+(2) Some operators are not counted into FLOPs like GN and custom operators. Refer to [`mmcv.cnn.get_model_complexity_info()`](https://github.com/open-mmlab/mmcv/blob/master/mmcv/cnn/utils/flops_counter.py) for details.
+:::
+## Model Conversion
+### MMAction2 model to ONNX (experimental)
+`/tools/deployment/pytorch2onnx.py` is a script to convert model to [ONNX](https://github.com/onnx/onnx) format.
+It also supports comparing the output results between Pytorch and ONNX model for verification.
+Run `pip install onnx onnxruntime` first to install the dependency.
+Please note that a softmax layer could be added for recognizers by `--softmax` option, in order to get predictions in range `[0, 1]`.
+- For recognizers, please run:
+  ```shell
+  python tools/deployment/pytorch2onnx.py $CONFIG_PATH $CHECKPOINT_PATH --shape $SHAPE --verify
+  ```
+- For localizers, please run:
+  ```shell
+  python tools/deployment/pytorch2onnx.py $CONFIG_PATH $CHECKPOINT_PATH --is-localizer --shape $SHAPE --verify
+  ```
+### Prepare a model for publishing
+`tools/deployment/publish_model.py` helps users to prepare their model for publishing.
+Before you upload a model to AWS, you may want to:
+(1) convert model weights to CPU tensors.
+(2) delete the optimizer states.
+(3) compute the hash of the checkpoint file and append the hash id to the filename.
+```shell
+python tools/deployment/publish_model.py ${INPUT_FILENAME} ${OUTPUT_FILENAME}
+```
+E.g.,
+```shell
+python tools/deployment/publish_model.py work_dirs/tsn_r50_1x1x3_100e_kinetics400_rgb/latest.pth tsn_r50_1x1x3_100e_kinetics400_rgb.pth
+```
+The final output filename will be `tsn_r50_1x1x3_100e_kinetics400_rgb-{hash id}.pth`.
+## Model Serving
+In order to serve an `MMAction2` model with [`TorchServe`](https://pytorch.org/serve/), you can follow the steps:
+### 1. Convert model from MMAction2 to TorchServe
+```shell
+python tools/deployment/mmaction2torchserve.py ${CONFIG_FILE} ${CHECKPOINT_FILE} \
+--output_folder ${MODEL_STORE} \
+--model-name ${MODEL_NAME} \
+--label-file ${LABLE_FILE}
+```
+### 2. Build `mmaction-serve` docker image
+```shell
+DOCKER_BUILDKIT=1 docker build -t mmaction-serve:latest docker/serve/
+```
+### 3. Launch `mmaction-serve`
+Check the official docs for [running TorchServe with docker](https://github.com/pytorch/serve/blob/master/docker/README.md#running-torchserve-in-a-production-docker-environment).
+Example:
+```shell
+docker run --rm \
+--cpus 8 \
+--gpus device=0 \
+-p8080:8080 -p8081:8081 -p8082:8082 \
+--mount type=bind,source=$MODEL_STORE,target=/home/model-server/model-store \
+mmaction-serve:latest
+```
+**Note**: ${MODEL_STORE} needs to be an absolute path.
+[Read the docs](https://github.com/pytorch/serve/blob/072f5d088cce9bb64b2a18af065886c9b01b317b/docs/rest_api.md) about the Inference (8080), Management (8081) and Metrics (8082) APis
+### 4. Test deployment
+```shell
+# Assume you are under the directory `mmaction2`
+curl http://127.0.0.1:8080/predictions/${MODEL_NAME} -T demo/demo.mp4
+```
+You should obtain a response similar to:
+```json
+{
+  "arm wrestling": 1.0,
+  "rock scissors paper": 4.962051880497143e-10,
+  "shaking hands": 3.9761663406245873e-10,
+  "massaging feet": 1.1924419784925533e-10,
+  "stretching leg": 1.0601879096849842e-10
+}
+```
+## Miscellaneous
+### Evaluating a metric
+`tools/analysis/eval_metric.py` evaluates certain metrics of the results saved in a file according to a config file.
+The saved result file is created on `tools/test.py` by setting the arguments `--out ${RESULT_FILE}` to indicate the result file,
+which stores the final output of the whole model.
+```shell
+python tools/analysis/eval_metric.py ${CONFIG_FILE} ${RESULT_FILE} [--eval ${EVAL_METRICS}] [--cfg-options ${CFG_OPTIONS}] [--eval-options ${EVAL_OPTIONS}]
+```
+### Print the entire config
+`tools/analysis/print_config.py` prints the whole config verbatim, expanding all its imports.
+```shell
+python tools/print_config.py ${CONFIG} [-h] [--options ${OPTIONS [OPTIONS...]}]
+```
+### Check videos
+`tools/analysis/check_videos.py` uses specified video encoder to iterate all samples that are specified by the input configuration file, looks for invalid videos (corrupted or missing), and saves the corresponding file path to the output file. Please note that after deleting invalid videos, users need to regenerate the video file list.
+```shell
+python tools/analysis/check_videos.py ${CONFIG} [-h] [--options OPTIONS [OPTIONS ...]] [--cfg-options CFG_OPTIONS [CFG_OPTIONS ...]] [--output-file OUTPUT_FILE] [--split SPLIT] [--decoder DECODER] [--num-processes NUM_PROCESSES] [--remove-corrupted-videos]
+```
--- a/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/Makefile
+++ b/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/Makefile
+# Minimal makefile for Sphinx documentation
+#
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+.PHONY: help Makefile
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/README.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/README.md
+../README_zh-CN.md
--- a/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/api.rst
+++ b/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/api.rst
+mmaction.apis
+-------------
+.. automodule:: mmaction.apis
+    :members:
+mmaction.core
+-------------
+optimizer
+^^^^^^^^^
+.. automodule:: mmaction.core.optimizer
+    :members:
+evaluation
+^^^^^^^^^^
+.. automodule:: mmaction.core.evaluation
+    :members:
+scheduler
+^^
+.. automodule:: mmaction.core.scheduler
+    :members:
+mmaction.localization
+---------------------
+localization
+^^^^^^^^^^^^
+.. automodule:: mmaction.localization
+    :members:
+mmaction.models
+---------------
+models
+^^^^^^
+.. automodule:: mmaction.models
+    :members:
+recognizers
+^^^^^^^^^^^
+.. automodule:: mmaction.models.recognizers
+    :members:
+localizers
+^^^^^^^^^^
+.. automodule:: mmaction.models.localizers
+    :members:
+common
+^^^^^^
+.. automodule:: mmaction.models.common
+    :members:
+backbones
+^^^^^^^^^
+.. automodule:: mmaction.models.backbones
+    :members:
+heads
+^^^^^
+.. automodule:: mmaction.models.heads
+    :members:
+necks
+^^^^^
+.. automodule:: mmaction.models.necks
+    :members:
+losses
+^^^^^^
+.. automodule:: mmaction.models.losses
+    :members:
+mmaction.datasets
+-----------------
+datasets
+^^^^^^^^
+.. automodule:: mmaction.datasets
+    :members:
+pipelines
+^^^^^^^^^
+.. automodule:: mmaction.datasets.pipelines
+    :members:
+samplers
+^^^^^^^^
+.. automodule:: mmaction.datasets.samplers
+    :members:
+mmaction.utils
+--------------
+.. automodule:: mmaction.utils
+    :members:
+mmaction.localization
+---------------------
+.. automodule:: mmaction.localization
+    :members:
--- a/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/benchmark.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/benchmark.md
+# 基准测试
+这里将 MMAction2 与其他流行的代码框架和官方开源代码的速度性能进行对比
+## 配置
+### 硬件环境
+- 8 NVIDIA Tesla V100 (32G) GPUs
+- Intel(R) Xeon(R) Gold 6146 CPU @ 3.20GHz
+### 软件环境
+- Python 3.7
+- PyTorch 1.4
+- CUDA 10.1
+- CUDNN 7.6.03
+- NCCL 2.4.08
+### 评测指标
+这里测量的时间是一轮训练迭代的平均时间，包括数据处理和模型训练。
+训练速度以 s/iter 为单位，其值越低越好。注意，这里跳过了前 50 个迭代时间，因为它们可能包含设备的预热时间。
+### 比较规则
+这里以一轮训练迭代时间为基准，使用了相同的数据和模型设置对 MMAction2 和其他的视频理解工具箱进行比较。参与评测的其他代码库包括
+- MMAction: commit id [7f3490d](https://github.com/open-mmlab/mmaction/tree/7f3490d3db6a67fe7b87bfef238b757403b670e3)(1/5/2020)
+- Temporal-Shift-Module: commit id [8d53d6f](https://github.com/mit-han-lab/temporal-shift-module/tree/8d53d6fda40bea2f1b37a6095279c4b454d672bd)(5/5/2020)
+- PySlowFast: commit id [8299c98](https://github.com/facebookresearch/SlowFast/tree/8299c9862f83a067fa7114ce98120ae1568a83ec)(7/7/2020)
+- BSN(boundary sensitive network): commit id [f13707f](https://github.com/wzmsltw/BSN-boundary-sensitive-network/tree/f13707fbc362486e93178c39f9c4d398afe2cb2f)(12/12/2018)
+- BMN(boundary matching network): commit id [45d0514](https://github.com/JJBOY/BMN-Boundary-Matching-Network/tree/45d05146822b85ca672b65f3d030509583d0135a)(17/10/2019)
+为了公平比较，这里基于相同的硬件环境和数据进行对比实验。
+使用的视频帧数据集是通过 [数据准备工具](/tools/data/kinetics/README.md) 生成的，
+使用的视频数据集是通过 [该脚本](/tools/data/resize_videos.py) 生成的，以快速解码为特点的，"短边 256，密集关键帧编码“的视频数据集。
+正如以下表格所示，在对比正常的短边 256 视频时，可以观察到速度上的显著提升，尤其是在采样特别稀疏的情况下，如 [TSN](/configs/recognition/tsn/tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb.py)。
+## 主要结果
+### 行为识别器
+| 模型                                                                                        |           输入           |  IO 后端  | 批大小 x GPU 数量 |                                                     MMAction2 (s/iter)                                                      | GPU 显存占用 (GB) |                                                  MMAction (s/iter)                                                   | GPU 显存占用 (GB) |                                                  Temporal-Shift-Module (s/iter)                                                   | GPU 显存占用 (GB) |                                                  PySlowFast (s/iter)                                                   | GPU 显存占用 (GB) |
+| :------------------------------------------------------------------------------------------ | :----------------------: | :-------: | :---------------: | :-------------------------------------------------------------------------------------------------------------------------: | :---------------: | :------------------------------------------------------------------------------------------------------------------: | :---------------: | :-------------------------------------------------------------------------------------------------------------------------------: | :---------------: | :--------------------------------------------------------------------------------------------------------------------: | :---------------: |
+| [TSN](/configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py)                       |      256p rawframes      | Memcached |       32x8        |  **[0.32](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/tsn_256p_rawframes_memcahed_32x8.zip)**   |        8.1        | [0.38](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction/tsn_256p_rawframes_memcached_32x8.zip) |        8.1        | [0.42](https://download.openmmlab.com/mmaction/benchmark/recognition/temporal_shift_module/tsn_256p_rawframes_memcached_32x8.zip) |       10.5        |                                                           x                                                            |         x         |
+| [TSN](/configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py)                       |       256p videos        |   Disk    |       32x8        |      **[1.42](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/tsn_256p_videos_disk_32x8.zip)**      |        8.1        |                                                          x                                                           |         x         |                                                                 x                                                                 |         x         |                                                          TODO                                                          |       TODO        |
+| [TSN](/configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py)                       | 256p dense-encoded video |   Disk    |       32x8        |   **[0.61](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/tsn_256p_fast_videos_disk_32x8.zip)**    |        8.1        |                                                          x                                                           |         x         |                                                                 x                                                                 |         x         |                                                          TODO                                                          |       TODO        |
+| [I3D heavy](/configs/recognition/i3d/i3d_r50_video_heavy_8x8x1_100e_kinetics400_rgb.py)     |       256p videos        |   Disk    |        8x8        |   **[0.34](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/i3d_heavy_256p_videos_disk_8x8.zip)**    |        4.6        |                                                          x                                                           |         x         |                                                                 x                                                                 |         x         |      [0.44](https://download.openmmlab.com/mmaction/benchmark/recognition/pyslowfast/pysf_i3d_r50_8x8_video.log)       |        4.6        |
+| [I3D heavy](/configs/recognition/i3d/i3d_r50_video_heavy_8x8x1_100e_kinetics400_rgb.py)     | 256p dense-encoded video |   Disk    |        8x8        | **[0.35](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/i3d_heavy_256p_fast_videos_disk_8x8.zip)** |        4.6        |                                                          x                                                           |         x         |                                                                 x                                                                 |         x         |    [0.36](https://download.openmmlab.com/mmaction/benchmark/recognition/pyslowfast/pysf_i3d_r50_8x8_fast_video.log)    |        4.6        |
+| [I3D](/configs/recognition/i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py)                      |      256p rawframes      | Memcached |        8x8        |   **[0.43](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/i3d_256p_rawframes_memcahed_8x8.zip)**   |        5.0        | [0.56](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction/i3d_256p_rawframes_memcached_8x8.zip)  |        5.0        |                                                                 x                                                                 |         x         |                                                           x                                                            |         x         |
+| [TSM](/configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py)                        |      256p rawframes      | Memcached |        8x8        |   **[0.31](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/tsm_256p_rawframes_memcahed_8x8.zip)**   |        6.9        |                                                          x                                                           |         x         | [0.41](https://download.openmmlab.com/mmaction/benchmark/recognition/temporal_shift_module/tsm_256p_rawframes_memcached_8x8.zip)  |        9.1        |                                                           x                                                            |         x         |
+| [Slowonly](/configs/recognition/slowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py) |       256p videos        |   Disk    |        8x8        |    **[0.32](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/slowonly_256p_videos_disk_8x8.zip)**    |        3.1        |                                                         TODO                                                         |       TODO        |                                                                 x                                                                 |         x         |   [0.34](https://download.openmmlab.com/mmaction/benchmark/recognition/pyslowfast/pysf_slowonly_r50_4x16_video.log)    |        3.4        |
+| [Slowonly](/configs/recognition/slowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py) | 256p dense-encoded video |   Disk    |        8x8        | **[0.25](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/slowonly_256p_fast_videos_disk_8x8.zip)**  |        3.1        |                                                         TODO                                                         |       TODO        |                                                                 x                                                                 |         x         | [0.28](https://download.openmmlab.com/mmaction/benchmark/recognition/pyslowfast/pysf_slowonly_r50_4x16_fast_video.log) |        3.4        |
+| [Slowfast](/configs/recognition/slowfast/slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py) |       256p videos        |   Disk    |        8x8        |    **[0.69](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/slowfast_256p_videos_disk_8x8.zip)**    |        6.1        |                                                          x                                                           |         x         |                                                                 x                                                                 |         x         |   [1.04](https://download.openmmlab.com/mmaction/benchmark/recognition/pyslowfast/pysf_slowfast_r50_4x16_video.log)    |        7.0        |
+| [Slowfast](/configs/recognition/slowfast/slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py) | 256p dense-encoded video |   Disk    |        8x8        | **[0.68](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/slowfast_256p_fast_videos_disk_8x8.zip)**  |        6.1        |                                                          x                                                           |         x         |                                                                 x                                                                 |         x         | [0.96](https://download.openmmlab.com/mmaction/benchmark/recognition/pyslowfast/pysf_slowfast_r50_4x16_fast_video.log) |        7.0        |
+| [R(2+1)D](/configs/recognition/r2plus1d/r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb.py)   |       256p videos        |   Disk    |        8x8        |    **[0.45](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/r2plus1d_256p_videos_disk_8x8.zip)**    |        5.1        |                                                          x                                                           |         x         |                                                                 x                                                                 |         x         |                                                           x                                                            |         x         |
+| [R(2+1)D](/configs/recognition/r2plus1d/r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb.py)   | 256p dense-encoded video |   Disk    |        8x8        | **[0.44](https://download.openmmlab.com/mmaction/benchmark/recognition/mmaction2/r2plus1d_256p_fast_videos_disk_8x8.zip)**  |        5.1        |                                                          x                                                           |         x         |                                                                 x                                                                 |         x         |                                                           x                                                            |         x         |
+### 时序动作检测器
+| Model                                                                                                               |    MMAction2 (s/iter)     | BSN(boundary sensitive network) (s/iter) | BMN(boundary matching network) (s/iter) |
+| :------------------------------------------------------------------------------------------------------------------ | :-----------------------: | :--------------------------------------: | :-------------------------------------: |
+| BSN ([TEM + PEM + PGM](/configs/localization/bsn))                                                                  | **0.074(TEM)+0.040(PEM)** |          0.101(TEM)+0.040(PEM)           |                    x                    |
+| BMN ([bmn_400x100_2x8_9e_activitynet_feature](/configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py)) |         **3.27**          |                    x                     |                  3.30                   |
+## 比较细节
+### TSN
+- **MMAction2**
+```shell
+# 处理视频帧
+bash tools/slurm_train.sh ${PARTATION_NAME} benchmark_tsn configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py --work-dir work_dirs/benchmark_tsn_rawframes
+# 处理视频
+bash tools/slurm_train.sh ${PARTATION_NAME} benchmark_tsn configs/recognition/tsn/tsn_r50_video_1x1x3_100e_kinetics400_rgb.py --work-dir work_dirs/benchmark_tsn_video
+```
+- **MMAction**
+```shell
+python -u tools/train_recognizer.py configs/TSN/tsn_kinetics400_2d_rgb_r50_seg3_f1s1.py
+```
+- **Temporal-Shift-Module**
+```shell
+python main.py kinetics RGB --arch resnet50 --num_segments 3 --gd 20 --lr 0.02 --wd 1e-4 --lr_steps 20 40 --epochs 1 --batch-size 256 -j 32 --dropout 0.5 --consensus_type=avg --eval-freq=10 --npb --print-freq 1
+```
+### I3D
+- **MMAction2**
+```shell
+# 处理视频帧
+bash tools/slurm_train.sh ${PARTATION_NAME} benchmark_i3d configs/recognition/i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py --work-dir work_dirs/benchmark_i3d_rawframes
+# 处理视频
+bash tools/slurm_train.sh ${PARTATION_NAME} benchmark_i3d configs/recognition/i3d/i3d_r50_video_heavy_8x8x1_100e_kinetics400_rgb.py --work-dir work_dirs/benchmark_i3d_video
+```
+- **MMAction**
+```shell
+python -u tools/train_recognizer.py configs/I3D_RGB/i3d_kinetics400_3d_rgb_r50_c3d_inflate3x1x1_seg1_f32s2.py
+```
+- **PySlowFast**
+```shell
+python tools/run_net.py   --cfg configs/Kinetics/I3D_8x8_R50.yaml   DATA.PATH_TO_DATA_DIR ${DATA_ROOT}   NUM_GPUS 8 TRAIN.BATCH_SIZE 64 TRAIN.AUTO_RESUME False LOG_PERIOD 1 SOLVER.MAX_EPOCH 1 > pysf_i3d_r50_8x8_video.log
+```
+可以通过编写一个简单的脚本对日志文件的 'time_diff' 域进行解析，以复现对应的结果。
+### SlowFast
+- **MMAction2**
+```shell
+bash tools/slurm_train.sh ${PARTATION_NAME} benchmark_slowfast configs/recognition/slowfast/slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py --work-dir work_dirs/benchmark_slowfast_video
+```
+- **MMAction**
+```shell
+python tools/run_net.py   --cfg configs/Kinetics/SLOWFAST_4x16_R50.yaml   DATA.PATH_TO_DATA_DIR ${DATA_ROOT}   NUM_GPUS 8 TRAIN.BATCH_SIZE 64 TRAIN.AUTO_RESUME False LOG_PERIOD 1 SOLVER.MAX_EPOCH 1 > pysf_slowfast_r50_4x16_video.log
+```
+可以通过编写一个简单的脚本对日志文件的 'time_diff' 域进行解析，以复现对应的结果。
+### SlowOnly
+- **MMAction2**
+```shell
+bash tools/slurm_train.sh ${PARTATION_NAME} benchmark_slowonly configs/recognition/slowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py --work-dir work_dirs/benchmark_slowonly_video
+```
+- **PySlowFast**
+```shell
+python tools/run_net.py   --cfg configs/Kinetics/SLOW_4x16_R50.yaml   DATA.PATH_TO_DATA_DIR ${DATA_ROOT}   NUM_GPUS 8 TRAIN.BATCH_SIZE 64 TRAIN.AUTO_RESUME False LOG_PERIOD 1 SOLVER.MAX_EPOCH 1 > pysf_slowonly_r50_4x16_video.log
+```
+可以通过编写一个简单的脚本对日志文件的 'time_diff' 域进行解析，以复现对应的结果。
+### R2plus1D
+- **MMAction2**
+```shell
+bash tools/slurm_train.sh ${PARTATION_NAME} benchmark_r2plus1d configs/recognition/r2plus1d/r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb.py --work-dir work_dirs/benchmark_r2plus1d_video
+```
--- a/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/conf.py
+++ b/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/conf.py
+# Copyright (c) OpenMMLab. All rights reserved.
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+# -- Path setup --------------------------------------------------------------
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import subprocess
+import sys
+import pytorch_sphinx_theme
+sys.path.insert(0, os.path.abspath('..'))
+# -- Project information -----------------------------------------------------
+project = 'MMAction2'
+copyright = '2020, OpenMMLab'
+author = 'MMAction2 Authors'
+version_file = '../mmaction/version.py'
+def get_version():
+    with open(version_file, 'r') as f:
+        exec(compile(f.read(), version_file, 'exec'))
+    return locals()['__version__']
+# The full version, including alpha/beta/rc tags
+release = get_version()
+# -- General configuration ---------------------------------------------------
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode',
+    'sphinx_markdown_tables', 'sphinx_copybutton', 'myst_parser'
+]
+# numpy and torch are required
+autodoc_mock_imports = ['mmaction.version', 'PIL']
+copybutton_prompt_text = r'>>> |\.\.\. '
+copybutton_prompt_is_regexp = True
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+# -- Options for HTML output -------------------------------------------------
+source_suffix = {'.rst': 'restructuredtext', '.md': 'markdown'}
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'pytorch_sphinx_theme'
+html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
+html_theme_options = {
+    # 'logo_url': 'https://mmocr.readthedocs.io/en/latest/',
+    'menu': [
+        {
+            'name':
+            '教程',
+            'url':
+            'https://colab.research.google.com/github/'
+            'open-mmlab/mmaction2/blob/master/demo/'
+            'mmaction2_tutorial_zh-CN.ipynb'
+        },
+        {
+            'name': 'GitHub',
+            'url': 'https://github.com/open-mmlab/mmaction2'
+        },
+        {
+            'name':
+            '上游代码库',
+            'children': [
+                {
+                    'name': 'MMCV',
+                    'url': 'https://github.com/open-mmlab/mmcv',
+                    'description': '计算机视觉基础库'
+                },
+                {
+                    'name': 'MMClassification',
+                    'url': 'https://github.com/open-mmlab/mmclassification',
+                    'description': '图像分类代码库'
+                },
+                {
+                    'name': 'MMDetection',
+                    'url': 'https://github.com/open-mmlab/mmdetection',
+                    'description': '物体检测代码库'
+                },
+            ]
+        },
+    ],
+    # Specify the language of shared menu
+    'menu_lang':
+    'cn'
+}
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+html_css_files = ['css/readthedocs.css']
+myst_enable_extensions = ['colon_fence']
+myst_heading_anchors = 3
+language = 'zh_CN'
+master_doc = 'index'
+def builder_inited_handler(app):
+    subprocess.run(['./merge_docs.sh'])
+    subprocess.run(['./stat.py'])
+def setup(app):
+    app.connect('builder-inited', builder_inited_handler)
--- a/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/data_preparation.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/data_preparation.md
+# 准备数据
+本文为 MMAction2 的数据准备提供一些指南。
+<!-- TOC -->
+- [视频格式数据的一些注意事项](#%E8%A7%86%E9%A2%91%E6%A0%BC%E5%BC%8F%E6%95%B0%E6%8D%AE%E7%9A%84%E4%B8%80%E4%BA%9B%E6%B3%A8%E6%84%8F%E4%BA%8B%E9%A1%B9)
+- [获取数据](#%E8%8E%B7%E5%8F%96%E6%95%B0%E6%8D%AE)
+  - [准备视频](#%E5%87%86%E5%A4%87%E8%A7%86%E9%A2%91)
+  - [提取帧](#%E6%8F%90%E5%8F%96%E5%B8%A7)
+    - [denseflow 的替代项](#denseflow-%E7%9A%84%E6%9B%BF%E4%BB%A3%E9%A1%B9)
+  - [生成文件列表](#%E7%94%9F%E6%88%90%E6%96%87%E4%BB%B6%E5%88%97%E8%A1%A8)
+  - [准备音频](#%E5%87%86%E5%A4%87%E9%9F%B3%E9%A2%91)
+<!-- TOC -->
+## 视频格式数据的一些注意事项
+MMAction2 支持两种数据类型：原始帧和视频。前者在过去的项目中经常出现，如 TSN。
+如果能把原始帧存储在固态硬盘上，处理帧格式的数据是非常快的，但对于大规模的数据集来说，原始帧需要占据大量的磁盘空间。
+（举例来说，最新版本的 [Kinetics](https://deepmind.com/research/open-source/open-source-datasets/kinetics/) 有 650K 个视频，其所有原始帧需要占据几个 TB 的磁盘空间。）
+视频格式的数据能够节省很多空间，但在运行模型时，必须进行视频解码，算力开销很大。
+为了加速视频解码，MMAction2 支持了若干种高效的视频加载库，如 [decord](https://github.com/zhreshold/decord), [PyAV](https://github.com/PyAV-Org/PyAV) 等。
+## 获取数据
+本文介绍如何构建自定义数据集。
+与上述数据集相似，推荐用户把数据放在 `$MMACTION2/data/$DATASET` 中。
+### 准备视频
+请参照官网或官方脚本准备视频。
+注意，应该按照下面两种方法之一来组织视频数据文件夹结构：
+(1) 形如 `${CLASS_NAME}/${VIDEO_ID}` 的两级文件目录结构，这种结构推荐在动作识别数据集中使用（如 UCF101 和 Kinetics）
+(2) 单级文件目录结构，这种结构推荐在动作检测数据集或者多标签数据集中使用（如 THUMOS14）
+### 提取帧
+若想同时提取帧和光流，可以使用 OpenMMLab 准备的 [denseflow](https://github.com/open-mmlab/denseflow) 工具。
+因为不同的帧提取工具可能产生不同数量的帧，建议使用同一工具来提取 RGB 帧和光流，以避免它们的数量不同。
+```shell
+python build_rawframes.py ${SRC_FOLDER} ${OUT_FOLDER} [--task ${TASK}] [--level ${LEVEL}] \
+    [--num-worker ${NUM_WORKER}] [--flow-type ${FLOW_TYPE}] [--out-format ${OUT_FORMAT}] \
+    [--ext ${EXT}] [--new-width ${NEW_WIDTH}] [--new-height ${NEW_HEIGHT}] [--new-short ${NEW_SHORT}] \
+    [--resume] [--use-opencv] [--mixed-ext]
+```
+- `SRC_FOLDER`: 视频源文件夹
+- `OUT_FOLDER`: 存储提取出的帧和光流的根文件夹
+- `TASK`: 提取任务，说明提取帧，光流，还是都提取，选项为 `rgb`, `flow`, `both`
+- `LEVEL`: 目录层级。1 指单级文件目录，2 指两级文件目录
+- `NUM_WORKER`: 提取原始帧的线程数
+- `FLOW_TYPE`: 提取的光流类型，如 `None`, `tvl1`, `warp_tvl1`, `farn`, `brox`
+- `OUT_FORMAT`: 提取帧的输出文件类型，如 `jpg`, `h5`, `png`
+- `EXT`: 视频文件后缀名，如 `avi`, `mp4`
+- `NEW_WIDTH`: 调整尺寸后，输出图像的宽
+- `NEW_HEIGHT`: 调整尺寸后，输出图像的高
+- `NEW_SHORT`: 等比例缩放图片后，输出图像的短边长
+- `--resume`: 是否接续之前的光流提取任务，还是覆盖之前的输出结果重新提取
+- `--use-opencv`: 是否使用 OpenCV 提取 RGB 帧
+- `--mixed-ext`: 说明是否处理不同文件类型的视频文件
+根据实际经验，推荐设置为：
+1. 将 `$OUT_FOLDER` 设置为固态硬盘上的文件夹。
+2. 软连接 `$OUT_FOLDER` 到 `$MMACTION2/data/$DATASET/rawframes`
+3. 使用 `new-short` 而不是 `new-width` 和 `new-height` 来调整图像尺寸
+```shell
+ln -s ${YOUR_FOLDER} $MMACTION2/data/$DATASET/rawframes
+```
+#### denseflow 的替代项
+如果用户因依赖要求（如 Nvidia 显卡驱动版本），无法安装 [denseflow](https://github.com/open-mmlab/denseflow)，
+或者只需要一些关于光流提取的快速演示，可用 Python 脚本 `tools/misc/flow_extraction.py` 替代 denseflow。
+这个脚本可用于一个或多个视频提取 RGB 帧和光流。注意，由于该脚本时在 CPU 上运行光流算法，其速度比 denseflow 慢很多。
+```shell
+python tools/misc/flow_extraction.py --input ${INPUT} [--prefix ${PREFIX}] [--dest ${DEST}] [--rgb-tmpl ${RGB_TMPL}] \
+    [--flow-tmpl ${FLOW_TMPL}] [--start-idx ${START_IDX}] [--method ${METHOD}] [--bound ${BOUND}] [--save-rgb]
+```
+- `INPUT`: 用于提取帧的视频，可以是单个视频或一个视频列表，视频列表应该是一个 txt 文件，并且只包含视频文件名，不包含目录
+- `PREFIX`: 输入视频的前缀，当输入是一个视频列表时使用
+- `DEST`: 保存提取出的帧的位置
+- `RGB_TMPL`:  RGB 帧的文件名格式
+- `FLOW_TMPL`: 光流的文件名格式
+- `START_IDX`: 提取帧的开始索引
+- `METHOD`: 用于生成光流的方法
+- `BOUND`: 光流的最大值
+- `SAVE_RGB`: 同时保存提取的 RGB 帧
+### 生成文件列表
+MMAction2 提供了便利的脚本用于生成文件列表。在完成视频下载（或更进一步完成视频抽帧）后，用户可以使用如下的脚本生成文件列表。
+```shell
+cd $MMACTION2
+python tools/data/build_file_list.py ${DATASET} ${SRC_FOLDER} [--rgb-prefix ${RGB_PREFIX}] \
+    [--flow-x-prefix ${FLOW_X_PREFIX}] [--flow-y-prefix ${FLOW_Y_PREFIX}] [--num-split ${NUM_SPLIT}] \
+    [--subset ${SUBSET}] [--level ${LEVEL}] [--format ${FORMAT}] [--out-root-path ${OUT_ROOT_PATH}] \
+    [--seed ${SEED}] [--shuffle]
+```
+- `DATASET`: 所要准备的数据集，例如：`ucf101` , `kinetics400` , `thumos14` , `sthv1` , `sthv2` 等。
+- `SRC_FOLDER`: 存放对应格式的数据的目录：
+  - 如目录为 "$MMACTION2/data/$DATASET/rawframes"，则需设置 `--format rawframes`。
+  - 如目录为 "$MMACTION2/data/$DATASET/videos"，则需设置 `--format videos`。
+- `RGB_PREFIX`: RGB 帧的文件前缀。
+- `FLOW_X_PREFIX`: 光流 x 分量帧的文件前缀。
+- `FLOW_Y_PREFIX`: 光流 y 分量帧的文件前缀。
+- `NUM_SPLIT`: 数据集总共的划分个数。
+- `SUBSET`: 需要生成文件列表的子集名称。可选项为 `train`, `val`, `test`。
+- `LEVEL`: 目录级别数量，1 表示一级目录（数据集中所有视频或帧文件夹位于同一目录）， 2 表示二级目录（数据集中所有视频或帧文件夹按类别存放于各子目录）。
+- `FORMAT`: 需要生成文件列表的源数据格式。可选项为 `rawframes`, `videos`。
+- `OUT_ROOT_PATH`: 生成文件的根目录。
+- `SEED`: 随机种子。
+- `--shuffle`: 是否打乱生成的文件列表。
+至此为止，用户可参考 [基础教程](getting_started.md) 来进行模型的训练及测试。
+### 准备音频
+MMAction2 还提供如下脚本来提取音频的波形并生成梅尔频谱。
+```shell
+cd $MMACTION2
+python tools/data/extract_audio.py ${ROOT} ${DST_ROOT} [--ext ${EXT}] [--num-workers ${N_WORKERS}] \
+    [--level ${LEVEL}]
+```
+- `ROOT`: 视频的根目录。
+- `DST_ROOT`: 存放生成音频的根目录。
+- `EXT`: 视频的后缀名，如 `mp4`。
+- `N_WORKERS`: 使用的进程数量。
+成功提取出音频后，用户可参照 [配置文件](/configs/recognition_audio/resnet/tsn_r50_64x1x1_100e_kinetics400_audio.py) 在线解码并生成梅尔频谱。如果音频文件的目录结构与帧文件夹一致，用户可以直接使用帧数据所用的标注文件作为音频数据的标注文件。在线解码的缺陷在于速度较慢，因此，MMAction2 也提供如下脚本用于离线地生成梅尔频谱。
+```shell
+cd $MMACTION2
+python tools/data/build_audio_features.py ${AUDIO_HOME_PATH} ${SPECTROGRAM_SAVE_PATH} [--level ${LEVEL}] \
+    [--ext $EXT] [--num-workers $N_WORKERS] [--part $PART]
+```
+- `AUDIO_HOME_PATH`: 音频文件的根目录。
+- `SPECTROGRAM_SAVE_PATH`: 存放生成音频特征的根目录。
+- `EXT`: 音频的后缀名，如 `m4a`。
+- `N_WORKERS`: 使用的进程数量。
+- `PART`: 将完整的解码任务分为几部分并执行其中一份。如 `2/5` 表示将所有待解码数据分成 5 份，并对其中的第 2 份进行解码。这一选项在用户有多台机器时发挥作用。
+梅尔频谱特征所对应的标注文件与帧文件夹一致，用户可以直接复制 `dataset_[train/val]_list_rawframes.txt` 并将其重命名为 `dataset_[train/val]_list_audio_feature.txt`。
--- a/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/demo.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/demo.md
+# Demo 示例
+## 目录
+- [Demo 示例](#demo-%E7%A4%BA%E4%BE%8B)
+  - [目录](#%E7%9B%AE%E5%BD%95)
+  - [预测视频的动作标签](#%E9%A2%84%E6%B5%8B%E8%A7%86%E9%A2%91%E7%9A%84%E5%8A%A8%E4%BD%9C%E6%A0%87%E7%AD%BE)
+  - [预测视频的时空检测结果](#%E9%A2%84%E6%B5%8B%E8%A7%86%E9%A2%91%E7%9A%84%E6%97%B6%E7%A9%BA%E6%A3%80%E6%B5%8B%E7%BB%93%E6%9E%9C)
+  - [可视化输入视频的 GradCAM](#%E5%8F%AF%E8%A7%86%E5%8C%96%E8%BE%93%E5%85%A5%E8%A7%86%E9%A2%91%E7%9A%84-gradcam)
+  - [使用网络摄像头的实时动作识别](#%E4%BD%BF%E7%94%A8%E7%BD%91%E7%BB%9C%E6%91%84%E5%83%8F%E5%A4%B4%E7%9A%84%E5%AE%9E%E6%97%B6%E5%8A%A8%E4%BD%9C%E8%AF%86%E5%88%AB)
+  - [滑动窗口预测长视频中不同动作类别](#%E6%BB%91%E5%8A%A8%E7%AA%97%E5%8F%A3%E9%A2%84%E6%B5%8B%E9%95%BF%E8%A7%86%E9%A2%91%E4%B8%AD%E4%B8%8D%E5%90%8C%E5%8A%A8%E4%BD%9C%E7%B1%BB%E5%88%AB)
+  - [基于网络摄像头的实时时空动作检测](#%E5%9F%BA%E4%BA%8E%E7%BD%91%E7%BB%9C%E6%91%84%E5%83%8F%E5%A4%B4%E7%9A%84%E5%AE%9E%E6%97%B6%E6%97%B6%E7%A9%BA%E5%8A%A8%E4%BD%9C%E6%A3%80%E6%B5%8B)
+  - [基于人体姿态预测动作标签](#%E5%9F%BA%E4%BA%8E%E4%BA%BA%E4%BD%93%E5%A7%BF%E6%80%81%E9%A2%84%E6%B5%8B%E5%8A%A8%E4%BD%9C%E6%A0%87%E7%AD%BE)
+  - [视频结构化预测](#%E8%A7%86%E9%A2%91%E7%BB%93%E6%9E%84%E5%8C%96%E9%A2%84%E6%B5%8B)
+  - [基于音频的动作识别](#%E5%9F%BA%E4%BA%8E%E9%9F%B3%E9%A2%91%E7%9A%84%E5%8A%A8%E4%BD%9C%E8%AF%86%E5%88%AB)
+## 预测视频的动作标签
+MMAction2 提供如下脚本以预测视频的动作标签。为得到 \[0, 1\] 间的动作分值，请确保在配置文件中设定 `model['test_cfg'] = dict(average_clips='prob')`。
+```shell
+python demo/demo.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${VIDEO_FILE} {LABEL_FILE} [--use-frames] \
+    [--device ${DEVICE_TYPE}] [--fps {FPS}] [--font-scale {FONT_SCALE}] [--font-color {FONT_COLOR}] \
+    [--target-resolution ${TARGET_RESOLUTION}] [--resize-algorithm {RESIZE_ALGORITHM}] [--out-filename {OUT_FILE}]
+```
+可选参数：
+- `--use-frames`: 如指定，代表使用帧目录作为输入；否则代表使用视频作为输入。
+- `DEVICE_TYPE`: 指定脚本运行设备，支持 cuda 设备（如 `cuda:0`）或 cpu（`cpu`）。默认为 `cuda:0`。
+- `FPS`: 使用帧目录作为输入时，代表输入的帧率。默认为 30。
+- `FONT_SCALE`: 输出视频上的字体缩放比例。默认为 0.5。
+- `FONT_COLOR`: 输出视频上的字体颜色，默认为白色（ `white`）。
+- `TARGET_RESOLUTION`: 输出视频的分辨率，如未指定，使用输入视频的分辨率。
+- `RESIZE_ALGORITHM`: 缩放视频时使用的插值方法，默认为 `bicubic`。
+- `OUT_FILE`: 输出视频的路径，如未指定，则不会生成输出视频。
+示例：
+以下示例假设用户的当前目录为 `$MMACTION2`，并已经将所需的模型权重文件下载至目录 `checkpoints/` 下，用户也可以使用所提供的 URL 来直接加载模型权重，文件将会被默认下载至 `$HOME/.cache/torch/checkpoints`。
+1. 在 cuda 设备上，使用 TSN 模型进行视频识别：
+   ```shell
+   # demo.mp4 及 label_map_k400.txt 均来自 Kinetics-400 数据集
+   python demo/demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+       checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+       demo/demo.mp4 tools/data/kinetics/label_map_k400.txt
+   ```
+2. 在 cuda 设备上，使用 TSN 模型进行视频识别，并利用 URL 加载模型权重文件：
+   ```shell
+   # demo.mp4 及 label_map_k400.txt 均来自 Kinetics-400 数据集
+   python demo/demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+       https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+       demo/demo.mp4 tools/data/kinetics/label_map_k400.txt
+   ```
+3. 在 CPU 上，使用 TSN 模型进行视频识别，输入为视频抽好的帧：
+   ```shell
+   python demo/demo.py configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py \
+       checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+       PATH_TO_FRAMES/ LABEL_FILE --use-frames --device cpu
+   ```
+4. 使用 TSN 模型进行视频识别，输出 MP4 格式的识别结果：
+   ```shell
+   # demo.mp4 及 label_map_k400.txt 均来自 Kinetics-400 数据集
+   python demo/demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+       checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+       demo/demo.mp4 tools/data/kinetics/label_map_k400.txt --out-filename demo/demo_out.mp4
+   ```
+5. 使用 TSN 模型进行视频识别，输入为视频抽好的帧，将识别结果存为 GIF 格式：
+   ```shell
+   python demo/demo.py configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py \
+       checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+       PATH_TO_FRAMES/ LABEL_FILE --use-frames --out-filename demo/demo_out.gif
+   ```
+6. 使用 TSN 模型进行视频识别，输出 MP4 格式的识别结果，并指定输出视频分辨率及缩放视频时使用的插值方法：
+   ```shell
+   # demo.mp4 及 label_map_k400.txt 均来自 Kinetics-400 数据集
+   python demo/demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+       checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+       demo/demo.mp4 tools/data/kinetics/label_map_k400.txt --target-resolution 340 256 --resize-algorithm bilinear \
+       --out-filename demo/demo_out.mp4
+   ```
+   ```shell
+   # demo.mp4 及 label_map_k400.txt 均来自 Kinetics-400 数据集
+   # 若 TARGET_RESOLUTION 的任一维度被设置为 -1，视频帧缩放时将保持长宽比
+   # 如设定 --target-resolution 为 170 -1，原先长宽为 (340, 256) 的视频帧将被缩放至 (170, 128)
+   python demo/demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+       checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+       demo/demo.mp4 tools/data/kinetics/label_map_k400.txt --target-resolution 170 -1 --resize-algorithm bilinear \
+       --out-filename demo/demo_out.mp4
+   ```
+7. 使用 TSN 模型进行视频识别，输出 MP4 格式的识别结果，指定输出视频中使用红色文字，字体大小为 10 像素：
+   ```shell
+   # demo.mp4 及 label_map_k400.txt 均来自 Kinetics-400 数据集
+   python demo/demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+       checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+       demo/demo.mp4 tools/data/kinetics/label_map_k400.txt --font-size 10 --font-color red \
+       --out-filename demo/demo_out.mp4
+   ```
+8. 使用 TSN 模型进行视频识别，输入为视频抽好的帧，将识别结果存为 MP4 格式，帧率设置为 24fps：
+   ```shell
+   python demo/demo.py configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py \
+       checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+       PATH_TO_FRAMES/ LABEL_FILE --use-frames --fps 24 --out-filename demo/demo_out.gif
+   ```
+## 预测视频的时空检测结果
+MMAction2 提供如下脚本以预测视频的时空检测结果。
+```shell
+python demo/demo_spatiotemporal_det.py --video ${VIDEO_FILE} \
+    [--config ${SPATIOTEMPORAL_ACTION_DETECTION_CONFIG_FILE}] \
+    [--checkpoint ${SPATIOTEMPORAL_ACTION_DETECTION_CHECKPOINT}] \
+    [--det-config ${HUMAN_DETECTION_CONFIG_FILE}] \
+    [--det-checkpoint ${HUMAN_DETECTION_CHECKPOINT}] \
+    [--det-score-thr ${HUMAN_DETECTION_SCORE_THRESHOLD}] \
+    [--action-score-thr ${ACTION_DETECTION_SCORE_THRESHOLD}] \
+    [--label-map ${LABEL_MAP}] \
+    [--device ${DEVICE}] \
+    [--out-filename ${OUTPUT_FILENAME}] \
+    [--predict-stepsize ${PREDICT_STEPSIZE}] \
+    [--output-stepsize ${OUTPUT_STEPSIZE}] \
+    [--output-fps ${OUTPUT_FPS}]
+```
+可选参数：
+- `SPATIOTEMPORAL_ACTION_DETECTION_CONFIG_FILE`: 时空检测配置文件路径。
+- `SPATIOTEMPORAL_ACTION_DETECTION_CHECKPOINT`: 时空检测模型权重文件路径。
+- `HUMAN_DETECTION_CONFIG_FILE`: 人体检测配置文件路径。
+- `HUMAN_DETECTION_CHECKPOINT`: 人体检测模型权重文件路径。
+- `HUMAN_DETECTION_SCORE_THRE`: 人体检测分数阈值，默认为 0.9。
+- `ACTION_DETECTION_SCORE_THRESHOLD`: 动作检测分数阈值，默认为 0.5。
+- `LABEL_MAP`: 所使用的标签映射文件，默认为 `tools/data/ava/label_map.txt`。
+- `DEVICE`:  指定脚本运行设备，支持 cuda 设备（如 `cuda:0`）或 cpu（`cpu`）。默认为 `cuda:0`。
+- `OUTPUT_FILENAME`: 输出视频的路径，默认为 `demo/stdet_demo.mp4`。
+- `PREDICT_STEPSIZE`: 每 N 帧进行一次预测（以节约计算资源），默认值为 8。
+- `OUTPUT_STEPSIZE`: 对于输入视频的每 N 帧，输出 1 帧至输出视频中， 默认值为 4，注意需满足 `PREDICT_STEPSIZE % OUTPUT_STEPSIZE == 0`。
+- `OUTPUT_FPS`: 输出视频的帧率，默认值为 6。
+示例：
+以下示例假设用户的当前目录为 `$MMACTION2`，并已经将所需的模型权重文件下载至目录 `checkpoints/` 下，用户也可以使用所提供的 URL 来直接加载模型权重，文件将会被默认下载至 `$HOME/.cache/torch/checkpoints`。
+1. 使用 Faster RCNN 作为人体检测器，SlowOnly-8x8-R101 作为动作检测器。每 8 帧进行一次预测，原视频中每 4 帧输出 1 帧至输出视频中，设置输出视频的帧率为 6。
+```shell
+python demo/demo_spatiotemporal_det.py --video demo/demo.mp4 \
+    --config configs/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb.py \
+    --checkpoint https://download.openmmlab.com/mmaction/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb_20201217-16378594.pth \
+    --det-config demo/faster_rcnn_r50_fpn_2x_coco.py \
+    --det-checkpoint http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth \
+    --det-score-thr 0.9 \
+    --action-score-thr 0.5 \
+    --label-map tools/data/ava/label_map.txt \
+    --predict-stepsize 8 \
+    --output-stepsize 4 \
+    --output-fps 6
+```
+## 可视化输入视频的 GradCAM
+MMAction2 提供如下脚本以可视化输入视频的 GradCAM。
+```shell
+python demo/demo_gradcam.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${VIDEO_FILE} [--use-frames] \
+    [--device ${DEVICE_TYPE}] [--target-layer-name ${TARGET_LAYER_NAME}] [--fps {FPS}] \
+    [--target-resolution ${TARGET_RESOLUTION}] [--resize-algorithm {RESIZE_ALGORITHM}] [--out-filename {OUT_FILE}]
+```
+可选参数：
+- `--use-frames`: 如指定，代表使用帧目录作为输入；否则代表使用视频作为输入。
+- `DEVICE_TYPE`: 指定脚本运行设备，支持 cuda 设备（如 `cuda:0`）或 cpu（`cpu`）。默认为 `cuda:0`。
+- `TARGET_LAYER_NAME`: 需要生成 GradCAM 可视化的网络层名称。
+- `FPS`: 使用帧目录作为输入时，代表输入的帧率。默认为 30。
+- `TARGET_RESOLUTION`: 输出视频的分辨率，如未指定，使用输入视频的分辨率。
+- `RESIZE_ALGORITHM`: 缩放视频时使用的插值方法，默认为 `bilinear`。
+- `OUT_FILE`: 输出视频的路径，如未指定，则不会生成输出视频。
+示例：
+以下示例假设用户的当前目录为 `$MMACTION2`，并已经将所需的模型权重文件下载至目录 `checkpoints/` 下，用户也可以使用所提供的 URL 来直接加载模型权重，文件将会被默认下载至 `$HOME/.cache/torch/checkpoints`。
+1. 对于 I3D 模型进行 GradCAM 的可视化，使用视频作为输入，并输出一帧率为 10 的 GIF 文件：
+   ```shell
+   python demo/demo_gradcam.py configs/recognition/i3d/i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py \
+       checkpoints/i3d_r50_video_32x2x1_100e_kinetics400_rgb_20200826-e31c6f52.pth demo/demo.mp4 \
+       --target-layer-name backbone/layer4/1/relu --fps 10 \
+       --out-filename demo/demo_gradcam.gif
+   ```
+2. 对于 I3D 模型进行 GradCAM 的可视化，使用视频作为输入，并输出一 GIF 文件，此示例利用 URL 加载模型权重文件：
+   ```shell
+   python demo/demo_gradcam.py configs/recognition/tsm/tsm_r50_video_inference_1x1x8_100e_kinetics400_rgb.py \
+       https://download.openmmlab.com/mmaction/recognition/tsm/tsm_r50_video_1x1x8_100e_kinetics400_rgb/tsm_r50_video_1x1x8_100e_kinetics400_rgb_20200702-a77f4328.pth \
+       demo/demo.mp4 --target-layer-name backbone/layer4/1/relu --out-filename demo/demo_gradcam_tsm.gif
+   ```
+## 使用网络摄像头的实时动作识别
+MMAction2 提供如下脚本来进行使用网络摄像头的实时动作识别。为得到 \[0, 1\] 间的动作分值，请确保在配置文件中设定 `model['test_cfg'] = dict(average_clips='prob')` 。
+```shell
+python demo/webcam_demo.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${LABEL_FILE} \
+    [--device ${DEVICE_TYPE}] [--camera-id ${CAMERA_ID}] [--threshold ${THRESHOLD}] \
+    [--average-size ${AVERAGE_SIZE}] [--drawing-fps ${DRAWING_FPS}] [--inference-fps ${INFERENCE_FPS}]
+```
+可选参数：
+- `DEVICE_TYPE`: 指定脚本运行设备，支持 cuda 设备（如 `cuda:0`）或 cpu（`cpu`）。默认为 `cuda:0`。
+- `CAMERA_ID`: 摄像头设备的 ID，默认为 0。
+- `THRESHOLD`: 动作识别的分数阈值，只有分数大于阈值的动作类型会被显示，默认为 0。
+- `AVERAGE_SIZE`: 使用最近 N 个片段的平均结果作为预测，默认为 1。
+- `DRAWING_FPS`: 可视化结果时的最高帧率，默认为 20。
+- `INFERENCE_FPS`: 进行推理时的最高帧率，默认为 4。
+**注**： 若用户的硬件配置足够，可增大可视化帧率和推理帧率以带来更好体验。
+示例：
+以下示例假设用户的当前目录为 `$MMACTION2`，并已经将所需的模型权重文件下载至目录 `checkpoints/` 下，用户也可以使用所提供的 URL 来直接加载模型权重，文件将会被默认下载至 `$HOME/.cache/torch/checkpoints`。
+1. 使用 TSN 模型进行利用网络摄像头的实时动作识别，平均最近 5 个片段结果作为预测，输出大于阈值 0.2 的动作类别：
+```shell
+    python demo/webcam_demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+      checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth tools/data/kinetics/label_map_k400.txt --average-size 5 \
+      --threshold 0.2 --device cpu
+```
+2. 使用 TSN 模型在 CPU 上进行利用网络摄像头的实时动作识别，平均最近 5 个片段结果作为预测，输出大于阈值 0.2 的动作类别，此示例利用 URL 加载模型权重文件：
+```shell
+    python demo/webcam_demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+      https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+      tools/data/kinetics/label_map_k400.txt --average-size 5 --threshold 0.2 --device cpu
+```
+3. 使用 I3D 模型在 GPU 上进行利用网络摄像头的实时动作识别，平均最近 5 个片段结果作为预测，输出大于阈值 0.2 的动作类别：
+```shell
+    python demo/webcam_demo.py configs/recognition/i3d/i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py \
+      checkpoints/i3d_r50_32x2x1_100e_kinetics400_rgb_20200614-c25ef9a4.pth tools/data/kinetics/label_map_k400.txt \
+      --average-size 5 --threshold 0.2
+```
+**注:** 考虑到用户所使用的推理设备具有性能差异，可进行如下改动在用户设备上取得更好效果：
+1). 更改配置文件中的 `test_pipeline` 下 `SampleFrames` 步骤 （特别是 `clip_len` 与 `num_clips`）。
+2). 更改配置文件中的 `test_pipeline` 下的裁剪方式类型（可选项含：`TenCrop`, `ThreeCrop`, `CenterCrop`）。
+3). 调低 `AVERAGE_SIZE` 以加快推理。
+## 滑动窗口预测长视频中不同动作类别
+MMAction2 提供如下脚本来预测长视频中的不同动作类别。为得到 \[0, 1\] 间的动作分值，请确保在配置文件中设定 `model['test_cfg'] = dict(average_clips='prob')` 。
+```shell
+python demo/long_video_demo.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${VIDEO_FILE} ${LABEL_FILE} \
+    ${OUT_FILE} [--input-step ${INPUT_STEP}] [--device ${DEVICE_TYPE}] [--threshold ${THRESHOLD}]
+```
+可选参数：
+- `OUT_FILE`: 输出视频的路径。
+- `INPUT_STEP`: 在视频中的每 N 帧中选取一帧作为输入，默认为 1。
+- `DEVICE_TYPE`: 指定脚本运行设备，支持 cuda 设备（如 `cuda:0`）或 cpu（`cpu`）。默认为 `cuda:0`。
+- `THRESHOLD`: 动作识别的分数阈值，只有分数大于阈值的动作类型会被显示，默认为 0.01。
+- `STRIDE`: 默认情况下，脚本为每帧给出单独预测，较为耗时。可以设定 `STRIDE` 参数进行加速，此时脚本将会为每 `STRIDE x sample_length` 帧做一次预测（`sample_length` 指模型采帧时的时间窗大小，等于 `clip_len x frame_interval`）。例如，若 sample_length 为 64 帧且 `STRIDE` 设定为 0.5，模型将每 32 帧做一次预测。若 `STRIDE` 设为 0，模型将为每帧做一次预测。`STRIDE` 的理想取值为 (0, 1\] 间，若大于 1，脚本亦可正常执行。`STRIDE` 默认值为 0。
+示例：
+以下示例假设用户的当前目录为 `$MMACTION2`，并已经将所需的模型权重文件下载至目录 `checkpoints/` 下，用户也可以使用所提供的 URL 来直接加载模型权重，文件将会被默认下载至 `$HOME/.cache/torch/checkpoints`。
+1. 利用 TSN 模型在 CPU 上预测长视频中的不同动作类别，设置 `INPUT_STEP` 为 3（即每 3 帧随机选取 1 帧作为输入），输出分值大于 0.2 的动作类别：
+```shell
+   python demo/long_video_demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+     checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth PATH_TO_LONG_VIDEO tools/data/kinetics/label_map_k400.txt PATH_TO_SAVED_VIDEO \
+     --input-step 3 --device cpu --threshold 0.2
+```
+2. 利用 TSN 模型在 CPU 上预测长视频中的不同动作类别，设置 `INPUT_STEP` 为 3，输出分值大于 0.2 的动作类别，此示例利用 URL 加载模型权重文件：
+```shell
+   python demo/long_video_demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+     https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+     PATH_TO_LONG_VIDEO tools/data/kinetics/label_map_k400.txt PATH_TO_SAVED_VIDEO --input-step 3 --device cpu --threshold 0.2
+```
+3. 利用 TSN 模型在 CPU 上预测网络长视频（利用 URL 读取）中的不同动作类别，设置 `INPUT_STEP` 为 3，输出分值大于 0.2 的动作类别，此示例利用 URL 加载模型权重文件：
+```shell
+   python demo/long_video_demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+     https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+     https://www.learningcontainer.com/wp-content/uploads/2020/05/sample-mp4-file.mp4 \
+     tools/data/kinetics/label_map_k400.txt PATH_TO_SAVED_VIDEO --input-step 3 --device cpu --threshold 0.2
+```
+4. 利用 I3D 模型在 GPU 上预测长视频中的不同动作类别，设置 `INPUT_STEP` 为 3，动作识别的分数阈值为 0.01：
+   ```shell
+   python demo/long_video_demo.py configs/recognition/i3d/i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py \
+     checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO tools/data/kinetics/label_map_k400.txt PATH_TO_SAVED_VIDEO \
+   ```
+## 基于网络摄像头的实时时空动作检测
+MMAction2 提供本脚本实现基于网络摄像头的实时时空动作检测。
+```shell
+python demo/webcam_demo_spatiotemporal_det.py \
+    [--config ${SPATIOTEMPORAL_ACTION_DETECTION_CONFIG_FILE}] \
+    [--checkpoint ${SPATIOTEMPORAL_ACTION_DETECTION_CHECKPOINT}] \
+    [--action-score-thr ${ACTION_DETECTION_SCORE_THRESHOLD}] \
+    [--det-config ${HUMAN_DETECTION_CONFIG_FILE}] \
+    [--det-checkpoint ${HUMAN_DETECTION_CHECKPOINT}] \
+    [--det-score-thr ${HUMAN_DETECTION_SCORE_THRESHOLD}] \
+    [--input-video] ${INPUT_VIDEO} \
+    [--label-map ${LABEL_MAP}] \
+    [--device ${DEVICE}] \
+    [--output-fps ${OUTPUT_FPS}] \
+    [--out-filename ${OUTPUT_FILENAME}] \
+    [--show] \
+    [--display-height] ${DISPLAY_HEIGHT} \
+    [--display-width] ${DISPLAY_WIDTH} \
+    [--predict-stepsize ${PREDICT_STEPSIZE}] \
+    [--clip-vis-length] ${CLIP_VIS_LENGTH}
+```
+可选参数：
+- `SPATIOTEMPORAL_ACTION_DETECTION_CONFIG_FILE`: 时空检测配置文件路径。
+- `SPATIOTEMPORAL_ACTION_DETECTION_CHECKPOINT`: 时空检测模型权重文件路径。
+- `ACTION_DETECTION_SCORE_THRESHOLD`: 动作检测分数阈值，默认为 0.4。
+- `HUMAN_DETECTION_CONFIG_FILE`: 人体检测配置文件路径。
+- `HUMAN_DETECTION_CHECKPOINT`: 人体检测模型权重文件路径。
+- `HUMAN_DETECTION_SCORE_THRE`: 人体检测分数阈值，默认为 0.9。
+- `INPUT_VIDEO`: 网络摄像头编号或本地视频文件路径，默认为 `0`。
+- `LABEL_MAP`: 所使用的标签映射文件，默认为 `tools/data/ava/label_map.txt`。
+- `DEVICE`:  指定脚本运行设备，支持 cuda 设备（如 `cuda:0`）或 cpu（`cpu`），默认为 `cuda:0`。
+- `OUTPUT_FPS`: 输出视频的帧率，默认为 15。
+- `OUTPUT_FILENAME`: 输出视频的路径，默认为 `None`。
+- `--show`: 是否通过 `cv2.imshow` 展示预测结果。
+- `DISPLAY_HEIGHT`: 输出结果图像高度，默认为 0。
+- `DISPLAY_WIDTH`: 输出结果图像宽度，默认为 0。若 `DISPLAY_HEIGHT <= 0 and DISPLAY_WIDTH <= 0`，则表示输出图像形状与输入视频形状相同。
+- `PREDICT_STEPSIZE`: 每 N 帧进行一次预测（以控制计算资源），默认为 8。
+- `CLIP_VIS_LENGTH`: 预测结果可视化持续帧数，即每次预测结果将可视化到 `CLIP_VIS_LENGTH` 帧中，默认为 8。
+小技巧：
+- 如何设置 `--output-fps` 的数值?
+  - `--output-fps` 建议设置为视频读取线程的帧率。
+  - 视频读取线程帧率已通过日志输出，格式为 `DEBUG:__main__:Read Thread: {duration} ms, {fps} fps`。
+- 如何设置 `--predict-stepsize` 的数值?
+  - 该参数选择与模型选型有关。
+  - 模型输入构建时间（视频读取线程）应大于等于模型推理时间（主线程）。
+  - 模型输入构建时间与模型推理时间均已通过日志输出。
+  - `--predict-stepsize` 数值越大，模型输入构建时间越长。
+  - 可降低 `--predict-stepsize` 数值增加模型推理频率，从而充分利用计算资源。
+示例：
+以下示例假设用户的当前目录为 $MMACTION2，并已经将所需的模型权重文件下载至目录 checkpoints/ 下，用户也可以使用所提供的 URL 来直接加载模型权重，文件将会被默认下载至 $HOME/.cache/torch/checkpoints。
+1. 使用 Faster RCNN 作为人体检测器，SlowOnly-8x8-R101 作为动作检测器，每 8 帧进行一次预测，设置输出视频的帧率为 20，并通过 `cv2.imshow` 展示预测结果。
+```shell
+python demo/webcam_demo_spatiotemporal_det.py \
+    --input-video 0 \
+    --config configs/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb.py \
+    --checkpoint https://download.openmmlab.com/mmaction/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb_20201217-16378594.pth \
+    --det-config demo/faster_rcnn_r50_fpn_2x_coco.py \
+    --det-checkpoint http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth \
+    --det-score-thr 0.9 \
+    --action-score-thr 0.5 \
+    --label-map tools/data/ava/label_map.txt \
+    --predict-stepsize 40 \
+    --output-fps 20 \
+    --show
+```
+## 基于人体姿态预测动作标签
+MMAction2 提供本脚本实现基于人体姿态的动作标签预测。
+```shell
+python demo/demo_skeleton.py ${VIDEO_FILE} ${OUT_FILENAME} \
+    [--config ${SKELETON_BASED_ACTION_RECOGNITION_CONFIG_FILE}] \
+    [--checkpoint ${SKELETON_BASED_ACTION_RECOGNITION_CHECKPOINT}] \
+    [--det-config ${HUMAN_DETECTION_CONFIG_FILE}] \
+    [--det-checkpoint ${HUMAN_DETECTION_CHECKPOINT}] \
+    [--det-score-thr ${HUMAN_DETECTION_SCORE_THRESHOLD}] \
+    [--pose-config ${HUMAN_POSE_ESTIMATION_CONFIG_FILE}] \
+    [--pose-checkpoint ${HUMAN_POSE_ESTIMATION_CHECKPOINT}] \
+    [--label-map ${LABEL_MAP}] \
+    [--device ${DEVICE}] \
+    [--short-side] ${SHORT_SIDE}
+```
+可选参数：
+- `SKELETON_BASED_ACTION_RECOGNITION_CONFIG_FILE`: 基于人体姿态的动作识别模型配置文件路径。
+- `SKELETON_BASED_ACTION_RECOGNITION_CHECKPOINT`: 基于人体姿态的动作识别模型权重文件路径。
+- `HUMAN_DETECTION_CONFIG_FILE`: 人体检测配置文件路径。
+- `HUMAN_DETECTION_CHECKPOINT`: 人体检测模型权重文件路径。
+- `HUMAN_DETECTION_SCORE_THRE`: 人体检测分数阈值，默认为 0.9。
+- `HUMAN_POSE_ESTIMATION_CONFIG_FILE`: 人体姿态估计模型配置文件路径 (需在 COCO-keypoint 数据集上训练)。
+- `HUMAN_POSE_ESTIMATION_CHECKPOINT`: 人体姿态估计模型权重文件路径 (需在 COCO-keypoint 数据集上训练).
+- `LABEL_MAP`: 所使用的标签映射文件，默认为 `tools/data/skeleton/label_map_ntu120.txt`。
+- `DEVICE`: 指定脚本运行设备，支持 cuda 设备（如 `cuda:0`）或 cpu（`cpu`），默认为 `cuda:0`。
+- `SHORT_SIDE`: 视频抽帧时使用的短边长度，默认为 480。
+示例：
+以下示例假设用户的当前目录为 $MMACTION2。
+1. 使用 Faster RCNN 作为人体检测器，HRNetw32 作为人体姿态估计模型，PoseC3D-NTURGB+D-120-Xsub-keypoint 作为基于人体姿态的动作识别模型。
+```shell
+python demo/demo_skeleton.py demo/ntu_sample.avi demo/skeleton_demo.mp4 \
+    --config configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint.py \
+    --checkpoint https://download.openmmlab.com/mmaction/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint/slowonly_r50_u48_240e_ntu120_xsub_keypoint-6736b03f.pth \
+    --det-config demo/faster_rcnn_r50_fpn_2x_coco.py \
+    --det-checkpoint http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth \
+    --det-score-thr 0.9 \
+    --pose-config demo/hrnet_w32_coco_256x192.py \
+    --pose-checkpoint https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth \
+    --label-map tools/data/skeleton/label_map_ntu120.txt
+```
+2. 使用 Faster RCNN 作为人体检测器，HRNetw32 作为人体姿态估计模型，STGCN-NTURGB+D-60-Xsub-keypoint 作为基于人体姿态的动作识别模型。
+```shell
+python demo/demo_skeleton.py demo/ntu_sample.avi demo/skeleton_demo.mp4 \
+    --config configs/skeleton/stgcn/stgcn_80e_ntu60_xsub_keypoint.py \
+    --checkpoint https://download.openmmlab.com/mmaction/skeleton/stgcn/stgcn_80e_ntu60_xsub_keypoint/stgcn_80e_ntu60_xsub_keypoint-e7bb9653.pth \
+    --det-config demo/faster_rcnn_r50_fpn_2x_coco.py \
+    --det-checkpoint http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth \
+    --det-score-thr 0.9 \
+    --pose-config demo/hrnet_w32_coco_256x192.py \
+    --pose-checkpoint https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w32_coco_256x192-c78dce93_20200708.pth \
+    --label-map tools/data/skeleton/label_map_ntu120.txt
+```
+## 视频结构化预测
+MMAction2 提供本脚本实现基于人体姿态和RGB的视频结构化预测。
+```shell
+python demo/demo_video_structuralize.py
+    [--rgb-stdet-config ${RGB_BASED_SPATIO_TEMPORAL_ACTION_DETECTION_CONFIG_FILE}] \
+    [--rgb-stdet-checkpoint ${RGB_BASED_SPATIO_TEMPORAL_ACTION_DETECTION_CHECKPOINT}] \
+    [--skeleton-stdet-checkpoint ${SKELETON_BASED_SPATIO_TEMPORAL_ACTION_DETECTION_CHECKPOINT}] \
+    [--det-config ${HUMAN_DETECTION_CONFIG_FILE}] \
+    [--det-checkpoint ${HUMAN_DETECTION_CHECKPOINT}] \
+    [--pose-config ${HUMAN_POSE_ESTIMATION_CONFIG_FILE}] \
+    [--pose-checkpoint ${HUMAN_POSE_ESTIMATION_CHECKPOINT}] \
+    [--skeleton-config ${SKELETON_BASED_ACTION_RECOGNITION_CONFIG_FILE}] \
+    [--skeleton-checkpoint ${SKELETON_BASED_ACTION_RECOGNITION_CHECKPOINT}] \
+    [--rgb-config ${RGB_BASED_ACTION_RECOGNITION_CONFIG_FILE}] \
+    [--rgb-checkpoint ${RGB_BASED_ACTION_RECOGNITION_CHECKPOINT}] \
+    [--use-skeleton-stdet ${USE_SKELETON_BASED_SPATIO_TEMPORAL_DETECTION_METHOD}] \
+    [--use-skeleton-recog ${USE_SKELETON_BASED_ACTION_RECOGNITION_METHOD}] \
+    [--det-score-thr ${HUMAN_DETECTION_SCORE_THRE}] \
+    [--action-score-thr ${ACTION_DETECTION_SCORE_THRE}] \
+    [--video ${VIDEO_FILE}] \
+    [--label-map-stdet ${LABEL_MAP_FOR_SPATIO_TEMPORAL_ACTION_DETECTION}] \
+    [--device ${DEVICE}] \
+    [--out-filename ${OUTPUT_FILENAME}] \
+    [--predict-stepsize ${PREDICT_STEPSIZE}] \
+    [--output-stepsize ${OUTPU_STEPSIZE}] \
+    [--output-fps ${OUTPUT_FPS}] \
+    [--cfg-options]
+```
+可选参数：
+- `RGB_BASED_SPATIO_TEMPORAL_ACTION_DETECTION_CONFIG_FILE`: 基于 RGB 的时空检测配置文件路径。
+- `RGB_BASED_SPATIO_TEMPORAL_ACTION_DETECTION_CHECKPOINT`: 基于 RGB 的时空检测模型权重文件路径。
+- `SKELETON_BASED_SPATIO_TEMPORAL_ACTION_DETECTION_CHECKPOINT`: 基于人体姿态的时空检测模型权重文件路径。
+- `HUMAN_DETECTION_CONFIG_FILE`: 人体检测配置文件路径。
+- `HUMAN_DETECTION_CHECKPOINT`: The human detection checkpoint URL.
+- `HUMAN_POSE_ESTIMATION_CONFIG_FILE`: 人体姿态估计模型配置文件路径 (需在 COCO-keypoint 数据集上训练)。
+- `HUMAN_POSE_ESTIMATION_CHECKPOINT`: 人体姿态估计模型权重文件路径 (需在 COCO-keypoint 数据集上训练)。
+- `SKELETON_BASED_ACTION_RECOGNITION_CONFIG_FILE`: 基于人体姿态的动作识别模型配置文件路径。
+- `SKELETON_BASED_ACTION_RECOGNITION_CHECKPOINT`: 基于人体姿态的动作识别模型权重文件路径。
+- `RGB_BASED_ACTION_RECOGNITION_CONFIG_FILE`: 基于 RGB 的行为识别配置文件路径。
+- `RGB_BASED_ACTION_RECOGNITION_CHECKPOINT`: 基于 RGB 的行为识别模型权重文件路径。
+- `USE_SKELETON_BASED_SPATIO_TEMPORAL_DETECTION_METHOD`: 使用基于人体姿态的时空检测方法。
+- `USE_SKELETON_BASED_ACTION_RECOGNITION_METHOD`: 使用基于人体姿态的行为识别方法。
+- `HUMAN_DETECTION_SCORE_THRE`: 人体检测分数阈值，默认为 0.9。
+- `ACTION_DETECTION_SCORE_THRE`: 动作检测分数阈值，默认为 0.5。
+- `LABEL_MAP_FOR_SPATIO_TEMPORAL_ACTION_DETECTION`: 时空动作检测所使用的标签映射文件，默认为: `tools/data/ava/label_map.txt`。
+- `LABEL_MAP`: 行为识别所使用的标签映射文件, 默认为: `tools/data/kinetics/label_map_k400.txt`。
+- `DEVICE`: 指定脚本运行设备，支持 cuda 设备（如 `cuda:0`）或 cpu（`cpu`），默认为 `cuda:0`。
+- `OUTPUT_FILENAME`: 输出视频的路径，默认为 `demo/test_stdet_recognition_output.mp4`。
+- `PREDICT_STEPSIZE`: 每 N 帧进行一次预测（以节约计算资源），默认值为 8。
+- `OUTPUT_STEPSIZE`: 对于输入视频的每 N 帧，输出 1 帧至输出视频中， 默认值为 1，注意需满足 `PREDICT_STEPSIZE % OUTPUT_STEPSIZE == 0`。
+- `OUTPUT_FPS`: 输出视频的帧率，默认为 24。
+示例：
+以下示例假设用户的当前目录为 $MMACTION2。
+1. 使用 Faster RCNN 作为人体检测器，HRNetw32 作为人体姿态估计模型，PoseC3D 作为基于人体姿态的动作识别模型和时空动作检测器。每 8 帧进行一次预测，原视频中每 1 帧输出 1 帧至输出视频中，设置输出视频的帧率为 24。
+```shell
+python demo/demo_video_structuralize.py
+    --skeleton-stdet-checkpoint https://download.openmmlab.com/mmaction/skeleton/posec3d/posec3d_ava.pth \
+    --det-config demo/faster_rcnn_r50_fpn_2x_coco.py \
+    --det-checkpoint http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth \
+    --pose-config demo/hrnet_w32_coco_256x192.py
+    --pose-checkpoint https://download.openmmlab.com/mmpose/top_down/hrnet/
+    hrnet_w32_coco_256x192-c78dce93_20200708.pth \
+    --skeleton-config configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint.py \
+    --skeleton-checkpoint https://download.openmmlab.com/mmaction/skeleton/posec3d/
+    posec3d_k400.pth \
+    --use-skeleton-stdet \
+    --use-skeleton-recog \
+    --label-map-stdet tools/data/ava/label_map.txt \
+    --label-map tools/data/kinetics/label_map_k400.txt
+```
+2. 使用 Faster RCNN 作为人体检测器，TSN-R50-1x1x3 作为动作识别模型, SlowOnly-8x8-R101 作为时空动检测器。每 8 帧进行一次预测，原视频中每 1 帧输出 1 帧至输出视频中，设置输出视频的帧率为 24。
+```shell
+python demo/demo_video_structuralize.py
+    --rgb-stdet-config configs/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb.py \
+    --rgb-stdet-checkpoint  https://download.openmmlab.com/mmaction/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb_20201217-16378594.pth \
+    --det-config demo/faster_rcnn_r50_fpn_2x_coco.py \
+    --det-checkpoint http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth \
+    --rgb-config configs/recognition/tsn/
+    tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+    --rgb-checkpoint https://download.openmmlab.com/mmaction/recognition/
+    tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/
+    tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+    --label-map-stdet tools/data/ava/label_map.txt \
+    --label-map tools/data/kinetics/label_map_k400.txt
+```
+3. 使用 Faster RCNN 作为人体检测器，HRNetw32 作为人体姿态估计模型，PoseC3D 作为基于人体姿态的动作识别模型, SlowOnly-8x8-R101 作为时空动作检测器。每 8 帧进行一次预测，原视频中每 1 帧输出 1 帧至输出视频中，设置输出视频的帧率为 24。
+```shell
+python demo/demo_video_structuralize.py
+    --rgb-stdet-config configs/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb.py \
+    --rgb-stdet-checkpoint  https://download.openmmlab.com/mmaction/detection/ava/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb/slowonly_omnisource_pretrained_r101_8x8x1_20e_ava_rgb_20201217-16378594.pth \
+    --det-config demo/faster_rcnn_r50_fpn_2x_coco.py \
+    --det-checkpoint http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth \
+    --pose-config demo/hrnet_w32_coco_256x192.py
+    --pose-checkpoint https://download.openmmlab.com/mmpose/top_down/hrnet/
+    hrnet_w32_coco_256x192-c78dce93_20200708.pth \
+    --skeleton-config configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint.py \
+    --skeleton-checkpoint https://download.openmmlab.com/mmaction/skeleton/posec3d/
+    posec3d_k400.pth \
+    --use-skeleton-recog \
+    --label-map-stdet tools/data/ava/label_map.txt \
+    --label-map tools/data/kinetics/label_map_k400.txt
+```
+4. 使用 Faster RCNN 作为人体检测器，HRNetw32 作为人体姿态估计模型，TSN-R50-1x1x3 作为动作识别模型, PoseC3D 作为基于人体姿态的时空动作检测器。每 8 帧进行一次预测，原视频中每 1 帧输出 1 帧至输出视频中，设置输出视频的帧率为 24。
+```shell
+python demo/demo_video_structuralize.py
+    --skeleton-stdet-checkpoint https://download.openmmlab.com/mmaction/skeleton/posec3d/posec3d_ava.pth \
+    --det-config demo/faster_rcnn_r50_fpn_2x_coco.py \
+    --det-checkpoint http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth \
+    --pose-config demo/hrnet_w32_coco_256x192.py
+    --pose-checkpoint https://download.openmmlab.com/mmpose/top_down/hrnet/
+    hrnet_w32_coco_256x192-c78dce93_20200708.pth \
+    --skeleton-config configs/skeleton/posec3d/slowonly_r50_u48_240e_ntu120_xsub_keypoint.py \
+    --rgb-config configs/recognition/tsn/
+    tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py \
+    --rgb-checkpoint https://download.openmmlab.com/mmaction/recognition/
+    tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/
+    tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \
+    --use-skeleton-stdet \
+    --label-map-stdet tools/data/ava/label_map.txt \
+    --label-map tools/data/kinetics/label_map_k400.txt
+```
+## 基于音频的动作识别
+本脚本可用于进行基于音频特征的动作识别。
+脚本 `extract_audio.py` 可被用于从视频中提取音频，脚本 `build_audio_features.py` 可被用于基于音频文件提取音频特征。
+```shell
+python demo/demo_audio.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${AUDIO_FILE} {LABEL_FILE} [--device ${DEVICE}]
+```
+可选参数：
+- `DEVICE`: 指定脚本运行设备，支持 cuda 设备（如 `cuda:0`）或 cpu（`cpu`），默认为 `cuda:0`。
+示例：
+以下示例假设用户的当前目录为 $MMACTION2。
+1. 在 GPU 上，使用 TSN 模型进行基于音频特征的动作识别。
+   ```shell
+   python demo/demo_audio.py \
+       configs/recognition_audio/resnet/tsn_r18_64x1x1_100e_kinetics400_audio_feature.py \
+       https://download.openmmlab.com/mmaction/recognition/audio_recognition/tsn_r18_64x1x1_100e_kinetics400_audio_feature/tsn_r18_64x1x1_100e_kinetics400_audio_feature_20201012-bf34df6c.pth \
+       audio_feature.npy label_map_k400.txt
+   ```
--- a/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/faq.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/faq.md
+# 常见问题解答
+本文这里列出了用户们遇到的一些常见问题，及相应的解决方案。
+如果您发现了任何社区中经常出现的问题，也有了相应的解决方案，欢迎充实本文档来帮助他人。
+如果本文档不包括您的问题，欢迎使用提供的 [模板](/.github/ISSUE_TEMPLATE/error-report.md) 创建问题，还请确保您在模板中填写了所有必需的信息。
+## 安装
+- **"No module named 'mmcv.ops'"; "No module named 'mmcv.\_ext'"**
+  1. 使用 `pip uninstall mmcv` 卸载环境中已安装的 `mmcv`。
+  2. 遵循 [MMCV 安装文档](https://mmcv.readthedocs.io/en/latest/#installation) 来安装 `mmcv-full`。
+- **"OSError: MoviePy Error: creation of None failed because of the following error"**
+  参照 [MMAction2 安装文档](https://github.com/open-mmlab/mmaction2/blob/master/docs_zh_CN/install.md#%E5%AE%89%E8%A3%85%E4%BE%9D%E8%B5%96%E5%8C%85)
+  1. 对于 Windows 用户，[ImageMagick](https://www.imagemagick.org/script/index.php) 不再被 MoviePy 自动检测，
+     需要获取名为 `magick` 的 ImageMagick 二进制包的路径，来修改 `moviepy/config_defaults.py` 文件中的 `IMAGEMAGICK_BINARY`，如 `IMAGEMAGICK_BINARY = "C:\\Program Files\\ImageMagick_VERSION\\magick.exe"`
+  2. 对于 Linux 用户，如果 ImageMagick 没有被 moviepy 检测，需要注释掉 `/etc/ImageMagick-6/policy.xml` 文件中的 `<policy domain="path" rights="none" pattern="@*" />`，即改为 `<!-- <policy domain="path" rights="none" pattern="@*" /> -->`。
+- **"Please install XXCODEBASE to use XXX"**
+  如得到报错消息 "Please install XXCODEBASE to use XXX"，代表 MMAction2 无法从 XXCODEBASE 中 import XXX。用户可以执行对应 import 语句定位原因。
+  一个可能的原因是，对于部分 OpenMMLAB 中的代码库，需先安装 mmcv-full 后再进行安装。
+## 数据
+- **FileNotFound 如 `No such file or directory: xxx/xxx/img_00300.jpg`**
+  在 MMAction2 中，对于帧数据集，`start_index` 的默认值为 1，而对于视频数据集， `start_index` 的默认值为 0。
+  如果 FileNotFound 错误发生于视频的第一帧或最后一帧，则需根据视频首帧（即 `xxx_00000.jpg` 或 `xxx_00001.jpg`）的偏移量，修改配置文件中数据处理流水线的 `start_index` 值。
+- **如何处理数据集中传入视频的尺寸？是把所有视频调整为固定尺寸，如 “340x256”，还是把所有视频的短边调整成相同的长度（256像素或320像素）？**
+  从基准测试来看，总体来说，后者（把所有视频的短边调整成相同的长度）效果更好，所以“调整尺寸为短边256像素”被设置为默认的数据处理方式。用户可以在 [TSN 数据基准测试](https://github.com/open-mmlab/mmaction2/tree/master/configs/recognition/tsn) 和 [SlowOnly 数据基准测试](https://github.com/open-mmlab/mmaction2/tree/master/configs/recognition/tsn) 中查看相关的基准测试结果。
+- **输入数据格式（视频或帧）与数据流水线不匹配，导致异常，如 `KeyError: 'total_frames'`**
+  对于视频和帧，我们都有相应的流水线来处理。
+  **对于视频**，应该在处理时首先对其进行解码。可选的解码方式，有 `DecordInit & DecordDecode`, `OpenCVInit & OpenCVDecode`, `PyAVInit & PyAVDecode` 等等。可以参照 [这个例子](https://github.com/open-mmlab/mmaction2/blob/023777cfd26bb175f85d78c455f6869673e0aa09/configs/recognition/slowfast/slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py#L47-L49)。
+  **对于帧**，已经事先在本地对其解码，所以使用 `RawFrameDecode` 对帧处理即可。可以参照 [这个例子](https://github.com/open-mmlab/mmaction2/blob/023777cfd26bb175f85d78c455f6869673e0aa09/configs/recognition/slowfast/slowfast_r50_8x8x1_256e_kinetics400_rgb.py#L49)。
+  `KeyError: 'total_frames'` 是因为错误地使用了 `RawFrameDecode` 来处理视频。当输入是视频的时候，程序是无法事先得到 `total_frame` 的。
+## 训练
+- **如何使用训练过的识别器作为主干网络的预训练模型？**
+  参照 [使用预训练模型](https://github.com/open-mmlab/mmaction2/blob/master/docs_zh_CN/tutorials/2_finetune.md#%E4%BD%BF%E7%94%A8%E9%A2%84%E8%AE%AD%E7%BB%83%E6%A8%A1%E5%9E%8B)，
+  如果想对整个网络使用预训练模型，可以在配置文件中，将 `load_from` 设置为预训练模型的链接。
+  如果只想对主干网络使用预训练模型，可以在配置文件中，将主干网络 `backbone` 中的 `pretrained` 设置为预训练模型的地址或链接。
+  在训练时，预训练模型中无法与主干网络对应的参数会被忽略。
+- **如何实时绘制训练集和验证集的准确率/损失函数曲线图？**
+  使用 `log_config` 中的 `TensorboardLoggerHook`，如：
+  ```python
+  log_config=dict(
+      interval=20,
+      hooks=[
+          dict(type='TensorboardLoggerHook')
+      ]
+  )
+  ```
+  可以参照 [教程1：如何编写配置文件](tutorials/1_config.md)，[教程7：如何自定义模型运行参数](tutorials/7_customize_runtime.md#log-config)，和 [这个例子](https://github.com/open-mmlab/mmaction2/blob/master/configs/recognition/tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py#L118) 了解更多相关内容。
+- **在 batchnorm.py 中抛出错误: Expected more than 1 value per channel when training**
+  BatchNorm 层要求批大小（batch size）大于 1。构建数据集时， 若 `drop_last` 被设为 `False`，有时每个轮次的最后一个批次的批大小可能为 1，进而在训练时抛出错误，可以设置 `drop_last=True` 来避免该错误，如：
+  ```python
+  train_dataloader=dict(drop_last=True)
+  ```
+- **微调模型参数时，如何冻结主干网络中的部分参数？**
+  可以参照 [`def _freeze_stages()`](https://github.com/open-mmlab/mmaction2/blob/0149a0e8c1e0380955db61680c0006626fd008e9/mmaction/models/backbones/x3d.py#L458) 和 [`frozen_stages`](https://github.com/open-mmlab/mmaction2/blob/0149a0e8c1e0380955db61680c0006626fd008e9/mmaction/models/backbones/x3d.py#L183-L184)。在分布式训练和测试时，还须设置 `find_unused_parameters = True`。
+  实际上，除了少数模型，如 C3D 等，用户都能通过设置 `frozen_stages` 来冻结模型参数，因为大多数主干网络继承自 `ResNet` 和 `ResNet3D`，而这两个模型都支持 `_freeze_stages()` 方法。
+- **如何在配置文件中设置 `load_from` 参数以进行模型微调？**
+  MMAction2 在 `configs/_base_/default_runtime.py` 文件中将 `load_from=None` 设为默认。由于配置文件的可继承性，用户可直接在下游配置文件中设置 `load_from` 的值来进行更改。
+## 测试
+- **如何将预测分值用 softmax 归一化到 \[0, 1\] 区间内？**
+  可以通过设置 `model['test_cfg'] = dict(average_clips='prob')` 来实现。
+- **如果模型太大，连一个测试样例都没法放进显存，怎么办？**
+  默认情况下，3D 模型是以 `10 clips x 3 crops` 的设置进行测试的，也即采样 10 个帧片段，每帧裁剪出 3 个图像块，总计有 30 个视图。
+  对于特别大的模型，GPU 显存可能连一个视频都放不下。对于这种情况，您可以在配置文件的 `model['test_cfg']` 中设置 `max_testing_views=n`。
+  如此设置，在模型推理过程中，一个批只会使用 n 个视图，以节省显存。
+- **如何保存测试结果？**
+  测试时，用户可在运行指令中设置可选项 `--out xxx.json/pkl/yaml` 来输出结果文件，以供后续检查。输出的测试结果顺序和测试集顺序保持一致。
+  除此之外，MMAction2 也在 [`tools/analysis/eval_metric.py`](/tools/analysis/eval_metric.py) 中提供了分析工具，用于结果文件的模型评估。
+## 部署
+- **为什么由 MMAction2 转换的 ONNX 模型在转换到其他框架（如 TensorRT）时会抛出错误？**
+  目前只能确保 MMAction2 中的模型与 ONNX 兼容。但是，ONNX 中的某些算子可能不受其他框架支持，例如 [这个问题](https://github.com/open-mmlab/mmaction2/issues/414) 中的 TensorRT。当这种情况发生时，如果 `pytorch2onnx.py` 没有出现问题，转换过去的 ONNX 模型也通过了数值检验，可以提 issue 让社区提供帮助。
--- a/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/feature_extraction.md
+++ b/openmmlab_test/mmaction2-0.24.1/docs_zh_CN/feature_extraction.md
+# 特征提取
+MMAction2 为特征提取提供了便捷使用的脚本。
+## 片段级特征提取
+片段级特征提取是从长度一般为几秒到几十秒不等的剪辑片段中提取深度特征。从每个片段中提取的特征是一个 n 维向量。当进行多视图特征提取时，例如 n 个片段 × m 种裁剪，提取的特征将会是 n\*m 个视图的平均值。
+在应用片段级特征提取之前，用户需要准备一个视频列表包含所有想要进行特征提取的视频。例如，由 UCF101 中视频组成的视频列表如下：
+```
+ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c01.avi
+ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c02.avi
+ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c03.avi
+ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c04.avi
+ApplyEyeMakeup/v_ApplyEyeMakeup_g01_c05.avi
+...
+YoYo/v_YoYo_g25_c01.avi
+YoYo/v_YoYo_g25_c02.avi
+YoYo/v_YoYo_g25_c03.avi
+YoYo/v_YoYo_g25_c04.avi
+YoYo/v_YoYo_g25_c05.avi
+```
+假设 UCF101 中的视频所在目录为 `data/ucf101/videos`，视频列表的文件名为 `ucf101.txt`，使用 TSN（Kinetics-400 预训练）从 UCF101 中提取片段级特征，用户可以使用脚本如下：
+```shell
+python tools/misc/clip_feature_extraction.py \
+configs/recognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py \
+https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_320p_1x1x3_100e_kinetics400_rgb_20200702-cc665e2a.pth \
+--video-list ucf101.txt \
+--video-root data/ucf101/videos \
+--out ucf101_feature.pkl
+```
+被提取的特征存储于 `ucf101_feature.pkl`。
+用户也可以使用分布式片段级特征提取。以下是使用拥有 8 gpus 的计算节点的示例。
+```shell
+bash tools/misc/dist_clip_feature_extraction.sh \
+configs/recognition/tsn/tsn_r50_clip_feature_extraction_1x1x3_rgb.py \
+https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_320p_1x1x3_100e_kinetics400_rgb/tsn_r50_320p_1x1x3_100e_kinetics400_rgb_20200702-cc665e2a.pth \
+8 \
+--video-list ucf101.txt \
+--video-root data/ucf101/videos \
+--out ucf101_feature.pkl
+```
+使用 SlowOnly（Kinetics-400 预训练）从 UCF101 中提取片段级特征，用户可以使用脚本如下：
+```shell
+python tools/misc/clip_feature_extraction.py \
+configs/recognition/slowonly/slowonly_r50_clip_feature_extraction_4x16x1_rgb.py \
+https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_video_320p_4x16x1_256e_kinetics400_rgb/slowonly_r50_video_320p_4x16x1_256e_kinetics400_rgb_20201014-c9cdc656.pth \
+--video-list ucf101.txt \
+--video-root data/ucf101/videos \
+--out ucf101_feature.pkl
+```
+这两个配置文件展示了用于特征提取的最小配置。用户也可以使用其他存在的配置文件进行特征提取，只要注意使用视频数据进行训练和测试，而不是原始帧数据。
+```shell
+python tools/misc/clip_feature_extraction.py \
+configs/recognition/slowonly/slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py \
+https://download.openmmlab.com/mmaction/recognition/slowonly/slowonly_r50_video_320p_4x16x1_256e_kinetics400_rgb/slowonly_r50_video_320p_4x16x1_256e_kinetics400_rgb_20201014-c9cdc656.pth \
+--video-list ucf101.txt \
+--video-root data/ucf101/videos \
+--out ucf101_feature.pkl
+```