# Copyright (c) OpenMMLab. All rights reserved. import mmcv import numpy as np import pytest import torch import torch.nn as nn from mmaction.apis import inference_recognizer, init_recognizer video_config_file = 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py' # noqa: E501 frame_config_file = 'configs/recognition/tsn/tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py' # noqa: E501 flow_frame_config_file = 'configs/recognition/tsn/tsn_r50_320p_1x1x3_110e_kinetics400_flow.py' # noqa: E501 video_path = 'demo/demo.mp4' frames_path = 'tests/data/imgs' def test_init_recognizer(): with pytest.raises(TypeError): # config must be a filename or Config object init_recognizer(dict(config_file=None)) if torch.cuda.is_available(): device = 'cuda:0' else: device = 'cpu' model = init_recognizer(video_config_file, None, device) config = mmcv.Config.fromfile(video_config_file) config.model.backbone.pretrained = None isinstance(model, nn.Module) if torch.cuda.is_available(): assert next(model.parameters()).is_cuda is True else: assert next(model.parameters()).is_cuda is False assert model.cfg.model.backbone.pretrained is None def test_video_inference_recognizer(): if torch.cuda.is_available(): device = 'cuda:0' else: device = 'cpu' model = init_recognizer(video_config_file, None, device) with pytest.raises(RuntimeError): # video path doesn't exist inference_recognizer(model, 'missing.mp4') for ops in model.cfg.data.test.pipeline: if ops['type'] in ('TenCrop', 'ThreeCrop'): # Use CenterCrop to reduce memory in order to pass CI ops['type'] = 'CenterCrop' top5_label = inference_recognizer(model, video_path) scores = [item[1] for item in top5_label] assert len(top5_label) == 5 assert scores == sorted(scores, reverse=True) _, feat = inference_recognizer( model, video_path, outputs=('backbone', 'cls_head'), as_tensor=False) assert isinstance(feat, dict) assert 'backbone' in feat and 'cls_head' in feat assert isinstance(feat['backbone'], np.ndarray) assert isinstance(feat['cls_head'], np.ndarray) assert feat['backbone'].shape == (25, 2048, 7, 7) assert feat['cls_head'].shape == (1, 400) _, feat = inference_recognizer( model, video_path, outputs=('backbone.layer3', 'backbone.layer3.1.conv1')) assert 'backbone.layer3.1.conv1' in feat and 'backbone.layer3' in feat assert isinstance(feat['backbone.layer3.1.conv1'], torch.Tensor) assert isinstance(feat['backbone.layer3'], torch.Tensor) assert feat['backbone.layer3'].size() == (25, 1024, 14, 14) assert feat['backbone.layer3.1.conv1'].size() == (25, 256, 14, 14) cfg_file = 'configs/recognition/slowfast/slowfast_r50_video_inference_4x16x1_256e_kinetics400_rgb.py' # noqa: E501 sf_model = init_recognizer(cfg_file, None, device) for ops in sf_model.cfg.data.test.pipeline: # Changes to reduce memory in order to pass CI if ops['type'] in ('TenCrop', 'ThreeCrop'): ops['type'] = 'CenterCrop' if ops['type'] == 'SampleFrames': ops['num_clips'] = 1 _, feat = inference_recognizer( sf_model, video_path, outputs=('backbone', 'cls_head')) assert isinstance(feat, dict) and isinstance(feat['backbone'], tuple) assert 'backbone' in feat and 'cls_head' in feat assert len(feat['backbone']) == 2 assert isinstance(feat['backbone'][0], torch.Tensor) assert isinstance(feat['backbone'][1], torch.Tensor) assert feat['backbone'][0].size() == (1, 2048, 4, 8, 8) assert feat['backbone'][1].size() == (1, 256, 32, 8, 8) assert feat['cls_head'].size() == (1, 400) def test_frames_inference_recognizer(): if torch.cuda.is_available(): device = 'cuda:0' else: device = 'cpu' rgb_model = init_recognizer(frame_config_file, None, device) flow_model = init_recognizer(flow_frame_config_file, None, device) with pytest.raises(RuntimeError): # video path doesn't exist inference_recognizer(rgb_model, 'missing_path') for ops in rgb_model.cfg.data.test.pipeline: if ops['type'] in ('TenCrop', 'ThreeCrop'): # Use CenterCrop to reduce memory in order to pass CI ops['type'] = 'CenterCrop' ops['crop_size'] = 224 for ops in flow_model.cfg.data.test.pipeline: if ops['type'] in ('TenCrop', 'ThreeCrop'): # Use CenterCrop to reduce memory in order to pass CI ops['type'] = 'CenterCrop' ops['crop_size'] = 224 top5_label = inference_recognizer(rgb_model, frames_path) scores = [item[1] for item in top5_label] assert len(top5_label) == 5 assert scores == sorted(scores, reverse=True) _, feat = inference_recognizer( flow_model, frames_path, outputs=('backbone', 'cls_head'), as_tensor=False) assert isinstance(feat, dict) assert 'backbone' in feat and 'cls_head' in feat assert isinstance(feat['backbone'], np.ndarray) assert isinstance(feat['cls_head'], np.ndarray) assert feat['backbone'].shape == (25, 2048, 7, 7) assert feat['cls_head'].shape == (1, 400) _, feat = inference_recognizer( rgb_model, frames_path, outputs=('backbone.layer3', 'backbone.layer3.1.conv1')) assert 'backbone.layer3.1.conv1' in feat and 'backbone.layer3' in feat assert isinstance(feat['backbone.layer3.1.conv1'], torch.Tensor) assert isinstance(feat['backbone.layer3'], torch.Tensor) assert feat['backbone.layer3'].size() == (25, 1024, 14, 14) assert feat['backbone.layer3.1.conv1'].size() == (25, 256, 14, 14)