# Copyright (c) OpenMMLab. All rights reserved. import torch from mmaction.models import build_recognizer from ..base import generate_recognizer_demo_inputs, get_recognizer_cfg def test_tsn(): config = get_recognizer_cfg('tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) input_shape = (1, 3, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) # test forward dummy recognizer.forward_dummy(imgs, softmax=False) res = recognizer.forward_dummy(imgs, softmax=True)[0] assert torch.min(res) >= 0 assert torch.max(res) <= 1 mmcls_backbone = dict( type='mmcls.ResNeXt', depth=101, num_stages=4, out_indices=(3, ), groups=32, width_per_group=4, style='pytorch') config.model['backbone'] = mmcls_backbone recognizer = build_recognizer(config.model) input_shape = (1, 3, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # test mixup forward config = get_recognizer_cfg( 'tsn/tsn_r50_video_mixup_1x1x8_100e_kinetics400_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) input_shape = (2, 8, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # test torchvision backbones tv_backbone = dict(type='torchvision.densenet161', pretrained=True) config.model['backbone'] = tv_backbone config.model['cls_head']['in_channels'] = 2208 recognizer = build_recognizer(config.model) input_shape = (1, 3, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # test timm backbones timm_backbone = dict(type='timm.efficientnet_b0', pretrained=False) config.model['backbone'] = timm_backbone config.model['cls_head']['in_channels'] = 1280 recognizer = build_recognizer(config.model) input_shape = (1, 3, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) def test_tsm(): config = get_recognizer_cfg('tsm/tsm_r50_1x1x8_50e_kinetics400_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) input_shape = (1, 8, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # test twice sample + 3 crops input_shape = (2, 48, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] config.model.test_cfg = dict(average_clips='prob') recognizer = build_recognizer(config.model) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) def test_trn(): config = get_recognizer_cfg('trn/trn_r50_1x1x8_50e_sthv1_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) input_shape = (1, 8, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # test twice sample + 3 crops input_shape = (2, 48, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] config.model.test_cfg = dict(average_clips='prob') recognizer = build_recognizer(config.model) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) def test_tpn(): config = get_recognizer_cfg('tpn/tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) input_shape = (1, 8, 3, 224, 224) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) assert 'loss_aux' in losses and 'loss_cls' in losses # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True) # Test forward dummy with torch.no_grad(): _recognizer = build_recognizer(config.model) img_list = [img[None, :] for img in imgs] if hasattr(_recognizer, 'forward_dummy'): _recognizer.forward = _recognizer.forward_dummy for one_img in img_list: _recognizer(one_img) def test_tanet(): config = get_recognizer_cfg( 'tanet/tanet_r50_dense_1x1x8_100e_kinetics400_rgb.py') config.model['backbone']['pretrained'] = None recognizer = build_recognizer(config.model) input_shape = (1, 8, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] gt_labels = demo_inputs['gt_labels'] losses = recognizer(imgs, gt_labels) assert isinstance(losses, dict) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # test twice sample + 3 crops input_shape = (2, 48, 3, 32, 32) demo_inputs = generate_recognizer_demo_inputs(input_shape) imgs = demo_inputs['imgs'] config.model.test_cfg = dict(average_clips='prob') recognizer = build_recognizer(config.model) # Test forward test with torch.no_grad(): img_list = [img[None, :] for img in imgs] for one_img in img_list: recognizer(one_img, None, return_loss=False) # Test forward gradcam recognizer(imgs, gradcam=True) for one_img in img_list: recognizer(one_img, gradcam=True)