Unverified Commit 2e5628b4 authored by q.yao's avatar q.yao Committed by GitHub
Browse files

[Refactor]: Remove deployment for dev-2.x (#2225)

* remove deploy for 2.0

* update onnx ut
parent 961373ad
# Copyright (c) OpenMMLab. All rights reserved.
import glob
import os
import torch
if torch.__version__ == 'parrots':
......@@ -22,15 +19,3 @@ else:
def get_compiling_cuda_version():
return ext_module.get_compiling_cuda_version()
def get_onnxruntime_op_path():
wildcard = os.path.join(
os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
'_ext_ort.*.so')
paths = glob.glob(wildcard)
if len(paths) > 0:
return paths[0]
else:
return ''
......@@ -17,7 +17,7 @@ ext_module = ext_loader.load_ext(
class MaskedConv2dFunction(Function):
@staticmethod
def symbolic(g, features, mask, weight, bias, padding, stride):
def symbolic(g, features, mask, weight, bias, padding, stride=1):
return g.op(
'mmcv::MMCVMaskedConv2d',
features,
......
import os
from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np
......@@ -37,49 +36,34 @@ class NMSop(torch.autograd.Function):
@staticmethod
def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold,
max_num):
from ..onnx import is_custom_op_loaded
has_custom_op = is_custom_op_loaded()
# TensorRT nms plugin is aligned with original nms in ONNXRuntime
is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT'
if has_custom_op and (not is_trt_backend):
return g.op(
'mmcv::NonMaxSuppression',
bboxes,
scores,
iou_threshold_f=float(iou_threshold),
offset_i=int(offset))
from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
from ..onnx.onnx_utils.symbolic_helper import _size_helper
boxes = unsqueeze(g, bboxes, 0)
scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
if max_num > 0:
max_num = g.op(
'Constant', value_t=torch.tensor(max_num, dtype=torch.long))
else:
from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
from ..onnx.onnx_utils.symbolic_helper import _size_helper
boxes = unsqueeze(g, bboxes, 0)
scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
if max_num > 0:
max_num = g.op(
'Constant',
value_t=torch.tensor(max_num, dtype=torch.long))
else:
dim = g.op('Constant', value_t=torch.tensor(0))
max_num = _size_helper(g, bboxes, dim)
max_output_per_class = max_num
iou_threshold = g.op(
'Constant',
value_t=torch.tensor([iou_threshold], dtype=torch.float))
score_threshold = g.op(
'Constant',
value_t=torch.tensor([score_threshold], dtype=torch.float))
nms_out = g.op('NonMaxSuppression', boxes, scores,
max_output_per_class, iou_threshold,
score_threshold)
return squeeze(
g,
select(
g, nms_out, 1,
g.op(
'Constant',
value_t=torch.tensor([2], dtype=torch.long))), 1)
dim = g.op('Constant', value_t=torch.tensor(0))
max_num = _size_helper(g, bboxes, dim)
max_output_per_class = max_num
iou_threshold = g.op(
'Constant',
value_t=torch.tensor([iou_threshold], dtype=torch.float))
score_threshold = g.op(
'Constant',
value_t=torch.tensor([score_threshold], dtype=torch.float))
nms_out = g.op('NonMaxSuppression', boxes, scores,
max_output_per_class, iou_threshold, score_threshold)
return squeeze(
g,
select(
g, nms_out, 1,
g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))),
1)
class SoftNMSop(torch.autograd.Function):
......
# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa
from os import path as osp
from typing import Tuple, Union
import torch
......@@ -89,13 +88,6 @@ def bilinear_grid_sample(im: Tensor,
return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw)
def is_in_onnx_export_without_custom_ops() -> bool:
from mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path()
return torch.onnx.is_in_onnx_export(
) and not osp.exists(ort_custom_op_path)
def normalize(grid: Tensor) -> Tensor:
"""Normalize input grid from [-1, 1] to [0, 1]
......@@ -280,7 +272,7 @@ def point_sample(input: Tensor,
if points.dim() == 3:
add_dim = True
points = points.unsqueeze(2)
if is_in_onnx_export_without_custom_ops():
if torch.onnx.is_in_onnx_export():
# If custom ops for onnx runtime not compiled use python
# implementation of grid_sample function to make onnx graph
# with supported nodes
......
......@@ -19,50 +19,35 @@ class RoIAlignFunction(Function):
@staticmethod
def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio,
pool_mode, aligned):
from ..onnx import is_custom_op_loaded
has_custom_op = is_custom_op_loaded()
if has_custom_op:
return g.op(
'mmcv::MMCVRoiAlign',
input,
rois,
output_height_i=output_size[0],
output_width_i=output_size[1],
spatial_scale_f=spatial_scale,
sampling_ratio_i=sampling_ratio,
mode_s=pool_mode,
aligned_i=aligned)
else:
from torch.onnx import TensorProtoDataType
from torch.onnx.symbolic_helper import _slice_helper
from torch.onnx.symbolic_opset9 import squeeze, sub
# batch_indices = rois[:, 0].long()
batch_indices = _slice_helper(
g, rois, axes=[1], starts=[0], ends=[1])
batch_indices = squeeze(g, batch_indices, 1)
batch_indices = g.op(
'Cast', batch_indices, to_i=TensorProtoDataType.INT64)
# rois = rois[:, 1:]
rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5])
if aligned:
# rois -= 0.5/spatial_scale
aligned_offset = g.op(
'Constant',
value_t=torch.tensor([0.5 / spatial_scale],
dtype=torch.float32))
rois = sub(g, rois, aligned_offset)
# roi align
return g.op(
'RoiAlign',
input,
rois,
batch_indices,
output_height_i=output_size[0],
output_width_i=output_size[1],
spatial_scale_f=spatial_scale,
sampling_ratio_i=max(0, sampling_ratio),
mode_s=pool_mode)
from torch.onnx import TensorProtoDataType
from torch.onnx.symbolic_helper import _slice_helper
from torch.onnx.symbolic_opset9 import squeeze, sub
# batch_indices = rois[:, 0].long()
batch_indices = _slice_helper(g, rois, axes=[1], starts=[0], ends=[1])
batch_indices = squeeze(g, batch_indices, 1)
batch_indices = g.op(
'Cast', batch_indices, to_i=TensorProtoDataType.INT64)
# rois = rois[:, 1:]
rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5])
if aligned:
# rois -= 0.5/spatial_scale
aligned_offset = g.op(
'Constant',
value_t=torch.tensor([0.5 / spatial_scale],
dtype=torch.float32))
rois = sub(g, rois, aligned_offset)
# roi align
return g.op(
'RoiAlign',
input,
rois,
batch_indices,
output_height_i=output_size[0],
output_width_i=output_size[1],
spatial_scale_f=spatial_scale,
sampling_ratio_i=max(0, sampling_ratio),
mode_s=pool_mode)
@staticmethod
def forward(ctx: Any,
......
# Copyright (c) OpenMMLab. All rights reserved.
# flake8: noqa
from .init_plugins import is_tensorrt_plugin_loaded, load_tensorrt_plugin
from .preprocess import preprocess_onnx
def is_tensorrt_available():
try:
import tensorrt
del tensorrt
return True
except ModuleNotFoundError:
return False
__all__ = []
if is_tensorrt_available():
from .tensorrt_utils import (TRTWraper, TRTWrapper, load_trt_engine,
onnx2trt, save_trt_engine)
# load tensorrt plugin lib
load_tensorrt_plugin()
__all__.extend([
'onnx2trt', 'save_trt_engine', 'load_trt_engine', 'TRTWraper',
'TRTWrapper'
])
__all__.extend(['is_tensorrt_plugin_loaded', 'preprocess_onnx'])
# Copyright (c) OpenMMLab. All rights reserved.
import ctypes
import glob
import os
import warnings
def get_tensorrt_op_path() -> str:
"""Get TensorRT plugins library path."""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
wildcard = os.path.join(
os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
'_ext_trt.*.so')
paths = glob.glob(wildcard)
lib_path = paths[0] if len(paths) > 0 else ''
return lib_path
plugin_is_loaded = False
def is_tensorrt_plugin_loaded() -> bool:
"""Check if TensorRT plugins library is loaded or not.
Returns:
bool: plugin_is_loaded flag
"""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
global plugin_is_loaded
return plugin_is_loaded
def load_tensorrt_plugin() -> None:
"""load TensorRT plugins library."""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
global plugin_is_loaded
lib_path = get_tensorrt_op_path()
if (not plugin_is_loaded) and os.path.exists(lib_path):
ctypes.CDLL(lib_path)
plugin_is_loaded = True
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
import numpy as np
import onnx
def preprocess_onnx(onnx_model: onnx.ModelProto) -> onnx.ModelProto:
"""Modify onnx model to match with TensorRT plugins in mmcv.
There are some conflict between onnx node definition and TensorRT limit.
This function perform preprocess on the onnx model to solve the conflicts.
For example, onnx `attribute` is loaded in TensorRT on host and onnx
`input` is loaded on device. The shape inference is performed on host, so
any `input` related to shape (such as `max_output_boxes_per_class` in
NonMaxSuppression) should be transformed to `attribute` before conversion.
Arguments:
onnx_model (onnx.ModelProto): Input onnx model.
Returns:
onnx.ModelProto: Modified onnx model.
"""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
graph = onnx_model.graph
nodes = graph.node
initializers = graph.initializer
node_dict = {}
for node in nodes:
node_outputs = node.output
for output in node_outputs:
if len(output) > 0:
node_dict[output] = node
init_dict = {_.name: _ for _ in initializers}
nodes_name_to_remove = set()
def is_node_without_output(name):
for node_name, node in node_dict.items():
if node_name not in nodes_name_to_remove:
if name in node.input:
return False
return True
def mark_nodes_to_remove(name):
node = node_dict[name]
nodes_name_to_remove.add(name)
for input_node_name in node.input:
if is_node_without_output(input_node_name):
mark_nodes_to_remove(input_node_name)
def parse_data(name, typ, default_value=0):
if name in node_dict:
node = node_dict[name]
if node.op_type == 'Constant':
raw_data = node.attribute[0].t.raw_data
else:
mark_nodes_to_remove(name)
return default_value
elif name in init_dict:
raw_data = init_dict[name].raw_data
else:
raise ValueError(f'{name} not found in node or initilizer.')
return np.frombuffer(raw_data, typ).item()
nrof_node = len(nodes)
for idx in range(nrof_node):
node = nodes[idx]
node_attributes = node.attribute
node_inputs = node.input
node_outputs = node.output
node_name = node.name
# process NonMaxSuppression node
if node.op_type == 'NonMaxSuppression':
center_point_box = 0
max_output_boxes_per_class = 1000000
iou_threshold = 0.3
score_threshold = 0.0
offset = 0
for attribute in node_attributes:
if attribute.name == 'center_point_box':
center_point_box = attribute.i
elif attribute.name == 'offset':
offset = attribute.i
if len(node_inputs) >= 3:
max_output_boxes_per_class = parse_data(
node_inputs[2], np.int64, max_output_boxes_per_class)
mark_nodes_to_remove(node_inputs[2])
if len(node_inputs) >= 4:
iou_threshold = parse_data(node_inputs[3], np.float32,
iou_threshold)
mark_nodes_to_remove(node_inputs[3])
if len(node_inputs) >= 5:
score_threshold = parse_data(node_inputs[4], np.float32)
mark_nodes_to_remove(node_inputs[4])
new_node = onnx.helper.make_node(
'NonMaxSuppression',
node_inputs[:2],
node_outputs,
name=node_name,
center_point_box=center_point_box,
max_output_boxes_per_class=max_output_boxes_per_class,
iou_threshold=iou_threshold,
score_threshold=score_threshold,
offset=offset)
for output in node_outputs:
if output in node_dict:
node_dict[output] = new_node
nodes.insert(idx, new_node)
nodes.remove(node)
elif node.op_type == 'InstanceNormalization':
# directly change op name
node.op_type = 'MMCVInstanceNormalization'
for node_name in nodes_name_to_remove:
nodes.remove(node_dict[node_name])
return onnx_model
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Union
import onnx
import tensorrt as trt
import torch
from .preprocess import preprocess_onnx
def onnx2trt(onnx_model: Union[str, onnx.ModelProto],
opt_shape_dict: dict,
log_level: trt.ILogger.Severity = trt.Logger.ERROR,
fp16_mode: bool = False,
max_workspace_size: int = 0,
device_id: int = 0) -> trt.ICudaEngine:
"""Convert onnx model to tensorrt engine.
Arguments:
onnx_model (str or onnx.ModelProto): the onnx model to convert from
opt_shape_dict (dict): the min/opt/max shape of each input
log_level (TensorRT log level): the log level of TensorRT
fp16_mode (bool): enable fp16 mode
max_workspace_size (int): set max workspace size of TensorRT engine.
some tactic and layers need large workspace.
device_id (int): choice the device to create engine.
Returns:
tensorrt.ICudaEngine: the TensorRT engine created from onnx_model
Example:
>>> engine = onnx2trt(
>>> "onnx_model.onnx",
>>> {'input': [[1, 3, 160, 160],
>>> [1, 3, 320, 320],
>>> [1, 3, 640, 640]]},
>>> log_level=trt.Logger.WARNING,
>>> fp16_mode=True,
>>> max_workspace_size=1 << 30,
>>> device_id=0)
>>> })
"""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
device = torch.device(f'cuda:{device_id}')
# create builder and network
logger = trt.Logger(log_level)
builder = trt.Builder(logger)
EXPLICIT_BATCH = 1 << (int)(
trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(EXPLICIT_BATCH)
# parse onnx
parser = trt.OnnxParser(network, logger)
if isinstance(onnx_model, str):
onnx_model = onnx.load(onnx_model)
onnx_model = preprocess_onnx(onnx_model)
if not parser.parse(onnx_model.SerializeToString()):
error_msgs = ''
for error in range(parser.num_errors):
error_msgs += f'{parser.get_error(error)}\n'
raise RuntimeError(f'parse onnx failed:\n{error_msgs}')
# config builder
builder.max_workspace_size = max_workspace_size
config = builder.create_builder_config()
config.max_workspace_size = max_workspace_size
profile = builder.create_optimization_profile()
for input_name, param in opt_shape_dict.items():
min_shape = tuple(param[0][:])
opt_shape = tuple(param[1][:])
max_shape = tuple(param[2][:])
profile.set_shape(input_name, min_shape, opt_shape, max_shape)
config.add_optimization_profile(profile)
if fp16_mode:
builder.fp16_mode = fp16_mode
config.set_flag(trt.BuilderFlag.FP16)
# create engine
with torch.cuda.device(device):
engine = builder.build_engine(network, config)
return engine
def save_trt_engine(engine: trt.ICudaEngine, path: str) -> None:
"""Serialize TensorRT engine to disk.
Arguments:
engine (tensorrt.ICudaEngine): TensorRT engine to serialize
path (str): disk path to write the engine
"""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
with open(path, mode='wb') as f:
f.write(bytearray(engine.serialize()))
def load_trt_engine(path: str) -> trt.ICudaEngine:
"""Deserialize TensorRT engine from disk.
Arguments:
path (str): disk path to read the engine
Returns:
tensorrt.ICudaEngine: the TensorRT engine loaded from disk
"""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
with open(path, mode='rb') as f:
engine_bytes = f.read()
engine = runtime.deserialize_cuda_engine(engine_bytes)
return engine
def torch_dtype_from_trt(dtype: trt.DataType) -> Union[torch.dtype, TypeError]:
"""Convert pytorch dtype to TensorRT dtype."""
if dtype == trt.bool:
return torch.bool
elif dtype == trt.int8:
return torch.int8
elif dtype == trt.int32:
return torch.int32
elif dtype == trt.float16:
return torch.float16
elif dtype == trt.float32:
return torch.float32
else:
raise TypeError('%s is not supported by torch' % dtype)
def torch_device_from_trt(
device: trt.TensorLocation) -> Union[torch.device, TypeError]:
"""Convert pytorch device to TensorRT device."""
if device == trt.TensorLocation.DEVICE:
return torch.device('cuda')
elif device == trt.TensorLocation.HOST:
return torch.device('cpu')
else:
return TypeError('%s is not supported by torch' % device)
class TRTWrapper(torch.nn.Module):
"""TensorRT engine Wrapper.
Arguments:
engine (tensorrt.ICudaEngine): TensorRT engine to wrap
input_names (list[str]): names of each inputs
output_names (list[str]): names of each outputs
Note:
If the engine is converted from onnx model. The input_names and
output_names should be the same as onnx model.
"""
def __init__(self, engine, input_names=None, output_names=None):
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This tool will be deprecated in future. '
msg += blue_text + \
'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
super().__init__()
self.engine = engine
if isinstance(self.engine, str):
self.engine = load_trt_engine(engine)
if not isinstance(self.engine, trt.ICudaEngine):
raise TypeError('engine should be str or trt.ICudaEngine')
self._register_state_dict_hook(TRTWrapper._on_state_dict)
self.context = self.engine.create_execution_context()
# get input and output names from engine
if input_names is None or output_names is None:
names = [_ for _ in self.engine]
input_names = list(filter(self.engine.binding_is_input, names))
output_names = list(set(names) - set(input_names))
self.input_names = input_names
self.output_names = output_names
def _on_state_dict(self, state_dict, prefix, local_metadata):
state_dict[prefix + 'engine'] = bytearray(self.engine.serialize())
state_dict[prefix + 'input_names'] = self.input_names
state_dict[prefix + 'output_names'] = self.output_names
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
engine_bytes = state_dict[prefix + 'engine']
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
self.engine = runtime.deserialize_cuda_engine(engine_bytes)
self.context = self.engine.create_execution_context()
self.input_names = state_dict[prefix + 'input_names']
self.output_names = state_dict[prefix + 'output_names']
def forward(self, inputs):
"""
Arguments:
inputs (dict): dict of input name-tensors pair
Return:
dict: dict of output name-tensors pair
"""
assert self.input_names is not None
assert self.output_names is not None
bindings = [None] * (len(self.input_names) + len(self.output_names))
for input_name, input_tensor in inputs.items():
idx = self.engine.get_binding_index(input_name)
if input_tensor.dtype == torch.long:
input_tensor = input_tensor.int()
self.context.set_binding_shape(idx, tuple(input_tensor.shape))
bindings[idx] = input_tensor.contiguous().data_ptr()
# create output tensors
outputs = {}
for i, output_name in enumerate(self.output_names):
idx = self.engine.get_binding_index(output_name)
dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))
shape = tuple(self.context.get_binding_shape(idx))
device = torch_device_from_trt(self.engine.get_location(idx))
output = torch.empty(size=shape, dtype=dtype, device=device)
outputs[output_name] = output
bindings[idx] = output.data_ptr()
self.context.execute_async_v2(bindings,
torch.cuda.current_stream().cuda_stream)
return outputs
class TRTWraper(TRTWrapper):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
warnings.warn(
'TRTWraper will be deprecated in'
' future. Please use TRTWrapper instead', DeprecationWarning)
......@@ -14,7 +14,7 @@ line_length = 79
multi_line_output = 0
extra_standard_library = pkg_resources,setuptools,logging,os,warnings,abc
known_first_party = mmcv
known_third_party = addict,cv2,matplotlib,numpy,onnx,onnxruntime,packaging,pytest,pytorch_sphinx_theme,scipy,sphinx,tensorrt,torch,torchvision,yaml,yapf
known_third_party = addict,cv2,matplotlib,numpy,onnx,packaging,pytest,pytorch_sphinx_theme,scipy,sphinx,torch,torchvision,yaml,yapf
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
......
......@@ -2,7 +2,6 @@ import glob
import os
import platform
import re
import warnings
from pkg_resources import DistributionNotFound, get_distribution
from setuptools import find_packages, setup
......@@ -138,65 +137,6 @@ except ImportError:
def get_extensions():
extensions = []
if os.getenv('MMCV_WITH_TRT', '0') != '0':
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: ' + \
'Custom TensorRT Ops will be deprecated in future. '
msg += blue_text + \
'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
ext_name = 'mmcv._ext_trt'
from torch.utils.cpp_extension import include_paths, library_paths
library_dirs = []
libraries = []
include_dirs = []
tensorrt_path = os.getenv('TENSORRT_DIR', '0')
tensorrt_lib_path = glob.glob(
os.path.join(tensorrt_path, 'targets', '*', 'lib'))[0]
library_dirs += [tensorrt_lib_path]
libraries += ['nvinfer', 'nvparsers', 'nvinfer_plugin']
libraries += ['cudart']
define_macros = []
extra_compile_args = {'cxx': []}
include_path = os.path.abspath('./mmcv/ops/csrc/common/cuda')
include_trt_path = os.path.abspath('./mmcv/ops/csrc/tensorrt')
include_dirs.append(include_path)
include_dirs.append(include_trt_path)
include_dirs.append(os.path.join(tensorrt_path, 'include'))
include_dirs += include_paths(cuda=True)
op_files = glob.glob('./mmcv/ops/csrc/tensorrt/plugins/*')
define_macros += [('MMCV_WITH_CUDA', None)]
define_macros += [('MMCV_WITH_TRT', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
# prevent cub/thrust conflict with other python library
# More context See issues #1454
extra_compile_args['nvcc'] += ['-Xcompiler=-fno-gnu-unique']
library_dirs += library_paths(cuda=True)
from setuptools import Extension
ext_ops = Extension(
name=ext_name,
sources=op_files,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
language='c++',
library_dirs=library_dirs,
libraries=libraries)
extensions.append(ext_ops)
if os.getenv('MMCV_WITH_OPS', '0') == '0':
return extensions
......@@ -353,63 +293,6 @@ def get_extensions():
define_macros=define_macros,
extra_compile_args=extra_compile_args)
extensions.append(ext_ops)
if EXT_TYPE == 'pytorch' and os.getenv('MMCV_WITH_ORT', '0') != '0':
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: ' + \
'Custom ONNXRuntime Ops will be deprecated in future. '
msg += blue_text + \
'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
ext_name = 'mmcv._ext_ort'
import onnxruntime
from torch.utils.cpp_extension import include_paths, library_paths
library_dirs = []
libraries = []
include_dirs = []
ort_path = os.getenv('ONNXRUNTIME_DIR', '0')
library_dirs += [os.path.join(ort_path, 'lib')]
libraries.append('onnxruntime')
define_macros = []
extra_compile_args = {'cxx': []}
include_path = os.path.abspath('./mmcv/ops/csrc/onnxruntime')
include_dirs.append(include_path)
include_dirs.append(os.path.join(ort_path, 'include'))
op_files = glob.glob('./mmcv/ops/csrc/onnxruntime/cpu/*')
if onnxruntime.get_device() == 'GPU' or os.getenv('FORCE_CUDA',
'0') == '1':
define_macros += [('MMCV_WITH_CUDA', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
op_files += glob.glob('./mmcv/ops/csrc/onnxruntime/gpu/*')
include_dirs += include_paths(cuda=True)
library_dirs += library_paths(cuda=True)
else:
include_dirs += include_paths(cuda=False)
library_dirs += library_paths(cuda=False)
from setuptools import Extension
ext_ops = Extension(
name=ext_name,
sources=op_files,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
language='c++',
library_dirs=library_dirs,
libraries=libraries)
extensions.append(ext_ops)
return extensions
......
# Copyright (c) OpenMMLab. All rights reserved.
import os
import warnings
from functools import partial
import numpy as np
......@@ -10,7 +9,6 @@ import pytest
import torch
import torch.nn as nn
import torch.nn.functional as F
from packaging import version
onnx_file = 'tmp.onnx'
if torch.__version__ == 'parrots':
......@@ -40,93 +38,8 @@ class WrapFunction(nn.Module):
return self.wrapped_function(*args, **kwargs)
def process_grid_sample(func, input, grid, ort_custom_op_path=''):
wrapped_model = WrapFunction(func).eval()
input_names = ['input', 'grid']
output_names = ['output']
with torch.no_grad():
torch.onnx.export(
wrapped_model, (input, grid),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
session_options = rt.SessionOptions()
if ort_custom_op_path:
session_options.register_custom_ops_library(ort_custom_op_path)
# get onnx output
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [node.name for node in onnx_model.graph.initializer]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 2)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
ort_result = sess.run(None, {
'input': input.detach().numpy(),
'grid': grid.detach().numpy()
})
pytorch_results = wrapped_model(input.clone(), grid.clone())
assert np.allclose(pytorch_results, ort_result, atol=1e-3)
@pytest.mark.parametrize('mode', ['bilinear', 'nearest'])
@pytest.mark.parametrize('padding_mode', ['zeros', 'border', 'reflection'])
@pytest.mark.parametrize('align_corners', [True, False])
def test_grid_sample(mode, padding_mode, align_corners):
from mmcv.onnx.symbolic import register_extra_symbolics
opset_version = 11
register_extra_symbolics(opset_version)
from mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
input = torch.rand(1, 1, 10, 10)
grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
grid = F.affine_grid(
grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input)
def func(input, grid):
return F.grid_sample(
input,
grid,
mode=mode,
padding_mode=padding_mode,
align_corners=align_corners)
return process_grid_sample(func, input, grid, ort_custom_op_path)
@pytest.mark.parametrize('align_corners', [True, False])
def test_bilinear_grid_sample(align_corners):
from mmcv.ops.point_sample import bilinear_grid_sample
# only support pytorch >= 1.5.0
if version.parse(torch.__version__) < version.parse('1.5.0'):
pytest.skip('Only support PyTorch >= 1.5.0')
input = torch.rand(1, 1, 10, 10)
grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
grid = F.affine_grid(
grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input)
def func(input, grid):
return bilinear_grid_sample(input, grid, align_corners=align_corners)
return process_grid_sample(func, input, grid)
def test_nms():
from mmcv.ops import get_onnxruntime_op_path, nms
from mmcv.ops import nms
np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0],
[3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]],
dtype=np.float32)
......@@ -151,10 +64,7 @@ def test_nms():
opset_version=11)
onnx_model = onnx.load(onnx_file)
ort_custom_op_path = get_onnxruntime_op_path()
session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
# get onnx output
input_all = [node.name for node in onnx_model.graph.input]
......@@ -171,89 +81,12 @@ def test_nms():
assert np.allclose(pytorch_score, onnx_score, atol=1e-3)
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_softnms():
from mmcv.ops import get_onnxruntime_op_path, soft_nms
# only support pytorch >= 1.7.0
if version.parse(torch.__version__) < version.parse('1.7.0'):
warnings.warn('test_softnms should be ran with pytorch >= 1.7.0')
return
# only support onnxruntime >= 1.5.1
assert version.parse(rt.__version__) >= version.parse(
'1.5.1'), 'test_softnms should be ran with onnxruntime >= 1.5.1'
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('softnms for onnxruntime is not compiled.')
np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0],
[3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]],
dtype=np.float32)
np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)
boxes = torch.from_numpy(np_boxes)
scores = torch.from_numpy(np_scores)
configs = [[0.3, 0.5, 0.01, 'linear'], [0.3, 0.5, 0.01, 'gaussian'],
[0.3, 0.5, 0.01, 'naive']]
session_options = rt.SessionOptions()
session_options.register_custom_ops_library(ort_custom_op_path)
for _iou_threshold, _sigma, _min_score, _method in configs:
pytorch_dets, pytorch_inds = soft_nms(
boxes,
scores,
iou_threshold=_iou_threshold,
sigma=_sigma,
min_score=_min_score,
method=_method)
nms = partial(
soft_nms,
iou_threshold=_iou_threshold,
sigma=_sigma,
min_score=_min_score,
method=_method)
wrapped_model = WrapFunction(nms)
wrapped_model.cpu().eval()
with torch.no_grad():
torch.onnx.export(
wrapped_model, (boxes, scores),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['boxes', 'scores'],
opset_version=11)
onnx_model = onnx.load(onnx_file)
# get onnx output
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [
node.name for node in onnx_model.graph.initializer
]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 2)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
onnx_dets, onnx_inds = sess.run(None, {
'scores': scores.detach().numpy(),
'boxes': boxes.detach().numpy()
})
assert np.allclose(pytorch_dets, onnx_dets, atol=1e-3)
assert np.allclose(onnx_inds, onnx_inds, atol=1e-3)
def test_roialign():
try:
from mmcv.ops import get_onnxruntime_op_path, roi_align
from mmcv.ops import roi_align
except (ImportError, ModuleNotFoundError):
pytest.skip('roi_align op is not successfully compiled')
ort_custom_op_path = get_onnxruntime_op_path()
# roi align config
pool_h = 2
pool_w = 2
......@@ -295,8 +128,6 @@ def test_roialign():
onnx_model = onnx.load(onnx_file)
session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
# compute onnx_output
input_all = [node.name for node in onnx_model.graph.input]
......@@ -318,83 +149,6 @@ def test_roialign():
assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
def test_roialign_rotated():
try:
from mmcv.ops import get_onnxruntime_op_path, roi_align_rotated
except (ImportError, ModuleNotFoundError):
pytest.skip('roi_align_aligned op is not successfully compiled')
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
# roi align config
pool_h = 2
pool_w = 2
spatial_scale = 1.0
sampling_ratio = 2
inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., 0]]),
([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., np.pi / 2]]),
([[[[1., 2.], [3., 4.]],
[[4., 3.], [2., 1.]]]], [[0., 0.5, 0.5, 1., 1., 0]]),
([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
[11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3., 0]]),
([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
[11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3.,
np.pi / 2]])]
def warpped_function(torch_input, torch_rois):
return roi_align_rotated(torch_input, torch_rois, (pool_w, pool_h),
spatial_scale, sampling_ratio, True, False)
for case in inputs:
np_input = np.array(case[0], dtype=np.float32)
np_rois = np.array(case[1], dtype=np.float32)
input = torch.from_numpy(np_input)
rois = torch.from_numpy(np_rois)
# compute pytorch_output
with torch.no_grad():
pytorch_output = roi_align_rotated(input, rois, (pool_w, pool_h),
spatial_scale, sampling_ratio,
True, False)
# export and load onnx model
wrapped_model = WrapFunction(warpped_function)
with torch.no_grad():
torch.onnx.export(
wrapped_model, (input, rois),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['features', 'rois'],
opset_version=11)
onnx_model = onnx.load(onnx_file)
session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
# compute onnx_output
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [
node.name for node in onnx_model.graph.initializer
]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 2)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
onnx_output = sess.run(None, {
'features': input.detach().numpy(),
'rois': rois.detach().numpy()
})
onnx_output = onnx_output[0]
# allclose
assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_roipool():
from mmcv.ops import roi_pool
......@@ -483,240 +237,6 @@ def test_interpolate():
assert np.allclose(pytorch_result, onnx_result, atol=1e-3)
def test_rotated_feature_align():
if torch.__version__ == 'parrots':
pytest.skip('onnx is not supported in parrots directly')
try:
from mmcv.ops import get_onnxruntime_op_path, rotated_feature_align
except (ImportError, ModuleNotFoundError):
pytest.skip('rotated_feature_align op is not successfully compiled')
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
spatial_scale = 1.0 / 8
points = 1
def warpped_function(feature, bbox):
return rotated_feature_align(
feature, bbox, spatial_scale=spatial_scale, points=points)
feature = torch.tensor([[[[1.2924, -0.2172, -0.5222, 0.1172],
[0.9144, 1.2248, 1.3115, -0.9690],
[-0.8949, -1.1797, -0.9093, -0.3961],
[-0.4586, 0.5062, -0.7947, -0.7397]],
[[-1.0943, -0.7495, 1.3461, -1.1652],
[0.2034, 0.6763, -1.2357, 0.5231],
[-1.0062, 1.2592, 1.4225, -0.3951],
[-0.1242, -1.6240, 0.1932, 2.7181]],
[[-1.6271, -1.0276, 0.0578, -0.2997],
[-0.9684, -1.6946, -1.3188, -1.1938],
[-1.6744, -0.8917, -0.6556, 1.0073],
[-0.1205, 0.3671, -0.3731, -0.5347]]],
[[[0.7035, 0.2089, -0.1774, 3.4670],
[-0.8505, -0.9278, 1.4714, 0.1644],
[0.0898, 0.3531, -0.4007, 0.1927],
[1.2569, -0.2636, -0.5223, 0.0616]],
[[0.1760, -0.7639, -0.4600, -1.3260],
[-0.9921, -0.2970, -0.8955, 1.0508],
[1.3515, -0.1641, 1.9679, 1.1986],
[-0.3616, 0.6287, 0.4933, 0.3360]],
[[-0.5860, 0.2124, -0.8700, 2.4200],
[-0.0551, -1.5103, -1.6779, 0.8399],
[0.8431, 1.2414, -1.1243, -0.3887],
[-2.1254, 0.6047, -0.3515, 0.7254]]]])
bbox = torch.tensor(
[[[[1.3080e+01, 1.2688e+01, 1.1214e+01, 9.3944e+01, -9.1905e-01],
[3.8104e+01, 1.0134e+01, 1.4659e+02, 9.0306e+01, -9.8211e-01],
[-5.3213e+01, 4.9508e+01, 5.1513e+01, 3.2055e+01, -3.1954e-01],
[2.6974e+01, 2.5248e+01, 5.4495e+01, 3.1083e+00, -6.2127e-01]],
[[-1.5604e+01, -5.1908e+01, 2.3998e+02, 1.5008e+01, -1.2546e+00],
[3.1354e+01, -7.3635e+00, 6.7879e+01, 3.5081e+01, -3.3851e-01],
[-5.3292e+00, 9.1946e+00, 1.2834e+01, 1.0485e+01, -1.3039e+00],
[-2.3925e+01, 3.6623e+01, 3.9875e+01, 7.2009e+01, -6.5934e-01]],
[[7.2114e+01, -2.3781e+01, 2.9106e+01, 8.4501e+01, -1.1340e+00],
[2.6258e+01, -7.7034e+00, 1.7629e+02, 1.0615e+02, -1.2156e+00],
[3.8057e+01, 4.6016e+01, 1.2965e+01, 6.9384e+00, -1.0855e+00],
[2.4428e+01, -1.6189e+01, 2.0572e+02, 3.1622e+01, -1.5719e-01]],
[[3.8226e+00, 2.9608e+01, 1.4457e+01, 6.8179e+01, -9.1997e-01],
[2.5003e+01, -4.2490e+01, 9.6007e+01, 4.9086e+01, -1.4786e+00],
[8.5983e+01, 5.4980e+01, 7.8080e+01, 1.0003e+02, -1.0926e+00],
[9.9065e+00, 4.1457e+01, 5.9799e+00, 1.7973e+01, -5.6313e-01]]],
[[[-1.8244e+01, 4.6309e+00, 5.3010e+01, 2.4310e+01, -7.0345e-01],
[1.9419e+01, 3.6704e+01, 5.2390e+01, 5.4133e+01, -3.7730e-01],
[5.6387e+01, 2.3752e+01, 9.0441e+00, 1.7792e+01, -1.5583e+00],
[3.6303e+01, 1.6396e+01, 2.0283e+01, 1.9148e+01, -8.3419e-01]],
[[3.2169e+01, 3.0521e+01, 2.6283e+01, 1.9680e+02, -3.0454e-01],
[2.5788e+01, -3.2189e+01, 8.8882e+01, 1.0207e+02, -1.5328e+00],
[8.4676e+00, -1.6668e+01, 2.4657e+01, 1.1275e+02, -4.0388e-01],
[-1.0799e+01, 6.0422e+00, 9.5807e+00, 3.3677e+01, -3.5438e-01]],
[[6.9363e+01, 1.0850e+01, 2.5968e+01, 2.2311e+01, -1.6408e-01],
[2.8140e+00, 4.6843e+00, 3.1289e+00, 2.1480e+01, -6.7583e-01],
[2.6661e+01, 4.5290e+01, 6.1679e+00, 3.0005e+01, -8.9806e-01],
[5.0871e+00, 1.3234e+01, 9.2087e+01, 4.9622e+01, -2.8020e-01]],
[[-1.2643e+01, 2.5176e+01, 5.0488e+01, 5.4246e+01, -4.4840e-01],
[-3.4521e+01, 9.8435e-01, 5.2413e+01, 9.7996e+00, -8.4218e-01],
[4.9829e+01, -1.0808e+01, 2.9848e+01, 7.3579e+01, -6.2672e-01],
[8.0446e+01, 2.8064e+01, 4.5273e+01, 5.3809e+01, -1.2359e+00]]]])
# compute pytorch_output
with torch.no_grad():
pytorch_output = rotated_feature_align(
feature, bbox, spatial_scale=spatial_scale, points=points)
# export and load onnx model
wrapped_model = WrapFunction(warpped_function)
with torch.no_grad():
torch.onnx.export(
wrapped_model, (feature, bbox),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['feature', 'bbox'],
opset_version=11)
onnx_model = onnx.load(onnx_file)
session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
# compute onnx_output
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [node.name for node in onnx_model.graph.initializer]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 2)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
onnx_output = sess.run(None, {
'feature': feature.detach().numpy(),
'bbox': bbox.detach().numpy()
})
onnx_output = onnx_output[0]
# allclose
assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
@pytest.mark.parametrize('mode', ['top', 'bottom', 'left', 'right'])
def test_corner_pool(mode, opset=11):
from mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
from mmcv.ops.corner_pool import CornerPool
def corner_pool_func(input):
corner_pool_module = CornerPool(mode)
return corner_pool_module.corner_pool.apply(input)
wrapped_model = WrapFunction(corner_pool_func).eval()
input = torch.rand((2, 3, 9, 12)) # (n,c,h,w)
with torch.no_grad():
torch.onnx.export(
wrapped_model,
input,
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input'],
output_names=['output'],
opset_version=opset)
onnx_model = onnx.load(onnx_file)
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [node.name for node in onnx_model.graph.initializer]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 1)
session_options = rt.SessionOptions()
session_options.register_custom_ops_library(ort_custom_op_path)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
ort_result = sess.run(None, {'input': input.detach().numpy()})
pytorch_results = wrapped_model(input.clone())
assert np.allclose(pytorch_results, ort_result, atol=1e-5)
@pytest.mark.parametrize('key', ['cummax', 'cummin'])
def test_cummax_cummin(key, opset=11):
# Note generally `cummax` or `cummin` is exportable to ONNX
# as long as the pytorch version >= 1.5.0, since `torch.cummax`
# is only supported with torch >= 1.5.0.
# But when `cummax` or `cummin` serves as an intermediate component
# whose outputs is used as inputs for another modules, it's expected
# that pytorch version must be >= 1.7.0. Otherwise error appears like:
# `RuntimeError: tuple appears in op that does not forward tuples,
# unsupported 'kind: prim::PythonOp`.
if version.parse(torch.__version__) < version.parse('1.7.0'):
pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0')
# register custom op `mmcv::cummax` and `mmcv::cummin`
from mmcv.onnx.symbolic import register_extra_symbolics
register_extra_symbolics(opset)
from mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
input_list = [
# arbitrary shape, e.g. 1-D, 2-D, 3-D, ...
torch.rand((2, 3, 4, 1, 5)),
torch.rand(1),
torch.rand((2, 0, 1)), # tensor.numel() is 0
torch.FloatTensor(), # empty tensor
]
cummax_cummin_funcs = {'cummax': torch.cummax, 'cummin': torch.cummin}
for input in input_list:
ndims = input.dim()
# valid dim range is [-ndims, ndims-1]
# test for all `dim` value which is valid
for dim in range(-ndims, ndims):
cummax_func = partial(cummax_cummin_funcs[key], dim=dim)
wrapped_model = WrapFunction(cummax_func).eval()
with torch.no_grad():
torch.onnx.export(
wrapped_model,
input,
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input'],
output_names=['output', 'indices'],
opset_version=opset)
onnx_model = onnx.load(onnx_file)
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [
node.name for node in onnx_model.graph.initializer
]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 1)
session_options = rt.SessionOptions()
session_options.register_custom_ops_library(ort_custom_op_path)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
ort_output, ort_inds = sess.run(None,
{'input': input.detach().numpy()})
pytorch_output, pytorch_inds = wrapped_model(input.clone())
pytorch_output = pytorch_output.detach().numpy()
pytorch_inds = pytorch_inds.detach().numpy()
assert np.allclose(pytorch_output, ort_output, atol=1e-5)
assert np.all(pytorch_inds == ort_inds)
@pytest.mark.parametrize('shifts_dims_pair', [([-3, 5], [2, 0]), (5, None)])
def test_roll(shifts_dims_pair):
opset = 11
......@@ -755,173 +275,120 @@ def test_roll(shifts_dims_pair):
torch.testing.assert_allclose(ort_output, pytorch_output)
@pytest.mark.skipif(
not torch.cuda.is_available(),
reason='modulated_deform_conv2d only supports in GPU')
def test_modulated_deform_conv2d():
try:
from mmcv.ops import ModulatedDeformConv2d, get_onnxruntime_op_path
except (ImportError, ModuleNotFoundError):
pytest.skip('modulated_deform_conv op is not successfully compiled')
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
# modulated deform conv config
in_channels = 3
out_channels = 64
stride = 1
padding = 0
dilation = 1
groups = 1
deform_groups = 1
kernel_size = 3
input = torch.rand(1, in_channels, 28, 28).cuda() # (n, c, h, w)
conv_offset = nn.Conv2d(
in_channels=3,
out_channels=deform_groups * 3 * kernel_size * kernel_size,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=True).cuda()
conv_offset.cuda()
out = conv_offset(input)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
model_with_bias = ModulatedDeformConv2d(
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation,
groups,
deform_groups,
bias=True)
model_without_bias = ModulatedDeformConv2d(
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation,
groups,
deform_groups,
bias=False)
models = [model_with_bias.cuda(), model_without_bias.cuda()]
for model in models:
# export and load onnx model
with torch.no_grad():
torch.onnx.export(
model, (input, offset, mask),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input', 'offset', 'mask'],
opset_version=11)
def _test_symbolic(model, inputs, symbol_name):
with torch.no_grad():
torch.onnx.export(model, inputs, onnx_file, opset_version=11)
session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
import onnx
model = onnx.load(onnx_file)
nodes = model.graph.node
# compute onnx_output
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
onnx_output = sess.run(
None, {
'input': input.cpu().detach().numpy(),
'offset': offset.cpu().detach().numpy(),
'mask': mask.cpu().detach().numpy()
})[0]
symbol_exist = False
for n in nodes:
if n.op_type == symbol_name:
symbol_exist = True
assert symbol_exist
# compute pytorch_output
with torch.no_grad():
pytorch_output = model(input, offset, mask).cpu()
# allclose
assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_border_align():
from mmcv.ops import BorderAlign
model = BorderAlign(2)
input = torch.rand(1, 8, 2, 2).cuda()
boxes = torch.rand(1, 4, 4).cuda()
_test_symbolic(model, (input, boxes), 'MMCVBorderAlign')
def test_deform_conv2d(threshold=1e-3):
try:
from mmcv.ops import DeformConv2d, get_onnxruntime_op_path
except (ImportError, ModuleNotFoundError):
pytest.skip('deform_conv op is not successfully compiled')
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
# deform conv config
# modulated deform conv config
in_channels = 1
out_channels = 64
stride = 1
padding = 0
dilation = 1
groups = 1
deform_groups = 1
kernel_size = 2
input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]],
[[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]],
[[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]],
[[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]]
offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7]
deform_weight = [[[0.4, 0.2, 0.1, 0.9]]]
x = torch.tensor(input)
conv_offset = nn.Conv2d(
in_channels=in_channels,
out_channels=deform_groups * 2 * kernel_size * kernel_size,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=True)
conv_offset.weight.data = torch.nn.Parameter(
torch.Tensor(offset_weight).reshape(8, 1, 2, 2))
conv_offset.bias.data = torch.nn.Parameter(
torch.Tensor(offset_bias).reshape(8))
offset = conv_offset(x)
model = DeformConv2d(in_channels, out_channels, kernel_size, stride,
padding, dilation, groups, deform_groups)
model.weight.data = torch.nn.Parameter(
torch.Tensor(deform_weight).reshape(1, 1, 2, 2))
with torch.no_grad():
torch.onnx.export(
model, (x, offset),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input', 'offset'],
opset_version=11)
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_carafe():
from mmcv.ops import CARAFENaive
feat = torch.randn(2, 64, 3, 3, device='cuda').double()
mask = torch.randn(2, 100, 6, 6, device='cuda').sigmoid().double()
_test_symbolic(CARAFENaive(5, 4, 2), (feat, mask), 'MMCVCARAFENaive')
session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
# compute onnx_output
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
onnx_output = sess.run(
None, {
'input': x.cpu().detach().numpy(),
'offset': offset.cpu().detach().numpy(),
})[0]
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_deform_conv():
from mmcv.ops import DeformConv2dPack
x = torch.randn(1, 2, 4, 4, device='cuda')
_test_symbolic(
DeformConv2dPack(2, 4, 3, 1, 1).cuda(), x, 'MMCVDeformConv2d')
# compute pytorch_output
with torch.no_grad():
pytorch_output = model(x, offset).cpu()
# allclose
assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_modulated_deform_conv():
from mmcv.ops import ModulatedDeformConv2dPack
x = torch.randn(1, 2, 4, 4, device='cuda')
_test_symbolic(
ModulatedDeformConv2dPack(2, 4, 3, 1, 1).cuda(), x,
'MMCVModulatedDeformConv2d')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_deform_roi_pool():
from mmcv.ops import DeformRoIPoolPack
x = torch.tensor([[[[1., 2.], [3., 4.]]]], device='cuda')
rois = torch.tensor([[0., 0., 0., 1., 1.]], device='cuda')
output_c = x.size(1)
pool_h = 2
pool_w = 2
spatial_scale = 1.0
sampling_ratio = 2
model = DeformRoIPoolPack((pool_h, pool_w),
output_c,
spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio).cuda()
_test_symbolic(model, (x, rois), 'MMCVDeformRoIPool')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_masked_conv():
from mmcv.ops import MaskedConv2d
x = torch.rand(1, 2, 4, 4, device='cuda')
mask = torch.rand(1, 4, 4, device='cuda')
_test_symbolic(
MaskedConv2d(2, 4, 3, 1, 1).cuda(), (x, mask), 'MMCVMaskedConv2d')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_pr_roi_pool():
from mmcv.ops import PrRoIPool
pool_h = 2
pool_w = 2
spatial_scale = 1.0
x = torch.tensor([[[[1., 2.], [3., 4.]]]], device='cuda')
rois = torch.tensor([[0., 0., 0., 1., 1.]], device='cuda')
model = PrRoIPool((pool_h, pool_w), spatial_scale).cuda()
_test_symbolic(model, (x, rois), 'PrRoIPool')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_psa_mask():
from mmcv.ops import PSAMask
input = torch.rand(4, 16, 8, 8).cuda()
model = PSAMask('collect', (4, 4)).cuda()
_test_symbolic(model, input, 'MMCVPSAMask')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_roi_align_rotated():
from mmcv.ops import RoIAlignRotated
pool_h = 2
pool_w = 2
spatial_scale = 1.0
sampling_ratio = 2
x = torch.tensor([[[[1., 2.], [3., 4.]]]], device='cuda')
rois = torch.tensor([[0., 0.5, 0.5, 1., 1., 0]], device='cuda')
model = RoIAlignRotated((pool_h, pool_w), spatial_scale,
sampling_ratio).cuda()
_test_symbolic(model, (x, rois), 'MMCVRoIAlignRotated')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_roi_feaeture_align():
from mmcv.ops import rotated_feature_align
wrapped_model = WrapFunction(rotated_feature_align)
feature = torch.rand(1, 1, 2, 2, device='cuda')
bbox = torch.rand(1, 2, 2, 5, device='cuda')
_test_symbolic(wrapped_model, (feature, bbox), 'MMCVRotatedFeatureAlign')
# Copyright (c) OpenMMLab. All rights reserved.
import os
from functools import partial
from typing import Callable
import mmengine
import numpy as np
import onnx
import pytest
import torch
import torch.nn as nn
import torch.nn.functional as F
try:
from mmcv.tensorrt import (TRTWrapper, is_tensorrt_plugin_loaded, onnx2trt,
save_trt_engine)
except ImportError:
pytest.skip(
'TensorRT should be installed from source.', allow_module_level=True)
if not torch.cuda.is_available():
pytest.skip(
'CUDA is required for this test module', allow_module_level=True)
if not is_tensorrt_plugin_loaded():
pytest.skip(
'Test requires to complie TensorRT plugins in mmcv',
allow_module_level=True)
class WrapFunction(nn.Module):
def __init__(self, wrapped_function):
super().__init__()
self.wrapped_function = wrapped_function
def forward(self, *args, **kwargs):
return self.wrapped_function(*args, **kwargs)
onnx_file = 'tmp.onnx'
trt_file = 'tmp.engine'
def test_roialign():
try:
from mmcv.ops import RoIAlign
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
# roi align config
pool_h = 2
pool_w = 2
spatial_scale = 1.0
sampling_ratio = 2
inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
([[[[1., 2.], [3., 4.]], [[4., 3.],
[2., 1.]]]], [[0., 0., 0., 1., 1.]]),
([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
[11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])]
wrapped_model = RoIAlign((pool_w, pool_h), spatial_scale, sampling_ratio,
'avg', True).cuda()
for case in inputs:
np_input = np.array(case[0], dtype=np.float32)
np_rois = np.array(case[1], dtype=np.float32)
input = torch.from_numpy(np_input).cuda()
rois = torch.from_numpy(np_rois).cuda()
with torch.no_grad():
torch.onnx.export(
wrapped_model, (input, rois),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input', 'rois'],
output_names=['roi_feat'],
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(input.shape),
list(input.shape),
list(input.shape)],
'rois': [list(rois.shape),
list(rois.shape),
list(rois.shape)]
}
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, ['input', 'rois'], ['roi_feat'])
with torch.no_grad():
trt_outputs = trt_model({'input': input, 'rois': rois})
trt_roi_feat = trt_outputs['roi_feat']
# compute pytorch_output
with torch.no_grad():
pytorch_roi_feat = wrapped_model(input, rois)
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_roi_feat, trt_roi_feat)
def test_nms():
try:
from mmcv.ops import nms
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
os.environ['ONNX_BACKEND'] = 'MMCVTensorRT'
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
data = mmengine.load('./tests/data/batched_nms_data.pkl')
boxes = torch.from_numpy(data['boxes']).cuda()
scores = torch.from_numpy(data['scores']).cuda()
nms = partial(
nms, iou_threshold=0.7, offset=0, score_threshold=0.1, max_num=100)
wrapped_model = WrapFunction(nms)
wrapped_model.cpu().eval()
with torch.no_grad():
torch.onnx.export(
wrapped_model, (boxes.detach().cpu(), scores.detach().cpu()),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['boxes', 'scores'],
output_names=['dets', 'inds'],
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'boxes': [list(boxes.shape),
list(boxes.shape),
list(boxes.shape)],
'scores': [list(scores.shape),
list(scores.shape),
list(scores.shape)]
}
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, ['boxes', 'scores'], ['dets', 'inds'])
with torch.no_grad():
trt_outputs = trt_model({'boxes': boxes, 'scores': scores})
trt_dets = trt_outputs['dets']
trt_inds = trt_outputs['inds']
trt_inds = trt_inds.long()
# compute pytorch_output
with torch.no_grad():
pytorch_outputs = wrapped_model(boxes, scores)
pytorch_dets, pytorch_inds = pytorch_outputs
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
num_boxes = pytorch_dets.shape[0]
trt_dets = trt_dets[:num_boxes, ...]
trt_inds = trt_inds[:num_boxes]
trt_scores = trt_dets[:, 4]
pytorch_scores = pytorch_dets[:, 4]
os.environ.pop('ONNX_BACKEND')
assert torch.allclose(pytorch_scores, trt_scores, atol=1e-3)
assert torch.equal(pytorch_inds, trt_inds)
def test_batched_nms():
try:
from mmcv.ops import batched_nms
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
# trt config
os.environ['ONNX_BACKEND'] = 'MMCVTensorRT'
fp16_mode = False
max_workspace_size = 1 << 30
data = mmengine.load('./tests/data/batched_nms_data.pkl')
nms_cfg = dict(type='nms', iou_threshold=0.7, score_threshold=0.1)
boxes = torch.from_numpy(data['boxes']).cuda()
scores = torch.from_numpy(data['scores']).cuda()
idxs = torch.from_numpy(data['idxs']).cuda()
class_agnostic = False
nms = partial(batched_nms, nms_cfg=nms_cfg, class_agnostic=class_agnostic)
wrapped_model = WrapFunction(nms)
wrapped_model.cpu().eval()
input_data = (boxes.detach().cpu(), scores.detach().cpu(),
idxs.detach().cpu())
input_names = ['boxes', 'scores', 'idxs']
output_names = ['dets', 'inds']
with torch.no_grad():
torch.onnx.export(
wrapped_model,
input_data,
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'boxes': [list(boxes.shape),
list(boxes.shape),
list(boxes.shape)],
'scores': [list(scores.shape),
list(scores.shape),
list(scores.shape)],
'idxs': [list(idxs.shape),
list(idxs.shape),
list(idxs.shape)]
}
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({
'boxes': boxes,
'scores': scores,
'idxs': idxs
})
trt_dets = trt_outputs['dets']
trt_inds = trt_outputs['inds']
trt_inds = trt_inds.long()
# compute pytorch_output
with torch.no_grad():
pytorch_outputs = wrapped_model(boxes, scores, idxs)
pytorch_dets, pytorch_inds = pytorch_outputs
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
num_boxes = pytorch_dets.shape[0]
trt_dets = trt_dets[:num_boxes, ...]
trt_inds = trt_inds[:num_boxes]
trt_scores = trt_dets[:, 4]
pytorch_scores = pytorch_dets[:, 4]
os.environ.pop('ONNX_BACKEND')
assert torch.allclose(pytorch_scores, trt_scores)
assert torch.equal(pytorch_inds, trt_inds)
def test_scatternd():
def func(data):
data[:, :-2] += 1
data[:2, :] -= 1
return data
data = torch.zeros(4, 4).cuda()
wrapped_model = WrapFunction(func).eval().cuda()
input_names = ['input']
output_names = ['output']
with torch.no_grad():
torch.onnx.export(
wrapped_model, (data.clone(), ),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(data.shape),
list(data.shape),
list(data.shape)],
}
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({'input': data.clone()})
trt_results = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_results = wrapped_model(data.clone())
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_results, trt_results)
def test_deform_conv():
try:
from mmcv.ops import DeformConv2dPack
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]],
[[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]],
[[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]],
[[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]]
offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7]
deform_weight = [[[0.4, 0.2, 0.1, 0.9]]]
c_in = 1
c_out = 1
x = torch.Tensor(input).cuda()
x.requires_grad = True
model = DeformConv2dPack(c_in, c_out, 2, stride=1, padding=0)
model.conv_offset.weight.data = torch.nn.Parameter(
torch.Tensor(offset_weight).reshape(8, 1, 2, 2))
model.conv_offset.bias.data = torch.nn.Parameter(
torch.Tensor(offset_bias).reshape(8))
model.weight.data = torch.nn.Parameter(
torch.Tensor(deform_weight).reshape(1, 1, 2, 2))
model.cuda().eval()
input_names = ['input']
output_names = ['output']
with torch.no_grad():
torch.onnx.export(
model, (x.clone(), ),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(x.shape), list(x.shape),
list(x.shape)],
}
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({'input': x.clone()})
trt_results = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_results = model(x.clone())
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_results, trt_results)
@pytest.mark.parametrize('with_bias', [True, False])
def test_modulated_deform_conv(with_bias):
try:
from mmcv.ops import ModulatedDeformConv2dPack
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
x = torch.Tensor(input).cuda()
model = ModulatedDeformConv2dPack(
1,
1,
kernel_size=(2, 2),
stride=1,
padding=1,
deform_groups=1,
bias=with_bias)
model.weight.data.fill_(1.)
model.type(torch.float32)
model = model.cuda().eval()
input_names = ['input']
output_names = ['output']
with torch.no_grad():
torch.onnx.export(
model, (x.clone(), ),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(x.shape), list(x.shape),
list(x.shape)],
}
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({'input': x.clone()})
trt_results = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_results = model(x.clone())
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
torch.testing.assert_allclose(pytorch_results, trt_results)
@pytest.mark.parametrize('mode', ['bilinear', 'nearest'])
@pytest.mark.parametrize('padding_mode', ['zeros', 'border', 'reflection'])
@pytest.mark.parametrize('align_corners', [True, False])
def test_grid_sample(mode, padding_mode, align_corners):
from mmcv.onnx.symbolic import register_extra_symbolics
register_extra_symbolics(11)
input = torch.rand(1, 1, 10, 10).cuda()
grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
grid = F.affine_grid(grid, (1, 1, 15, 15)).type_as(input).cuda()
def func(input, grid):
return F.grid_sample(
input,
grid,
mode=mode,
padding_mode=padding_mode,
align_corners=align_corners)
wrapped_model = WrapFunction(func).eval().cuda()
input_names = ['input', 'grid']
output_names = ['output']
with torch.no_grad():
torch.onnx.export(
wrapped_model, (input.clone(), grid.clone()),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(input.shape),
list(input.shape),
list(input.shape)],
'grid': [list(grid.shape),
list(grid.shape),
list(grid.shape)],
}
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({'input': input.clone(), 'grid': grid.clone()})
trt_results = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_results = wrapped_model(input.clone(), grid.clone())
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_results, trt_results)
@pytest.mark.parametrize('func', [torch.cummax, torch.cummin])
def test_cummin_cummax(func: Callable):
# Note generally `cummax` or `cummin` is exportable to ONNX
# as long as the pytorch version >= 1.5.0, since `torch.cummax`
# is only supported with torch >= 1.5.0.
# But when `cummax` or `cummin` serves as an intermediate component
# whose outputs is used as inputs for another modules, it's expected
# that pytorch version must be >= 1.7.0. Otherwise error appears like:
# `RuntimeError: tuple appears in op that does not forward tuples,
# unsupported 'kind: prim::PythonOp`.
from packaging import version
if version.parse(torch.__version__) < version.parse('1.7.0'):
pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0')
opset = 11
# register custom op `mmcv::cummax` and `mmcv::cummin`
from mmcv.onnx.symbolic import register_extra_symbolics
register_extra_symbolics(opset)
input_list = [
# arbitrary shape, e.g. 1-D, 2-D, 3-D, ...
torch.rand((2, 3, 4, 1, 5)).cuda(),
torch.rand(1).cuda()
]
input_names = ['input']
output_names = ['output', 'indices']
for input in input_list:
ndims = input.dim()
# valid dim range is [-ndims, ndims-1]
# test for all `dim` value which is valid
for dim in range(-ndims, ndims):
cummax_func = partial(func, dim=dim)
wrapped_model = WrapFunction(cummax_func).eval().cuda()
with torch.no_grad():
torch.onnx.export(
wrapped_model,
input,
onnx_file,
export_params=True,
keep_initializers_as_inputs=False,
input_names=input_names,
output_names=output_names,
opset_version=opset)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input':
[list(input.shape),
list(input.shape),
list(input.shape)]
}
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
# remove ONNX model after conversion
if os.path.exists(onnx_file):
os.remove(onnx_file)
# save TensorRT model
save_trt_engine(trt_engine, trt_file)
# load and wrap TensorRT model
trt_model = TRTWrapper(trt_file)
# remove trt model after loading
if os.path.exists(trt_file):
os.remove(trt_file)
# compute trt output
with torch.no_grad():
trt_results = trt_model({'input': input.contiguous().clone()})
trt_output = trt_results['output']
trt_indices = trt_results['indices']
# compute pytorch output
with torch.no_grad():
pytorch_results = wrapped_model(input.clone())
pytorch_output = pytorch_results[0]
pytorch_indices = pytorch_results[1]
torch.testing.assert_allclose(trt_output, pytorch_output)
torch.testing.assert_allclose(trt_indices, pytorch_indices)
@pytest.mark.parametrize('dynamic_export', [True, False])
@pytest.mark.parametrize('fp16_mode', [True, False])
def test_instance_norm(dynamic_export, fp16_mode):
n, c, h, w = 2, 3, 10, 10
data = torch.randn(n, c, h, w).cuda()
norm = nn.InstanceNorm2d(c, affine=True)
wrapped_model = WrapFunction(norm).eval().cuda()
input_names = ['input']
output_names = ['output']
dynamic_axes = None
if dynamic_export:
dynamic_axes = {
'input': {
0: 'n',
2: 'h',
3: 'w',
},
'output': {
0: 'n',
2: 'h',
3: 'w',
},
}
with torch.no_grad():
torch.onnx.export(
wrapped_model, (data.clone(), ),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
if dynamic_export:
opt_shape_dict = {
'input':
[list(data.shape),
list(data.shape), [2 * n, c, 2 * h, 2 * w]],
}
else:
opt_shape_dict = {
'input': [list(data.shape),
list(data.shape),
list(data.shape)],
}
# trt config
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({'input': data.clone()})
trt_results = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_results = wrapped_model(data.clone())
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_results, trt_results)
@pytest.mark.parametrize('mode', ['top', 'bottom', 'left', 'right'])
def test_corner_pool(mode):
try:
from mmcv.ops import CornerPool
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
opset = 11
# register custom op `mmcv::MMCVCornerPool`
from mmcv.onnx.symbolic import register_extra_symbolics
register_extra_symbolics(opset)
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
inputs = [
# (n, c, h, w)
torch.rand((2, 3, 5, 5)),
torch.rand((1, 2, 4, 6)),
torch.rand((2, 1, 3, 2)),
]
class CornerPoolWrapper(CornerPool):
def __init__(self, mode):
super().__init__(mode)
def forward(self, x):
# no use `torch.cummax`, instead `corner_pool` is used
# for various torch version
return self.corner_pool.apply(x)
wrapped_model = CornerPoolWrapper(mode).cuda()
for input in inputs:
input = input.cuda()
with torch.no_grad():
torch.onnx.export(
wrapped_model, (input, ),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input'],
output_names=['output'],
opset_version=opset)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(input.shape),
list(input.shape),
list(input.shape)],
}
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, ['input'], ['output'])
with torch.no_grad():
trt_outputs = trt_model({'input': input})
trt_pool_feat = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_pool_feat = wrapped_model(input)
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_pool_feat, trt_pool_feat, atol=1e-5)
# Copyright (c) OpenMMLab. All rights reserved.
import os
from functools import wraps
import onnx
import pytest
import torch
from mmcv.ops import nms
from mmcv.tensorrt.preprocess import preprocess_onnx
if torch.__version__ == 'parrots':
pytest.skip('not supported in parrots now', allow_module_level=True)
def remove_tmp_file(func):
@wraps(func)
def wrapper(*args, **kwargs):
onnx_file = 'tmp.onnx'
kwargs['onnx_file'] = onnx_file
try:
result = func(*args, **kwargs)
finally:
if os.path.exists(onnx_file):
os.remove(onnx_file)
return result
return wrapper
@remove_tmp_file
def export_nms_module_to_onnx(module, onnx_file):
torch_model = module()
torch_model.eval()
input = (torch.rand([100, 4], dtype=torch.float32),
torch.rand([100], dtype=torch.float32))
torch.onnx.export(
torch_model,
input,
onnx_file,
opset_version=11,
input_names=['boxes', 'scores'],
output_names=['output'])
onnx_model = onnx.load(onnx_file)
return onnx_model
def test_can_handle_nms_with_constant_maxnum():
class ModuleNMS(torch.nn.Module):
def forward(self, boxes, scores):
return nms(boxes, scores, iou_threshold=0.4, max_num=10)
onnx_model = export_nms_module_to_onnx(ModuleNMS)
preprocess_onnx_model = preprocess_onnx(onnx_model)
for node in preprocess_onnx_model.graph.node:
if 'NonMaxSuppression' in node.name:
assert len(node.attribute) == 5, 'The NMS must have 5 attributes.'
def test_can_handle_nms_with_undefined_maxnum():
class ModuleNMS(torch.nn.Module):
def forward(self, boxes, scores):
return nms(boxes, scores, iou_threshold=0.4)
onnx_model = export_nms_module_to_onnx(ModuleNMS)
preprocess_onnx_model = preprocess_onnx(onnx_model)
for node in preprocess_onnx_model.graph.node:
if 'NonMaxSuppression' in node.name:
assert len(node.attribute) == 5, \
'The NMS must have 5 attributes.'
assert node.attribute[2].i > 0, \
'The max_output_boxes_per_class is not defined correctly.'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment