Unverified Commit 2e5628b4 authored by q.yao's avatar q.yao Committed by GitHub
Browse files

[Refactor]: Remove deployment for dev-2.x (#2225)

* remove deploy for 2.0

* update onnx ut
parent 961373ad
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import glob
import os
import torch import torch
if torch.__version__ == 'parrots': if torch.__version__ == 'parrots':
...@@ -22,15 +19,3 @@ else: ...@@ -22,15 +19,3 @@ else:
def get_compiling_cuda_version(): def get_compiling_cuda_version():
return ext_module.get_compiling_cuda_version() return ext_module.get_compiling_cuda_version()
def get_onnxruntime_op_path():
wildcard = os.path.join(
os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
'_ext_ort.*.so')
paths = glob.glob(wildcard)
if len(paths) > 0:
return paths[0]
else:
return ''
...@@ -17,7 +17,7 @@ ext_module = ext_loader.load_ext( ...@@ -17,7 +17,7 @@ ext_module = ext_loader.load_ext(
class MaskedConv2dFunction(Function): class MaskedConv2dFunction(Function):
@staticmethod @staticmethod
def symbolic(g, features, mask, weight, bias, padding, stride): def symbolic(g, features, mask, weight, bias, padding, stride=1):
return g.op( return g.op(
'mmcv::MMCVMaskedConv2d', 'mmcv::MMCVMaskedConv2d',
features, features,
......
import os
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np import numpy as np
...@@ -37,49 +36,34 @@ class NMSop(torch.autograd.Function): ...@@ -37,49 +36,34 @@ class NMSop(torch.autograd.Function):
@staticmethod @staticmethod
def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold, def symbolic(g, bboxes, scores, iou_threshold, offset, score_threshold,
max_num): max_num):
from ..onnx import is_custom_op_loaded from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze
has_custom_op = is_custom_op_loaded()
# TensorRT nms plugin is aligned with original nms in ONNXRuntime from ..onnx.onnx_utils.symbolic_helper import _size_helper
is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT'
if has_custom_op and (not is_trt_backend): boxes = unsqueeze(g, bboxes, 0)
return g.op( scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
'mmcv::NonMaxSuppression',
bboxes, if max_num > 0:
scores, max_num = g.op(
iou_threshold_f=float(iou_threshold), 'Constant', value_t=torch.tensor(max_num, dtype=torch.long))
offset_i=int(offset))
else: else:
from torch.onnx.symbolic_opset9 import select, squeeze, unsqueeze dim = g.op('Constant', value_t=torch.tensor(0))
max_num = _size_helper(g, bboxes, dim)
from ..onnx.onnx_utils.symbolic_helper import _size_helper max_output_per_class = max_num
iou_threshold = g.op(
boxes = unsqueeze(g, bboxes, 0) 'Constant',
scores = unsqueeze(g, unsqueeze(g, scores, 0), 0) value_t=torch.tensor([iou_threshold], dtype=torch.float))
score_threshold = g.op(
if max_num > 0: 'Constant',
max_num = g.op( value_t=torch.tensor([score_threshold], dtype=torch.float))
'Constant', nms_out = g.op('NonMaxSuppression', boxes, scores,
value_t=torch.tensor(max_num, dtype=torch.long)) max_output_per_class, iou_threshold, score_threshold)
else: return squeeze(
dim = g.op('Constant', value_t=torch.tensor(0)) g,
max_num = _size_helper(g, bboxes, dim) select(
max_output_per_class = max_num g, nms_out, 1,
iou_threshold = g.op( g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))),
'Constant', 1)
value_t=torch.tensor([iou_threshold], dtype=torch.float))
score_threshold = g.op(
'Constant',
value_t=torch.tensor([score_threshold], dtype=torch.float))
nms_out = g.op('NonMaxSuppression', boxes, scores,
max_output_per_class, iou_threshold,
score_threshold)
return squeeze(
g,
select(
g, nms_out, 1,
g.op(
'Constant',
value_t=torch.tensor([2], dtype=torch.long))), 1)
class SoftNMSop(torch.autograd.Function): class SoftNMSop(torch.autograd.Function):
......
# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa # Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend # noqa
from os import path as osp
from typing import Tuple, Union from typing import Tuple, Union
import torch import torch
...@@ -89,13 +88,6 @@ def bilinear_grid_sample(im: Tensor, ...@@ -89,13 +88,6 @@ def bilinear_grid_sample(im: Tensor,
return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw) return (Ia * wa + Ib * wb + Ic * wc + Id * wd).reshape(n, c, gh, gw)
def is_in_onnx_export_without_custom_ops() -> bool:
from mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path()
return torch.onnx.is_in_onnx_export(
) and not osp.exists(ort_custom_op_path)
def normalize(grid: Tensor) -> Tensor: def normalize(grid: Tensor) -> Tensor:
"""Normalize input grid from [-1, 1] to [0, 1] """Normalize input grid from [-1, 1] to [0, 1]
...@@ -280,7 +272,7 @@ def point_sample(input: Tensor, ...@@ -280,7 +272,7 @@ def point_sample(input: Tensor,
if points.dim() == 3: if points.dim() == 3:
add_dim = True add_dim = True
points = points.unsqueeze(2) points = points.unsqueeze(2)
if is_in_onnx_export_without_custom_ops(): if torch.onnx.is_in_onnx_export():
# If custom ops for onnx runtime not compiled use python # If custom ops for onnx runtime not compiled use python
# implementation of grid_sample function to make onnx graph # implementation of grid_sample function to make onnx graph
# with supported nodes # with supported nodes
......
...@@ -19,50 +19,35 @@ class RoIAlignFunction(Function): ...@@ -19,50 +19,35 @@ class RoIAlignFunction(Function):
@staticmethod @staticmethod
def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio, def symbolic(g, input, rois, output_size, spatial_scale, sampling_ratio,
pool_mode, aligned): pool_mode, aligned):
from ..onnx import is_custom_op_loaded from torch.onnx import TensorProtoDataType
has_custom_op = is_custom_op_loaded() from torch.onnx.symbolic_helper import _slice_helper
if has_custom_op: from torch.onnx.symbolic_opset9 import squeeze, sub
return g.op(
'mmcv::MMCVRoiAlign', # batch_indices = rois[:, 0].long()
input, batch_indices = _slice_helper(g, rois, axes=[1], starts=[0], ends=[1])
rois, batch_indices = squeeze(g, batch_indices, 1)
output_height_i=output_size[0], batch_indices = g.op(
output_width_i=output_size[1], 'Cast', batch_indices, to_i=TensorProtoDataType.INT64)
spatial_scale_f=spatial_scale, # rois = rois[:, 1:]
sampling_ratio_i=sampling_ratio, rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5])
mode_s=pool_mode, if aligned:
aligned_i=aligned) # rois -= 0.5/spatial_scale
else: aligned_offset = g.op(
from torch.onnx import TensorProtoDataType 'Constant',
from torch.onnx.symbolic_helper import _slice_helper value_t=torch.tensor([0.5 / spatial_scale],
from torch.onnx.symbolic_opset9 import squeeze, sub dtype=torch.float32))
rois = sub(g, rois, aligned_offset)
# batch_indices = rois[:, 0].long() # roi align
batch_indices = _slice_helper( return g.op(
g, rois, axes=[1], starts=[0], ends=[1]) 'RoiAlign',
batch_indices = squeeze(g, batch_indices, 1) input,
batch_indices = g.op( rois,
'Cast', batch_indices, to_i=TensorProtoDataType.INT64) batch_indices,
# rois = rois[:, 1:] output_height_i=output_size[0],
rois = _slice_helper(g, rois, axes=[1], starts=[1], ends=[5]) output_width_i=output_size[1],
if aligned: spatial_scale_f=spatial_scale,
# rois -= 0.5/spatial_scale sampling_ratio_i=max(0, sampling_ratio),
aligned_offset = g.op( mode_s=pool_mode)
'Constant',
value_t=torch.tensor([0.5 / spatial_scale],
dtype=torch.float32))
rois = sub(g, rois, aligned_offset)
# roi align
return g.op(
'RoiAlign',
input,
rois,
batch_indices,
output_height_i=output_size[0],
output_width_i=output_size[1],
spatial_scale_f=spatial_scale,
sampling_ratio_i=max(0, sampling_ratio),
mode_s=pool_mode)
@staticmethod @staticmethod
def forward(ctx: Any, def forward(ctx: Any,
......
# Copyright (c) OpenMMLab. All rights reserved.
# flake8: noqa
from .init_plugins import is_tensorrt_plugin_loaded, load_tensorrt_plugin
from .preprocess import preprocess_onnx
def is_tensorrt_available():
try:
import tensorrt
del tensorrt
return True
except ModuleNotFoundError:
return False
__all__ = []
if is_tensorrt_available():
from .tensorrt_utils import (TRTWraper, TRTWrapper, load_trt_engine,
onnx2trt, save_trt_engine)
# load tensorrt plugin lib
load_tensorrt_plugin()
__all__.extend([
'onnx2trt', 'save_trt_engine', 'load_trt_engine', 'TRTWraper',
'TRTWrapper'
])
__all__.extend(['is_tensorrt_plugin_loaded', 'preprocess_onnx'])
# Copyright (c) OpenMMLab. All rights reserved.
import ctypes
import glob
import os
import warnings
def get_tensorrt_op_path() -> str:
"""Get TensorRT plugins library path."""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
wildcard = os.path.join(
os.path.abspath(os.path.dirname(os.path.dirname(__file__))),
'_ext_trt.*.so')
paths = glob.glob(wildcard)
lib_path = paths[0] if len(paths) > 0 else ''
return lib_path
plugin_is_loaded = False
def is_tensorrt_plugin_loaded() -> bool:
"""Check if TensorRT plugins library is loaded or not.
Returns:
bool: plugin_is_loaded flag
"""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
global plugin_is_loaded
return plugin_is_loaded
def load_tensorrt_plugin() -> None:
"""load TensorRT plugins library."""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
global plugin_is_loaded
lib_path = get_tensorrt_op_path()
if (not plugin_is_loaded) and os.path.exists(lib_path):
ctypes.CDLL(lib_path)
plugin_is_loaded = True
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
import numpy as np
import onnx
def preprocess_onnx(onnx_model: onnx.ModelProto) -> onnx.ModelProto:
"""Modify onnx model to match with TensorRT plugins in mmcv.
There are some conflict between onnx node definition and TensorRT limit.
This function perform preprocess on the onnx model to solve the conflicts.
For example, onnx `attribute` is loaded in TensorRT on host and onnx
`input` is loaded on device. The shape inference is performed on host, so
any `input` related to shape (such as `max_output_boxes_per_class` in
NonMaxSuppression) should be transformed to `attribute` before conversion.
Arguments:
onnx_model (onnx.ModelProto): Input onnx model.
Returns:
onnx.ModelProto: Modified onnx model.
"""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
graph = onnx_model.graph
nodes = graph.node
initializers = graph.initializer
node_dict = {}
for node in nodes:
node_outputs = node.output
for output in node_outputs:
if len(output) > 0:
node_dict[output] = node
init_dict = {_.name: _ for _ in initializers}
nodes_name_to_remove = set()
def is_node_without_output(name):
for node_name, node in node_dict.items():
if node_name not in nodes_name_to_remove:
if name in node.input:
return False
return True
def mark_nodes_to_remove(name):
node = node_dict[name]
nodes_name_to_remove.add(name)
for input_node_name in node.input:
if is_node_without_output(input_node_name):
mark_nodes_to_remove(input_node_name)
def parse_data(name, typ, default_value=0):
if name in node_dict:
node = node_dict[name]
if node.op_type == 'Constant':
raw_data = node.attribute[0].t.raw_data
else:
mark_nodes_to_remove(name)
return default_value
elif name in init_dict:
raw_data = init_dict[name].raw_data
else:
raise ValueError(f'{name} not found in node or initilizer.')
return np.frombuffer(raw_data, typ).item()
nrof_node = len(nodes)
for idx in range(nrof_node):
node = nodes[idx]
node_attributes = node.attribute
node_inputs = node.input
node_outputs = node.output
node_name = node.name
# process NonMaxSuppression node
if node.op_type == 'NonMaxSuppression':
center_point_box = 0
max_output_boxes_per_class = 1000000
iou_threshold = 0.3
score_threshold = 0.0
offset = 0
for attribute in node_attributes:
if attribute.name == 'center_point_box':
center_point_box = attribute.i
elif attribute.name == 'offset':
offset = attribute.i
if len(node_inputs) >= 3:
max_output_boxes_per_class = parse_data(
node_inputs[2], np.int64, max_output_boxes_per_class)
mark_nodes_to_remove(node_inputs[2])
if len(node_inputs) >= 4:
iou_threshold = parse_data(node_inputs[3], np.float32,
iou_threshold)
mark_nodes_to_remove(node_inputs[3])
if len(node_inputs) >= 5:
score_threshold = parse_data(node_inputs[4], np.float32)
mark_nodes_to_remove(node_inputs[4])
new_node = onnx.helper.make_node(
'NonMaxSuppression',
node_inputs[:2],
node_outputs,
name=node_name,
center_point_box=center_point_box,
max_output_boxes_per_class=max_output_boxes_per_class,
iou_threshold=iou_threshold,
score_threshold=score_threshold,
offset=offset)
for output in node_outputs:
if output in node_dict:
node_dict[output] = new_node
nodes.insert(idx, new_node)
nodes.remove(node)
elif node.op_type == 'InstanceNormalization':
# directly change op name
node.op_type = 'MMCVInstanceNormalization'
for node_name in nodes_name_to_remove:
nodes.remove(node_dict[node_name])
return onnx_model
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from typing import Union
import onnx
import tensorrt as trt
import torch
from .preprocess import preprocess_onnx
def onnx2trt(onnx_model: Union[str, onnx.ModelProto],
opt_shape_dict: dict,
log_level: trt.ILogger.Severity = trt.Logger.ERROR,
fp16_mode: bool = False,
max_workspace_size: int = 0,
device_id: int = 0) -> trt.ICudaEngine:
"""Convert onnx model to tensorrt engine.
Arguments:
onnx_model (str or onnx.ModelProto): the onnx model to convert from
opt_shape_dict (dict): the min/opt/max shape of each input
log_level (TensorRT log level): the log level of TensorRT
fp16_mode (bool): enable fp16 mode
max_workspace_size (int): set max workspace size of TensorRT engine.
some tactic and layers need large workspace.
device_id (int): choice the device to create engine.
Returns:
tensorrt.ICudaEngine: the TensorRT engine created from onnx_model
Example:
>>> engine = onnx2trt(
>>> "onnx_model.onnx",
>>> {'input': [[1, 3, 160, 160],
>>> [1, 3, 320, 320],
>>> [1, 3, 640, 640]]},
>>> log_level=trt.Logger.WARNING,
>>> fp16_mode=True,
>>> max_workspace_size=1 << 30,
>>> device_id=0)
>>> })
"""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
device = torch.device(f'cuda:{device_id}')
# create builder and network
logger = trt.Logger(log_level)
builder = trt.Builder(logger)
EXPLICIT_BATCH = 1 << (int)(
trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(EXPLICIT_BATCH)
# parse onnx
parser = trt.OnnxParser(network, logger)
if isinstance(onnx_model, str):
onnx_model = onnx.load(onnx_model)
onnx_model = preprocess_onnx(onnx_model)
if not parser.parse(onnx_model.SerializeToString()):
error_msgs = ''
for error in range(parser.num_errors):
error_msgs += f'{parser.get_error(error)}\n'
raise RuntimeError(f'parse onnx failed:\n{error_msgs}')
# config builder
builder.max_workspace_size = max_workspace_size
config = builder.create_builder_config()
config.max_workspace_size = max_workspace_size
profile = builder.create_optimization_profile()
for input_name, param in opt_shape_dict.items():
min_shape = tuple(param[0][:])
opt_shape = tuple(param[1][:])
max_shape = tuple(param[2][:])
profile.set_shape(input_name, min_shape, opt_shape, max_shape)
config.add_optimization_profile(profile)
if fp16_mode:
builder.fp16_mode = fp16_mode
config.set_flag(trt.BuilderFlag.FP16)
# create engine
with torch.cuda.device(device):
engine = builder.build_engine(network, config)
return engine
def save_trt_engine(engine: trt.ICudaEngine, path: str) -> None:
"""Serialize TensorRT engine to disk.
Arguments:
engine (tensorrt.ICudaEngine): TensorRT engine to serialize
path (str): disk path to write the engine
"""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
with open(path, mode='wb') as f:
f.write(bytearray(engine.serialize()))
def load_trt_engine(path: str) -> trt.ICudaEngine:
"""Deserialize TensorRT engine from disk.
Arguments:
path (str): disk path to read the engine
Returns:
tensorrt.ICudaEngine: the TensorRT engine loaded from disk
"""
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This function will be deprecated in future. '
msg += blue_text + 'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
with open(path, mode='rb') as f:
engine_bytes = f.read()
engine = runtime.deserialize_cuda_engine(engine_bytes)
return engine
def torch_dtype_from_trt(dtype: trt.DataType) -> Union[torch.dtype, TypeError]:
"""Convert pytorch dtype to TensorRT dtype."""
if dtype == trt.bool:
return torch.bool
elif dtype == trt.int8:
return torch.int8
elif dtype == trt.int32:
return torch.int32
elif dtype == trt.float16:
return torch.float16
elif dtype == trt.float32:
return torch.float32
else:
raise TypeError('%s is not supported by torch' % dtype)
def torch_device_from_trt(
device: trt.TensorLocation) -> Union[torch.device, TypeError]:
"""Convert pytorch device to TensorRT device."""
if device == trt.TensorLocation.DEVICE:
return torch.device('cuda')
elif device == trt.TensorLocation.HOST:
return torch.device('cpu')
else:
return TypeError('%s is not supported by torch' % device)
class TRTWrapper(torch.nn.Module):
"""TensorRT engine Wrapper.
Arguments:
engine (tensorrt.ICudaEngine): TensorRT engine to wrap
input_names (list[str]): names of each inputs
output_names (list[str]): names of each outputs
Note:
If the engine is converted from onnx model. The input_names and
output_names should be the same as onnx model.
"""
def __init__(self, engine, input_names=None, output_names=None):
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: This tool will be deprecated in future. '
msg += blue_text + \
'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
super().__init__()
self.engine = engine
if isinstance(self.engine, str):
self.engine = load_trt_engine(engine)
if not isinstance(self.engine, trt.ICudaEngine):
raise TypeError('engine should be str or trt.ICudaEngine')
self._register_state_dict_hook(TRTWrapper._on_state_dict)
self.context = self.engine.create_execution_context()
# get input and output names from engine
if input_names is None or output_names is None:
names = [_ for _ in self.engine]
input_names = list(filter(self.engine.binding_is_input, names))
output_names = list(set(names) - set(input_names))
self.input_names = input_names
self.output_names = output_names
def _on_state_dict(self, state_dict, prefix, local_metadata):
state_dict[prefix + 'engine'] = bytearray(self.engine.serialize())
state_dict[prefix + 'input_names'] = self.input_names
state_dict[prefix + 'output_names'] = self.output_names
def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
missing_keys, unexpected_keys, error_msgs):
engine_bytes = state_dict[prefix + 'engine']
with trt.Logger() as logger, trt.Runtime(logger) as runtime:
self.engine = runtime.deserialize_cuda_engine(engine_bytes)
self.context = self.engine.create_execution_context()
self.input_names = state_dict[prefix + 'input_names']
self.output_names = state_dict[prefix + 'output_names']
def forward(self, inputs):
"""
Arguments:
inputs (dict): dict of input name-tensors pair
Return:
dict: dict of output name-tensors pair
"""
assert self.input_names is not None
assert self.output_names is not None
bindings = [None] * (len(self.input_names) + len(self.output_names))
for input_name, input_tensor in inputs.items():
idx = self.engine.get_binding_index(input_name)
if input_tensor.dtype == torch.long:
input_tensor = input_tensor.int()
self.context.set_binding_shape(idx, tuple(input_tensor.shape))
bindings[idx] = input_tensor.contiguous().data_ptr()
# create output tensors
outputs = {}
for i, output_name in enumerate(self.output_names):
idx = self.engine.get_binding_index(output_name)
dtype = torch_dtype_from_trt(self.engine.get_binding_dtype(idx))
shape = tuple(self.context.get_binding_shape(idx))
device = torch_device_from_trt(self.engine.get_location(idx))
output = torch.empty(size=shape, dtype=dtype, device=device)
outputs[output_name] = output
bindings[idx] = output.data_ptr()
self.context.execute_async_v2(bindings,
torch.cuda.current_stream().cuda_stream)
return outputs
class TRTWraper(TRTWrapper):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
warnings.warn(
'TRTWraper will be deprecated in'
' future. Please use TRTWrapper instead', DeprecationWarning)
...@@ -14,7 +14,7 @@ line_length = 79 ...@@ -14,7 +14,7 @@ line_length = 79
multi_line_output = 0 multi_line_output = 0
extra_standard_library = pkg_resources,setuptools,logging,os,warnings,abc extra_standard_library = pkg_resources,setuptools,logging,os,warnings,abc
known_first_party = mmcv known_first_party = mmcv
known_third_party = addict,cv2,matplotlib,numpy,onnx,onnxruntime,packaging,pytest,pytorch_sphinx_theme,scipy,sphinx,tensorrt,torch,torchvision,yaml,yapf known_third_party = addict,cv2,matplotlib,numpy,onnx,packaging,pytest,pytorch_sphinx_theme,scipy,sphinx,torch,torchvision,yaml,yapf
no_lines_before = STDLIB,LOCALFOLDER no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY default_section = THIRDPARTY
......
...@@ -2,7 +2,6 @@ import glob ...@@ -2,7 +2,6 @@ import glob
import os import os
import platform import platform
import re import re
import warnings
from pkg_resources import DistributionNotFound, get_distribution from pkg_resources import DistributionNotFound, get_distribution
from setuptools import find_packages, setup from setuptools import find_packages, setup
...@@ -138,65 +137,6 @@ except ImportError: ...@@ -138,65 +137,6 @@ except ImportError:
def get_extensions(): def get_extensions():
extensions = [] extensions = []
if os.getenv('MMCV_WITH_TRT', '0') != '0':
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: ' + \
'Custom TensorRT Ops will be deprecated in future. '
msg += blue_text + \
'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
ext_name = 'mmcv._ext_trt'
from torch.utils.cpp_extension import include_paths, library_paths
library_dirs = []
libraries = []
include_dirs = []
tensorrt_path = os.getenv('TENSORRT_DIR', '0')
tensorrt_lib_path = glob.glob(
os.path.join(tensorrt_path, 'targets', '*', 'lib'))[0]
library_dirs += [tensorrt_lib_path]
libraries += ['nvinfer', 'nvparsers', 'nvinfer_plugin']
libraries += ['cudart']
define_macros = []
extra_compile_args = {'cxx': []}
include_path = os.path.abspath('./mmcv/ops/csrc/common/cuda')
include_trt_path = os.path.abspath('./mmcv/ops/csrc/tensorrt')
include_dirs.append(include_path)
include_dirs.append(include_trt_path)
include_dirs.append(os.path.join(tensorrt_path, 'include'))
include_dirs += include_paths(cuda=True)
op_files = glob.glob('./mmcv/ops/csrc/tensorrt/plugins/*')
define_macros += [('MMCV_WITH_CUDA', None)]
define_macros += [('MMCV_WITH_TRT', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
# prevent cub/thrust conflict with other python library
# More context See issues #1454
extra_compile_args['nvcc'] += ['-Xcompiler=-fno-gnu-unique']
library_dirs += library_paths(cuda=True)
from setuptools import Extension
ext_ops = Extension(
name=ext_name,
sources=op_files,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
language='c++',
library_dirs=library_dirs,
libraries=libraries)
extensions.append(ext_ops)
if os.getenv('MMCV_WITH_OPS', '0') == '0': if os.getenv('MMCV_WITH_OPS', '0') == '0':
return extensions return extensions
...@@ -353,63 +293,6 @@ def get_extensions(): ...@@ -353,63 +293,6 @@ def get_extensions():
define_macros=define_macros, define_macros=define_macros,
extra_compile_args=extra_compile_args) extra_compile_args=extra_compile_args)
extensions.append(ext_ops) extensions.append(ext_ops)
if EXT_TYPE == 'pytorch' and os.getenv('MMCV_WITH_ORT', '0') != '0':
# Following strings of text style are from colorama package
bright_style, reset_style = '\x1b[1m', '\x1b[0m'
red_text, blue_text = '\x1b[31m', '\x1b[34m'
white_background = '\x1b[107m'
msg = white_background + bright_style + red_text
msg += 'DeprecationWarning: ' + \
'Custom ONNXRuntime Ops will be deprecated in future. '
msg += blue_text + \
'Welcome to use the unified model deployment toolbox '
msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
msg += reset_style
warnings.warn(msg)
ext_name = 'mmcv._ext_ort'
import onnxruntime
from torch.utils.cpp_extension import include_paths, library_paths
library_dirs = []
libraries = []
include_dirs = []
ort_path = os.getenv('ONNXRUNTIME_DIR', '0')
library_dirs += [os.path.join(ort_path, 'lib')]
libraries.append('onnxruntime')
define_macros = []
extra_compile_args = {'cxx': []}
include_path = os.path.abspath('./mmcv/ops/csrc/onnxruntime')
include_dirs.append(include_path)
include_dirs.append(os.path.join(ort_path, 'include'))
op_files = glob.glob('./mmcv/ops/csrc/onnxruntime/cpu/*')
if onnxruntime.get_device() == 'GPU' or os.getenv('FORCE_CUDA',
'0') == '1':
define_macros += [('MMCV_WITH_CUDA', None)]
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
op_files += glob.glob('./mmcv/ops/csrc/onnxruntime/gpu/*')
include_dirs += include_paths(cuda=True)
library_dirs += library_paths(cuda=True)
else:
include_dirs += include_paths(cuda=False)
library_dirs += library_paths(cuda=False)
from setuptools import Extension
ext_ops = Extension(
name=ext_name,
sources=op_files,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
language='c++',
library_dirs=library_dirs,
libraries=libraries)
extensions.append(ext_ops)
return extensions return extensions
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import os import os
import warnings
from functools import partial from functools import partial
import numpy as np import numpy as np
...@@ -10,7 +9,6 @@ import pytest ...@@ -10,7 +9,6 @@ import pytest
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from packaging import version
onnx_file = 'tmp.onnx' onnx_file = 'tmp.onnx'
if torch.__version__ == 'parrots': if torch.__version__ == 'parrots':
...@@ -40,93 +38,8 @@ class WrapFunction(nn.Module): ...@@ -40,93 +38,8 @@ class WrapFunction(nn.Module):
return self.wrapped_function(*args, **kwargs) return self.wrapped_function(*args, **kwargs)
def process_grid_sample(func, input, grid, ort_custom_op_path=''):
wrapped_model = WrapFunction(func).eval()
input_names = ['input', 'grid']
output_names = ['output']
with torch.no_grad():
torch.onnx.export(
wrapped_model, (input, grid),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
session_options = rt.SessionOptions()
if ort_custom_op_path:
session_options.register_custom_ops_library(ort_custom_op_path)
# get onnx output
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [node.name for node in onnx_model.graph.initializer]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 2)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
ort_result = sess.run(None, {
'input': input.detach().numpy(),
'grid': grid.detach().numpy()
})
pytorch_results = wrapped_model(input.clone(), grid.clone())
assert np.allclose(pytorch_results, ort_result, atol=1e-3)
@pytest.mark.parametrize('mode', ['bilinear', 'nearest'])
@pytest.mark.parametrize('padding_mode', ['zeros', 'border', 'reflection'])
@pytest.mark.parametrize('align_corners', [True, False])
def test_grid_sample(mode, padding_mode, align_corners):
from mmcv.onnx.symbolic import register_extra_symbolics
opset_version = 11
register_extra_symbolics(opset_version)
from mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
input = torch.rand(1, 1, 10, 10)
grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
grid = F.affine_grid(
grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input)
def func(input, grid):
return F.grid_sample(
input,
grid,
mode=mode,
padding_mode=padding_mode,
align_corners=align_corners)
return process_grid_sample(func, input, grid, ort_custom_op_path)
@pytest.mark.parametrize('align_corners', [True, False])
def test_bilinear_grid_sample(align_corners):
from mmcv.ops.point_sample import bilinear_grid_sample
# only support pytorch >= 1.5.0
if version.parse(torch.__version__) < version.parse('1.5.0'):
pytest.skip('Only support PyTorch >= 1.5.0')
input = torch.rand(1, 1, 10, 10)
grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
grid = F.affine_grid(
grid, (1, 1, 15, 15), align_corners=align_corners).type_as(input)
def func(input, grid):
return bilinear_grid_sample(input, grid, align_corners=align_corners)
return process_grid_sample(func, input, grid)
def test_nms(): def test_nms():
from mmcv.ops import get_onnxruntime_op_path, nms from mmcv.ops import nms
np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0], np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0],
[3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]], [3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]],
dtype=np.float32) dtype=np.float32)
...@@ -151,10 +64,7 @@ def test_nms(): ...@@ -151,10 +64,7 @@ def test_nms():
opset_version=11) opset_version=11)
onnx_model = onnx.load(onnx_file) onnx_model = onnx.load(onnx_file)
ort_custom_op_path = get_onnxruntime_op_path()
session_options = rt.SessionOptions() session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
# get onnx output # get onnx output
input_all = [node.name for node in onnx_model.graph.input] input_all = [node.name for node in onnx_model.graph.input]
...@@ -171,89 +81,12 @@ def test_nms(): ...@@ -171,89 +81,12 @@ def test_nms():
assert np.allclose(pytorch_score, onnx_score, atol=1e-3) assert np.allclose(pytorch_score, onnx_score, atol=1e-3)
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_softnms():
from mmcv.ops import get_onnxruntime_op_path, soft_nms
# only support pytorch >= 1.7.0
if version.parse(torch.__version__) < version.parse('1.7.0'):
warnings.warn('test_softnms should be ran with pytorch >= 1.7.0')
return
# only support onnxruntime >= 1.5.1
assert version.parse(rt.__version__) >= version.parse(
'1.5.1'), 'test_softnms should be ran with onnxruntime >= 1.5.1'
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('softnms for onnxruntime is not compiled.')
np_boxes = np.array([[6.0, 3.0, 8.0, 7.0], [3.0, 6.0, 9.0, 11.0],
[3.0, 7.0, 10.0, 12.0], [1.0, 4.0, 13.0, 7.0]],
dtype=np.float32)
np_scores = np.array([0.6, 0.9, 0.7, 0.2], dtype=np.float32)
boxes = torch.from_numpy(np_boxes)
scores = torch.from_numpy(np_scores)
configs = [[0.3, 0.5, 0.01, 'linear'], [0.3, 0.5, 0.01, 'gaussian'],
[0.3, 0.5, 0.01, 'naive']]
session_options = rt.SessionOptions()
session_options.register_custom_ops_library(ort_custom_op_path)
for _iou_threshold, _sigma, _min_score, _method in configs:
pytorch_dets, pytorch_inds = soft_nms(
boxes,
scores,
iou_threshold=_iou_threshold,
sigma=_sigma,
min_score=_min_score,
method=_method)
nms = partial(
soft_nms,
iou_threshold=_iou_threshold,
sigma=_sigma,
min_score=_min_score,
method=_method)
wrapped_model = WrapFunction(nms)
wrapped_model.cpu().eval()
with torch.no_grad():
torch.onnx.export(
wrapped_model, (boxes, scores),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['boxes', 'scores'],
opset_version=11)
onnx_model = onnx.load(onnx_file)
# get onnx output
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [
node.name for node in onnx_model.graph.initializer
]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 2)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
onnx_dets, onnx_inds = sess.run(None, {
'scores': scores.detach().numpy(),
'boxes': boxes.detach().numpy()
})
assert np.allclose(pytorch_dets, onnx_dets, atol=1e-3)
assert np.allclose(onnx_inds, onnx_inds, atol=1e-3)
def test_roialign(): def test_roialign():
try: try:
from mmcv.ops import get_onnxruntime_op_path, roi_align from mmcv.ops import roi_align
except (ImportError, ModuleNotFoundError): except (ImportError, ModuleNotFoundError):
pytest.skip('roi_align op is not successfully compiled') pytest.skip('roi_align op is not successfully compiled')
ort_custom_op_path = get_onnxruntime_op_path()
# roi align config # roi align config
pool_h = 2 pool_h = 2
pool_w = 2 pool_w = 2
...@@ -295,8 +128,6 @@ def test_roialign(): ...@@ -295,8 +128,6 @@ def test_roialign():
onnx_model = onnx.load(onnx_file) onnx_model = onnx.load(onnx_file)
session_options = rt.SessionOptions() session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
# compute onnx_output # compute onnx_output
input_all = [node.name for node in onnx_model.graph.input] input_all = [node.name for node in onnx_model.graph.input]
...@@ -318,83 +149,6 @@ def test_roialign(): ...@@ -318,83 +149,6 @@ def test_roialign():
assert np.allclose(pytorch_output, onnx_output, atol=1e-3) assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
def test_roialign_rotated():
try:
from mmcv.ops import get_onnxruntime_op_path, roi_align_rotated
except (ImportError, ModuleNotFoundError):
pytest.skip('roi_align_aligned op is not successfully compiled')
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
# roi align config
pool_h = 2
pool_w = 2
spatial_scale = 1.0
sampling_ratio = 2
inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., 0]]),
([[[[1., 2.], [3., 4.]]]], [[0., 0.5, 0.5, 1., 1., np.pi / 2]]),
([[[[1., 2.], [3., 4.]],
[[4., 3.], [2., 1.]]]], [[0., 0.5, 0.5, 1., 1., 0]]),
([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
[11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3., 0]]),
([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
[11., 12., 15., 16.]]]], [[0., 1.5, 1.5, 3., 3.,
np.pi / 2]])]
def warpped_function(torch_input, torch_rois):
return roi_align_rotated(torch_input, torch_rois, (pool_w, pool_h),
spatial_scale, sampling_ratio, True, False)
for case in inputs:
np_input = np.array(case[0], dtype=np.float32)
np_rois = np.array(case[1], dtype=np.float32)
input = torch.from_numpy(np_input)
rois = torch.from_numpy(np_rois)
# compute pytorch_output
with torch.no_grad():
pytorch_output = roi_align_rotated(input, rois, (pool_w, pool_h),
spatial_scale, sampling_ratio,
True, False)
# export and load onnx model
wrapped_model = WrapFunction(warpped_function)
with torch.no_grad():
torch.onnx.export(
wrapped_model, (input, rois),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['features', 'rois'],
opset_version=11)
onnx_model = onnx.load(onnx_file)
session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
# compute onnx_output
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [
node.name for node in onnx_model.graph.initializer
]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 2)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
onnx_output = sess.run(None, {
'features': input.detach().numpy(),
'rois': rois.detach().numpy()
})
onnx_output = onnx_output[0]
# allclose
assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU') @pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_roipool(): def test_roipool():
from mmcv.ops import roi_pool from mmcv.ops import roi_pool
...@@ -483,240 +237,6 @@ def test_interpolate(): ...@@ -483,240 +237,6 @@ def test_interpolate():
assert np.allclose(pytorch_result, onnx_result, atol=1e-3) assert np.allclose(pytorch_result, onnx_result, atol=1e-3)
def test_rotated_feature_align():
if torch.__version__ == 'parrots':
pytest.skip('onnx is not supported in parrots directly')
try:
from mmcv.ops import get_onnxruntime_op_path, rotated_feature_align
except (ImportError, ModuleNotFoundError):
pytest.skip('rotated_feature_align op is not successfully compiled')
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
spatial_scale = 1.0 / 8
points = 1
def warpped_function(feature, bbox):
return rotated_feature_align(
feature, bbox, spatial_scale=spatial_scale, points=points)
feature = torch.tensor([[[[1.2924, -0.2172, -0.5222, 0.1172],
[0.9144, 1.2248, 1.3115, -0.9690],
[-0.8949, -1.1797, -0.9093, -0.3961],
[-0.4586, 0.5062, -0.7947, -0.7397]],
[[-1.0943, -0.7495, 1.3461, -1.1652],
[0.2034, 0.6763, -1.2357, 0.5231],
[-1.0062, 1.2592, 1.4225, -0.3951],
[-0.1242, -1.6240, 0.1932, 2.7181]],
[[-1.6271, -1.0276, 0.0578, -0.2997],
[-0.9684, -1.6946, -1.3188, -1.1938],
[-1.6744, -0.8917, -0.6556, 1.0073],
[-0.1205, 0.3671, -0.3731, -0.5347]]],
[[[0.7035, 0.2089, -0.1774, 3.4670],
[-0.8505, -0.9278, 1.4714, 0.1644],
[0.0898, 0.3531, -0.4007, 0.1927],
[1.2569, -0.2636, -0.5223, 0.0616]],
[[0.1760, -0.7639, -0.4600, -1.3260],
[-0.9921, -0.2970, -0.8955, 1.0508],
[1.3515, -0.1641, 1.9679, 1.1986],
[-0.3616, 0.6287, 0.4933, 0.3360]],
[[-0.5860, 0.2124, -0.8700, 2.4200],
[-0.0551, -1.5103, -1.6779, 0.8399],
[0.8431, 1.2414, -1.1243, -0.3887],
[-2.1254, 0.6047, -0.3515, 0.7254]]]])
bbox = torch.tensor(
[[[[1.3080e+01, 1.2688e+01, 1.1214e+01, 9.3944e+01, -9.1905e-01],
[3.8104e+01, 1.0134e+01, 1.4659e+02, 9.0306e+01, -9.8211e-01],
[-5.3213e+01, 4.9508e+01, 5.1513e+01, 3.2055e+01, -3.1954e-01],
[2.6974e+01, 2.5248e+01, 5.4495e+01, 3.1083e+00, -6.2127e-01]],
[[-1.5604e+01, -5.1908e+01, 2.3998e+02, 1.5008e+01, -1.2546e+00],
[3.1354e+01, -7.3635e+00, 6.7879e+01, 3.5081e+01, -3.3851e-01],
[-5.3292e+00, 9.1946e+00, 1.2834e+01, 1.0485e+01, -1.3039e+00],
[-2.3925e+01, 3.6623e+01, 3.9875e+01, 7.2009e+01, -6.5934e-01]],
[[7.2114e+01, -2.3781e+01, 2.9106e+01, 8.4501e+01, -1.1340e+00],
[2.6258e+01, -7.7034e+00, 1.7629e+02, 1.0615e+02, -1.2156e+00],
[3.8057e+01, 4.6016e+01, 1.2965e+01, 6.9384e+00, -1.0855e+00],
[2.4428e+01, -1.6189e+01, 2.0572e+02, 3.1622e+01, -1.5719e-01]],
[[3.8226e+00, 2.9608e+01, 1.4457e+01, 6.8179e+01, -9.1997e-01],
[2.5003e+01, -4.2490e+01, 9.6007e+01, 4.9086e+01, -1.4786e+00],
[8.5983e+01, 5.4980e+01, 7.8080e+01, 1.0003e+02, -1.0926e+00],
[9.9065e+00, 4.1457e+01, 5.9799e+00, 1.7973e+01, -5.6313e-01]]],
[[[-1.8244e+01, 4.6309e+00, 5.3010e+01, 2.4310e+01, -7.0345e-01],
[1.9419e+01, 3.6704e+01, 5.2390e+01, 5.4133e+01, -3.7730e-01],
[5.6387e+01, 2.3752e+01, 9.0441e+00, 1.7792e+01, -1.5583e+00],
[3.6303e+01, 1.6396e+01, 2.0283e+01, 1.9148e+01, -8.3419e-01]],
[[3.2169e+01, 3.0521e+01, 2.6283e+01, 1.9680e+02, -3.0454e-01],
[2.5788e+01, -3.2189e+01, 8.8882e+01, 1.0207e+02, -1.5328e+00],
[8.4676e+00, -1.6668e+01, 2.4657e+01, 1.1275e+02, -4.0388e-01],
[-1.0799e+01, 6.0422e+00, 9.5807e+00, 3.3677e+01, -3.5438e-01]],
[[6.9363e+01, 1.0850e+01, 2.5968e+01, 2.2311e+01, -1.6408e-01],
[2.8140e+00, 4.6843e+00, 3.1289e+00, 2.1480e+01, -6.7583e-01],
[2.6661e+01, 4.5290e+01, 6.1679e+00, 3.0005e+01, -8.9806e-01],
[5.0871e+00, 1.3234e+01, 9.2087e+01, 4.9622e+01, -2.8020e-01]],
[[-1.2643e+01, 2.5176e+01, 5.0488e+01, 5.4246e+01, -4.4840e-01],
[-3.4521e+01, 9.8435e-01, 5.2413e+01, 9.7996e+00, -8.4218e-01],
[4.9829e+01, -1.0808e+01, 2.9848e+01, 7.3579e+01, -6.2672e-01],
[8.0446e+01, 2.8064e+01, 4.5273e+01, 5.3809e+01, -1.2359e+00]]]])
# compute pytorch_output
with torch.no_grad():
pytorch_output = rotated_feature_align(
feature, bbox, spatial_scale=spatial_scale, points=points)
# export and load onnx model
wrapped_model = WrapFunction(warpped_function)
with torch.no_grad():
torch.onnx.export(
wrapped_model, (feature, bbox),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['feature', 'bbox'],
opset_version=11)
onnx_model = onnx.load(onnx_file)
session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
# compute onnx_output
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [node.name for node in onnx_model.graph.initializer]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 2)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
onnx_output = sess.run(None, {
'feature': feature.detach().numpy(),
'bbox': bbox.detach().numpy()
})
onnx_output = onnx_output[0]
# allclose
assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
@pytest.mark.parametrize('mode', ['top', 'bottom', 'left', 'right'])
def test_corner_pool(mode, opset=11):
from mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
from mmcv.ops.corner_pool import CornerPool
def corner_pool_func(input):
corner_pool_module = CornerPool(mode)
return corner_pool_module.corner_pool.apply(input)
wrapped_model = WrapFunction(corner_pool_func).eval()
input = torch.rand((2, 3, 9, 12)) # (n,c,h,w)
with torch.no_grad():
torch.onnx.export(
wrapped_model,
input,
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input'],
output_names=['output'],
opset_version=opset)
onnx_model = onnx.load(onnx_file)
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [node.name for node in onnx_model.graph.initializer]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 1)
session_options = rt.SessionOptions()
session_options.register_custom_ops_library(ort_custom_op_path)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
ort_result = sess.run(None, {'input': input.detach().numpy()})
pytorch_results = wrapped_model(input.clone())
assert np.allclose(pytorch_results, ort_result, atol=1e-5)
@pytest.mark.parametrize('key', ['cummax', 'cummin'])
def test_cummax_cummin(key, opset=11):
# Note generally `cummax` or `cummin` is exportable to ONNX
# as long as the pytorch version >= 1.5.0, since `torch.cummax`
# is only supported with torch >= 1.5.0.
# But when `cummax` or `cummin` serves as an intermediate component
# whose outputs is used as inputs for another modules, it's expected
# that pytorch version must be >= 1.7.0. Otherwise error appears like:
# `RuntimeError: tuple appears in op that does not forward tuples,
# unsupported 'kind: prim::PythonOp`.
if version.parse(torch.__version__) < version.parse('1.7.0'):
pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0')
# register custom op `mmcv::cummax` and `mmcv::cummin`
from mmcv.onnx.symbolic import register_extra_symbolics
register_extra_symbolics(opset)
from mmcv.ops import get_onnxruntime_op_path
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
input_list = [
# arbitrary shape, e.g. 1-D, 2-D, 3-D, ...
torch.rand((2, 3, 4, 1, 5)),
torch.rand(1),
torch.rand((2, 0, 1)), # tensor.numel() is 0
torch.FloatTensor(), # empty tensor
]
cummax_cummin_funcs = {'cummax': torch.cummax, 'cummin': torch.cummin}
for input in input_list:
ndims = input.dim()
# valid dim range is [-ndims, ndims-1]
# test for all `dim` value which is valid
for dim in range(-ndims, ndims):
cummax_func = partial(cummax_cummin_funcs[key], dim=dim)
wrapped_model = WrapFunction(cummax_func).eval()
with torch.no_grad():
torch.onnx.export(
wrapped_model,
input,
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input'],
output_names=['output', 'indices'],
opset_version=opset)
onnx_model = onnx.load(onnx_file)
input_all = [node.name for node in onnx_model.graph.input]
input_initializer = [
node.name for node in onnx_model.graph.initializer
]
net_feed_input = list(set(input_all) - set(input_initializer))
assert (len(net_feed_input) == 1)
session_options = rt.SessionOptions()
session_options.register_custom_ops_library(ort_custom_op_path)
sess = rt.InferenceSession(
onnx_file, session_options, providers=['CPUExecutionProvider'])
ort_output, ort_inds = sess.run(None,
{'input': input.detach().numpy()})
pytorch_output, pytorch_inds = wrapped_model(input.clone())
pytorch_output = pytorch_output.detach().numpy()
pytorch_inds = pytorch_inds.detach().numpy()
assert np.allclose(pytorch_output, ort_output, atol=1e-5)
assert np.all(pytorch_inds == ort_inds)
@pytest.mark.parametrize('shifts_dims_pair', [([-3, 5], [2, 0]), (5, None)]) @pytest.mark.parametrize('shifts_dims_pair', [([-3, 5], [2, 0]), (5, None)])
def test_roll(shifts_dims_pair): def test_roll(shifts_dims_pair):
opset = 11 opset = 11
...@@ -755,173 +275,120 @@ def test_roll(shifts_dims_pair): ...@@ -755,173 +275,120 @@ def test_roll(shifts_dims_pair):
torch.testing.assert_allclose(ort_output, pytorch_output) torch.testing.assert_allclose(ort_output, pytorch_output)
@pytest.mark.skipif( def _test_symbolic(model, inputs, symbol_name):
not torch.cuda.is_available(), with torch.no_grad():
reason='modulated_deform_conv2d only supports in GPU') torch.onnx.export(model, inputs, onnx_file, opset_version=11)
def test_modulated_deform_conv2d():
try:
from mmcv.ops import ModulatedDeformConv2d, get_onnxruntime_op_path
except (ImportError, ModuleNotFoundError):
pytest.skip('modulated_deform_conv op is not successfully compiled')
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
# modulated deform conv config
in_channels = 3
out_channels = 64
stride = 1
padding = 0
dilation = 1
groups = 1
deform_groups = 1
kernel_size = 3
input = torch.rand(1, in_channels, 28, 28).cuda() # (n, c, h, w)
conv_offset = nn.Conv2d(
in_channels=3,
out_channels=deform_groups * 3 * kernel_size * kernel_size,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=True).cuda()
conv_offset.cuda()
out = conv_offset(input)
o1, o2, mask = torch.chunk(out, 3, dim=1)
offset = torch.cat((o1, o2), dim=1)
mask = torch.sigmoid(mask)
model_with_bias = ModulatedDeformConv2d(
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation,
groups,
deform_groups,
bias=True)
model_without_bias = ModulatedDeformConv2d(
in_channels,
out_channels,
kernel_size,
stride,
padding,
dilation,
groups,
deform_groups,
bias=False)
models = [model_with_bias.cuda(), model_without_bias.cuda()]
for model in models:
# export and load onnx model
with torch.no_grad():
torch.onnx.export(
model, (input, offset, mask),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input', 'offset', 'mask'],
opset_version=11)
session_options = rt.SessionOptions() import onnx
if os.path.exists(ort_custom_op_path): model = onnx.load(onnx_file)
session_options.register_custom_ops_library(ort_custom_op_path) nodes = model.graph.node
# compute onnx_output symbol_exist = False
sess = rt.InferenceSession( for n in nodes:
onnx_file, session_options, providers=['CPUExecutionProvider']) if n.op_type == symbol_name:
onnx_output = sess.run( symbol_exist = True
None, { assert symbol_exist
'input': input.cpu().detach().numpy(),
'offset': offset.cpu().detach().numpy(),
'mask': mask.cpu().detach().numpy()
})[0]
# compute pytorch_output
with torch.no_grad():
pytorch_output = model(input, offset, mask).cpu()
# allclose
assert np.allclose(pytorch_output, onnx_output, atol=1e-3)
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_border_align():
from mmcv.ops import BorderAlign
model = BorderAlign(2)
input = torch.rand(1, 8, 2, 2).cuda()
boxes = torch.rand(1, 4, 4).cuda()
_test_symbolic(model, (input, boxes), 'MMCVBorderAlign')
def test_deform_conv2d(threshold=1e-3):
try:
from mmcv.ops import DeformConv2d, get_onnxruntime_op_path
except (ImportError, ModuleNotFoundError):
pytest.skip('deform_conv op is not successfully compiled')
ort_custom_op_path = get_onnxruntime_op_path()
if not os.path.exists(ort_custom_op_path):
pytest.skip('custom ops for onnxruntime are not compiled.')
# deform conv config
# modulated deform conv config
in_channels = 1
out_channels = 64
stride = 1
padding = 0
dilation = 1
groups = 1
deform_groups = 1
kernel_size = 2
input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]],
[[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]],
[[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]],
[[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]]
offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7]
deform_weight = [[[0.4, 0.2, 0.1, 0.9]]]
x = torch.tensor(input)
conv_offset = nn.Conv2d(
in_channels=in_channels,
out_channels=deform_groups * 2 * kernel_size * kernel_size,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
bias=True)
conv_offset.weight.data = torch.nn.Parameter(
torch.Tensor(offset_weight).reshape(8, 1, 2, 2))
conv_offset.bias.data = torch.nn.Parameter(
torch.Tensor(offset_bias).reshape(8))
offset = conv_offset(x)
model = DeformConv2d(in_channels, out_channels, kernel_size, stride,
padding, dilation, groups, deform_groups)
model.weight.data = torch.nn.Parameter(
torch.Tensor(deform_weight).reshape(1, 1, 2, 2))
with torch.no_grad(): @pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
torch.onnx.export( def test_carafe():
model, (x, offset), from mmcv.ops import CARAFENaive
onnx_file, feat = torch.randn(2, 64, 3, 3, device='cuda').double()
export_params=True, mask = torch.randn(2, 100, 6, 6, device='cuda').sigmoid().double()
keep_initializers_as_inputs=True, _test_symbolic(CARAFENaive(5, 4, 2), (feat, mask), 'MMCVCARAFENaive')
input_names=['input', 'offset'],
opset_version=11)
session_options = rt.SessionOptions()
if os.path.exists(ort_custom_op_path):
session_options.register_custom_ops_library(ort_custom_op_path)
# compute onnx_output @pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
sess = rt.InferenceSession( def test_deform_conv():
onnx_file, session_options, providers=['CPUExecutionProvider']) from mmcv.ops import DeformConv2dPack
onnx_output = sess.run( x = torch.randn(1, 2, 4, 4, device='cuda')
None, { _test_symbolic(
'input': x.cpu().detach().numpy(), DeformConv2dPack(2, 4, 3, 1, 1).cuda(), x, 'MMCVDeformConv2d')
'offset': offset.cpu().detach().numpy(),
})[0]
# compute pytorch_output
with torch.no_grad(): @pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
pytorch_output = model(x, offset).cpu() def test_modulated_deform_conv():
# allclose from mmcv.ops import ModulatedDeformConv2dPack
assert np.allclose(pytorch_output, onnx_output, atol=1e-3) x = torch.randn(1, 2, 4, 4, device='cuda')
_test_symbolic(
ModulatedDeformConv2dPack(2, 4, 3, 1, 1).cuda(), x,
'MMCVModulatedDeformConv2d')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_deform_roi_pool():
from mmcv.ops import DeformRoIPoolPack
x = torch.tensor([[[[1., 2.], [3., 4.]]]], device='cuda')
rois = torch.tensor([[0., 0., 0., 1., 1.]], device='cuda')
output_c = x.size(1)
pool_h = 2
pool_w = 2
spatial_scale = 1.0
sampling_ratio = 2
model = DeformRoIPoolPack((pool_h, pool_w),
output_c,
spatial_scale=spatial_scale,
sampling_ratio=sampling_ratio).cuda()
_test_symbolic(model, (x, rois), 'MMCVDeformRoIPool')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_masked_conv():
from mmcv.ops import MaskedConv2d
x = torch.rand(1, 2, 4, 4, device='cuda')
mask = torch.rand(1, 4, 4, device='cuda')
_test_symbolic(
MaskedConv2d(2, 4, 3, 1, 1).cuda(), (x, mask), 'MMCVMaskedConv2d')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_pr_roi_pool():
from mmcv.ops import PrRoIPool
pool_h = 2
pool_w = 2
spatial_scale = 1.0
x = torch.tensor([[[[1., 2.], [3., 4.]]]], device='cuda')
rois = torch.tensor([[0., 0., 0., 1., 1.]], device='cuda')
model = PrRoIPool((pool_h, pool_w), spatial_scale).cuda()
_test_symbolic(model, (x, rois), 'PrRoIPool')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_psa_mask():
from mmcv.ops import PSAMask
input = torch.rand(4, 16, 8, 8).cuda()
model = PSAMask('collect', (4, 4)).cuda()
_test_symbolic(model, input, 'MMCVPSAMask')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_roi_align_rotated():
from mmcv.ops import RoIAlignRotated
pool_h = 2
pool_w = 2
spatial_scale = 1.0
sampling_ratio = 2
x = torch.tensor([[[[1., 2.], [3., 4.]]]], device='cuda')
rois = torch.tensor([[0., 0.5, 0.5, 1., 1., 0]], device='cuda')
model = RoIAlignRotated((pool_h, pool_w), spatial_scale,
sampling_ratio).cuda()
_test_symbolic(model, (x, rois), 'MMCVRoIAlignRotated')
@pytest.mark.skipif(not torch.cuda.is_available(), reason='test requires GPU')
def test_roi_feaeture_align():
from mmcv.ops import rotated_feature_align
wrapped_model = WrapFunction(rotated_feature_align)
feature = torch.rand(1, 1, 2, 2, device='cuda')
bbox = torch.rand(1, 2, 2, 5, device='cuda')
_test_symbolic(wrapped_model, (feature, bbox), 'MMCVRotatedFeatureAlign')
# Copyright (c) OpenMMLab. All rights reserved.
import os
from functools import partial
from typing import Callable
import mmengine
import numpy as np
import onnx
import pytest
import torch
import torch.nn as nn
import torch.nn.functional as F
try:
from mmcv.tensorrt import (TRTWrapper, is_tensorrt_plugin_loaded, onnx2trt,
save_trt_engine)
except ImportError:
pytest.skip(
'TensorRT should be installed from source.', allow_module_level=True)
if not torch.cuda.is_available():
pytest.skip(
'CUDA is required for this test module', allow_module_level=True)
if not is_tensorrt_plugin_loaded():
pytest.skip(
'Test requires to complie TensorRT plugins in mmcv',
allow_module_level=True)
class WrapFunction(nn.Module):
def __init__(self, wrapped_function):
super().__init__()
self.wrapped_function = wrapped_function
def forward(self, *args, **kwargs):
return self.wrapped_function(*args, **kwargs)
onnx_file = 'tmp.onnx'
trt_file = 'tmp.engine'
def test_roialign():
try:
from mmcv.ops import RoIAlign
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
# roi align config
pool_h = 2
pool_w = 2
spatial_scale = 1.0
sampling_ratio = 2
inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
([[[[1., 2.], [3., 4.]], [[4., 3.],
[2., 1.]]]], [[0., 0., 0., 1., 1.]]),
([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
[11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])]
wrapped_model = RoIAlign((pool_w, pool_h), spatial_scale, sampling_ratio,
'avg', True).cuda()
for case in inputs:
np_input = np.array(case[0], dtype=np.float32)
np_rois = np.array(case[1], dtype=np.float32)
input = torch.from_numpy(np_input).cuda()
rois = torch.from_numpy(np_rois).cuda()
with torch.no_grad():
torch.onnx.export(
wrapped_model, (input, rois),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input', 'rois'],
output_names=['roi_feat'],
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(input.shape),
list(input.shape),
list(input.shape)],
'rois': [list(rois.shape),
list(rois.shape),
list(rois.shape)]
}
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, ['input', 'rois'], ['roi_feat'])
with torch.no_grad():
trt_outputs = trt_model({'input': input, 'rois': rois})
trt_roi_feat = trt_outputs['roi_feat']
# compute pytorch_output
with torch.no_grad():
pytorch_roi_feat = wrapped_model(input, rois)
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_roi_feat, trt_roi_feat)
def test_nms():
try:
from mmcv.ops import nms
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
os.environ['ONNX_BACKEND'] = 'MMCVTensorRT'
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
data = mmengine.load('./tests/data/batched_nms_data.pkl')
boxes = torch.from_numpy(data['boxes']).cuda()
scores = torch.from_numpy(data['scores']).cuda()
nms = partial(
nms, iou_threshold=0.7, offset=0, score_threshold=0.1, max_num=100)
wrapped_model = WrapFunction(nms)
wrapped_model.cpu().eval()
with torch.no_grad():
torch.onnx.export(
wrapped_model, (boxes.detach().cpu(), scores.detach().cpu()),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['boxes', 'scores'],
output_names=['dets', 'inds'],
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'boxes': [list(boxes.shape),
list(boxes.shape),
list(boxes.shape)],
'scores': [list(scores.shape),
list(scores.shape),
list(scores.shape)]
}
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, ['boxes', 'scores'], ['dets', 'inds'])
with torch.no_grad():
trt_outputs = trt_model({'boxes': boxes, 'scores': scores})
trt_dets = trt_outputs['dets']
trt_inds = trt_outputs['inds']
trt_inds = trt_inds.long()
# compute pytorch_output
with torch.no_grad():
pytorch_outputs = wrapped_model(boxes, scores)
pytorch_dets, pytorch_inds = pytorch_outputs
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
num_boxes = pytorch_dets.shape[0]
trt_dets = trt_dets[:num_boxes, ...]
trt_inds = trt_inds[:num_boxes]
trt_scores = trt_dets[:, 4]
pytorch_scores = pytorch_dets[:, 4]
os.environ.pop('ONNX_BACKEND')
assert torch.allclose(pytorch_scores, trt_scores, atol=1e-3)
assert torch.equal(pytorch_inds, trt_inds)
def test_batched_nms():
try:
from mmcv.ops import batched_nms
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
# trt config
os.environ['ONNX_BACKEND'] = 'MMCVTensorRT'
fp16_mode = False
max_workspace_size = 1 << 30
data = mmengine.load('./tests/data/batched_nms_data.pkl')
nms_cfg = dict(type='nms', iou_threshold=0.7, score_threshold=0.1)
boxes = torch.from_numpy(data['boxes']).cuda()
scores = torch.from_numpy(data['scores']).cuda()
idxs = torch.from_numpy(data['idxs']).cuda()
class_agnostic = False
nms = partial(batched_nms, nms_cfg=nms_cfg, class_agnostic=class_agnostic)
wrapped_model = WrapFunction(nms)
wrapped_model.cpu().eval()
input_data = (boxes.detach().cpu(), scores.detach().cpu(),
idxs.detach().cpu())
input_names = ['boxes', 'scores', 'idxs']
output_names = ['dets', 'inds']
with torch.no_grad():
torch.onnx.export(
wrapped_model,
input_data,
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'boxes': [list(boxes.shape),
list(boxes.shape),
list(boxes.shape)],
'scores': [list(scores.shape),
list(scores.shape),
list(scores.shape)],
'idxs': [list(idxs.shape),
list(idxs.shape),
list(idxs.shape)]
}
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({
'boxes': boxes,
'scores': scores,
'idxs': idxs
})
trt_dets = trt_outputs['dets']
trt_inds = trt_outputs['inds']
trt_inds = trt_inds.long()
# compute pytorch_output
with torch.no_grad():
pytorch_outputs = wrapped_model(boxes, scores, idxs)
pytorch_dets, pytorch_inds = pytorch_outputs
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
num_boxes = pytorch_dets.shape[0]
trt_dets = trt_dets[:num_boxes, ...]
trt_inds = trt_inds[:num_boxes]
trt_scores = trt_dets[:, 4]
pytorch_scores = pytorch_dets[:, 4]
os.environ.pop('ONNX_BACKEND')
assert torch.allclose(pytorch_scores, trt_scores)
assert torch.equal(pytorch_inds, trt_inds)
def test_scatternd():
def func(data):
data[:, :-2] += 1
data[:2, :] -= 1
return data
data = torch.zeros(4, 4).cuda()
wrapped_model = WrapFunction(func).eval().cuda()
input_names = ['input']
output_names = ['output']
with torch.no_grad():
torch.onnx.export(
wrapped_model, (data.clone(), ),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(data.shape),
list(data.shape),
list(data.shape)],
}
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({'input': data.clone()})
trt_results = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_results = wrapped_model(data.clone())
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_results, trt_results)
def test_deform_conv():
try:
from mmcv.ops import DeformConv2dPack
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
offset_weight = [[[0.1, 0.4, 0.6, 0.1]], [[0.3, 0.2, 0.1, 0.3]],
[[0.5, 0.5, 0.2, 0.8]], [[0.8, 0.3, 0.9, 0.1]],
[[0.3, 0.1, 0.2, 0.5]], [[0.3, 0.7, 0.5, 0.3]],
[[0.6, 0.2, 0.5, 0.3]], [[0.4, 0.1, 0.8, 0.4]]]
offset_bias = [0.7, 0.1, 0.8, 0.5, 0.6, 0.5, 0.4, 0.7]
deform_weight = [[[0.4, 0.2, 0.1, 0.9]]]
c_in = 1
c_out = 1
x = torch.Tensor(input).cuda()
x.requires_grad = True
model = DeformConv2dPack(c_in, c_out, 2, stride=1, padding=0)
model.conv_offset.weight.data = torch.nn.Parameter(
torch.Tensor(offset_weight).reshape(8, 1, 2, 2))
model.conv_offset.bias.data = torch.nn.Parameter(
torch.Tensor(offset_bias).reshape(8))
model.weight.data = torch.nn.Parameter(
torch.Tensor(deform_weight).reshape(1, 1, 2, 2))
model.cuda().eval()
input_names = ['input']
output_names = ['output']
with torch.no_grad():
torch.onnx.export(
model, (x.clone(), ),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(x.shape), list(x.shape),
list(x.shape)],
}
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({'input': x.clone()})
trt_results = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_results = model(x.clone())
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_results, trt_results)
@pytest.mark.parametrize('with_bias', [True, False])
def test_modulated_deform_conv(with_bias):
try:
from mmcv.ops import ModulatedDeformConv2dPack
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
input = [[[[1., 2., 3.], [0., 1., 2.], [3., 5., 2.]]]]
x = torch.Tensor(input).cuda()
model = ModulatedDeformConv2dPack(
1,
1,
kernel_size=(2, 2),
stride=1,
padding=1,
deform_groups=1,
bias=with_bias)
model.weight.data.fill_(1.)
model.type(torch.float32)
model = model.cuda().eval()
input_names = ['input']
output_names = ['output']
with torch.no_grad():
torch.onnx.export(
model, (x.clone(), ),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(x.shape), list(x.shape),
list(x.shape)],
}
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({'input': x.clone()})
trt_results = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_results = model(x.clone())
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
torch.testing.assert_allclose(pytorch_results, trt_results)
@pytest.mark.parametrize('mode', ['bilinear', 'nearest'])
@pytest.mark.parametrize('padding_mode', ['zeros', 'border', 'reflection'])
@pytest.mark.parametrize('align_corners', [True, False])
def test_grid_sample(mode, padding_mode, align_corners):
from mmcv.onnx.symbolic import register_extra_symbolics
register_extra_symbolics(11)
input = torch.rand(1, 1, 10, 10).cuda()
grid = torch.Tensor([[[1, 0, 0], [0, 1, 0]]])
grid = F.affine_grid(grid, (1, 1, 15, 15)).type_as(input).cuda()
def func(input, grid):
return F.grid_sample(
input,
grid,
mode=mode,
padding_mode=padding_mode,
align_corners=align_corners)
wrapped_model = WrapFunction(func).eval().cuda()
input_names = ['input', 'grid']
output_names = ['output']
with torch.no_grad():
torch.onnx.export(
wrapped_model, (input.clone(), grid.clone()),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(input.shape),
list(input.shape),
list(input.shape)],
'grid': [list(grid.shape),
list(grid.shape),
list(grid.shape)],
}
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({'input': input.clone(), 'grid': grid.clone()})
trt_results = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_results = wrapped_model(input.clone(), grid.clone())
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_results, trt_results)
@pytest.mark.parametrize('func', [torch.cummax, torch.cummin])
def test_cummin_cummax(func: Callable):
# Note generally `cummax` or `cummin` is exportable to ONNX
# as long as the pytorch version >= 1.5.0, since `torch.cummax`
# is only supported with torch >= 1.5.0.
# But when `cummax` or `cummin` serves as an intermediate component
# whose outputs is used as inputs for another modules, it's expected
# that pytorch version must be >= 1.7.0. Otherwise error appears like:
# `RuntimeError: tuple appears in op that does not forward tuples,
# unsupported 'kind: prim::PythonOp`.
from packaging import version
if version.parse(torch.__version__) < version.parse('1.7.0'):
pytest.skip('test_cummax_cummin should be ran with pytorch >= 1.7.0')
opset = 11
# register custom op `mmcv::cummax` and `mmcv::cummin`
from mmcv.onnx.symbolic import register_extra_symbolics
register_extra_symbolics(opset)
input_list = [
# arbitrary shape, e.g. 1-D, 2-D, 3-D, ...
torch.rand((2, 3, 4, 1, 5)).cuda(),
torch.rand(1).cuda()
]
input_names = ['input']
output_names = ['output', 'indices']
for input in input_list:
ndims = input.dim()
# valid dim range is [-ndims, ndims-1]
# test for all `dim` value which is valid
for dim in range(-ndims, ndims):
cummax_func = partial(func, dim=dim)
wrapped_model = WrapFunction(cummax_func).eval().cuda()
with torch.no_grad():
torch.onnx.export(
wrapped_model,
input,
onnx_file,
export_params=True,
keep_initializers_as_inputs=False,
input_names=input_names,
output_names=output_names,
opset_version=opset)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input':
[list(input.shape),
list(input.shape),
list(input.shape)]
}
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
# remove ONNX model after conversion
if os.path.exists(onnx_file):
os.remove(onnx_file)
# save TensorRT model
save_trt_engine(trt_engine, trt_file)
# load and wrap TensorRT model
trt_model = TRTWrapper(trt_file)
# remove trt model after loading
if os.path.exists(trt_file):
os.remove(trt_file)
# compute trt output
with torch.no_grad():
trt_results = trt_model({'input': input.contiguous().clone()})
trt_output = trt_results['output']
trt_indices = trt_results['indices']
# compute pytorch output
with torch.no_grad():
pytorch_results = wrapped_model(input.clone())
pytorch_output = pytorch_results[0]
pytorch_indices = pytorch_results[1]
torch.testing.assert_allclose(trt_output, pytorch_output)
torch.testing.assert_allclose(trt_indices, pytorch_indices)
@pytest.mark.parametrize('dynamic_export', [True, False])
@pytest.mark.parametrize('fp16_mode', [True, False])
def test_instance_norm(dynamic_export, fp16_mode):
n, c, h, w = 2, 3, 10, 10
data = torch.randn(n, c, h, w).cuda()
norm = nn.InstanceNorm2d(c, affine=True)
wrapped_model = WrapFunction(norm).eval().cuda()
input_names = ['input']
output_names = ['output']
dynamic_axes = None
if dynamic_export:
dynamic_axes = {
'input': {
0: 'n',
2: 'h',
3: 'w',
},
'output': {
0: 'n',
2: 'h',
3: 'w',
},
}
with torch.no_grad():
torch.onnx.export(
wrapped_model, (data.clone(), ),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=input_names,
output_names=output_names,
dynamic_axes=dynamic_axes,
opset_version=11)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
if dynamic_export:
opt_shape_dict = {
'input':
[list(data.shape),
list(data.shape), [2 * n, c, 2 * h, 2 * w]],
}
else:
opt_shape_dict = {
'input': [list(data.shape),
list(data.shape),
list(data.shape)],
}
# trt config
max_workspace_size = 1 << 30
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, input_names, output_names)
with torch.no_grad():
trt_outputs = trt_model({'input': data.clone()})
trt_results = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_results = wrapped_model(data.clone())
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_results, trt_results)
@pytest.mark.parametrize('mode', ['top', 'bottom', 'left', 'right'])
def test_corner_pool(mode):
try:
from mmcv.ops import CornerPool
except (ImportError, ModuleNotFoundError):
pytest.skip('test requires compilation')
opset = 11
# register custom op `mmcv::MMCVCornerPool`
from mmcv.onnx.symbolic import register_extra_symbolics
register_extra_symbolics(opset)
# trt config
fp16_mode = False
max_workspace_size = 1 << 30
inputs = [
# (n, c, h, w)
torch.rand((2, 3, 5, 5)),
torch.rand((1, 2, 4, 6)),
torch.rand((2, 1, 3, 2)),
]
class CornerPoolWrapper(CornerPool):
def __init__(self, mode):
super().__init__(mode)
def forward(self, x):
# no use `torch.cummax`, instead `corner_pool` is used
# for various torch version
return self.corner_pool.apply(x)
wrapped_model = CornerPoolWrapper(mode).cuda()
for input in inputs:
input = input.cuda()
with torch.no_grad():
torch.onnx.export(
wrapped_model, (input, ),
onnx_file,
export_params=True,
keep_initializers_as_inputs=True,
input_names=['input'],
output_names=['output'],
opset_version=opset)
onnx_model = onnx.load(onnx_file)
# create trt engine and wrapper
opt_shape_dict = {
'input': [list(input.shape),
list(input.shape),
list(input.shape)],
}
trt_engine = onnx2trt(
onnx_model,
opt_shape_dict,
fp16_mode=fp16_mode,
max_workspace_size=max_workspace_size)
save_trt_engine(trt_engine, trt_file)
trt_model = TRTWrapper(trt_file, ['input'], ['output'])
with torch.no_grad():
trt_outputs = trt_model({'input': input})
trt_pool_feat = trt_outputs['output']
# compute pytorch_output
with torch.no_grad():
pytorch_pool_feat = wrapped_model(input)
# allclose
if os.path.exists(onnx_file):
os.remove(onnx_file)
if os.path.exists(trt_file):
os.remove(trt_file)
assert torch.allclose(pytorch_pool_feat, trt_pool_feat, atol=1e-5)
# Copyright (c) OpenMMLab. All rights reserved.
import os
from functools import wraps
import onnx
import pytest
import torch
from mmcv.ops import nms
from mmcv.tensorrt.preprocess import preprocess_onnx
if torch.__version__ == 'parrots':
pytest.skip('not supported in parrots now', allow_module_level=True)
def remove_tmp_file(func):
@wraps(func)
def wrapper(*args, **kwargs):
onnx_file = 'tmp.onnx'
kwargs['onnx_file'] = onnx_file
try:
result = func(*args, **kwargs)
finally:
if os.path.exists(onnx_file):
os.remove(onnx_file)
return result
return wrapper
@remove_tmp_file
def export_nms_module_to_onnx(module, onnx_file):
torch_model = module()
torch_model.eval()
input = (torch.rand([100, 4], dtype=torch.float32),
torch.rand([100], dtype=torch.float32))
torch.onnx.export(
torch_model,
input,
onnx_file,
opset_version=11,
input_names=['boxes', 'scores'],
output_names=['output'])
onnx_model = onnx.load(onnx_file)
return onnx_model
def test_can_handle_nms_with_constant_maxnum():
class ModuleNMS(torch.nn.Module):
def forward(self, boxes, scores):
return nms(boxes, scores, iou_threshold=0.4, max_num=10)
onnx_model = export_nms_module_to_onnx(ModuleNMS)
preprocess_onnx_model = preprocess_onnx(onnx_model)
for node in preprocess_onnx_model.graph.node:
if 'NonMaxSuppression' in node.name:
assert len(node.attribute) == 5, 'The NMS must have 5 attributes.'
def test_can_handle_nms_with_undefined_maxnum():
class ModuleNMS(torch.nn.Module):
def forward(self, boxes, scores):
return nms(boxes, scores, iou_threshold=0.4)
onnx_model = export_nms_module_to_onnx(ModuleNMS)
preprocess_onnx_model = preprocess_onnx(onnx_model)
for node in preprocess_onnx_model.graph.node:
if 'NonMaxSuppression' in node.name:
assert len(node.attribute) == 5, \
'The NMS must have 5 attributes.'
assert node.attribute[2].i > 0, \
'The max_output_boxes_per_class is not defined correctly.'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment