Commit 7f7e7663 authored by Lara Haidar's avatar Lara Haidar Committed by Francisco Massa
Browse files

Fix Windows build in Torchvision Custom op Registration (#1320)

* Revert "Revert "Register Torchvision Ops as Cutom Ops (#1267)" (#1316)"

This reverts commit fe234fc8.

* Make import of C++ extensions lazy

* define python initialization functions for extension

* Fix lint
parent 6ddda3ae
......@@ -47,6 +47,10 @@ before_install:
- pip install future
- pip install pytest pytest-cov codecov
- pip install mock
- |
if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then
pip install onnxruntime
fi
- conda install av -c conda-forge
......
......@@ -96,12 +96,21 @@ def get_extensions():
source_models = [os.path.join(models_dir, s) for s in source_models]
tests = test_file + source_models
custom_ops_sources = [os.path.join(extensions_dir, "custom_ops", "custom_ops.cpp"),
os.path.join(extensions_dir, "cpu", "nms_cpu.cpp"),
os.path.join(extensions_dir, "cpu", "ROIAlign_cpu.cpp"),
os.path.join(extensions_dir, "cpu", "ROIPool_cpu.cpp")]
custom_ops_sources_cuda = [os.path.join(extensions_dir, "cuda", "nms_cuda.cu"),
os.path.join(extensions_dir, "cuda", "ROIAlign_cuda.cu"),
os.path.join(extensions_dir, "cuda", "ROIPool_cuda.cu")]
define_macros = []
extra_compile_args = {}
if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1':
extension = CUDAExtension
sources += source_cuda
custom_ops_sources += custom_ops_sources_cuda
define_macros += [('WITH_CUDA', None)]
nvcc_flags = os.getenv('NVCC_FLAGS', '')
if nvcc_flags == '':
......@@ -138,7 +147,14 @@ def get_extensions():
include_dirs=tests_include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
)
),
extension(
"torchvision._custom_ops",
sources=custom_ops_sources,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
),
]
return ext_modules
......@@ -179,5 +195,6 @@ setup(
"scipy": ["scipy"],
},
ext_modules=get_extensions(),
cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension, 'clean': clean}
cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension,
'clean': clean}
)
import io
import torch
from torchvision import ops
# onnxruntime requires python 3.5 or above
try:
import onnxruntime
except ImportError:
onnxruntime = None
import unittest
@unittest.skipIf(onnxruntime is None, 'ONNX Runtime unavailable')
class ONNXExporterTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
torch.manual_seed(123)
def run_model(self, model, inputs):
model.eval()
# run pytorch model
with torch.no_grad():
if isinstance(inputs, torch.Tensor):
inputs = (inputs,)
outputs = model(*inputs)
if isinstance(outputs, torch.Tensor):
outputs = (outputs,)
onnx_io = io.BytesIO()
# export to onnx
torch.onnx.export(model, inputs, onnx_io, do_constant_folding=True, opset_version=10)
# validate the exported model with onnx runtime
self.ort_validate(onnx_io, inputs, outputs)
def ort_validate(self, onnx_io, inputs, outputs):
inputs, _ = torch.jit._flatten(inputs)
outputs, _ = torch.jit._flatten(outputs)
def to_numpy(tensor):
if tensor.requires_grad:
return tensor.detach().cpu().numpy()
else:
return tensor.cpu().numpy()
inputs = list(map(to_numpy, inputs))
outputs = list(map(to_numpy, outputs))
ort_session = onnxruntime.InferenceSession(onnx_io.getvalue())
# compute onnxruntime output prediction
ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs))
ort_outs = ort_session.run(None, ort_inputs)
for i in range(0, len(outputs)):
torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05)
def test_nms(self):
boxes = torch.rand(5, 4)
boxes[:, 2:] += torch.rand(5, 2)
scores = torch.randn(5)
class Module(torch.nn.Module):
def forward(self, boxes, scores):
return ops.nms(boxes, scores, 0.5)
self.run_model(Module(), (boxes, scores))
def test_roi_pool(self):
x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
model = ops.RoIAlign((5, 5), 1, 2)
self.run_model(model, (x, single_roi))
def test_roi_align(self):
x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
pool_h = 5
pool_w = 5
model = ops.RoIPool((pool_h, pool_w), 2)
model.eval()
self.run_model(model, (x, rois))
if __name__ == '__main__':
unittest.main()
......@@ -10,11 +10,11 @@
at::Tensor ROIAlign_forward(
const at::Tensor& input, // Input feature map.
const at::Tensor& rois, // List of ROIs to pool over.
const float spatial_scale, // The scale of the image features. ROIs will be
const double spatial_scale, // The scale of the image features. ROIs will be
// scaled to this.
const int pooled_height, // The height of the pooled feature map.
const int pooled_width, // The width of the pooled feature
const int sampling_ratio) // The number of points to sample in each bin
const int64_t pooled_height, // The height of the pooled feature map.
const int64_t pooled_width, // The width of the pooled feature
const int64_t sampling_ratio) // The number of points to sample in each bin
// along each axis.
{
if (input.type().is_cuda()) {
......
......@@ -9,9 +9,9 @@
std::tuple<at::Tensor, at::Tensor> ROIPool_forward(
const at::Tensor& input,
const at::Tensor& rois,
const float spatial_scale,
const int pooled_height,
const int pooled_width) {
const double spatial_scale,
const int64_t pooled_height,
const int64_t pooled_width) {
if (input.type().is_cuda()) {
#ifdef WITH_CUDA
return ROIPool_forward_cuda(
......
#include <Python.h>
#include <torch/script.h>
#include "ROIAlign.h"
#include "ROIPool.h"
#include "nms.h"
using namespace at;
// If we are in a Windows environment, we need to define
// initialization functions for the _custom_ops extension
#ifdef _WIN32
#if PY_MAJOR_VERSION < 3
PyMODINIT_FUNC init_custom_ops(void) {
// No need to do anything.
// _custom_ops.py will run on load
return NULL;
}
#else
PyMODINIT_FUNC PyInit__custom_ops(void) {
// No need to do anything.
// _custom_ops.py will run on load
return NULL;
}
#endif
#endif
static auto registry =
torch::RegisterOperators()
.op("torchvision::nms", &nms)
.op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio) -> Tensor",
&ROIAlign_forward)
.op("torchvision::roi_pool", &ROIPool_forward);
......@@ -8,7 +8,7 @@
at::Tensor nms(
const at::Tensor& dets,
const at::Tensor& scores,
const float iou_threshold) {
const double iou_threshold) {
if (dets.device().is_cuda()) {
#ifdef WITH_CUDA
if (dets.numel() == 0) {
......
......@@ -7,6 +7,8 @@
#endif
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
// TODO: remove nms from here since it is now registered
// and used as a PyTorch custom op
m.def("nms", &nms, "non-maximum suppression");
m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
......
......@@ -10,6 +10,7 @@ def _lazy_import():
return _C
import torch
from torchvision import _C as C
import torchvision.ops._custom_ops
_C = C
if hasattr(_C, "CUDA_VERSION") and torch.version.cuda is not None:
tv_version = str(_C.CUDA_VERSION)
......
import os
import sys
import imp
import torch
# load the custom_op_library and register the custom ops
lib_dir = os.path.join(os.path.dirname(__file__), '..')
file, path, description = imp.find_module("_custom_ops", [lib_dir])
torch.ops.load_library(path)
def register_custom_op():
from torch.onnx.symbolic_helper import parse_args, scalar_type_to_onnx
from torch.onnx.symbolic_opset9 import select, unsqueeze, squeeze, _cast_Long, reshape
@parse_args('v', 'v', 'f')
def symbolic_multi_label_nms(g, boxes, scores, iou_threshold):
boxes = unsqueeze(g, boxes, 0)
scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
max_output_per_class = g.op('Constant', value_t=torch.tensor([sys.maxsize], dtype=torch.long))
iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float))
nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold)
return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1)
@parse_args('v', 'v', 'f', 'i', 'i', 'i')
def roi_align(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio):
batch_indices = _cast_Long(g, squeeze(g, select(g, rois, 1, g.op('Constant',
value_t=torch.tensor([0], dtype=torch.long))), 1), False)
rois = select(g, rois, 1, g.op('Constant', value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long)))
return g.op('RoiAlign', input, rois, batch_indices, spatial_scale_f=spatial_scale,
output_height_i=pooled_height, output_width_i=pooled_width, sampling_ratio_i=sampling_ratio)
@parse_args('v', 'v', 'f', 'i', 'i')
def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width):
roi_pool = g.op('MaxRoiPool', input, rois,
pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale)
return roi_pool, None
from torch.onnx import register_custom_op_symbolic
register_custom_op_symbolic('torchvision::nms', symbolic_multi_label_nms, 10)
register_custom_op_symbolic('torchvision::roi_align', roi_align, 10)
register_custom_op_symbolic('torchvision::roi_pool', roi_pool, 10)
register_custom_op()
......@@ -29,8 +29,8 @@ def nms(boxes, scores, iou_threshold):
of the elements that have been kept
by NMS, sorted in decreasing order of scores
"""
_C = _lazy_import()
return _C.nms(boxes, scores, iou_threshold)
_lazy_import()
return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
def batched_nms(boxes, scores, idxs, iou_threshold):
......
......@@ -66,6 +66,13 @@ def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
rois = boxes
if not isinstance(rois, torch.Tensor):
rois = convert_boxes_to_roi_format(rois)
# TODO: Change this to support backwards, which we
# do not currently support when JIT tracing.
if torch._C._get_tracing_state():
_lazy_import()
return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
output_size[0], output_size[1],
sampling_ratio)
return _RoIAlignFunction.apply(input, rois, output_size, spatial_scale, sampling_ratio)
......
......@@ -59,6 +59,13 @@ def roi_pool(input, boxes, output_size, spatial_scale=1.0):
rois = boxes
if not isinstance(rois, torch.Tensor):
rois = convert_boxes_to_roi_format(rois)
# TODO: Change this to support backwards, which we
# do not currently support when JIT tracing.
if torch._C._get_tracing_state():
_lazy_import()
output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
output_size[0], output_size[1])
return output
return _RoIPoolFunction.apply(input, rois, output_size, spatial_scale)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment