Commit 7f7e7663 authored by Lara Haidar's avatar Lara Haidar Committed by Francisco Massa
Browse files

Fix Windows build in Torchvision Custom op Registration (#1320)

* Revert "Revert "Register Torchvision Ops as Cutom Ops (#1267)" (#1316)"

This reverts commit fe234fc8.

* Make import of C++ extensions lazy

* define python initialization functions for extension

* Fix lint
parent 6ddda3ae
...@@ -47,6 +47,10 @@ before_install: ...@@ -47,6 +47,10 @@ before_install:
- pip install future - pip install future
- pip install pytest pytest-cov codecov - pip install pytest pytest-cov codecov
- pip install mock - pip install mock
- |
if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then
pip install onnxruntime
fi
- conda install av -c conda-forge - conda install av -c conda-forge
......
...@@ -96,12 +96,21 @@ def get_extensions(): ...@@ -96,12 +96,21 @@ def get_extensions():
source_models = [os.path.join(models_dir, s) for s in source_models] source_models = [os.path.join(models_dir, s) for s in source_models]
tests = test_file + source_models tests = test_file + source_models
custom_ops_sources = [os.path.join(extensions_dir, "custom_ops", "custom_ops.cpp"),
os.path.join(extensions_dir, "cpu", "nms_cpu.cpp"),
os.path.join(extensions_dir, "cpu", "ROIAlign_cpu.cpp"),
os.path.join(extensions_dir, "cpu", "ROIPool_cpu.cpp")]
custom_ops_sources_cuda = [os.path.join(extensions_dir, "cuda", "nms_cuda.cu"),
os.path.join(extensions_dir, "cuda", "ROIAlign_cuda.cu"),
os.path.join(extensions_dir, "cuda", "ROIPool_cuda.cu")]
define_macros = [] define_macros = []
extra_compile_args = {} extra_compile_args = {}
if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1': if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv('FORCE_CUDA', '0') == '1':
extension = CUDAExtension extension = CUDAExtension
sources += source_cuda sources += source_cuda
custom_ops_sources += custom_ops_sources_cuda
define_macros += [('WITH_CUDA', None)] define_macros += [('WITH_CUDA', None)]
nvcc_flags = os.getenv('NVCC_FLAGS', '') nvcc_flags = os.getenv('NVCC_FLAGS', '')
if nvcc_flags == '': if nvcc_flags == '':
...@@ -138,7 +147,14 @@ def get_extensions(): ...@@ -138,7 +147,14 @@ def get_extensions():
include_dirs=tests_include_dirs, include_dirs=tests_include_dirs,
define_macros=define_macros, define_macros=define_macros,
extra_compile_args=extra_compile_args, extra_compile_args=extra_compile_args,
) ),
extension(
"torchvision._custom_ops",
sources=custom_ops_sources,
include_dirs=include_dirs,
define_macros=define_macros,
extra_compile_args=extra_compile_args,
),
] ]
return ext_modules return ext_modules
...@@ -179,5 +195,6 @@ setup( ...@@ -179,5 +195,6 @@ setup(
"scipy": ["scipy"], "scipy": ["scipy"],
}, },
ext_modules=get_extensions(), ext_modules=get_extensions(),
cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension, 'clean': clean} cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension,
'clean': clean}
) )
import io
import torch
from torchvision import ops
# onnxruntime requires python 3.5 or above
try:
import onnxruntime
except ImportError:
onnxruntime = None
import unittest
@unittest.skipIf(onnxruntime is None, 'ONNX Runtime unavailable')
class ONNXExporterTester(unittest.TestCase):
@classmethod
def setUpClass(cls):
torch.manual_seed(123)
def run_model(self, model, inputs):
model.eval()
# run pytorch model
with torch.no_grad():
if isinstance(inputs, torch.Tensor):
inputs = (inputs,)
outputs = model(*inputs)
if isinstance(outputs, torch.Tensor):
outputs = (outputs,)
onnx_io = io.BytesIO()
# export to onnx
torch.onnx.export(model, inputs, onnx_io, do_constant_folding=True, opset_version=10)
# validate the exported model with onnx runtime
self.ort_validate(onnx_io, inputs, outputs)
def ort_validate(self, onnx_io, inputs, outputs):
inputs, _ = torch.jit._flatten(inputs)
outputs, _ = torch.jit._flatten(outputs)
def to_numpy(tensor):
if tensor.requires_grad:
return tensor.detach().cpu().numpy()
else:
return tensor.cpu().numpy()
inputs = list(map(to_numpy, inputs))
outputs = list(map(to_numpy, outputs))
ort_session = onnxruntime.InferenceSession(onnx_io.getvalue())
# compute onnxruntime output prediction
ort_inputs = dict((ort_session.get_inputs()[i].name, inpt) for i, inpt in enumerate(inputs))
ort_outs = ort_session.run(None, ort_inputs)
for i in range(0, len(outputs)):
torch.testing.assert_allclose(outputs[i], ort_outs[i], rtol=1e-03, atol=1e-05)
def test_nms(self):
boxes = torch.rand(5, 4)
boxes[:, 2:] += torch.rand(5, 2)
scores = torch.randn(5)
class Module(torch.nn.Module):
def forward(self, boxes, scores):
return ops.nms(boxes, scores, 0.5)
self.run_model(Module(), (boxes, scores))
def test_roi_pool(self):
x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
single_roi = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
model = ops.RoIAlign((5, 5), 1, 2)
self.run_model(model, (x, single_roi))
def test_roi_align(self):
x = torch.rand(1, 1, 10, 10, dtype=torch.float32)
rois = torch.tensor([[0, 0, 0, 4, 4]], dtype=torch.float32)
pool_h = 5
pool_w = 5
model = ops.RoIPool((pool_h, pool_w), 2)
model.eval()
self.run_model(model, (x, rois))
if __name__ == '__main__':
unittest.main()
...@@ -10,11 +10,11 @@ ...@@ -10,11 +10,11 @@
at::Tensor ROIAlign_forward( at::Tensor ROIAlign_forward(
const at::Tensor& input, // Input feature map. const at::Tensor& input, // Input feature map.
const at::Tensor& rois, // List of ROIs to pool over. const at::Tensor& rois, // List of ROIs to pool over.
const float spatial_scale, // The scale of the image features. ROIs will be const double spatial_scale, // The scale of the image features. ROIs will be
// scaled to this. // scaled to this.
const int pooled_height, // The height of the pooled feature map. const int64_t pooled_height, // The height of the pooled feature map.
const int pooled_width, // The width of the pooled feature const int64_t pooled_width, // The width of the pooled feature
const int sampling_ratio) // The number of points to sample in each bin const int64_t sampling_ratio) // The number of points to sample in each bin
// along each axis. // along each axis.
{ {
if (input.type().is_cuda()) { if (input.type().is_cuda()) {
......
...@@ -9,9 +9,9 @@ ...@@ -9,9 +9,9 @@
std::tuple<at::Tensor, at::Tensor> ROIPool_forward( std::tuple<at::Tensor, at::Tensor> ROIPool_forward(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
const float spatial_scale, const double spatial_scale,
const int pooled_height, const int64_t pooled_height,
const int pooled_width) { const int64_t pooled_width) {
if (input.type().is_cuda()) { if (input.type().is_cuda()) {
#ifdef WITH_CUDA #ifdef WITH_CUDA
return ROIPool_forward_cuda( return ROIPool_forward_cuda(
......
#include <Python.h>
#include <torch/script.h>
#include "ROIAlign.h"
#include "ROIPool.h"
#include "nms.h"
using namespace at;
// If we are in a Windows environment, we need to define
// initialization functions for the _custom_ops extension
#ifdef _WIN32
#if PY_MAJOR_VERSION < 3
PyMODINIT_FUNC init_custom_ops(void) {
// No need to do anything.
// _custom_ops.py will run on load
return NULL;
}
#else
PyMODINIT_FUNC PyInit__custom_ops(void) {
// No need to do anything.
// _custom_ops.py will run on load
return NULL;
}
#endif
#endif
static auto registry =
torch::RegisterOperators()
.op("torchvision::nms", &nms)
.op("torchvision::roi_align(Tensor input, Tensor rois, float spatial_scale, int pooled_height, int pooled_width, int sampling_ratio) -> Tensor",
&ROIAlign_forward)
.op("torchvision::roi_pool", &ROIPool_forward);
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
at::Tensor nms( at::Tensor nms(
const at::Tensor& dets, const at::Tensor& dets,
const at::Tensor& scores, const at::Tensor& scores,
const float iou_threshold) { const double iou_threshold) {
if (dets.device().is_cuda()) { if (dets.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef WITH_CUDA
if (dets.numel() == 0) { if (dets.numel() == 0) {
......
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
#endif #endif
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
// TODO: remove nms from here since it is now registered
// and used as a PyTorch custom op
m.def("nms", &nms, "non-maximum suppression"); m.def("nms", &nms, "non-maximum suppression");
m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
......
...@@ -10,6 +10,7 @@ def _lazy_import(): ...@@ -10,6 +10,7 @@ def _lazy_import():
return _C return _C
import torch import torch
from torchvision import _C as C from torchvision import _C as C
import torchvision.ops._custom_ops
_C = C _C = C
if hasattr(_C, "CUDA_VERSION") and torch.version.cuda is not None: if hasattr(_C, "CUDA_VERSION") and torch.version.cuda is not None:
tv_version = str(_C.CUDA_VERSION) tv_version = str(_C.CUDA_VERSION)
......
import os
import sys
import imp
import torch
# load the custom_op_library and register the custom ops
lib_dir = os.path.join(os.path.dirname(__file__), '..')
file, path, description = imp.find_module("_custom_ops", [lib_dir])
torch.ops.load_library(path)
def register_custom_op():
from torch.onnx.symbolic_helper import parse_args, scalar_type_to_onnx
from torch.onnx.symbolic_opset9 import select, unsqueeze, squeeze, _cast_Long, reshape
@parse_args('v', 'v', 'f')
def symbolic_multi_label_nms(g, boxes, scores, iou_threshold):
boxes = unsqueeze(g, boxes, 0)
scores = unsqueeze(g, unsqueeze(g, scores, 0), 0)
max_output_per_class = g.op('Constant', value_t=torch.tensor([sys.maxsize], dtype=torch.long))
iou_threshold = g.op('Constant', value_t=torch.tensor([iou_threshold], dtype=torch.float))
nms_out = g.op('NonMaxSuppression', boxes, scores, max_output_per_class, iou_threshold)
return squeeze(g, select(g, nms_out, 1, g.op('Constant', value_t=torch.tensor([2], dtype=torch.long))), 1)
@parse_args('v', 'v', 'f', 'i', 'i', 'i')
def roi_align(g, input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio):
batch_indices = _cast_Long(g, squeeze(g, select(g, rois, 1, g.op('Constant',
value_t=torch.tensor([0], dtype=torch.long))), 1), False)
rois = select(g, rois, 1, g.op('Constant', value_t=torch.tensor([1, 2, 3, 4], dtype=torch.long)))
return g.op('RoiAlign', input, rois, batch_indices, spatial_scale_f=spatial_scale,
output_height_i=pooled_height, output_width_i=pooled_width, sampling_ratio_i=sampling_ratio)
@parse_args('v', 'v', 'f', 'i', 'i')
def roi_pool(g, input, rois, spatial_scale, pooled_height, pooled_width):
roi_pool = g.op('MaxRoiPool', input, rois,
pooled_shape_i=(pooled_height, pooled_width), spatial_scale_f=spatial_scale)
return roi_pool, None
from torch.onnx import register_custom_op_symbolic
register_custom_op_symbolic('torchvision::nms', symbolic_multi_label_nms, 10)
register_custom_op_symbolic('torchvision::roi_align', roi_align, 10)
register_custom_op_symbolic('torchvision::roi_pool', roi_pool, 10)
register_custom_op()
...@@ -29,8 +29,8 @@ def nms(boxes, scores, iou_threshold): ...@@ -29,8 +29,8 @@ def nms(boxes, scores, iou_threshold):
of the elements that have been kept of the elements that have been kept
by NMS, sorted in decreasing order of scores by NMS, sorted in decreasing order of scores
""" """
_C = _lazy_import() _lazy_import()
return _C.nms(boxes, scores, iou_threshold) return torch.ops.torchvision.nms(boxes, scores, iou_threshold)
def batched_nms(boxes, scores, idxs, iou_threshold): def batched_nms(boxes, scores, idxs, iou_threshold):
......
...@@ -66,6 +66,13 @@ def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1): ...@@ -66,6 +66,13 @@ def roi_align(input, boxes, output_size, spatial_scale=1.0, sampling_ratio=-1):
rois = boxes rois = boxes
if not isinstance(rois, torch.Tensor): if not isinstance(rois, torch.Tensor):
rois = convert_boxes_to_roi_format(rois) rois = convert_boxes_to_roi_format(rois)
# TODO: Change this to support backwards, which we
# do not currently support when JIT tracing.
if torch._C._get_tracing_state():
_lazy_import()
return torch.ops.torchvision.roi_align(input, rois, spatial_scale,
output_size[0], output_size[1],
sampling_ratio)
return _RoIAlignFunction.apply(input, rois, output_size, spatial_scale, sampling_ratio) return _RoIAlignFunction.apply(input, rois, output_size, spatial_scale, sampling_ratio)
......
...@@ -59,6 +59,13 @@ def roi_pool(input, boxes, output_size, spatial_scale=1.0): ...@@ -59,6 +59,13 @@ def roi_pool(input, boxes, output_size, spatial_scale=1.0):
rois = boxes rois = boxes
if not isinstance(rois, torch.Tensor): if not isinstance(rois, torch.Tensor):
rois = convert_boxes_to_roi_format(rois) rois = convert_boxes_to_roi_format(rois)
# TODO: Change this to support backwards, which we
# do not currently support when JIT tracing.
if torch._C._get_tracing_state():
_lazy_import()
output, _ = torch.ops.torchvision.roi_pool(input, rois, spatial_scale,
output_size[0], output_size[1])
return output
return _RoIPoolFunction.apply(input, rois, output_size, spatial_scale) return _RoIPoolFunction.apply(input, rois, output_size, spatial_scale)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment