Fix dlpack bug; zero copy between dgl.ndarray and user tensor enabled

7603b8c3 · Minjie Wang · 3d38c2a9 · 7603b8c3 · 7603b8c3 · 7603b8c3
Commit 7603b8c3 authored Oct 09, 2018 by Minjie Wang
7 changed files
--- a/include/dgl/runtime/ndarray.h
+++ b/include/dgl/runtime/ndarray.h
@@ -249,9 +249,14 @@ struct NDArray::Container {
  friend class RPCWrappedFunc;
  /*!
   * \brief The shape container,
-   *  can be used used for shape data.
+   *  can be used for shape data.
   */
  std::vector<int64_t> shape_;
+  /*!
+   * \brief The stride container,
+   *  can be used for stride data.
+   */
+  std::vector<int64_t> stride_;
  /*! \brief The internal array object */
  std::atomic<int> ref_counter_{0};
 };

--- a/python/dgl/_ffi/_ctypes/ndarray.py
+++ b/python/dgl/_ffi/_ctypes/ndarray.py
@@ -24,6 +24,10 @@ def _from_dlpack(dltensor):
    dltensor = ctypes.py_object(dltensor)
    if ctypes.pythonapi.PyCapsule_IsValid(dltensor, _c_str_dltensor):
        ptr = ctypes.pythonapi.PyCapsule_GetPointer(dltensor, _c_str_dltensor)
+        # XXX(minjie): The below cast should be unnecessary given the code to
+        #   set restype of PyCapsule calls. But weirdly, this does not
+        #   work out always.
+        ptr = ctypes.cast(ptr, ctypes.c_void_p)
        handle = TVMArrayHandle()
        check_call(_LIB.TVMArrayFromDLPack(ptr, ctypes.byref(handle)))
        ctypes.pythonapi.PyCapsule_SetName(dltensor, _c_str_used_dltensor)
@@ -36,8 +40,12 @@ def _dlpack_deleter(pycapsule):
    pycapsule = ctypes.cast(pycapsule, ctypes.py_object)
    if ctypes.pythonapi.PyCapsule_IsValid(pycapsule, _c_str_dltensor):
        ptr = ctypes.pythonapi.PyCapsule_GetPointer(pycapsule, _c_str_dltensor)
+        # XXX(minjie): The below cast should be unnecessary given the code to
+        #   set restype of PyCapsule calls. But weirdly, this does not
+        #   work out always.
+        ptr = ctypes.cast(ptr, ctypes.c_void_p)
        _LIB.TVMDLManagedTensorCallDeleter(ptr)
-        ctypes.pythonapi.PyCapsule_SetDestructor(dltensor, TVMPyCapsuleDestructor(0))
+        ctypes.pythonapi.PyCapsule_SetDestructor(pycapsule, TVMPyCapsuleDestructor(0))
 _c_dlpack_deleter = TVMPyCapsuleDestructor(_dlpack_deleter)
@@ -72,9 +80,9 @@ class NDArrayBase(object):
        -------
        dlpack : DLPack tensor view of the array data
        """
-        handle = ctypes.c_void_p()
+        ptr = ctypes.c_void_p()
-        check_call(_LIB.TVMArrayToDLPack(self.handle, ctypes.byref(handle)))
+        check_call(_LIB.TVMArrayToDLPack(self.handle, ctypes.byref(ptr)))
-        return ctypes.pythonapi.PyCapsule_New(handle, _c_str_dltensor, _c_dlpack_deleter)
+        return ctypes.pythonapi.PyCapsule_New(ptr, _c_str_dltensor, _c_dlpack_deleter)
 def _make_array(handle, is_view):

--- a/python/dgl/backend/pytorch.py
+++ b/python/dgl/backend/pytorch.py
@@ -2,6 +2,7 @@ from __future__ import absolute_import
 import ctypes
 import torch as th
+from torch.utils import dlpack
 from .._ffi.base import _LIB, check_call, c_array
 from .._ffi.runtime_ctypes import TVMType, TVMContext, TVMArray
@@ -31,12 +32,6 @@ max = th.max
 def astype(a, ty):
    return a.type(ty)
-def asnumpy(a):
-    return a.cpu().numpy()
-def from_numpy(np_data):
-    return th.from_numpy(np_data)
 def pack(tensors):
    return th.cat(tensors)
@@ -49,6 +44,9 @@ def shape(x):
 def dtype(x):
    return x.dtype
+def asnumpy(a):
+    return a.cpu().numpy()
 unique = th.unique
 def gather_row(data, row_index):
@@ -110,8 +108,24 @@ def _typestr(arr_dtype):
    else:
        raise RuntimeError('Unsupported data type:', arr_dtype)
-def astvmarray(arr_data):
+def zerocopy_to_dlpack(arr):
-    """Return a TVMArray representation of the underlying data."""
+    """Return a dlpack compatible array using zero copy."""
+    return dlpack.to_dlpack(arr)
+def zerocopy_from_dlpack(dlpack_arr):
+    """Return a tensor using zero copy."""
+    return dlpack.from_dlpack(dlpack_arr)
+def zerocopy_to_numpy(arr):
+    """Return a numpy array that shares the data."""
+    # TODO(minjie): zero copy
+    return arr.numpy()
+def zerocopy_from_numpy(np_data):
+    """Return a tensor that shares the numpy data."""
+    return th.from_numpy(np_data)
+    '''
    data = arr_data
    assert data.is_contiguous()
    arr = TVMArray()
@@ -123,3 +137,4 @@ def astvmarray(arr_data):
    arr.ndim = len(shape)
    arr.ctx = get_context(data)
    return arr
+    '''
--- a/python/dgl/ndarray.py
+++ b/python/dgl/ndarray.py
@@ -72,7 +72,7 @@ def array(arr, ctx=cpu(0)):
        arr = _np.array(arr)
    return empty(arr.shape, arr.dtype, ctx).copyfrom(arr)
-def from_numpy(np_data):
+def zerocopy_from_numpy(np_data):
    """Create an array that shares the given numpy data.
    Parameters
@@ -89,21 +89,4 @@ def from_numpy(np_data):
    handle = ctypes.pointer(arr)
    return NDArray(handle, is_view=True)
-def from_user_tensor(data):
-    """Create an array that shares the given user tensor data.
-    Parameters
-    ----------
-    data : F.Tensor
-        The user tensor data.
-    Returns
-    -------
-    NDArray
-        The array
-    """
-    arr = F.astvmarray(data)
-    handle = ctypes.pointer(arr)
-    return NDArray(handle, is_view=True)
 _set_class_ndarray(NDArray)
--- a/python/dgl/utils.py
+++ b/python/dgl/utils.py
@@ -35,6 +35,7 @@ class Index(object):
                    self._list_data = np.array(data).astype(np.int64)
                except:
                    raise ValueError('Error index data: %s' % str(data))
+            self._user_tensor_data[nd.cpu()] = F.zerocopy_from_numpy(self._list_data)
    def tolist(self):
        """Convert to a python-list compatible object."""
@@ -42,18 +43,20 @@ class Index(object):
            if self._dgl_tensor_data is not None:
                self._list_data = self._dgl_tensor_data.asnumpy()
            else:
-                assert len(self._user_tensor_data) > 0
+                data = self.tousertensor()
-                data = next(iter(self._user_tensor_data.values()))
+                self._list_data = F.zerocopy_to_numpy(data)
-                self._list_data = F.asnumpy(data)
        return self._list_data
    def tousertensor(self, ctx=None):
        """Convert to user tensor (defined in `backend`)."""
-        if len(self._user_tensor_data) == 0:
-            self._user_tensor_data[nd.cpu()] = F.from_numpy(self.tolist())
        if ctx is None:
            ctx = nd.cpu()
+        if len(self._user_tensor_data) == 0:
+            # zero copy from dgl tensor
+            dl = self._dgl_tensor_data.to_dlpack()
+            self._user_tensor_data[nd.cpu()] = F.zerocopy_from_dlpack(dl)
        if ctx not in self._user_tensor_data:
+            # copy from cpu to another device
            data = next(iter(self._user_tensor_data.values()))
            self._user_tensor_data[ctx] = F.to_context(data, ctx)
        return self._user_tensor_data[ctx]
@@ -61,13 +64,10 @@ class Index(object):
    def todgltensor(self):
        """Convert to dgl.NDArray."""
        if self._dgl_tensor_data is None:
-            if self._list_data is not None:
+            # zero copy from user tensor
-                # create a view ndarray from numpy
+            tsor = self.tousertensor()
-                self._dgl_tensor_data = nd.from_numpy(self._list_data)
+            dl = F.zerocopy_to_dlpack(tsor)
-            else:
+            self._dgl_tensor_data = nd.from_dlpack(dl)
-                # create a view ndarray from user tensor
-                self._dgl_tensor_data = nd.from_user_tensor(
-                        self.tousertensor(ctx=nd.cpu()))
        return self._dgl_tensor_data
    def __iter__(self):

--- a/src/runtime/ndarray.cc
+++ b/src/runtime/ndarray.cc
@@ -80,6 +80,13 @@ struct NDArray::Internal {
    data->shape_ = std::move(shape);
    data->dl_tensor.shape = dmlc::BeginPtr(data->shape_);
    data->dl_tensor.ndim = static_cast<int>(data->shape_.size());
+    // setup stride (this should be optional, but some framework
+    //   does not support NULL stride and thus will crash the program).
+    data->stride_.resize(data->dl_tensor.ndim, 1);
+    for (int i = data->dl_tensor.ndim - 2; i >= 0; --i) {
+      data->stride_[i] = data->shape_[i+1] * data->stride_[i+1];
+    }
+    data->dl_tensor.strides = dmlc::BeginPtr(data->stride_);
    // setup dtype
    data->dl_tensor.dtype = dtype;
    // setup ctx

--- a/tests/pytorch/test_index.py
+++ b/tests/pytorch/test_index.py
+import dgl
+import dgl.ndarray as nd
+from dgl.utils import toindex
+import numpy as np
+import torch as th
+from torch.utils import dlpack
+def test_dlpack():
+    # test dlpack conversion.
+    def nd2th():
+        ans = np.array([[1., 1., 1., 1.],
+                        [0., 0., 0., 0.],
+                        [0., 0., 0., 0.]])
+        x = nd.array(np.zeros((3, 4), dtype=np.float32))
+        dl = x.to_dlpack()
+        y = dlpack.from_dlpack(dl)
+        y[0] = 1
+        assert np.allclose(x.asnumpy(), ans)
+    def th2nd():
+        ans = np.array([[1., 1., 1., 1.],
+                        [0., 0., 0., 0.],
+                        [0., 0., 0., 0.]])
+        x = th.zeros((3, 4))
+        dl = dlpack.to_dlpack(x)
+        y = nd.from_dlpack(dl)
+        x[0] = 1
+        assert np.allclose(y.asnumpy(), ans)
+    nd2th()
+    th2nd()
+def test_index():
+    ans = np.ones((10,), dtype=np.int64) * 10
+    # from np data
+    data = np.ones((10,), dtype=np.int64) * 10
+    idx = toindex(data)
+    y1 = idx.tolist()
+    y2 = idx.tousertensor().numpy()
+    y3 = idx.todgltensor().asnumpy()
+    assert np.allclose(ans, y1)
+    assert np.allclose(ans, y2)
+    assert np.allclose(ans, y3)
+    # from list
+    data = [10] * 10
+    idx = toindex(data)
+    y1 = idx.tolist()
+    y2 = idx.tousertensor().numpy()
+    y3 = idx.todgltensor().asnumpy()
+    assert np.allclose(ans, y1)
+    assert np.allclose(ans, y2)
+    assert np.allclose(ans, y3)
+    # from torch
+    data = th.ones((10,), dtype=th.int64) * 10
+    idx = toindex(data)
+    y1 = idx.tolist()
+    y2 = idx.tousertensor().numpy()
+    y3 = idx.todgltensor().asnumpy()
+    assert np.allclose(ans, y1)
+    assert np.allclose(ans, y2)
+    assert np.allclose(ans, y3)
+    # from dgl.NDArray
+    data = dgl.ndarray.array(np.ones((10,), dtype=np.int64) * 10)
+    idx = toindex(data)
+    y1 = idx.tolist()
+    y2 = idx.tousertensor().numpy()
+    y3 = idx.todgltensor().asnumpy()
+    assert np.allclose(ans, y1)
+    assert np.allclose(ans, y2)
+    assert np.allclose(ans, y3)
+if __name__ == '__main__':
+    test_dlpack()
+    test_index()