"...text-generation-inference.git" did not exist on "3136f27f361b1a31ccd469998cc9e99249e759b8"
Commit 7603b8c3 authored by Minjie Wang's avatar Minjie Wang
Browse files

Fix dlpack bug; zero copy between dgl.ndarray and user tensor enabled

parent 3d38c2a9
...@@ -249,9 +249,14 @@ struct NDArray::Container { ...@@ -249,9 +249,14 @@ struct NDArray::Container {
friend class RPCWrappedFunc; friend class RPCWrappedFunc;
/*! /*!
* \brief The shape container, * \brief The shape container,
* can be used used for shape data. * can be used for shape data.
*/ */
std::vector<int64_t> shape_; std::vector<int64_t> shape_;
/*!
* \brief The stride container,
* can be used for stride data.
*/
std::vector<int64_t> stride_;
/*! \brief The internal array object */ /*! \brief The internal array object */
std::atomic<int> ref_counter_{0}; std::atomic<int> ref_counter_{0};
}; };
......
...@@ -24,6 +24,10 @@ def _from_dlpack(dltensor): ...@@ -24,6 +24,10 @@ def _from_dlpack(dltensor):
dltensor = ctypes.py_object(dltensor) dltensor = ctypes.py_object(dltensor)
if ctypes.pythonapi.PyCapsule_IsValid(dltensor, _c_str_dltensor): if ctypes.pythonapi.PyCapsule_IsValid(dltensor, _c_str_dltensor):
ptr = ctypes.pythonapi.PyCapsule_GetPointer(dltensor, _c_str_dltensor) ptr = ctypes.pythonapi.PyCapsule_GetPointer(dltensor, _c_str_dltensor)
# XXX(minjie): The below cast should be unnecessary given the code to
# set restype of PyCapsule calls. But weirdly, this does not
# work out always.
ptr = ctypes.cast(ptr, ctypes.c_void_p)
handle = TVMArrayHandle() handle = TVMArrayHandle()
check_call(_LIB.TVMArrayFromDLPack(ptr, ctypes.byref(handle))) check_call(_LIB.TVMArrayFromDLPack(ptr, ctypes.byref(handle)))
ctypes.pythonapi.PyCapsule_SetName(dltensor, _c_str_used_dltensor) ctypes.pythonapi.PyCapsule_SetName(dltensor, _c_str_used_dltensor)
...@@ -36,8 +40,12 @@ def _dlpack_deleter(pycapsule): ...@@ -36,8 +40,12 @@ def _dlpack_deleter(pycapsule):
pycapsule = ctypes.cast(pycapsule, ctypes.py_object) pycapsule = ctypes.cast(pycapsule, ctypes.py_object)
if ctypes.pythonapi.PyCapsule_IsValid(pycapsule, _c_str_dltensor): if ctypes.pythonapi.PyCapsule_IsValid(pycapsule, _c_str_dltensor):
ptr = ctypes.pythonapi.PyCapsule_GetPointer(pycapsule, _c_str_dltensor) ptr = ctypes.pythonapi.PyCapsule_GetPointer(pycapsule, _c_str_dltensor)
# XXX(minjie): The below cast should be unnecessary given the code to
# set restype of PyCapsule calls. But weirdly, this does not
# work out always.
ptr = ctypes.cast(ptr, ctypes.c_void_p)
_LIB.TVMDLManagedTensorCallDeleter(ptr) _LIB.TVMDLManagedTensorCallDeleter(ptr)
ctypes.pythonapi.PyCapsule_SetDestructor(dltensor, TVMPyCapsuleDestructor(0)) ctypes.pythonapi.PyCapsule_SetDestructor(pycapsule, TVMPyCapsuleDestructor(0))
_c_dlpack_deleter = TVMPyCapsuleDestructor(_dlpack_deleter) _c_dlpack_deleter = TVMPyCapsuleDestructor(_dlpack_deleter)
...@@ -72,9 +80,9 @@ class NDArrayBase(object): ...@@ -72,9 +80,9 @@ class NDArrayBase(object):
------- -------
dlpack : DLPack tensor view of the array data dlpack : DLPack tensor view of the array data
""" """
handle = ctypes.c_void_p() ptr = ctypes.c_void_p()
check_call(_LIB.TVMArrayToDLPack(self.handle, ctypes.byref(handle))) check_call(_LIB.TVMArrayToDLPack(self.handle, ctypes.byref(ptr)))
return ctypes.pythonapi.PyCapsule_New(handle, _c_str_dltensor, _c_dlpack_deleter) return ctypes.pythonapi.PyCapsule_New(ptr, _c_str_dltensor, _c_dlpack_deleter)
def _make_array(handle, is_view): def _make_array(handle, is_view):
......
...@@ -2,6 +2,7 @@ from __future__ import absolute_import ...@@ -2,6 +2,7 @@ from __future__ import absolute_import
import ctypes import ctypes
import torch as th import torch as th
from torch.utils import dlpack
from .._ffi.base import _LIB, check_call, c_array from .._ffi.base import _LIB, check_call, c_array
from .._ffi.runtime_ctypes import TVMType, TVMContext, TVMArray from .._ffi.runtime_ctypes import TVMType, TVMContext, TVMArray
...@@ -31,12 +32,6 @@ max = th.max ...@@ -31,12 +32,6 @@ max = th.max
def astype(a, ty): def astype(a, ty):
return a.type(ty) return a.type(ty)
def asnumpy(a):
return a.cpu().numpy()
def from_numpy(np_data):
return th.from_numpy(np_data)
def pack(tensors): def pack(tensors):
return th.cat(tensors) return th.cat(tensors)
...@@ -49,6 +44,9 @@ def shape(x): ...@@ -49,6 +44,9 @@ def shape(x):
def dtype(x): def dtype(x):
return x.dtype return x.dtype
def asnumpy(a):
return a.cpu().numpy()
unique = th.unique unique = th.unique
def gather_row(data, row_index): def gather_row(data, row_index):
...@@ -110,8 +108,24 @@ def _typestr(arr_dtype): ...@@ -110,8 +108,24 @@ def _typestr(arr_dtype):
else: else:
raise RuntimeError('Unsupported data type:', arr_dtype) raise RuntimeError('Unsupported data type:', arr_dtype)
def astvmarray(arr_data): def zerocopy_to_dlpack(arr):
"""Return a TVMArray representation of the underlying data.""" """Return a dlpack compatible array using zero copy."""
return dlpack.to_dlpack(arr)
def zerocopy_from_dlpack(dlpack_arr):
"""Return a tensor using zero copy."""
return dlpack.from_dlpack(dlpack_arr)
def zerocopy_to_numpy(arr):
"""Return a numpy array that shares the data."""
# TODO(minjie): zero copy
return arr.numpy()
def zerocopy_from_numpy(np_data):
"""Return a tensor that shares the numpy data."""
return th.from_numpy(np_data)
'''
data = arr_data data = arr_data
assert data.is_contiguous() assert data.is_contiguous()
arr = TVMArray() arr = TVMArray()
...@@ -123,3 +137,4 @@ def astvmarray(arr_data): ...@@ -123,3 +137,4 @@ def astvmarray(arr_data):
arr.ndim = len(shape) arr.ndim = len(shape)
arr.ctx = get_context(data) arr.ctx = get_context(data)
return arr return arr
'''
...@@ -72,7 +72,7 @@ def array(arr, ctx=cpu(0)): ...@@ -72,7 +72,7 @@ def array(arr, ctx=cpu(0)):
arr = _np.array(arr) arr = _np.array(arr)
return empty(arr.shape, arr.dtype, ctx).copyfrom(arr) return empty(arr.shape, arr.dtype, ctx).copyfrom(arr)
def from_numpy(np_data): def zerocopy_from_numpy(np_data):
"""Create an array that shares the given numpy data. """Create an array that shares the given numpy data.
Parameters Parameters
...@@ -89,21 +89,4 @@ def from_numpy(np_data): ...@@ -89,21 +89,4 @@ def from_numpy(np_data):
handle = ctypes.pointer(arr) handle = ctypes.pointer(arr)
return NDArray(handle, is_view=True) return NDArray(handle, is_view=True)
def from_user_tensor(data):
"""Create an array that shares the given user tensor data.
Parameters
----------
data : F.Tensor
The user tensor data.
Returns
-------
NDArray
The array
"""
arr = F.astvmarray(data)
handle = ctypes.pointer(arr)
return NDArray(handle, is_view=True)
_set_class_ndarray(NDArray) _set_class_ndarray(NDArray)
...@@ -35,6 +35,7 @@ class Index(object): ...@@ -35,6 +35,7 @@ class Index(object):
self._list_data = np.array(data).astype(np.int64) self._list_data = np.array(data).astype(np.int64)
except: except:
raise ValueError('Error index data: %s' % str(data)) raise ValueError('Error index data: %s' % str(data))
self._user_tensor_data[nd.cpu()] = F.zerocopy_from_numpy(self._list_data)
def tolist(self): def tolist(self):
"""Convert to a python-list compatible object.""" """Convert to a python-list compatible object."""
...@@ -42,18 +43,20 @@ class Index(object): ...@@ -42,18 +43,20 @@ class Index(object):
if self._dgl_tensor_data is not None: if self._dgl_tensor_data is not None:
self._list_data = self._dgl_tensor_data.asnumpy() self._list_data = self._dgl_tensor_data.asnumpy()
else: else:
assert len(self._user_tensor_data) > 0 data = self.tousertensor()
data = next(iter(self._user_tensor_data.values())) self._list_data = F.zerocopy_to_numpy(data)
self._list_data = F.asnumpy(data)
return self._list_data return self._list_data
def tousertensor(self, ctx=None): def tousertensor(self, ctx=None):
"""Convert to user tensor (defined in `backend`).""" """Convert to user tensor (defined in `backend`)."""
if len(self._user_tensor_data) == 0:
self._user_tensor_data[nd.cpu()] = F.from_numpy(self.tolist())
if ctx is None: if ctx is None:
ctx = nd.cpu() ctx = nd.cpu()
if len(self._user_tensor_data) == 0:
# zero copy from dgl tensor
dl = self._dgl_tensor_data.to_dlpack()
self._user_tensor_data[nd.cpu()] = F.zerocopy_from_dlpack(dl)
if ctx not in self._user_tensor_data: if ctx not in self._user_tensor_data:
# copy from cpu to another device
data = next(iter(self._user_tensor_data.values())) data = next(iter(self._user_tensor_data.values()))
self._user_tensor_data[ctx] = F.to_context(data, ctx) self._user_tensor_data[ctx] = F.to_context(data, ctx)
return self._user_tensor_data[ctx] return self._user_tensor_data[ctx]
...@@ -61,13 +64,10 @@ class Index(object): ...@@ -61,13 +64,10 @@ class Index(object):
def todgltensor(self): def todgltensor(self):
"""Convert to dgl.NDArray.""" """Convert to dgl.NDArray."""
if self._dgl_tensor_data is None: if self._dgl_tensor_data is None:
if self._list_data is not None: # zero copy from user tensor
# create a view ndarray from numpy tsor = self.tousertensor()
self._dgl_tensor_data = nd.from_numpy(self._list_data) dl = F.zerocopy_to_dlpack(tsor)
else: self._dgl_tensor_data = nd.from_dlpack(dl)
# create a view ndarray from user tensor
self._dgl_tensor_data = nd.from_user_tensor(
self.tousertensor(ctx=nd.cpu()))
return self._dgl_tensor_data return self._dgl_tensor_data
def __iter__(self): def __iter__(self):
......
...@@ -80,6 +80,13 @@ struct NDArray::Internal { ...@@ -80,6 +80,13 @@ struct NDArray::Internal {
data->shape_ = std::move(shape); data->shape_ = std::move(shape);
data->dl_tensor.shape = dmlc::BeginPtr(data->shape_); data->dl_tensor.shape = dmlc::BeginPtr(data->shape_);
data->dl_tensor.ndim = static_cast<int>(data->shape_.size()); data->dl_tensor.ndim = static_cast<int>(data->shape_.size());
// setup stride (this should be optional, but some framework
// does not support NULL stride and thus will crash the program).
data->stride_.resize(data->dl_tensor.ndim, 1);
for (int i = data->dl_tensor.ndim - 2; i >= 0; --i) {
data->stride_[i] = data->shape_[i+1] * data->stride_[i+1];
}
data->dl_tensor.strides = dmlc::BeginPtr(data->stride_);
// setup dtype // setup dtype
data->dl_tensor.dtype = dtype; data->dl_tensor.dtype = dtype;
// setup ctx // setup ctx
......
import dgl
import dgl.ndarray as nd
from dgl.utils import toindex
import numpy as np
import torch as th
from torch.utils import dlpack
def test_dlpack():
# test dlpack conversion.
def nd2th():
ans = np.array([[1., 1., 1., 1.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])
x = nd.array(np.zeros((3, 4), dtype=np.float32))
dl = x.to_dlpack()
y = dlpack.from_dlpack(dl)
y[0] = 1
assert np.allclose(x.asnumpy(), ans)
def th2nd():
ans = np.array([[1., 1., 1., 1.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])
x = th.zeros((3, 4))
dl = dlpack.to_dlpack(x)
y = nd.from_dlpack(dl)
x[0] = 1
assert np.allclose(y.asnumpy(), ans)
nd2th()
th2nd()
def test_index():
ans = np.ones((10,), dtype=np.int64) * 10
# from np data
data = np.ones((10,), dtype=np.int64) * 10
idx = toindex(data)
y1 = idx.tolist()
y2 = idx.tousertensor().numpy()
y3 = idx.todgltensor().asnumpy()
assert np.allclose(ans, y1)
assert np.allclose(ans, y2)
assert np.allclose(ans, y3)
# from list
data = [10] * 10
idx = toindex(data)
y1 = idx.tolist()
y2 = idx.tousertensor().numpy()
y3 = idx.todgltensor().asnumpy()
assert np.allclose(ans, y1)
assert np.allclose(ans, y2)
assert np.allclose(ans, y3)
# from torch
data = th.ones((10,), dtype=th.int64) * 10
idx = toindex(data)
y1 = idx.tolist()
y2 = idx.tousertensor().numpy()
y3 = idx.todgltensor().asnumpy()
assert np.allclose(ans, y1)
assert np.allclose(ans, y2)
assert np.allclose(ans, y3)
# from dgl.NDArray
data = dgl.ndarray.array(np.ones((10,), dtype=np.int64) * 10)
idx = toindex(data)
y1 = idx.tolist()
y2 = idx.tousertensor().numpy()
y3 = idx.todgltensor().asnumpy()
assert np.allclose(ans, y1)
assert np.allclose(ans, y2)
assert np.allclose(ans, y3)
if __name__ == '__main__':
test_dlpack()
test_index()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment