"git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "4208ce2b9ef11a8d477cd551d9ff8ad444eeadbf"
Unverified Commit d453d72d authored by nv-dlasalle's avatar nv-dlasalle Committed by GitHub
Browse files

[Doc][Dataloading] Expand documentation of AsyncTransferer (#2313)

* Update docs

* Make non-default streams non-blocking
parent f673fc25
...@@ -56,6 +56,11 @@ Async Copying to/from GPUs ...@@ -56,6 +56,11 @@ Async Copying to/from GPUs
Data can be copied from the CPU to the GPU, or from the GPU to the CPU, Data can be copied from the CPU to the GPU, or from the GPU to the CPU,
while the GPU is being used for while the GPU is being used for
computation, using the :class:`AsyncTransferer`. computation, using the :class:`AsyncTransferer`.
For the transfer to be fully asynchronous, the context the
:class:`AsyncTranserer`
is created with must be a GPU context, and the input tensor must be in
pinned memory.
.. autoclass:: AsyncTransferer .. autoclass:: AsyncTransferer
:members: __init__, async_copy :members: __init__, async_copy
......
...@@ -38,7 +38,21 @@ class Transfer(object): ...@@ -38,7 +38,21 @@ class Transfer(object):
class AsyncTransferer(object): class AsyncTransferer(object):
""" Class for initiating asynchronous copies to/from the GPU on a second """ Class for initiating asynchronous copies to/from the GPU on a second
GPU stream. """ GPU stream.
To initiate a transfer to a GPU:
>>> tensor_cpu = torch.ones(100000).pin_memory()
>>> transferer = dgl.dataloading.AsyncTransferer(torch.device(0))
>>> future = transferer.async_copy(tensor_cpu, torch.device(0))
And then to wait for the transfer to finish and get a copy of the tensor on
the GPU.
>>> tensor_gpu = future.wait()
"""
def __init__(self, device): def __init__(self, device):
""" Create a new AsyncTransferer object. """ Create a new AsyncTransferer object.
...@@ -55,7 +69,12 @@ class AsyncTransferer(object): ...@@ -55,7 +69,12 @@ class AsyncTransferer(object):
self._handle = _CAPI_DGLAsyncTransfererCreate(ctx) self._handle = _CAPI_DGLAsyncTransfererCreate(ctx)
def async_copy(self, tensor, device): def async_copy(self, tensor, device):
""" Initiate an asynchronous copy on the internal stream. """ Initiate an asynchronous copy on the internal stream. For this call
to be asynchronous, the context the AsyncTranserer is created with must
be a GPU context, and the input tensor must be in pinned memory.
Currently, transfers from the GPU to the CPU, and CPU to CPU, will
be synchronous.
Parameters Parameters
---------- ----------
......
...@@ -138,7 +138,8 @@ class CUDADeviceAPI final : public DeviceAPI { ...@@ -138,7 +138,8 @@ class CUDADeviceAPI final : public DeviceAPI {
DGLStreamHandle CreateStream(DGLContext ctx) { DGLStreamHandle CreateStream(DGLContext ctx) {
CUDA_CALL(cudaSetDevice(ctx.device_id)); CUDA_CALL(cudaSetDevice(ctx.device_id));
cudaStream_t retval; cudaStream_t retval;
CUDA_CALL(cudaStreamCreate(&retval)); // make sure the legacy default stream won't block on this stream
CUDA_CALL(cudaStreamCreateWithFlags(&retval, cudaStreamNonBlocking));
return static_cast<DGLStreamHandle>(retval); return static_cast<DGLStreamHandle>(retval);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment