Unverified Commit 738e8318 authored by Xin Yao's avatar Xin Yao Committed by GitHub
Browse files

[Feature] CUDA UVA sampling for MultiLayerNeighborSampler (#3674)



* implement pin_memory/unpin_memory/is_pinned for dgl.graph

* update python docstring

* update c++ docstring

* add test

* fix the broken UnifiedTensor

* XPU_SWITCH for kDLCPUPinned

* a rough version ready for testing

* eliminate extra context parameter for pin/unpin

* update train_sampling

* fix linting

* fix typo

* multi-gpu uva sampling case

* disable new format materialization for pinned graphs

* update python doc for pin_memory_

* fix unit test

* UVA sampling for link prediction

* dispatch most csr ops

* update graphsage example to combine uva sampling and UnifiedTensor

* update graphsage example to combine uva sampling and UnifiedTensor

* update graphsage example to combine uva sampling and UnifiedTensor

* update doc

* update examples

* change unitgraph and heterograph's PinMemory to in-place

* update examples for multi-gpu uva sampling

* update doc

* fix linting

* fix cpu build

* fix is_pinned for DistGraph

* fix is_pinned for DistGraph

* update graphsage unsupervised example

* update doc for gpu sampling

* update some check for sampling device switching

* fix linting

* adapt for new dataloader

* fix linting

* fix

* fix some name issue

* adjust device check

* add unit test for uva sampling & fix some zero_copy bug

* fix linting

* update num_threads in graphsage examples
Co-authored-by: default avatarQuan (Andy) Gan <coin2028@hotmail.com>
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
parent fa343873
......@@ -260,7 +260,7 @@ void NDArray::CopyFromTo(DLTensor* from,
// Use the context that is *not* a cpu context to get the correct device
// api manager.
DGLContext ctx = from->ctx.device_type != kDLCPU ? from->ctx : to->ctx;
DGLContext ctx = GetDevice(from->ctx).device_type != kDLCPU ? from->ctx : to->ctx;
DeviceAPI::Get(ctx)->CopyDataFromTo(
from->data, static_cast<size_t>(from->byte_offset),
......@@ -489,9 +489,10 @@ int DGLArrayToDLPack(DGLArrayHandle from, DLManagedTensor** out,
API_BEGIN();
auto* nd_container = reinterpret_cast<NDArray::Container*>(from);
DLTensor* nd = &(nd_container->dl_tensor);
if (alignment != 0 && !is_aligned(nd->data, alignment)) {
if ((alignment != 0 && !is_aligned(nd->data, alignment))
|| (nd->ctx.device_type == kDLCPUPinned)) {
std::vector<int64_t> shape_vec(nd->shape, nd->shape + nd->ndim);
NDArray copy_ndarray = NDArray::Empty(shape_vec, nd->dtype, nd->ctx);
NDArray copy_ndarray = NDArray::Empty(shape_vec, nd->dtype, GetDevice(nd->ctx));
copy_ndarray.CopyFrom(nd);
*out = copy_ndarray.ToDLPack();
} else {
......
......@@ -99,8 +99,12 @@ def _check_device(data):
assert data.device == F.ctx()
@pytest.mark.parametrize('sampler_name', ['full', 'neighbor', 'neighbor2'])
def test_node_dataloader(sampler_name):
@pytest.mark.parametrize('pin_graph', [True, False])
def test_node_dataloader(sampler_name, pin_graph):
g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
if F.ctx() != F.cpu() and pin_graph:
g1.create_formats_()
g1.pin_memory_()
g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())
g1.ndata['label'] = F.copy_to(F.randn((g1.num_nodes(),)), F.cpu())
......@@ -141,14 +145,20 @@ def test_node_dataloader(sampler_name):
_check_device(output_nodes)
_check_device(blocks)
if g1.is_pinned():
g1.unpin_memory_()
@pytest.mark.parametrize('sampler_name', ['full', 'neighbor'])
@pytest.mark.parametrize('neg_sampler', [
dgl.dataloading.negative_sampler.Uniform(2),
dgl.dataloading.negative_sampler.GlobalUniform(15, False, 3),
dgl.dataloading.negative_sampler.GlobalUniform(15, True, 3)])
def test_edge_dataloader(sampler_name, neg_sampler):
@pytest.mark.parametrize('pin_graph', [True, False])
def test_edge_dataloader(sampler_name, neg_sampler, pin_graph):
g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
if F.ctx() != F.cpu() and pin_graph:
g1.create_formats_()
g1.pin_memory_()
g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())
sampler = {
......@@ -209,6 +219,9 @@ def test_edge_dataloader(sampler_name, neg_sampler):
_check_device(neg_pair_graph)
_check_device(blocks)
if g1.is_pinned():
g1.unpin_memory_()
if __name__ == '__main__':
test_graph_dataloader()
test_cluster_gcn(0)
......@@ -219,4 +232,5 @@ if __name__ == '__main__':
dgl.dataloading.negative_sampler.Uniform(2),
dgl.dataloading.negative_sampler.GlobalUniform(2, False),
dgl.dataloading.negative_sampler.GlobalUniform(2, True)]:
test_edge_dataloader(sampler, neg_sampler)
for pin_graph in [True, False]:
test_edge_dataloader(sampler, neg_sampler, pin_graph)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment