Unverified Commit b717c8bf authored by Xin Yao's avatar Xin Yao Committed by GitHub
Browse files

[BugFix] Fix bugs in GPU sampling and enable unit tests for dataloaders on the GPU (#3474)



* enable unit tests for dataloader on the GPU

* fix compatibility

* copyright

* fix linting
Co-authored-by: default avatarnv-dlasalle <63612878+nv-dlasalle@users.noreply.github.com>
parent d3ae7544
...@@ -80,7 +80,7 @@ class _EidExcluder(): ...@@ -80,7 +80,7 @@ class _EidExcluder():
assert self._filter is not None assert self._filter is not None
if isinstance(parent_eids, Mapping): if isinstance(parent_eids, Mapping):
located_eids = {k: self._filter[k].find_included_indices(parent_eids[k]) located_eids = {k: self._filter[k].find_included_indices(parent_eids[k])
for k, v in parent_eids.items()} for k, v in parent_eids.items() if k in self._filter}
else: else:
located_eids = self._filter.find_included_indices(parent_eids) located_eids = self._filter.find_included_indices(parent_eids)
return located_eids return located_eids
...@@ -820,8 +820,10 @@ class EdgeCollator(Collator): ...@@ -820,8 +820,10 @@ class EdgeCollator(Collator):
neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst} neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst}
# Get dtype from a tuple of tensors # Get dtype from a tuple of tensors
dtype = F.dtype(list(neg_srcdst.values())[0][0]) dtype = F.dtype(list(neg_srcdst.values())[0][0])
ctx = F.context(pair_graph)
neg_edges = { neg_edges = {
etype: neg_srcdst.get(etype, (F.tensor([], dtype), F.tensor([], dtype))) etype: neg_srcdst.get(etype, (F.copy_to(F.tensor([], dtype), ctx),
F.copy_to(F.tensor([], dtype), ctx)))
for etype in self.g.canonical_etypes} for etype in self.g.canonical_etypes}
neg_pair_graph = heterograph( neg_pair_graph = heterograph(
neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes}) neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes})
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020-2021 by Contributors
* \file graph/sampling/neighbor.cc * \file graph/sampling/neighbor.cc
* \brief Definition of neighborhood-based sampler APIs. * \brief Definition of neighborhood-based sampler APIs.
*/ */
...@@ -93,7 +93,7 @@ HeteroSubgraph SampleNeighbors( ...@@ -93,7 +93,7 @@ HeteroSubgraph SampleNeighbors(
hg->NumVertices(src_vtype), hg->NumVertices(src_vtype),
hg->NumVertices(dst_vtype), hg->NumVertices(dst_vtype),
hg->DataType(), hg->Context()); hg->DataType(), hg->Context());
induced_edges[etype] = aten::NullArray(); induced_edges[etype] = aten::NullArray(hg->DataType(), hg->Context());
} else if (fanouts[etype] == -1) { } else if (fanouts[etype] == -1) {
const auto &earr = (dir == EdgeDir::kOut) ? const auto &earr = (dir == EdgeDir::kOut) ?
hg->OutEdges(etype, nodes_ntype) : hg->OutEdges(etype, nodes_ntype) :
......
...@@ -78,11 +78,10 @@ def _check_neighbor_sampling_dataloader(g, nids, dl, mode, collator): ...@@ -78,11 +78,10 @@ def _check_neighbor_sampling_dataloader(g, nids, dl, mode, collator):
v_set = set(F.asnumpy(v)) v_set = set(F.asnumpy(v))
assert v_set == seed_set assert v_set == seed_set
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
def test_neighbor_sampler_dataloader(): def test_neighbor_sampler_dataloader():
g = dgl.heterograph({('user', 'follow', 'user'): ([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])}, g = dgl.heterograph({('user', 'follow', 'user'): ([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])},
{'user': 6}).long() {'user': 6}).long()
g = dgl.to_bidirected(g) g = dgl.to_bidirected(g).to(F.ctx())
g.ndata['feat'] = F.randn((6, 8)) g.ndata['feat'] = F.randn((6, 8))
g.edata['feat'] = F.randn((10, 4)) g.edata['feat'] = F.randn((10, 4))
reverse_eids = F.tensor([5, 6, 7, 8, 9, 0, 1, 2, 3, 4], dtype=F.int64) reverse_eids = F.tensor([5, 6, 7, 8, 9, 0, 1, 2, 3, 4], dtype=F.int64)
...@@ -94,7 +93,7 @@ def test_neighbor_sampler_dataloader(): ...@@ -94,7 +93,7 @@ def test_neighbor_sampler_dataloader():
('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]), ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5]) ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
}).long() }).long().to(F.ctx())
for ntype in hg.ntypes: for ntype in hg.ntypes:
hg.nodes[ntype].data['feat'] = F.randn((hg.number_of_nodes(ntype), 8)) hg.nodes[ntype].data['feat'] = F.randn((hg.number_of_nodes(ntype), 8))
for etype in hg.canonical_etypes: for etype in hg.canonical_etypes:
...@@ -218,8 +217,8 @@ def _check_device(data): ...@@ -218,8 +217,8 @@ def _check_device(data):
def test_node_dataloader(): def test_node_dataloader():
sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)
g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])) g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])).to(F.ctx())
g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu()) g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.ctx())
dataloader = dgl.dataloading.NodeDataLoader( dataloader = dgl.dataloading.NodeDataLoader(
g1, g1.nodes(), sampler, device=F.ctx(), batch_size=g1.num_nodes()) g1, g1.nodes(), sampler, device=F.ctx(), batch_size=g1.num_nodes())
...@@ -233,9 +232,9 @@ def test_node_dataloader(): ...@@ -233,9 +232,9 @@ def test_node_dataloader():
('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]), ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5]) ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
}) }).to(F.ctx())
for ntype in g2.ntypes: for ntype in g2.ntypes:
g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu()) g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.ctx())
batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes) batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes)
dataloader = dgl.dataloading.NodeDataLoader( dataloader = dgl.dataloading.NodeDataLoader(
...@@ -251,8 +250,8 @@ def test_edge_dataloader(): ...@@ -251,8 +250,8 @@ def test_edge_dataloader():
sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)
neg_sampler = dgl.dataloading.negative_sampler.Uniform(2) neg_sampler = dgl.dataloading.negative_sampler.Uniform(2)
g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])) g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])).to(F.ctx())
g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu()) g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.ctx())
# no negative sampler # no negative sampler
dataloader = dgl.dataloading.EdgeDataLoader( dataloader = dgl.dataloading.EdgeDataLoader(
...@@ -277,9 +276,9 @@ def test_edge_dataloader(): ...@@ -277,9 +276,9 @@ def test_edge_dataloader():
('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]), ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5]) ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
}) }).to(F.ctx())
for ntype in g2.ntypes: for ntype in g2.ntypes:
g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu()) g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.ctx())
batch_size = max(g2.num_edges(ety) for ety in g2.canonical_etypes) batch_size = max(g2.num_edges(ety) for ety in g2.canonical_etypes)
# no negative sampler # no negative sampler
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment