Unverified Commit b717c8bf authored by Xin Yao's avatar Xin Yao Committed by GitHub
Browse files

[BugFix] Fix bugs in GPU sampling and enable unit tests for dataloaders on the GPU (#3474)



* enable unit tests for dataloader on the GPU

* fix compatibility

* copyright

* fix linting
Co-authored-by: default avatarnv-dlasalle <63612878+nv-dlasalle@users.noreply.github.com>
parent d3ae7544
......@@ -80,7 +80,7 @@ class _EidExcluder():
assert self._filter is not None
if isinstance(parent_eids, Mapping):
located_eids = {k: self._filter[k].find_included_indices(parent_eids[k])
for k, v in parent_eids.items()}
for k, v in parent_eids.items() if k in self._filter}
else:
located_eids = self._filter.find_included_indices(parent_eids)
return located_eids
......@@ -820,8 +820,10 @@ class EdgeCollator(Collator):
neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst}
# Get dtype from a tuple of tensors
dtype = F.dtype(list(neg_srcdst.values())[0][0])
ctx = F.context(pair_graph)
neg_edges = {
etype: neg_srcdst.get(etype, (F.tensor([], dtype), F.tensor([], dtype)))
etype: neg_srcdst.get(etype, (F.copy_to(F.tensor([], dtype), ctx),
F.copy_to(F.tensor([], dtype), ctx)))
for etype in self.g.canonical_etypes}
neg_pair_graph = heterograph(
neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes})
......
/*!
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020-2021 by Contributors
* \file graph/sampling/neighbor.cc
* \brief Definition of neighborhood-based sampler APIs.
*/
......@@ -93,7 +93,7 @@ HeteroSubgraph SampleNeighbors(
hg->NumVertices(src_vtype),
hg->NumVertices(dst_vtype),
hg->DataType(), hg->Context());
induced_edges[etype] = aten::NullArray();
induced_edges[etype] = aten::NullArray(hg->DataType(), hg->Context());
} else if (fanouts[etype] == -1) {
const auto &earr = (dir == EdgeDir::kOut) ?
hg->OutEdges(etype, nodes_ntype) :
......
......@@ -78,11 +78,10 @@ def _check_neighbor_sampling_dataloader(g, nids, dl, mode, collator):
v_set = set(F.asnumpy(v))
assert v_set == seed_set
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
def test_neighbor_sampler_dataloader():
g = dgl.heterograph({('user', 'follow', 'user'): ([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])},
{'user': 6}).long()
g = dgl.to_bidirected(g)
g = dgl.to_bidirected(g).to(F.ctx())
g.ndata['feat'] = F.randn((6, 8))
g.edata['feat'] = F.randn((10, 4))
reverse_eids = F.tensor([5, 6, 7, 8, 9, 0, 1, 2, 3, 4], dtype=F.int64)
......@@ -94,7 +93,7 @@ def test_neighbor_sampler_dataloader():
('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
}).long()
}).long().to(F.ctx())
for ntype in hg.ntypes:
hg.nodes[ntype].data['feat'] = F.randn((hg.number_of_nodes(ntype), 8))
for etype in hg.canonical_etypes:
......@@ -218,8 +217,8 @@ def _check_device(data):
def test_node_dataloader():
sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)
g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())
g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])).to(F.ctx())
g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.ctx())
dataloader = dgl.dataloading.NodeDataLoader(
g1, g1.nodes(), sampler, device=F.ctx(), batch_size=g1.num_nodes())
......@@ -233,9 +232,9 @@ def test_node_dataloader():
('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
})
}).to(F.ctx())
for ntype in g2.ntypes:
g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu())
g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.ctx())
batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes)
dataloader = dgl.dataloading.NodeDataLoader(
......@@ -251,8 +250,8 @@ def test_edge_dataloader():
sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)
neg_sampler = dgl.dataloading.negative_sampler.Uniform(2)
g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())
g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])).to(F.ctx())
g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.ctx())
# no negative sampler
dataloader = dgl.dataloading.EdgeDataLoader(
......@@ -277,9 +276,9 @@ def test_edge_dataloader():
('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
})
}).to(F.ctx())
for ntype in g2.ntypes:
g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu())
g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.ctx())
batch_size = max(g2.num_edges(ety) for ety in g2.canonical_etypes)
# no negative sampler
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment