[BugFix] Fix bugs in GPU sampling and enable unit tests for dataloaders on the GPU (#3474)

* enable unit tests for dataloader on the GPU * fix compatibility * copyright * fix linting Co-authored-by: nv-dlasalle <63612878+nv-dlasalle@users.noreply.github.com>

[BugFix] Fix bugs in GPU sampling and enable unit tests for dataloaders on the GPU (#3474)
* enable unit tests for dataloader on the GPU * fix compatibility * copyright * fix linting Co-authored-by: nv-dlasalle <63612878+nv-dlasalle@users.noreply.github.com>
b717c8bf · Xin Yao · GitHub · d3ae7544 · b717c8bf · b717c8bf
Unverified Commit b717c8bf authored Nov 05, 2021 by Xin Yao Committed by GitHub Nov 04, 2021
3 changed files
--- a/python/dgl/dataloading/dataloader.py
+++ b/python/dgl/dataloading/dataloader.py
@@ -80,7 +80,7 @@ class _EidExcluder():
            assert self._filter is not None
            if isinstance(parent_eids, Mapping):
                located_eids = {k: self._filter[k].find_included_indices(parent_eids[k])
-                                for k, v in parent_eids.items()}
+                                for k, v in parent_eids.items() if k in self._filter}
            else:
                located_eids = self._filter.find_included_indices(parent_eids)
            return located_eids
@@ -820,8 +820,10 @@ class EdgeCollator(Collator):
            neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst}
        # Get dtype from a tuple of tensors
        dtype = F.dtype(list(neg_srcdst.values())[0][0])
+        ctx = F.context(pair_graph)
        neg_edges = {
-            etype: neg_srcdst.get(etype, (F.tensor([], dtype), F.tensor([], dtype)))
+            etype: neg_srcdst.get(etype, (F.copy_to(F.tensor([], dtype), ctx),
+                                          F.copy_to(F.tensor([], dtype), ctx)))
            for etype in self.g.canonical_etypes}
        neg_pair_graph = heterograph(
            neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes})

--- a/src/graph/sampling/neighbor/neighbor.cc
+++ b/src/graph/sampling/neighbor/neighbor.cc
 /*!
- *  Copyright (c) 2020 by Contributors
+ *  Copyright (c) 2020-2021 by Contributors
 * \file graph/sampling/neighbor.cc
 * \brief Definition of neighborhood-based sampler APIs.
 */
@@ -93,7 +93,7 @@ HeteroSubgraph SampleNeighbors(
        hg->NumVertices(src_vtype),
        hg->NumVertices(dst_vtype),
        hg->DataType(), hg->Context());
-      induced_edges[etype] = aten::NullArray();
+      induced_edges[etype] = aten::NullArray(hg->DataType(), hg->Context());
    } else if (fanouts[etype] == -1) {
      const auto &earr = (dir == EdgeDir::kOut) ?
        hg->OutEdges(etype, nodes_ntype) :

--- a/tests/pytorch/test_dataloader.py
+++ b/tests/pytorch/test_dataloader.py
@@ -78,11 +78,10 @@ def _check_neighbor_sampling_dataloader(g, nids, dl, mode, collator):
        v_set = set(F.asnumpy(v))
        assert v_set == seed_set

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
 def test_neighbor_sampler_dataloader():
    g = dgl.heterograph({('user', 'follow', 'user'): ([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])},
                        {'user': 6}).long()
-    g = dgl.to_bidirected(g)
+    g = dgl.to_bidirected(g).to(F.ctx())
    g.ndata['feat'] = F.randn((6, 8))
    g.edata['feat'] = F.randn((10, 4))
    reverse_eids = F.tensor([5, 6, 7, 8, 9, 0, 1, 2, 3, 4], dtype=F.int64)
@@ -94,7 +93,7 @@ def test_neighbor_sampler_dataloader():
         ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
         ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
         ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
-    }).long()
+    }).long().to(F.ctx())
    for ntype in hg.ntypes:
        hg.nodes[ntype].data['feat'] = F.randn((hg.number_of_nodes(ntype), 8))
    for etype in hg.canonical_etypes:
@@ -218,8 +217,8 @@ def _check_device(data):
 def test_node_dataloader():
    sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)

-    g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
-    g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())
+    g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])).to(F.ctx())
+    g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.ctx())

    dataloader = dgl.dataloading.NodeDataLoader(
        g1, g1.nodes(), sampler, device=F.ctx(), batch_size=g1.num_nodes())
@@ -233,9 +232,9 @@ def test_node_dataloader():
         ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
         ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
         ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
-    })
+    }).to(F.ctx())
    for ntype in g2.ntypes:
-        g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu())
+        g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.ctx())
    batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes)

    dataloader = dgl.dataloading.NodeDataLoader(
@@ -251,8 +250,8 @@ def test_edge_dataloader():
    sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2)
    neg_sampler = dgl.dataloading.negative_sampler.Uniform(2)

-    g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
-    g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())
+    g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])).to(F.ctx())
+    g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.ctx())

    # no negative sampler
    dataloader = dgl.dataloading.EdgeDataLoader(
@@ -277,9 +276,9 @@ def test_edge_dataloader():
         ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
         ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
         ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
-    })
+    }).to(F.ctx())
    for ntype in g2.ntypes:
-        g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu())
+        g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.ctx())
    batch_size = max(g2.num_edges(ety) for ety in g2.canonical_etypes)

    # no negative sampler