[Feature] Make sample_neighbors copy features on demand (#2042)

* fix * fix * lint * fix * test * fix * fix

[Feature] Make sample_neighbors copy features on demand (#2042)
* fix * fix * lint * fix * test * fix * fix
edfbee2c · Quan (Andy) Gan · GitHub · 09ec6020 · edfbee2c · edfbee2c
Unverified Commit edfbee2c authored Aug 18, 2020 by Quan (Andy) Gan Committed by GitHub Aug 18, 2020
3 changed files
--- a/examples/pytorch/graphsage/train_sampling.py
+++ b/examples/pytorch/graphsage/train_sampling.py
@@ -158,8 +158,10 @@ def run(args, device, data):
        tic_step = time.time()
        for step, (input_nodes, seeds, blocks) in enumerate(dataloader):
            # Load the input features as well as output labels
-            batch_inputs, batch_labels = load_subtensor(train_g, seeds, input_nodes, device)
+            #batch_inputs, batch_labels = load_subtensor(train_g, seeds, input_nodes, device)
            blocks = [block.int().to(device) for block in blocks]
+            batch_inputs = blocks[0].srcdata['features']
+            batch_labels = blocks[-1].dstdata['labels']
            # Compute loss and prediction
            batch_pred = model(blocks, batch_inputs)

--- a/python/dgl/distributed/graph_services.py
+++ b/python/dgl/distributed/graph_services.py
@@ -56,7 +56,7 @@ def _sample_neighbors(local_g, partition_book, seed_nodes, fan_out, edge_dir, pr
    local_ids = F.astype(local_ids, local_g.idtype)
    # local_ids = self.seed_nodes
    sampled_graph = local_sample_neighbors(
-        local_g, local_ids, fan_out, edge_dir, prob, replace)
+        local_g, local_ids, fan_out, edge_dir, prob, replace, _dist_training=True)
    global_nid_mapping = local_g.ndata[NID]
    src, dst = sampled_graph.edges()
    global_src, global_dst = global_nid_mapping[src], global_nid_mapping[dst]

--- a/python/dgl/sampling/neighbor.py
+++ b/python/dgl/sampling/neighbor.py
@@ -11,7 +11,8 @@ __all__ = [
    'sample_neighbors',
    'select_topk']
-def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
+def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False,
+                     copy_ndata=True, copy_edata=True, _dist_training=False):
    """Sample neighboring edges of the given nodes and return the induced subgraph.
    For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges
@@ -53,12 +54,35 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
        to sum up to one).  Otherwise, the result will be undefined.
    replace : bool, optional
        If True, sample with replacement.
+    copy_ndata: bool, optional
+        If True, the node features of the new graph are copied from
+        the original graph. If False, the new graph will not have any
+        node features.
+        (Default: True)
+    copy_edata: bool, optional
+        If True, the edge features of the new graph are copied from
+        the original graph.  If False, the new graph will not have any
+        edge features.
+        (Default: True)
+    _dist_training : bool, optional
+        Internal argument.  Do not use.
+        (Default: False)
    Returns
    -------
    DGLGraph
        A sampled subgraph containing only the sampled neighboring edges.  It is on CPU.
+    Notes
+    -----
+    If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors are used as
+    the node or edge features of the original graph and the new graph.
+    As a result, users should avoid performing in-place operations
+    on the node features of the new graph to avoid feature corruption.
    Examples
    --------
    Assume that you have the following graph
@@ -130,11 +154,30 @@ def sample_neighbors(g, nodes, fanout, edge_dir='in', prob=None, replace=False):
                                       edge_dir, prob_arrays, replace)
    induced_edges = subgidx.induced_edges
    ret = DGLHeteroGraph(subgidx.graph, g.ntypes, g.etypes)
-    for i, etype in enumerate(ret.canonical_etypes):
-        ret.edges[etype].data[EID] = induced_edges[i]
+    # handle features
+    # (TODO) (BarclayII) DGL distributed fails with bus error, freezes, or other
+    # incomprehensible errors with lazy feature copy.
+    # So in distributed training context, we fall back to old behavior where we
+    # only set the edge IDs.
+    if not _dist_training:
+        if copy_ndata:
+            print(g, type(g))
+            node_frames = utils.extract_node_subframes(g, None)
+            utils.set_new_frames(ret, node_frames=node_frames)
+        if copy_edata:
+            print(g, type(g))
+            edge_frames = utils.extract_edge_subframes(g, induced_edges)
+            utils.set_new_frames(ret, edge_frames=edge_frames)
+    else:
+        for i, etype in enumerate(ret.canonical_etypes):
+            ret.edges[etype].data[EID] = induced_edges[i]
    return ret
-def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
+def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False,
+                copy_ndata=True, copy_edata=True):
    """Select the neighboring edges with k-largest (or k-smallest) weights of the given
    nodes and return the induced subgraph.
@@ -176,12 +219,31 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
    ascending : bool, optional
        If True, DGL will return edges with k-smallest weights instead of
        k-largest weights.
+    copy_ndata: bool, optional
+        If True, the node features of the new graph are copied from
+        the original graph. If False, the new graph will not have any
+        node features.
+        (Default: True)
+    copy_edata: bool, optional
+        If True, the edge features of the new graph are copied from
+        the original graph.  If False, the new graph will not have any
+        edge features.
+        (Default: True)
    Returns
    -------
    DGLGraph
        A sampled subgraph containing only the sampled neighboring edges.  It is on CPU.
+    Notes
+    -----
+    If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors are used as
+    the node or edge features of the original graph and the new graph.
+    As a result, users should avoid performing in-place operations
+    on the node features of the new graph to avoid feature corruption.
    Examples
    --------
    >>> g = dgl.graph(([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0]))
@@ -231,8 +293,17 @@ def select_topk(g, k, weight, nodes=None, edge_dir='in', ascending=False):
        g._graph, nodes_all_types, k_array, edge_dir, weight_arrays, bool(ascending))
    induced_edges = subgidx.induced_edges
    ret = DGLHeteroGraph(subgidx.graph, g.ntypes, g.etypes)
-    for i, etype in enumerate(ret.canonical_etypes):
-        ret.edges[etype].data[EID] = induced_edges[i]
+    # handle features
+    if copy_ndata:
+        print(g, type(g))
+        node_frames = utils.extract_node_subframes(g, None)
+        utils.set_new_frames(ret, node_frames=node_frames)
+    if copy_edata:
+        print(g, type(g))
+        edge_frames = utils.extract_edge_subframes(g, induced_edges)
+        utils.set_new_frames(ret, edge_frames=edge_frames)
    return ret
 _init_api('dgl.sampling.neighbor', __name__)