[Feature] Lazy copy ndata, edata to device (#1986)

* Lazy to device * remove print * lint * Fix * Fix * Fix * Fix * Fix * Fix * Fix * lint * Fix * Revert "Fix" This reverts commit 615c9b8f80f5f6ee2ab43c849a22f0083deedf3b. * Add test for frame lazy update * disable tensorflow * upd Co-authored-by: Ubuntu <ubuntu@ip-172-31-51-214.ec2.internal> Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com>

[Feature] Lazy copy ndata, edata to device (#1986)
* Lazy to device * remove print * lint * Fix * Fix * Fix * Fix * Fix * Fix * Fix * lint * Fix * Revert "Fix" This reverts commit 615c9b8f80f5f6ee2ab43c849a22f0083deedf3b. * Add test for frame lazy update * disable tensorflow * upd Co-authored-by: Ubuntu <ubuntu@ip-172-31-51-214.ec2.internal> Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com>
5e34ca8b · xiang song(charlie.song) · GitHub · 912da18c · 5e34ca8b · 5e34ca8b
Unverified Commit 5e34ca8b authored Aug 12, 2020 by xiang song(charlie.song) Committed by GitHub Aug 12, 2020
4 changed files
--- a/python/dgl/frame.py
+++ b/python/dgl/frame.py
@@ -84,10 +84,11 @@ class Column(object):
    index : Tensor
        Index tensor
    """
-    def __init__(self, storage, scheme=None, index=None):
+    def __init__(self, storage, scheme=None, index=None, device=None):
        self.storage = storage
        self.scheme = scheme if scheme else infer_scheme(storage)
        self.index = index
+        self.device = device
    def __len__(self):
        """The number of features (number of rows) in this column."""
@@ -105,8 +106,18 @@ class Column(object):
    def data(self):
        """Return the feature data. Perform index selecting if needed."""
        if self.index is not None:
+            # If index and storage is not in the same context,
+            # copy index to the same context of storage.
+            # Copy index is usually cheaper than copy data
+            if F.context(self.storage) != F.context(self.index):
+                self.index = F.copy_to(self.index, F.context(self.storage))
            self.storage = F.gather_row(self.storage, self.index)
            self.index = None
+        # move data to the right device
+        if self.device is not None:
+            self.storage = F.copy_to(self.storage, self.device[0], **self.device[1])
+            self.device = None
        return self.storage
    @data.setter
@@ -115,6 +126,25 @@ class Column(object):
        self.index = None
        self.storage = val
+    def to(self, device, **kwargs): # pylint: disable=invalid-name
+        """ Return a new column with columns copy to the targeted device (cpu/gpu).
+        Parameters
+        ----------
+        device : Framework-specific device context object
+            The context to move data to.
+        kwargs : Key-word arguments.
+            Key-word arguments fed to the framework copy function.
+        Returns
+        -------
+        Column
+            A new column
+        """
+        col = self.clone()
+        col.device = (device, kwargs)
+        return col
    def __getitem__(self, rowids):
        """Return the feature data given the rowids.
@@ -186,7 +216,7 @@ class Column(object):
    def clone(self):
        """Return a shallow copy of this column."""
-        return Column(self.storage, self.scheme, self.index)
+        return Column(self.storage, self.scheme, self.index, self.device)
    def deepclone(self):
        """Return a deepcopy of this column.
@@ -214,9 +244,9 @@ class Column(object):
            Sub-column
        """
        if self.index is None:
-            return Column(self.storage, self.scheme, rowids)
+            return Column(self.storage, self.scheme, rowids, self.device)
        else:
-            return Column(self.storage, self.scheme, F.gather_row(self.index, rowids))
+            return Column(self.storage, self.scheme, F.gather_row(self.index, rowids), self.device)
    @staticmethod
    def create(data):
@@ -578,5 +608,25 @@ class Frame(MutableMapping):
        subf._default_initializer = self._default_initializer
        return subf
+    def to(self, device, **kwargs): # pylint: disable=invalid-name
+        """ Return a new frame with columns copy to the targeted device (cpu/gpu).
+        Parameters
+        ----------
+        device : Framework-specific device context object
+            The context to move data to.
+        kwargs : Key-word arguments.
+            Key-word arguments fed to the framework copy function.
+        Returns
+        -------
+        Frame
+            A new frame
+        """
+        newframe = self.clone()
+        new_columns = {key : col.to(device, **kwargs) for key, col in newframe._columns.items()}
+        newframe._columns = new_columns
+        return newframe
    def __repr__(self):
        return repr(dict(self))
--- a/python/dgl/heterograph.py
+++ b/python/dgl/heterograph.py
@@ -2756,7 +2756,7 @@ class DGLHeteroGraph(object):
            Representation dict from feature name to feature tensor.
        """
        if is_all(u):
-            return dict(self._node_frames[ntid])
+            return self._node_frames[ntid]
        else:
            u = utils.prepare_tensor(self, u, 'u')
            return self._node_frames[ntid].subframe(u)
@@ -3614,14 +3614,12 @@ class DGLHeteroGraph(object):
        # TODO(minjie): handle initializer
        new_nframes = []
        for nframe in self._node_frames:
-            new_feats = {k : F.copy_to(feat, device, **kwargs) for k, feat in nframe.items()}
+            new_nframes.append(nframe.to(device, **kwargs))
-            new_nframes.append(Frame(new_feats, num_rows=nframe.num_rows))
        ret._node_frames = new_nframes
        new_eframes = []
        for eframe in self._edge_frames:
-            new_feats = {k : F.copy_to(feat, device, **kwargs) for k, feat in eframe.items()}
+            new_eframes.append(eframe.to(device, **kwargs))
-            new_eframes.append(Frame(new_feats, num_rows=eframe.num_rows))
        ret._edge_frames = new_eframes
        # 2. Copy misc info

--- a/tests/compute/test_heterograph.py
+++ b/tests/compute/test_heterograph.py
@@ -2406,6 +2406,100 @@ def test_remove_nodes(idtype):
    assert F.array_equal(u, F.tensor([1], dtype=idtype))
    assert F.array_equal(v, F.tensor([0], dtype=idtype))
+@parametrize_dtype
+def test_frame(idtype):
+    g = dgl.graph(([0, 1, 2], [1, 2, 3]), idtype=idtype, device=F.ctx())
+    g.ndata['h'] = F.copy_to(F.tensor([0, 1, 2, 3], dtype=idtype), ctx=F.ctx())
+    g.edata['h'] = F.copy_to(F.tensor([0, 1, 2], dtype=idtype), ctx=F.ctx())
+    # remove nodes
+    sg = dgl.remove_nodes(g, [3])
+    # check for lazy update
+    assert F.array_equal(sg._node_frames[0]._columns['h'].storage, g.ndata['h'])
+    assert F.array_equal(sg._edge_frames[0]._columns['h'].storage, g.edata['h'])
+    assert sg.ndata['h'].shape[0] == 3
+    assert sg.edata['h'].shape[0] == 2
+    # update after read
+    assert F.array_equal(sg._node_frames[0]._columns['h'].storage, F.tensor([0, 1, 2], dtype=idtype))
+    assert F.array_equal(sg._edge_frames[0]._columns['h'].storage, F.tensor([0, 1], dtype=idtype))
+    ng = dgl.add_nodes(sg, 1)
+    assert ng.ndata['h'].shape[0] == 4
+    assert F.array_equal(ng._node_frames[0]._columns['h'].storage, F.tensor([0, 1, 2, 0], dtype=idtype))
+    ng = dgl.add_edges(ng, [3], [1])
+    assert ng.edata['h'].shape[0] == 3
+    assert F.array_equal(ng._edge_frames[0]._columns['h'].storage, F.tensor([0, 1, 0], dtype=idtype))
+    # multi level lazy update
+    sg = dgl.remove_nodes(g, [3])
+    assert F.array_equal(sg._node_frames[0]._columns['h'].storage, g.ndata['h'])
+    assert F.array_equal(sg._edge_frames[0]._columns['h'].storage, g.edata['h'])
+    ssg = dgl.remove_nodes(sg, [1])
+    assert F.array_equal(ssg._node_frames[0]._columns['h'].storage, g.ndata['h'])
+    assert F.array_equal(ssg._edge_frames[0]._columns['h'].storage, g.edata['h'])
+    # ssg is changed
+    assert ssg.ndata['h'].shape[0] == 2
+    assert ssg.edata['h'].shape[0] == 0
+    assert F.array_equal(ssg._node_frames[0]._columns['h'].storage, F.tensor([0, 2], dtype=idtype))
+    # sg still in lazy model
+    assert F.array_equal(sg._node_frames[0]._columns['h'].storage, g.ndata['h'])
+    assert F.array_equal(sg._edge_frames[0]._columns['h'].storage, g.edata['h'])
+@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TensorFlow always create a new tensor")
+@unittest.skipIf(F._default_context_str == 'cpu', reason="cpu do not have context change problem")
+@parametrize_dtype
+def test_frame_device(idtype):
+    g = dgl.graph(([0,1,2], [2,3,1]))
+    g.ndata['h'] = F.copy_to(F.tensor([1,1,1,2], dtype=idtype), ctx=F.cpu())
+    g.ndata['hh'] = F.copy_to(F.ones((4,3), dtype=idtype), ctx=F.cpu())
+    g.edata['h'] = F.copy_to(F.tensor([1,2,3], dtype=idtype), ctx=F.cpu())
+    g = g.to(F.ctx())
+    # lazy device copy
+    assert F.context(g._node_frames[0]._columns['h'].storage) == F.cpu()
+    assert F.context(g._node_frames[0]._columns['hh'].storage) == F.cpu()
+    print(g.ndata['h'])
+    assert F.context(g._node_frames[0]._columns['h'].storage) == F.ctx()
+    assert F.context(g._node_frames[0]._columns['hh'].storage) == F.cpu()
+    assert F.context(g._edge_frames[0]._columns['h'].storage) == F.cpu()
+    # lazy device copy in subgraph
+    sg = dgl.node_subgraph(g, [0,1,2])
+    assert F.context(sg._node_frames[0]._columns['h'].storage) == F.ctx()
+    assert F.context(sg._node_frames[0]._columns['hh'].storage) == F.cpu()
+    assert F.context(sg._edge_frames[0]._columns['h'].storage) == F.cpu()
+    print(sg.ndata['hh'])
+    assert F.context(sg._node_frames[0]._columns['hh'].storage) == F.ctx()
+    assert F.context(sg._edge_frames[0]._columns['h'].storage) == F.cpu()
+    # back to cpu
+    sg = sg.to(F.cpu())
+    assert F.context(sg._node_frames[0]._columns['h'].storage) == F.ctx()
+    assert F.context(sg._node_frames[0]._columns['hh'].storage) == F.ctx()
+    assert F.context(sg._edge_frames[0]._columns['h'].storage) == F.cpu()
+    print(sg.ndata['h'])
+    print(sg.ndata['hh'])
+    print(sg.edata['h'])
+    assert F.context(sg._node_frames[0]._columns['h'].storage) == F.cpu()
+    assert F.context(sg._node_frames[0]._columns['hh'].storage) == F.cpu()
+    assert F.context(sg._edge_frames[0]._columns['h'].storage) == F.cpu()
+    # set some field
+    sg = sg.to(F.ctx())
+    assert F.context(sg._node_frames[0]._columns['h'].storage) == F.cpu()
+    sg.ndata['h'][0] = 5
+    assert F.context(sg._node_frames[0]._columns['h'].storage) == F.ctx()
+    assert F.context(sg._node_frames[0]._columns['hh'].storage) == F.cpu()
+    assert F.context(sg._edge_frames[0]._columns['h'].storage) == F.cpu()
+    # add nodes
+    ng = dgl.add_nodes(sg, 3)
+    assert F.context(ng._node_frames[0]._columns['h'].storage) == F.ctx()
+    assert F.context(ng._node_frames[0]._columns['hh'].storage) == F.ctx()
+    assert F.context(ng._edge_frames[0]._columns['h'].storage) == F.cpu()
 if __name__ == '__main__':
    # test_create()
    # test_query()
@@ -2434,9 +2528,11 @@ if __name__ == '__main__':
    # test_dtype_cast()
    # test_reverse("int32")
    # test_format()
-    test_add_edges(F.int32)
+    #test_add_edges(F.int32)
-    test_add_nodes(F.int32)
+    #test_add_nodes(F.int32)
-    test_remove_edges(F.int32)
+    #test_remove_edges(F.int32)
-    test_remove_nodes(F.int32)
+    #test_remove_nodes(F.int32)
-    test_clone(F.int32)
+    #test_clone(F.int32)
+    test_frame(F.int32)
+    test_frame_device(F.int32)
    pass
--- a/tests/pytorch/test_nn.py
+++ b/tests/pytorch/test_nn.py
@@ -420,7 +420,7 @@ def test_sage_conv2(idtype):
    sage = nn.SAGEConv((3, 3), 2, 'gcn')
    feat = (F.randn((5, 3)), F.randn((3, 3)))
    sage = sage.to(ctx)
-    h = sage(g, feat)
+    h = sage(g, (F.copy_to(feat[0], F.ctx()), F.copy_to(feat[1], F.ctx())))
    assert h.shape[-1] == 2
    assert h.shape[0] == 3
    for aggre_type in ['mean', 'pool', 'lstm']: