update comments (#2132)

Co-authored-by: Ubuntu <ubuntu@ip-172-31-51-214.ec2.internal>

update comments (#2132)
Co-authored-by: Ubuntu <ubuntu@ip-172-31-51-214.ec2.internal>
1e3fcc7c · xiang song(charlie.song) · GitHub · 7993a4d8 · 1e3fcc7c · 1e3fcc7c
Unverified Commit 1e3fcc7c authored Aug 31, 2020 by xiang song(charlie.song) Committed by GitHub Aug 31, 2020
9 changed files
--- a/examples/pytorch/gcmc/train_sampling.py
+++ b/examples/pytorch/gcmc/train_sampling.py
@@ -403,6 +403,8 @@ if __name__ == '__main__':
        run(0, n_gpus, args, devices, dataset)
    # multi gpu
    else:
+        # Create csr/coo/csc formats before launching training processes with multi-gpu.
+        # This avoids creating certain formats in each sub-process, which saves momory and CPU.
        dataset.train_enc_graph.create_formats_()
        dataset.train_dec_graph.create_formats_()
        procs = []

--- a/examples/pytorch/graphsage/train_cv_multi_gpu.py
+++ b/examples/pytorch/graphsage/train_cv_multi_gpu.py
@@ -384,6 +384,8 @@ if __name__ == '__main__':
    g.ndata['features'] = features.share_memory_()
    create_history_storage(g, args, n_classes)
+    # Create csr/coo/csc formats before launching training processes with multi-gpu.
+    # This avoids creating certain formats in each sub-process, which saves momory and CPU.
    g.create_formats_()
    # Pack data
    data = train_mask, val_mask, in_feats, labels, n_classes, g

--- a/examples/pytorch/graphsage/train_sampling.py
+++ b/examples/pytorch/graphsage/train_sampling.py
@@ -229,6 +229,8 @@ if __name__ == '__main__':
    else:
        train_g = val_g = test_g = g
+    # Create csr/coo/csc formats before launching training processes with multi-gpu.
+    # This avoids creating certain formats in each sub-process, which saves momory and CPU.
    train_g.create_formats_()
    val_g.create_formats_()
    test_g.create_formats_()

--- a/examples/pytorch/graphsage/train_sampling_multi_gpu.py
+++ b/examples/pytorch/graphsage/train_sampling_multi_gpu.py
@@ -258,6 +258,8 @@ if __name__ == '__main__':
    else:
        train_g = val_g = test_g = g
+    # Create csr/coo/csc formats before launching training processes with multi-gpu.
+    # This avoids creating certain formats in each sub-process, which saves momory and CPU.
    train_g.create_formats_()
    val_g.create_formats_()
    test_g.create_formats_()

--- a/examples/pytorch/graphsage/train_sampling_unsupervised.py
+++ b/examples/pytorch/graphsage/train_sampling_unsupervised.py
@@ -298,6 +298,9 @@ def main(args, devices):
    val_mask = g.ndata['val_mask']
    test_mask = g.ndata['test_mask']
    g.ndata['features'] = features
+    # Create csr/coo/csc formats before launching training processes with multi-gpu.
+    # This avoids creating certain formats in each sub-process, which saves momory and CPU.
    g.create_formats_()
    # Pack data
    data = train_mask, val_mask, test_mask, in_feats, labels, n_classes, g

--- a/examples/pytorch/ogb/cluster-sage/sampler.py
+++ b/examples/pytorch/ogb/cluster-sage/sampler.py
@@ -63,9 +63,4 @@ class ClusterIter(object):
 def subgraph_collate_fn(g, batch):
    nids = np.concatenate(batch).reshape(-1).astype(np.int64)
    g1 = g.subgraph(nids)
-    nid = g1.ndata[dgl.NID]
-    g1.ndata['feat'] = g.ndata['feat'][nid]
-    g1.ndata['labels'] = g.ndata['labels'][nid]
-    g1.ndata['train_mask'] = g.ndata['train_mask'][nid]
-    g1.create_formats_()
    return g1
--- a/examples/pytorch/ogb/ogbn-products/gat/main.py
+++ b/examples/pytorch/ogb/ogbn-products/gat/main.py
@@ -243,6 +243,9 @@ if __name__ == '__main__':
    in_feats = graph.ndata['feat'].shape[1]
    n_classes = (labels.max() + 1).item()
+    # Create csr/coo/csc formats before launching sampling processes
+    # This avoids creating certain formats in each data loader process, which saves momory and CPU.
    graph.create_formats_()
    # Pack data
    data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, args.head

--- a/examples/pytorch/ogb/ogbn-products/graphsage/main.py
+++ b/examples/pytorch/ogb/ogbn-products/graphsage/main.py
@@ -234,6 +234,8 @@ if __name__ == '__main__':
    in_feats = graph.ndata['feat'].shape[1]
    n_classes = (labels.max() + 1).item()
+    # Create csr/coo/csc formats before launching sampling processes
+    # This avoids creating certain formats in each data loader process, which saves momory and CPU.
    graph.create_formats_()
    # Pack data
    data = train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph

--- a/examples/pytorch/rgcn/entity_classify_mp.py
+++ b/examples/pytorch/rgcn/entity_classify_mp.py
@@ -504,6 +504,9 @@ def main(args, devices):
    train_idx.share_memory_()
    val_idx.share_memory_()
    test_idx.share_memory_()
+    # Create csr/coo/csc formats before launching training processes with multi-gpu.
+    # This avoids creating certain formats in each sub-process, which saves momory and CPU.
+    g.create_formats_()
    n_gpus = len(devices)
    # cpu