[Misc] Black auto fix. (#4705)

Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

[Misc] Black auto fix. (#4705)
Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
2b983869 · Hongzhi (Steve), Chen · GitHub · 408eba24 · 2b983869 · 2b983869
Unverified Commit 2b983869 authored Oct 12, 2022 by Hongzhi (Steve), Chen Committed by GitHub Oct 12, 2022
12 changed files
--- a/tools/utils/array_readwriter/numpy_array.py
+++ b/tools/utils/array_readwriter/numpy_array.py
 import logging
 import numpy as np
 from numpy.lib.format import open_memmap
 from .registry import register_array_parser
 @register_array_parser("numpy")
 class NumpyArrayParser(object):
    def __init__(self):
        pass
    def read(self, path):
-        logging.info('Reading from %s using numpy format' % path)
+        logging.info("Reading from %s using numpy format" % path)
-        arr = np.load(path, mmap_mode='r')
+        arr = np.load(path, mmap_mode="r")
-        logging.info('Done reading from %s' % path)
+        logging.info("Done reading from %s" % path)
        return arr
    def write(self, path, arr):
-        logging.info('Writing to %s using numpy format' % path)
+        logging.info("Writing to %s using numpy format" % path)
        # np.save would load the entire memmap array up into CPU.  So we manually open
        # an empty npy file with memmap mode and manually flush it instead.
-        new_arr = open_memmap(path, mode='w+', dtype=arr.dtype, shape=arr.shape)
+        new_arr = open_memmap(path, mode="w+", dtype=arr.dtype, shape=arr.shape)
        new_arr[:] = arr[:]
-        logging.info('Done writing to %s' % path)
+        logging.info("Done writing to %s" % path)
--- a/tools/utils/array_readwriter/registry.py
+++ b/tools/utils/array_readwriter/registry.py
 REGISTRY = {}
 def register_array_parser(name):
    def _deco(cls):
        REGISTRY[name] = cls
        return cls
    return _deco
 def get_array_parser(**fmt_meta):
-    cls = REGISTRY[fmt_meta.pop('name')]
+    cls = REGISTRY[fmt_meta.pop("name")]
    return cls(**fmt_meta)
--- a/tools/utils/files.py
+++ b/tools/utils/files.py
+import logging
 import os
 from contextlib import contextmanager
-import logging
 from numpy.lib.format import open_memmap
 @contextmanager
 def setdir(path):
    try:
        os.makedirs(path, exist_ok=True)
        cwd = os.getcwd()
-        logging.info('Changing directory to %s' % path)
+        logging.info("Changing directory to %s" % path)
-        logging.info('Previously: %s' % cwd)
+        logging.info("Previously: %s" % cwd)
        os.chdir(path)
        yield
    finally:
-        logging.info('Restoring directory to %s' % cwd)
+        logging.info("Restoring directory to %s" % cwd)
        os.chdir(cwd)
--- a/tutorials/blitz/1_introduction.py
+++ b/tutorials/blitz/1_introduction.py
@@ -21,11 +21,12 @@ networks with PyTorch.
 """
-import dgl
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import dgl
+import dgl.data
 ######################################################################
 # Overview of Node Classification with GNN
@@ -59,10 +60,9 @@ import torch.nn.functional as F
 # --------------------
 #
-import dgl.data
 dataset = dgl.data.CoraGraphDataset()
-print('Number of categories:', dataset.num_classes)
+print("Number of categories:", dataset.num_classes)
 ######################################################################
@@ -92,9 +92,9 @@ g = dataset[0]
 # -  ``feat``: The node features.
 #
-print('Node features')
+print("Node features")
 print(g.ndata)
-print('Edge features')
+print("Edge features")
 print(g.edata)
@@ -113,6 +113,7 @@ print(g.edata)
 from dgl.nn import GraphConv
 class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
@@ -125,8 +126,9 @@ class GCN(nn.Module):
        h = self.conv2(g, h)
        return h
 # Create the model with given dimensions
-model = GCN(g.ndata['feat'].shape[1], 16, dataset.num_classes)
+model = GCN(g.ndata["feat"].shape[1], 16, dataset.num_classes)
 ######################################################################
@@ -142,16 +144,17 @@ model = GCN(g.ndata['feat'].shape[1], 16, dataset.num_classes)
 # Training this GCN is similar to training other PyTorch neural networks.
 #
 def train(g, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    best_val_acc = 0
    best_test_acc = 0
-    features = g.ndata['feat']
+    features = g.ndata["feat"]
-    labels = g.ndata['label']
+    labels = g.ndata["label"]
-    train_mask = g.ndata['train_mask']
+    train_mask = g.ndata["train_mask"]
-    val_mask = g.ndata['val_mask']
+    val_mask = g.ndata["val_mask"]
-    test_mask = g.ndata['test_mask']
+    test_mask = g.ndata["test_mask"]
    for e in range(100):
        # Forward
        logits = model(g, features)
@@ -179,9 +182,14 @@ def train(g, model):
        optimizer.step()
        if e % 5 == 0:
-            print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format(
+            print(
-                e, loss, val_acc, best_val_acc, test_acc, best_test_acc))
+                "In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})".format(
-model = GCN(g.ndata['feat'].shape[1], 16, dataset.num_classes)
+                    e, loss, val_acc, best_val_acc, test_acc, best_test_acc
+                )
+            )
+model = GCN(g.ndata["feat"].shape[1], 16, dataset.num_classes)
 train(g, model)

--- a/tutorials/blitz/2_dglgraph.py
+++ b/tutorials/blitz/2_dglgraph.py
@@ -30,13 +30,17 @@ By the end of this tutorial you will be able to:
 # center node to the leaves.
 #
-import dgl
 import numpy as np
 import torch
+import dgl
 g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]), num_nodes=6)
 # Equivalently, PyTorch LongTensors also work.
-g = dgl.graph((torch.LongTensor([0, 0, 0, 0, 0]), torch.LongTensor([1, 2, 3, 4, 5])), num_nodes=6)
+g = dgl.graph(
+    (torch.LongTensor([0, 0, 0, 0, 0]), torch.LongTensor([1, 2, 3, 4, 5])),
+    num_nodes=6,
+)
 # You can omit the number of nodes argument if you can tell the number of nodes from the edge list alone.
 g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]))
@@ -81,13 +85,13 @@ print(g.edges())
 #
 # Assign a 3-dimensional node feature vector for each node.
-g.ndata['x'] = torch.randn(6, 3)
+g.ndata["x"] = torch.randn(6, 3)
 # Assign a 4-dimensional edge feature vector for each edge.
-g.edata['a'] = torch.randn(5, 4)
+g.edata["a"] = torch.randn(5, 4)
 # Assign a 5x4 node feature matrix for each node.  Node and edge features in DGL can be multi-dimensional.
-g.ndata['y'] = torch.randn(6, 5, 4)
+g.ndata["y"] = torch.randn(6, 5, 4)
-print(g.edata['a'])
+print(g.edata["a"])
 ######################################################################
@@ -163,13 +167,13 @@ print(sg2.edata[dgl.EID])
 #
 # The original node feature of each node in sg1
-print(sg1.ndata['x'])
+print(sg1.ndata["x"])
 # The original edge feature of each node in sg1
-print(sg1.edata['a'])
+print(sg1.edata["a"])
 # The original node feature of each node in sg2
-print(sg2.ndata['x'])
+print(sg2.ndata["x"])
 # The original edge feature of each node in sg2
-print(sg2.edata['a'])
+print(sg2.edata["a"])
 ######################################################################
@@ -195,13 +199,13 @@ print(newg.edges())
 #
 # Save graphs
-dgl.save_graphs('graph.dgl', g)
+dgl.save_graphs("graph.dgl", g)
-dgl.save_graphs('graphs.dgl', [g, sg1, sg2])
+dgl.save_graphs("graphs.dgl", [g, sg1, sg2])
 # Load graphs
-(g,), _ = dgl.load_graphs('graph.dgl')
+(g,), _ = dgl.load_graphs("graph.dgl")
 print(g)
-(g, sg1, sg2), _ = dgl.load_graphs('graphs.dgl')
+(g, sg1, sg2), _ = dgl.load_graphs("graphs.dgl")
 print(g)
 print(sg1)
 print(sg2)

--- a/tutorials/blitz/3_message_passing.py
+++ b/tutorials/blitz/3_message_passing.py
@@ -18,11 +18,12 @@ GNN for node classification <1_introduction>`.
 """
-import dgl
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import dgl
+import dgl.function as fn
 ######################################################################
 # Message passing and GNNs
@@ -80,7 +81,6 @@ import torch.nn.functional as F
 # here is how you can implement GraphSAGE convolution in DGL by your own.
 #
-import dgl.function as fn
 class SAGEConv(nn.Module):
    """Graph convolution module used by the GraphSAGE model.
@@ -92,6 +92,7 @@ class SAGEConv(nn.Module):
    out_feat : int
        Output feature size.
    """
    def __init__(self, in_feat, out_feat):
        super(SAGEConv, self).__init__()
        # A linear submodule for projecting the input and neighbor feature to the output.
@@ -108,10 +109,13 @@ class SAGEConv(nn.Module):
            The input node feature.
        """
        with g.local_scope():
-            g.ndata['h'] = h
+            g.ndata["h"] = h
            # update_all is a message passing API.
-            g.update_all(message_func=fn.copy_u('h', 'm'), reduce_func=fn.mean('m', 'h_N'))
+            g.update_all(
-            h_N = g.ndata['h_N']
+                message_func=fn.copy_u("h", "m"),
+                reduce_func=fn.mean("m", "h_N"),
+            )
+            h_N = g.ndata["h_N"]
            h_total = torch.cat([h, h_N], dim=1)
            return self.linear(h_total)
@@ -140,6 +144,7 @@ class SAGEConv(nn.Module):
 # a multi-layer GraphSAGE network.
 #
 class Model(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(Model, self).__init__()
@@ -165,17 +170,18 @@ import dgl.data
 dataset = dgl.data.CoraGraphDataset()
 g = dataset[0]
 def train(g, model):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    all_logits = []
    best_val_acc = 0
    best_test_acc = 0
-    features = g.ndata['feat']
+    features = g.ndata["feat"]
-    labels = g.ndata['label']
+    labels = g.ndata["label"]
-    train_mask = g.ndata['train_mask']
+    train_mask = g.ndata["train_mask"]
-    val_mask = g.ndata['val_mask']
+    val_mask = g.ndata["val_mask"]
-    test_mask = g.ndata['test_mask']
+    test_mask = g.ndata["test_mask"]
    for e in range(200):
        # Forward
        logits = model(g, features)
@@ -205,10 +211,14 @@ def train(g, model):
        all_logits.append(logits.detach())
        if e % 5 == 0:
-            print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format(
+            print(
-                e, loss, val_acc, best_val_acc, test_acc, best_test_acc))
+                "In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})".format(
+                    e, loss, val_acc, best_val_acc, test_acc, best_test_acc
+                )
+            )
-model = Model(g.ndata['feat'].shape[1], 16, dataset.num_classes)
+model = Model(g.ndata["feat"].shape[1], 16, dataset.num_classes)
 train(g, model)
@@ -230,6 +240,7 @@ train(g, model)
 # passing.
 #
 class WeightedSAGEConv(nn.Module):
    """Graph convolution module used by the GraphSAGE model with edge weights.
@@ -240,6 +251,7 @@ class WeightedSAGEConv(nn.Module):
    out_feat : int
        Output feature size.
    """
    def __init__(self, in_feat, out_feat):
        super(WeightedSAGEConv, self).__init__()
        # A linear submodule for projecting the input and neighbor feature to the output.
@@ -258,10 +270,13 @@ class WeightedSAGEConv(nn.Module):
            The edge weight.
        """
        with g.local_scope():
-            g.ndata['h'] = h
+            g.ndata["h"] = h
-            g.edata['w'] = w
+            g.edata["w"] = w
-            g.update_all(message_func=fn.u_mul_e('h', 'w', 'm'), reduce_func=fn.mean('m', 'h_N'))
+            g.update_all(
-            h_N = g.ndata['h_N']
+                message_func=fn.u_mul_e("h", "w", "m"),
+                reduce_func=fn.mean("m", "h_N"),
+            )
+            h_N = g.ndata["h_N"]
            h_total = torch.cat([h, h_N], dim=1)
            return self.linear(h_total)
@@ -272,6 +287,7 @@ class WeightedSAGEConv(nn.Module):
 # the model. You can replace it with your own edge weights.
 #
 class Model(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(Model, self).__init__()
@@ -284,7 +300,8 @@ class Model(nn.Module):
        h = self.conv2(g, h, torch.ones(g.num_edges(), 1).to(g.device))
        return h
-model = Model(g.ndata['feat'].shape[1], 16, dataset.num_classes)
+model = Model(g.ndata["feat"].shape[1], 16, dataset.num_classes)
 train(g, model)
@@ -297,8 +314,9 @@ train(g, model)
 # equivalent to ``fn.u_mul_e('h', 'w', 'm')``.
 #
 def u_mul_e_udf(edges):
-    return {'m' : edges.src['h'] * edges.data['w']}
+    return {"m": edges.src["h"] * edges.data["w"]}
 ######################################################################
@@ -314,8 +332,9 @@ def u_mul_e_udf(edges):
 # the incoming messages:
 #
 def mean_udf(nodes):
-    return {'h_N': nodes.mailbox['m'].mean(1)}
+    return {"h_N": nodes.mailbox["m"].mean(1)}
 ######################################################################

--- a/tutorials/blitz/4_link_predict.py
+++ b/tutorials/blitz/4_link_predict.py
@@ -17,14 +17,16 @@ By the end of this tutorial you will be able to
 """
-import dgl
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
 import itertools
 import numpy as np
 import scipy.sparse as sp
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import dgl
+import dgl.data
 ######################################################################
 # Overview of Link Prediction with GNN
@@ -67,7 +69,6 @@ import scipy.sparse as sp
 # first loads the Cora dataset.
 #
-import dgl.data
 dataset = dgl.data.CoraGraphDataset()
 g = dataset[0]
@@ -98,8 +99,14 @@ adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes())
 neg_u, neg_v = np.where(adj_neg != 0)
 neg_eids = np.random.choice(len(neg_u), g.number_of_edges())
-test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]]
+test_neg_u, test_neg_v = (
-train_neg_u, train_neg_v = neg_u[neg_eids[test_size:]], neg_v[neg_eids[test_size:]]
+    neg_u[neg_eids[:test_size]],
+    neg_v[neg_eids[:test_size]],
+)
+train_neg_u, train_neg_v = (
+    neg_u[neg_eids[test_size:]],
+    neg_v[neg_eids[test_size:]],
+)
 ######################################################################
@@ -129,13 +136,14 @@ train_g = dgl.remove_edges(g, eids[:test_size])
 from dgl.nn import SAGEConv
 # ----------- 2. create model -------------- #
 # build a two-layer GraphSAGE model
 class GraphSAGE(nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GraphSAGE, self).__init__()
-        self.conv1 = SAGEConv(in_feats, h_feats, 'mean')
+        self.conv1 = SAGEConv(in_feats, h_feats, "mean")
-        self.conv2 = SAGEConv(h_feats, h_feats, 'mean')
+        self.conv2 = SAGEConv(h_feats, h_feats, "mean")
    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
@@ -180,8 +188,12 @@ class GraphSAGE(nn.Module):
 # for the training set and the test set respectively.
 #
-train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes())
+train_pos_g = dgl.graph(
-train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes())
+    (train_pos_u, train_pos_v), num_nodes=g.number_of_nodes()
+)
+train_neg_g = dgl.graph(
+    (train_neg_u, train_neg_v), num_nodes=g.number_of_nodes()
+)
 test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes())
 test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes())
@@ -201,15 +213,16 @@ test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes())
 import dgl.function as fn
 class DotPredictor(nn.Module):
    def forward(self, g, h):
        with g.local_scope():
-            g.ndata['h'] = h
+            g.ndata["h"] = h
            # Compute a new edge feature named 'score' by a dot-product between the
            # source node feature 'h' and destination node feature 'h'.
-            g.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            g.apply_edges(fn.u_dot_v("h", "h", "score"))
            # u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
-            return g.edata['score'][:, 0]
+            return g.edata["score"][:, 0]
 ######################################################################
@@ -218,6 +231,7 @@ class DotPredictor(nn.Module):
 # by concatenating the incident nodes’ features and passing it to an MLP.
 #
 class MLPPredictor(nn.Module):
    def __init__(self, h_feats):
        super().__init__()
@@ -241,14 +255,14 @@ class MLPPredictor(nn.Module):
        dict
            A dictionary of new edge features.
        """
-        h = torch.cat([edges.src['h'], edges.dst['h']], 1)
+        h = torch.cat([edges.src["h"], edges.dst["h"]], 1)
-        return {'score': self.W2(F.relu(self.W1(h))).squeeze(1)}
+        return {"score": self.W2(F.relu(self.W1(h))).squeeze(1)}
    def forward(self, g, h):
        with g.local_scope():
-            g.ndata['h'] = h
+            g.ndata["h"] = h
            g.apply_edges(self.apply_edges)
-            return g.edata['score']
+            return g.edata["score"]
 ######################################################################
@@ -284,20 +298,25 @@ class MLPPredictor(nn.Module):
 # The evaluation metric in this tutorial is AUC.
 #
-model = GraphSAGE(train_g.ndata['feat'].shape[1], 16)
+model = GraphSAGE(train_g.ndata["feat"].shape[1], 16)
 # You can replace DotPredictor with MLPPredictor.
-#pred = MLPPredictor(16)
+# pred = MLPPredictor(16)
 pred = DotPredictor()
 def compute_loss(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score])
-    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])])
+    labels = torch.cat(
+        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]
+    )
    return F.binary_cross_entropy_with_logits(scores, labels)
 def compute_auc(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score]).numpy()
    labels = torch.cat(
-        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
+        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]
+    ).numpy()
    return roc_auc_score(labels, scores)
@@ -313,13 +332,15 @@ def compute_auc(pos_score, neg_score):
 # ----------- 3. set up loss and optimizer -------------- #
 # in this case, loss will in training loop
-optimizer = torch.optim.Adam(itertools.chain(model.parameters(), pred.parameters()), lr=0.01)
+optimizer = torch.optim.Adam(
+    itertools.chain(model.parameters(), pred.parameters()), lr=0.01
+)
 # ----------- 4. training -------------------------------- #
 all_logits = []
 for e in range(100):
    # forward
-    h = model(train_g, train_g.ndata['feat'])
+    h = model(train_g, train_g.ndata["feat"])
    pos_score = pred(train_pos_g, h)
    neg_score = pred(train_neg_g, h)
    loss = compute_loss(pos_score, neg_score)
@@ -330,14 +351,15 @@ for e in range(100):
    optimizer.step()
    if e % 5 == 0:
-        print('In epoch {}, loss: {}'.format(e, loss))
+        print("In epoch {}, loss: {}".format(e, loss))
 # ----------- 5. check results ------------------------ #
 from sklearn.metrics import roc_auc_score
 with torch.no_grad():
    pos_score = pred(test_pos_g, h)
    neg_score = pred(test_neg_g, h)
-    print('AUC', compute_auc(pos_score, neg_score))
+    print("AUC", compute_auc(pos_score, neg_score))
 # Thumbnail credits: Link Prediction with Neo4j, Mark Needham

--- a/tutorials/blitz/5_graph_classification.py
+++ b/tutorials/blitz/5_graph_classification.py
@@ -13,11 +13,12 @@ By the end of this tutorial, you will be able to
 (Time estimate: 18 minutes)
 """
-import dgl
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import dgl
+import dgl.data
 ######################################################################
 # Overview of Graph Classification with GNN
@@ -35,10 +36,9 @@ import torch.nn.functional as F
 # ------------
 #
-import dgl.data
 # Generate a synthetic dataset with 10000 graphs, ranging from 10 to 500 nodes.
-dataset = dgl.data.GINDataset('PROTEINS', self_loop=True)
+dataset = dgl.data.GINDataset("PROTEINS", self_loop=True)
 ######################################################################
@@ -48,8 +48,8 @@ dataset = dgl.data.GINDataset('PROTEINS', self_loop=True)
 # and ``gclasses`` attributes.
 #
-print('Node feature dimensionality:', dataset.dim_nfeats)
+print("Node feature dimensionality:", dataset.dim_nfeats)
-print('Number of graph categories:', dataset.gclasses)
+print("Number of graph categories:", dataset.gclasses)
 ######################################################################
@@ -70,9 +70,10 @@ print('Number of graph categories:', dataset.gclasses)
 # to sample from only a subset of the dataset.
 #
-from dgl.dataloading import GraphDataLoader
 from torch.utils.data.sampler import SubsetRandomSampler
+from dgl.dataloading import GraphDataLoader
 num_examples = len(dataset)
 num_train = int(num_examples * 0.8)
@@ -80,9 +81,11 @@ train_sampler = SubsetRandomSampler(torch.arange(num_train))
 test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples))
 train_dataloader = GraphDataLoader(
-    dataset, sampler=train_sampler, batch_size=5, drop_last=False)
+    dataset, sampler=train_sampler, batch_size=5, drop_last=False
+)
 test_dataloader = GraphDataLoader(
-    dataset, sampler=test_sampler, batch_size=5, drop_last=False)
+    dataset, sampler=test_sampler, batch_size=5, drop_last=False
+)
 ######################################################################
@@ -117,12 +120,18 @@ print(batch)
 #
 batched_graph, labels = batch
-print('Number of nodes for each graph element in the batch:', batched_graph.batch_num_nodes())
+print(
-print('Number of edges for each graph element in the batch:', batched_graph.batch_num_edges())
+    "Number of nodes for each graph element in the batch:",
+    batched_graph.batch_num_nodes(),
+)
+print(
+    "Number of edges for each graph element in the batch:",
+    batched_graph.batch_num_edges(),
+)
 # Recover the original graph elements from the minibatch
 graphs = dgl.unbatch(batched_graph)
-print('The original graphs in the minibatch:')
+print("The original graphs in the minibatch:")
 print(graphs)
@@ -152,6 +161,7 @@ print(graphs)
 from dgl.nn import GraphConv
 class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
@@ -162,9 +172,8 @@ class GCN(nn.Module):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
-        g.ndata['h'] = h
+        g.ndata["h"] = h
-        return dgl.mean_nodes(g, 'h')
+        return dgl.mean_nodes(g, "h")
 ######################################################################
@@ -182,7 +191,7 @@ optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
 for epoch in range(20):
    for batched_graph, labels in train_dataloader:
-        pred = model(batched_graph, batched_graph.ndata['attr'].float())
+        pred = model(batched_graph, batched_graph.ndata["attr"].float())
        loss = F.cross_entropy(pred, labels)
        optimizer.zero_grad()
        loss.backward()
@@ -191,11 +200,11 @@ for epoch in range(20):
 num_correct = 0
 num_tests = 0
 for batched_graph, labels in test_dataloader:
-    pred = model(batched_graph, batched_graph.ndata['attr'].float())
+    pred = model(batched_graph, batched_graph.ndata["attr"].float())
    num_correct += (pred.argmax(1) == labels).sum().item()
    num_tests += len(labels)
-print('Test accuracy:', num_correct / num_tests)
+print("Test accuracy:", num_correct / num_tests)
 ######################################################################

--- a/tutorials/blitz/6_load_data.py
+++ b/tutorials/blitz/6_load_data.py
@@ -49,16 +49,21 @@ By the end of this tutorial, you will be able to
 #
 import urllib.request
 import pandas as pd
 urllib.request.urlretrieve(
-    'https://data.dgl.ai/tutorial/dataset/members.csv', './members.csv')
+    "https://data.dgl.ai/tutorial/dataset/members.csv", "./members.csv"
+)
 urllib.request.urlretrieve(
-    'https://data.dgl.ai/tutorial/dataset/interactions.csv', './interactions.csv')
+    "https://data.dgl.ai/tutorial/dataset/interactions.csv",
+    "./interactions.csv",
+)
-members = pd.read_csv('./members.csv')
+members = pd.read_csv("./members.csv")
 members.head()
-interactions = pd.read_csv('./interactions.csv')
+interactions = pd.read_csv("./interactions.csv")
 interactions.head()
@@ -82,28 +87,35 @@ interactions.head()
 #    takes the values as-is for simplicity.
 #
+import os
+import torch
 import dgl
 from dgl.data import DGLDataset
-import torch
-import os
 class KarateClubDataset(DGLDataset):
    def __init__(self):
-        super().__init__(name='karate_club')
+        super().__init__(name="karate_club")
    def process(self):
-        nodes_data = pd.read_csv('./members.csv')
+        nodes_data = pd.read_csv("./members.csv")
-        edges_data = pd.read_csv('./interactions.csv')
+        edges_data = pd.read_csv("./interactions.csv")
-        node_features = torch.from_numpy(nodes_data['Age'].to_numpy())
+        node_features = torch.from_numpy(nodes_data["Age"].to_numpy())
-        node_labels = torch.from_numpy(nodes_data['Club'].astype('category').cat.codes.to_numpy())
+        node_labels = torch.from_numpy(
-        edge_features = torch.from_numpy(edges_data['Weight'].to_numpy())
+            nodes_data["Club"].astype("category").cat.codes.to_numpy()
-        edges_src = torch.from_numpy(edges_data['Src'].to_numpy())
+        )
-        edges_dst = torch.from_numpy(edges_data['Dst'].to_numpy())
+        edge_features = torch.from_numpy(edges_data["Weight"].to_numpy())
+        edges_src = torch.from_numpy(edges_data["Src"].to_numpy())
-        self.graph = dgl.graph((edges_src, edges_dst), num_nodes=nodes_data.shape[0])
+        edges_dst = torch.from_numpy(edges_data["Dst"].to_numpy())
-        self.graph.ndata['feat'] = node_features
-        self.graph.ndata['label'] = node_labels
+        self.graph = dgl.graph(
-        self.graph.edata['weight'] = edge_features
+            (edges_src, edges_dst), num_nodes=nodes_data.shape[0]
+        )
+        self.graph.ndata["feat"] = node_features
+        self.graph.ndata["label"] = node_labels
+        self.graph.edata["weight"] = edge_features
        # If your dataset is a node classification dataset, you will need to assign
        # masks indicating whether a node belongs to training, validation, and test set.
@@ -114,11 +126,11 @@ class KarateClubDataset(DGLDataset):
        val_mask = torch.zeros(n_nodes, dtype=torch.bool)
        test_mask = torch.zeros(n_nodes, dtype=torch.bool)
        train_mask[:n_train] = True
-        val_mask[n_train:n_train + n_val] = True
+        val_mask[n_train : n_train + n_val] = True
-        test_mask[n_train + n_val:] = True
+        test_mask[n_train + n_val :] = True
-        self.graph.ndata['train_mask'] = train_mask
+        self.graph.ndata["train_mask"] = train_mask
-        self.graph.ndata['val_mask'] = val_mask
+        self.graph.ndata["val_mask"] = val_mask
-        self.graph.ndata['test_mask'] = test_mask
+        self.graph.ndata["test_mask"] = test_mask
    def __getitem__(self, i):
        return self.graph
@@ -126,6 +138,7 @@ class KarateClubDataset(DGLDataset):
    def __len__(self):
        return 1
 dataset = KarateClubDataset()
 graph = dataset[0]
@@ -163,23 +176,27 @@ print(graph)
 #
 urllib.request.urlretrieve(
-    'https://data.dgl.ai/tutorial/dataset/graph_edges.csv', './graph_edges.csv')
+    "https://data.dgl.ai/tutorial/dataset/graph_edges.csv", "./graph_edges.csv"
+)
 urllib.request.urlretrieve(
-    'https://data.dgl.ai/tutorial/dataset/graph_properties.csv', './graph_properties.csv')
+    "https://data.dgl.ai/tutorial/dataset/graph_properties.csv",
-edges = pd.read_csv('./graph_edges.csv')
+    "./graph_properties.csv",
-properties = pd.read_csv('./graph_properties.csv')
+)
+edges = pd.read_csv("./graph_edges.csv")
+properties = pd.read_csv("./graph_properties.csv")
 edges.head()
 properties.head()
 class SyntheticDataset(DGLDataset):
    def __init__(self):
-        super().__init__(name='synthetic')
+        super().__init__(name="synthetic")
    def process(self):
-        edges = pd.read_csv('./graph_edges.csv')
+        edges = pd.read_csv("./graph_edges.csv")
-        properties = pd.read_csv('./graph_properties.csv')
+        properties = pd.read_csv("./graph_properties.csv")
        self.graphs = []
        self.labels = []
@@ -189,18 +206,18 @@ class SyntheticDataset(DGLDataset):
        label_dict = {}
        num_nodes_dict = {}
        for _, row in properties.iterrows():
-            label_dict[row['graph_id']] = row['label']
+            label_dict[row["graph_id"]] = row["label"]
-            num_nodes_dict[row['graph_id']] = row['num_nodes']
+            num_nodes_dict[row["graph_id"]] = row["num_nodes"]
        # For the edges, first group the table by graph IDs.
-        edges_group = edges.groupby('graph_id')
+        edges_group = edges.groupby("graph_id")
        # For each graph ID...
        for graph_id in edges_group.groups:
            # Find the edges as well as the number of nodes and its label.
            edges_of_id = edges_group.get_group(graph_id)
-            src = edges_of_id['src'].to_numpy()
+            src = edges_of_id["src"].to_numpy()
-            dst = edges_of_id['dst'].to_numpy()
+            dst = edges_of_id["dst"].to_numpy()
            num_nodes = num_nodes_dict[graph_id]
            label = label_dict[graph_id]
@@ -218,6 +235,7 @@ class SyntheticDataset(DGLDataset):
    def __len__(self):
        return len(self.graphs)
 dataset = SyntheticDataset()
 graph, label = dataset[0]
 print(graph, label)

--- a/tutorials/models/1_gnn/1_gcn.py
+++ b/tutorials/models/1_gnn/1_gcn.py
@@ -45,15 +45,16 @@ message passing APIs.
 # aggregation on a node :math:`u` only involves summing over the neighbors'
 # representations :math:`h_v`, we can simply use builtin functions:
-import dgl
-import dgl.function as fn
 import torch as th
 import torch.nn as nn
 import torch.nn.functional as F
+import dgl
+import dgl.function as fn
 from dgl import DGLGraph
-gcn_msg = fn.copy_u(u='h', out='m')
+gcn_msg = fn.copy_u(u="h", out="m")
-gcn_reduce = fn.sum(msg='m', out='h')
+gcn_reduce = fn.sum(msg="m", out="h")
 ###############################################################################
 # We then proceed to define the GCNLayer module. A GCNLayer essentially performs
@@ -65,6 +66,7 @@ gcn_reduce = fn.sum(msg='m', out='h')
 #    efficient :class:`builtin GCN layer module <dgl.nn.pytorch.conv.GraphConv>`.
 #
 class GCNLayer(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(GCNLayer, self).__init__()
@@ -75,11 +77,12 @@ class GCNLayer(nn.Module):
        # (such as the `'h'` ndata below) are automatically popped out
        # when the scope exits.
        with g.local_scope():
-            g.ndata['h'] = feature
+            g.ndata["h"] = feature
            g.update_all(gcn_msg, gcn_reduce)
-            h = g.ndata['h']
+            h = g.ndata["h"]
            return self.linear(h)
 ###############################################################################
 # The forward function is essentially the same as any other commonly seen NNs
 # model in PyTorch.  We can initialize GCN like any ``nn.Module``. For example,
@@ -88,6 +91,7 @@ class GCNLayer(nn.Module):
 # 1433 and the number of classes is 7). The last GCN layer computes node embeddings,
 # so the last layer in general does not apply activation.
 class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
@@ -98,6 +102,8 @@ class Net(nn.Module):
        x = F.relu(self.layer1(g, features))
        x = self.layer2(g, x)
        return x
 net = Net()
 print(net)
@@ -105,19 +111,23 @@ print(net)
 # We load the cora dataset using DGL's built-in data module.
 from dgl.data import CoraGraphDataset
 def load_cora_data():
    dataset = CoraGraphDataset()
    g = dataset[0]
-    features = g.ndata['feat']
+    features = g.ndata["feat"]
-    labels = g.ndata['label']
+    labels = g.ndata["label"]
-    train_mask = g.ndata['train_mask']
+    train_mask = g.ndata["train_mask"]
-    test_mask = g.ndata['test_mask']
+    test_mask = g.ndata["test_mask"]
    return g, features, labels, train_mask, test_mask
 ###############################################################################
 # When a model is trained, we can use the following method to evaluate
 # the performance of the model on the test dataset:
 def evaluate(model, g, features, labels, mask):
    model.eval()
    with th.no_grad():
@@ -128,18 +138,21 @@ def evaluate(model, g, features, labels, mask):
        correct = th.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)
 ###############################################################################
 # We then train the network as follows:
 import time
 import numpy as np
 g, features, labels, train_mask, test_mask = load_cora_data()
 # Add edges between each node and itself to preserve old node representations
 g.add_edges(g.nodes(), g.nodes())
 optimizer = th.optim.Adam(net.parameters(), lr=1e-2)
 dur = []
 for epoch in range(50):
-    if epoch >=3:
+    if epoch >= 3:
        t0 = time.time()
    net.train()
@@ -151,12 +164,15 @@ for epoch in range(50):
    loss.backward()
    optimizer.step()
-    if epoch >=3:
+    if epoch >= 3:
        dur.append(time.time() - t0)
    acc = evaluate(net, g, features, labels, test_mask)
-    print("Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
+    print(
-            epoch, loss.item(), acc, np.mean(dur)))
+        "Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
+            epoch, loss.item(), acc, np.mean(dur)
+        )
+    )
 ###############################################################################
 # .. _math:

--- a/tutorials/models/4_old_wines/2_capsule.py
+++ b/tutorials/models/4_old_wines/2_capsule.py
@@ -67,11 +67,12 @@ offers a different perspective. The tutorial describes how to implement a Capsul
 #
 # Here's how we set up the graph and initialize node and edge features.
-import torch.nn as nn
+import matplotlib.pyplot as plt
+import numpy as np
 import torch as th
+import torch.nn as nn
 import torch.nn.functional as F
-import numpy as np
-import matplotlib.pyplot as plt
 import dgl
@@ -80,8 +81,8 @@ def init_graph(in_nodes, out_nodes, f_size):
    v = np.tile(np.arange(in_nodes, in_nodes + out_nodes), in_nodes)
    g = dgl.DGLGraph((u, v))
    # init states
-    g.ndata['v'] = th.zeros(in_nodes + out_nodes, f_size)
+    g.ndata["v"] = th.zeros(in_nodes + out_nodes, f_size)
-    g.edata['b'] = th.zeros(in_nodes * out_nodes, 1)
+    g.edata["b"] = th.zeros(in_nodes * out_nodes, 1)
    return g
@@ -116,6 +117,7 @@ def init_graph(in_nodes, out_nodes, f_size):
 import dgl.function as fn
 class DGLRoutingLayer(nn.Module):
    def __init__(self, in_nodes, out_nodes, f_size):
        super(DGLRoutingLayer, self).__init__()
@@ -126,27 +128,33 @@ class DGLRoutingLayer(nn.Module):
        self.out_indx = list(range(in_nodes, in_nodes + out_nodes))
    def forward(self, u_hat, routing_num=1):
-        self.g.edata['u_hat'] = u_hat
+        self.g.edata["u_hat"] = u_hat
        for r in range(routing_num):
            # step 1 (line 4): normalize over out edges
-            edges_b = self.g.edata['b'].view(self.in_nodes, self.out_nodes)
+            edges_b = self.g.edata["b"].view(self.in_nodes, self.out_nodes)
-            self.g.edata['c'] = F.softmax(edges_b, dim=1).view(-1, 1)
+            self.g.edata["c"] = F.softmax(edges_b, dim=1).view(-1, 1)
-            self.g.edata['c u_hat'] = self.g.edata['c'] * self.g.edata['u_hat']
+            self.g.edata["c u_hat"] = self.g.edata["c"] * self.g.edata["u_hat"]
            # Execute step 1 & 2
-            self.g.update_all(fn.copy_e('c u_hat', 'm'), fn.sum('m', 's'))
+            self.g.update_all(fn.copy_e("c u_hat", "m"), fn.sum("m", "s"))
            # step 3 (line 6)
-            self.g.nodes[self.out_indx].data['v'] = self.squash(self.g.nodes[self.out_indx].data['s'], dim=1)
+            self.g.nodes[self.out_indx].data["v"] = self.squash(
+                self.g.nodes[self.out_indx].data["s"], dim=1
+            )
            # step 4 (line 7)
-            v = th.cat([self.g.nodes[self.out_indx].data['v']] * self.in_nodes, dim=0)
+            v = th.cat(
-            self.g.edata['b'] = self.g.edata['b'] + (self.g.edata['u_hat'] * v).sum(dim=1, keepdim=True)
+                [self.g.nodes[self.out_indx].data["v"]] * self.in_nodes, dim=0
+            )
+            self.g.edata["b"] = self.g.edata["b"] + (
+                self.g.edata["u_hat"] * v
+            ).sum(dim=1, keepdim=True)
    @staticmethod
    def squash(s, dim=1):
-        sq = th.sum(s ** 2, dim=dim, keepdim=True)
+        sq = th.sum(s**2, dim=dim, keepdim=True)
        s_norm = th.sqrt(sq)
        s = (sq / (1.0 + sq)) * (s / s_norm)
        return s
@@ -172,14 +180,14 @@ dist_list = []
 for i in range(10):
    routing(u_hat)
-    dist_matrix = routing.g.edata['c'].view(in_nodes, out_nodes)
+    dist_matrix = routing.g.edata["c"].view(in_nodes, out_nodes)
    entropy = (-dist_matrix * th.log(dist_matrix)).sum(dim=1)
    entropy_list.append(entropy.data.numpy())
    dist_list.append(dist_matrix.data.numpy())
 stds = np.std(entropy_list, axis=1)
 means = np.mean(entropy_list, axis=1)
-plt.errorbar(np.arange(len(entropy_list)), means, stds, marker='o')
+plt.errorbar(np.arange(len(entropy_list)), means, stds, marker="o")
 plt.ylabel("Entropy of Weight Distribution")
 plt.xlabel("Number of Routing")
 plt.xticks(np.arange(len(entropy_list)))
@@ -189,8 +197,8 @@ plt.close()
 #
 # Alternatively, we can also watch the evolution of histograms.
-import seaborn as sns
 import matplotlib.animation as animation
+import seaborn as sns
 fig = plt.figure(dpi=150)
 fig.clf()
@@ -204,7 +212,9 @@ def dist_animate(i):
    ax.set_title("Routing: %d" % (i))
-ani = animation.FuncAnimation(fig, dist_animate, frames=len(entropy_list), interval=500)
+ani = animation.FuncAnimation(
+    fig, dist_animate, frames=len(entropy_list), interval=500
+)
 plt.close()
 ############################################################################################################
@@ -226,22 +236,43 @@ pos = dict()
 fig2 = plt.figure(figsize=(8, 3), dpi=150)
 fig2.clf()
 ax = fig2.subplots()
-pos.update((n, (i, 1)) for i, n in zip(height_in_y, X))  # put nodes from X at x=1
+pos.update(
-pos.update((n, (i, 2)) for i, n in zip(height_out_y, Y))  # put nodes from Y at x=2
+    (n, (i, 1)) for i, n in zip(height_in_y, X)
+)  # put nodes from X at x=1
+pos.update(
+    (n, (i, 2)) for i, n in zip(height_out_y, Y)
+)  # put nodes from Y at x=2
 def weight_animate(i):
    ax.cla()
-    ax.axis('off')
+    ax.axis("off")
    ax.set_title("Routing: %d  " % i)
    dm = dist_list[i]
-    nx.draw_networkx_nodes(g, pos, nodelist=range(in_nodes), node_color='r', node_size=100, ax=ax)
+    nx.draw_networkx_nodes(
-    nx.draw_networkx_nodes(g, pos, nodelist=range(in_nodes, in_nodes + out_nodes), node_color='b', node_size=100, ax=ax)
+        g, pos, nodelist=range(in_nodes), node_color="r", node_size=100, ax=ax
+    )
+    nx.draw_networkx_nodes(
+        g,
+        pos,
+        nodelist=range(in_nodes, in_nodes + out_nodes),
+        node_color="b",
+        node_size=100,
+        ax=ax,
+    )
    for edge in g.edges():
-        nx.draw_networkx_edges(g, pos, edgelist=[edge], width=dm[edge[0], edge[1] - in_nodes] * 1.5, ax=ax)
+        nx.draw_networkx_edges(
+            g,
+            pos,
-ani2 = animation.FuncAnimation(fig2, weight_animate, frames=len(dist_list), interval=500)
+            edgelist=[edge],
+            width=dm[edge[0], edge[1] - in_nodes] * 1.5,
+            ax=ax,
+        )
+ani2 = animation.FuncAnimation(
+    fig2, weight_animate, frames=len(dist_list), interval=500
+)
 plt.close()
 ############################################################################################################
@@ -257,4 +288,3 @@ plt.close()
 # .. |image3| image:: https://i.imgur.com/dMvu7p3.png
 # .. |image4| image:: https://github.com/VoVAllen/DGL_Capsule/raw/master/routing_dist.gif
 # .. |image5| image:: https://github.com/VoVAllen/DGL_Capsule/raw/master/routing_vis.gif
--- a/tutorials/multi/1_graph_classification.py
+++ b/tutorials/multi/1_graph_classification.py
@@ -72,12 +72,15 @@ process ID, which should be an integer from `0` to `world_size - 1`.
 import torch.distributed as dist
 def init_process_group(world_size, rank):
    dist.init_process_group(
-        backend='gloo',     # change to 'nccl' for multiple GPUs
+        backend="gloo",  # change to 'nccl' for multiple GPUs
-        init_method='tcp://127.0.0.1:12345',
+        init_method="tcp://127.0.0.1:12345",
        world_size=world_size,
-        rank=rank)
+        rank=rank,
+    )
 ###############################################################################
 # Data Loader Preparation
@@ -94,18 +97,21 @@ def init_process_group(world_size, rank):
 from dgl.data import split_dataset
 from dgl.dataloading import GraphDataLoader
 def get_dataloaders(dataset, seed, batch_size=32):
    # Use a 80:10:10 train-val-test split
-    train_set, val_set, test_set = split_dataset(dataset,
+    train_set, val_set, test_set = split_dataset(
-                                                 frac_list=[0.8, 0.1, 0.1],
+        dataset, frac_list=[0.8, 0.1, 0.1], shuffle=True, random_state=seed
-                                                 shuffle=True,
+    )
-                                                 random_state=seed)
+    train_loader = GraphDataLoader(
-    train_loader = GraphDataLoader(train_set, use_ddp=True, batch_size=batch_size, shuffle=True)
+        train_set, use_ddp=True, batch_size=batch_size, shuffle=True
+    )
    val_loader = GraphDataLoader(val_set, batch_size=batch_size)
    test_loader = GraphDataLoader(test_set, batch_size=batch_size)
    return train_loader, val_loader, test_loader
 ###############################################################################
 # Model Initialization
 # --------------------
@@ -115,14 +121,20 @@ def get_dataloaders(dataset, seed, batch_size=32):
 import torch.nn as nn
 import torch.nn.functional as F
 from dgl.nn.pytorch import GINConv, SumPooling
 class GIN(nn.Module):
    def __init__(self, input_size=1, num_classes=2):
        super(GIN, self).__init__()
-        self.conv1 = GINConv(nn.Linear(input_size, num_classes), aggregator_type='sum')
+        self.conv1 = GINConv(
-        self.conv2 = GINConv(nn.Linear(num_classes, num_classes), aggregator_type='sum')
+            nn.Linear(input_size, num_classes), aggregator_type="sum"
+        )
+        self.conv2 = GINConv(
+            nn.Linear(num_classes, num_classes), aggregator_type="sum"
+        )
        self.pool = SumPooling()
    def forward(self, g, feats):
@@ -132,6 +144,7 @@ class GIN(nn.Module):
        return self.pool(g, feats)
 ###############################################################################
 # To ensure same initial model parameters across processes, we need to set the
 # same random seed before model initialization. Once we construct a model
@@ -141,16 +154,20 @@ class GIN(nn.Module):
 import torch
 from torch.nn.parallel import DistributedDataParallel
 def init_model(seed, device):
    torch.manual_seed(seed)
    model = GIN().to(device)
-    if device.type == 'cpu':
+    if device.type == "cpu":
        model = DistributedDataParallel(model)
    else:
-        model = DistributedDataParallel(model, device_ids=[device], output_device=device)
+        model = DistributedDataParallel(
+            model, device_ids=[device], output_device=device
+        )
    return model
 ###############################################################################
 # Main Function for Each Process
 # -----------------------------
@@ -158,6 +175,7 @@ def init_model(seed, device):
 # Define the model evaluation function as in the single-GPU setting.
 #
 def evaluate(model, dataloader, device):
    model.eval()
@@ -168,7 +186,7 @@ def evaluate(model, dataloader, device):
        bg = bg.to(device)
        labels = labels.to(device)
        # Get input node features
-        feats = bg.ndata.pop('attr')
+        feats = bg.ndata.pop("attr")
        with torch.no_grad():
            pred = model(bg, feats)
        _, pred = torch.max(pred, 1)
@@ -177,26 +195,27 @@ def evaluate(model, dataloader, device):
    return 1.0 * total_correct / total
 ###############################################################################
 # Define the main function for each process.
 #
 from torch.optim import Adam
 def main(rank, world_size, dataset, seed=0):
    init_process_group(world_size, rank)
    if torch.cuda.is_available():
-        device = torch.device('cuda:{:d}'.format(rank))
+        device = torch.device("cuda:{:d}".format(rank))
        torch.cuda.set_device(device)
    else:
-        device = torch.device('cpu')
+        device = torch.device("cpu")
    model = init_model(seed, device)
    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(model.parameters(), lr=0.01)
-    train_loader, val_loader, test_loader = get_dataloaders(dataset,
+    train_loader, val_loader, test_loader = get_dataloaders(dataset, seed)
-                                                            seed)
    for epoch in range(5):
        model.train()
        # The line below ensures all processes use a different
@@ -207,7 +226,7 @@ def main(rank, world_size, dataset, seed=0):
        for bg, labels in train_loader:
            bg = bg.to(device)
            labels = labels.to(device)
-            feats = bg.ndata.pop('attr')
+            feats = bg.ndata.pop("attr")
            pred = model(bg, feats)
            loss = criterion(pred, labels)
@@ -216,15 +235,16 @@ def main(rank, world_size, dataset, seed=0):
            loss.backward()
            optimizer.step()
        loss = total_loss
-        print('Loss: {:.4f}'.format(loss))
+        print("Loss: {:.4f}".format(loss))
        val_acc = evaluate(model, val_loader, device)
-        print('Val acc: {:.4f}'.format(val_acc))
+        print("Val acc: {:.4f}".format(val_acc))
    test_acc = evaluate(model, test_loader, device)
-    print('Test acc: {:.4f}'.format(test_acc))
+    print("Test acc: {:.4f}".format(test_acc))
    dist.destroy_process_group()
 ###############################################################################
 # Finally we load the dataset and launch the processes.
 #