[Misc] Black auto fix. (#4640)

* auto fix * add more * sort Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

[Misc] Black auto fix. (#4640)
* auto fix * add more * sort Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
d78a3a4b · Hongzhi (Steve), Chen · GitHub · 23d09057 · d78a3a4b · d78a3a4b
Unverified Commit d78a3a4b authored Sep 26, 2022 by Hongzhi (Steve), Chen Committed by GitHub Sep 26, 2022
5 changed files
--- a/benchmarks/benchmarks/model_speed/bench_gat.py
+++ b/benchmarks/benchmarks/model_speed/bench_gat.py
 import time
-import dgl
-from dgl.nn.pytorch import GATConv
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import dgl
+from dgl.nn.pytorch import GATConv
 from .. import utils
 class GAT(nn.Module):
-    def __init__(self,
+    def __init__(
-                 num_layers,
+        self,
-                 in_dim,
+        num_layers,
-                 num_hidden,
+        in_dim,
-                 num_classes,
+        num_hidden,
-                 heads,
+        num_classes,
-                 activation,
+        heads,
-                 feat_drop,
+        activation,
-                 attn_drop,
+        feat_drop,
-                 negative_slope,
+        attn_drop,
-                 residual):
+        negative_slope,
+        residual,
+    ):
        super(GAT, self).__init__()
        self.num_layers = num_layers
        self.gat_layers = nn.ModuleList()
        self.activation = activation
        # input projection (no residual)
-        self.gat_layers.append(GATConv(
+        self.gat_layers.append(
-            in_dim, num_hidden, heads[0],
+            GATConv(
-            feat_drop, attn_drop, negative_slope, False, self.activation))
+                in_dim,
+                num_hidden,
+                heads[0],
+                feat_drop,
+                attn_drop,
+                negative_slope,
+                False,
+                self.activation,
+            )
+        )
        # hidden layers
        for l in range(1, num_layers):
            # due to multi-head, the in_dim = num_hidden * num_heads
-            self.gat_layers.append(GATConv(
+            self.gat_layers.append(
-                num_hidden * heads[l-1], num_hidden, heads[l],
+                GATConv(
-                feat_drop, attn_drop, negative_slope, residual, self.activation))
+                    num_hidden * heads[l - 1],
+                    num_hidden,
+                    heads[l],
+                    feat_drop,
+                    attn_drop,
+                    negative_slope,
+                    residual,
+                    self.activation,
+                )
+            )
        # output projection
-        self.gat_layers.append(GATConv(
+        self.gat_layers.append(
-            num_hidden * heads[-2], num_classes, heads[-1],
+            GATConv(
-            feat_drop, attn_drop, negative_slope, residual, None))
+                num_hidden * heads[-2],
+                num_classes,
+                heads[-1],
+                feat_drop,
+                attn_drop,
+                negative_slope,
+                residual,
+                None,
+            )
+        )
    def forward(self, g, inputs):
        h = inputs
@@ -46,8 +78,9 @@ class GAT(nn.Module):
        logits = self.gat_layers[-1](g, h).mean(1)
        return logits
-@utils.benchmark('time')
-@utils.parametrize('data', ['cora', 'pubmed'])
+@utils.benchmark("time")
+@utils.parametrize("data", ["cora", "pubmed"])
 def track_time(data):
    data = utils.process_data(data)
    device = utils.get_bench_device()
@@ -55,11 +88,11 @@ def track_time(data):
    g = data[0].to(device)
-    features = g.ndata['feat']
+    features = g.ndata["feat"]
-    labels = g.ndata['label']
+    labels = g.ndata["label"]
-    train_mask = g.ndata['train_mask']
+    train_mask = g.ndata["train_mask"]
-    val_mask = g.ndata['val_mask']
+    val_mask = g.ndata["val_mask"]
-    test_mask = g.ndata['test_mask']
+    test_mask = g.ndata["test_mask"]
    in_feats = features.shape[1]
    n_classes = data.num_classes
@@ -68,17 +101,14 @@ def track_time(data):
    g = dgl.add_self_loop(g)
    # create model
-    model = GAT(1, in_feats, 8, n_classes, [8, 1], F.elu,
+    model = GAT(1, in_feats, 8, n_classes, [8, 1], F.elu, 0.6, 0.6, 0.2, False)
-                0.6, 0.6, 0.2, False)
    loss_fcn = torch.nn.CrossEntropyLoss()
    model = model.to(device)
    model.train()
    # optimizer
-    optimizer = torch.optim.Adam(model.parameters(),
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
-                                 lr=1e-2,
-                                 weight_decay=5e-4)
    # dry run
    for epoch in range(10):

--- a/benchmarks/benchmarks/model_speed/bench_gcn_udf.py
+++ b/benchmarks/benchmarks/model_speed/bench_gcn_udf.py
 import time
-import dgl
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import dgl
 from .. import utils
 class GraphConv(nn.Module):
    def __init__(self, in_dim, out_dim, activation=None):
        super(GraphConv, self).__init__()
@@ -19,39 +22,42 @@ class GraphConv(nn.Module):
    def forward(self, graph, feat):
        with graph.local_scope():
-            graph.ndata['ci'] = torch.pow(graph.out_degrees().float().clamp(min=1), -0.5)
+            graph.ndata["ci"] = torch.pow(
-            graph.ndata['cj'] = torch.pow(graph.in_degrees().float().clamp(min=1), -0.5)
+                graph.out_degrees().float().clamp(min=1), -0.5
-            graph.ndata['h'] = feat
+            )
+            graph.ndata["cj"] = torch.pow(
+                graph.in_degrees().float().clamp(min=1), -0.5
+            )
+            graph.ndata["h"] = feat
            graph.update_all(self.mfunc, self.rfunc)
-            h = graph.ndata['h']
+            h = graph.ndata["h"]
            h = torch.matmul(h, self.weight) + self.bias
            if self.activation is not None:
                h = self.activation(h)
            return h
    def mfunc(self, edges):
-        return {'m' : edges.src['h'], 'ci' : edges.src['ci']}
+        return {"m": edges.src["h"], "ci": edges.src["ci"]}
    def rfunc(self, nodes):
-        ci = nodes.mailbox['ci'].unsqueeze(2)
+        ci = nodes.mailbox["ci"].unsqueeze(2)
-        newh = (nodes.mailbox['m'] * ci).sum(1) * nodes.data['cj'].unsqueeze(1)
+        newh = (nodes.mailbox["m"] * ci).sum(1) * nodes.data["cj"].unsqueeze(1)
-        return {'h' : newh}
+        return {"h": newh}
 class GCN(nn.Module):
-    def __init__(self,
+    def __init__(
-                 in_feats,
+        self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
-                 n_hidden,
+    ):
-                 n_classes,
-                 n_layers,
-                 activation,
-                 dropout):
        super(GCN, self).__init__()
        self.layers = nn.ModuleList()
        # input layer
        self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
        # hidden layers
        for i in range(n_layers - 1):
-            self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
+            self.layers.append(
+                GraphConv(n_hidden, n_hidden, activation=activation)
+            )
        # output layer
        self.layers.append(GraphConv(n_hidden, n_classes))
        self.dropout = nn.Dropout(p=dropout)
@@ -64,19 +70,20 @@ class GCN(nn.Module):
            h = layer(g, h)
        return h
-@utils.benchmark('time', timeout=300)
-@utils.parametrize('data', ['cora', 'pubmed'])
+@utils.benchmark("time", timeout=300)
+@utils.parametrize("data", ["cora", "pubmed"])
 def track_time(data):
    data = utils.process_data(data)
    device = utils.get_bench_device()
    g = data[0].to(device).int()
-    features = g.ndata['feat']
+    features = g.ndata["feat"]
-    labels = g.ndata['label']
+    labels = g.ndata["label"]
-    train_mask = g.ndata['train_mask']
+    train_mask = g.ndata["train_mask"]
-    val_mask = g.ndata['val_mask']
+    val_mask = g.ndata["val_mask"]
-    test_mask = g.ndata['test_mask']
+    test_mask = g.ndata["test_mask"]
    in_feats = features.shape[1]
    n_classes = data.num_classes
@@ -88,7 +95,7 @@ def track_time(data):
    degs = g.in_degrees().float()
    norm = torch.pow(degs, -0.5)
    norm[torch.isinf(norm)] = 0
-    g.ndata['norm'] = norm.unsqueeze(1)
+    g.ndata["norm"] = norm.unsqueeze(1)
    # create GCN model
    model = GCN(in_feats, 16, n_classes, 1, F.relu, 0.5)
@@ -98,9 +105,7 @@ def track_time(data):
    model.train()
    # optimizer
-    optimizer = torch.optim.Adam(model.parameters(),
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
-                                 lr=1e-2,
-                                 weight_decay=5e-4)
    # dry run
    for epoch in range(5):
        logits = model(g, features)

--- a/benchmarks/benchmarks/model_speed/bench_pinsage.py
+++ b/benchmarks/benchmarks/model_speed/bench_pinsage.py
-import pickle, time
 import argparse
+import pickle
+import time
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torch.utils.data import IterableDataset, DataLoader
+from torch.utils.data import DataLoader, IterableDataset
 import dgl
 import dgl.function as fn
 from .. import utils
 def _init_input_modules(g, ntype, textset, hidden_dims):
    # We initialize the linear projections of each input feature ``x`` as
    # follows:
@@ -31,8 +35,7 @@ def _init_input_modules(g, ntype, textset, hidden_dims):
            module_dict[column] = m
        elif data.dtype == torch.int64:
            assert data.ndim == 1
-            m = nn.Embedding(
+            m = nn.Embedding(data.max() + 2, hidden_dims, padding_idx=-1)
-                data.max() + 2, hidden_dims, padding_idx=-1)
            nn.init.xavier_uniform_(m.weight)
            module_dict[column] = m
@@ -45,14 +48,17 @@ def _init_input_modules(g, ntype, textset, hidden_dims):
    return module_dict
 class BagOfWordsPretrained(nn.Module):
    def __init__(self, field, hidden_dims):
        super().__init__()
        input_dims = field.vocab.vectors.shape[1]
        self.emb = nn.Embedding(
-            len(field.vocab.itos), input_dims,
+            len(field.vocab.itos),
-            padding_idx=field.vocab.stoi[field.pad_token])
+            input_dims,
+            padding_idx=field.vocab.stoi[field.pad_token],
+        )
        self.emb.weight[:] = field.vocab.vectors
        self.proj = nn.Linear(input_dims, hidden_dims)
        nn.init.xavier_uniform_(self.proj.weight)
@@ -68,18 +74,22 @@ class BagOfWordsPretrained(nn.Module):
        x = self.emb(x).sum(1) / length.unsqueeze(1).float()
        return self.proj(x)
 class BagOfWords(nn.Module):
    def __init__(self, field, hidden_dims):
        super().__init__()
        self.emb = nn.Embedding(
-            len(field.vocab.itos), hidden_dims,
+            len(field.vocab.itos),
-            padding_idx=field.vocab.stoi[field.pad_token])
+            hidden_dims,
+            padding_idx=field.vocab.stoi[field.pad_token],
+        )
        nn.init.xavier_uniform_(self.emb.weight)
    def forward(self, x, length):
        return self.emb(x).sum(1) / length.unsqueeze(1).float()
 class WeightedSAGEConv(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims, act=F.relu):
        super().__init__()
@@ -91,7 +101,7 @@ class WeightedSAGEConv(nn.Module):
        self.dropout = nn.Dropout(0.5)
    def reset_parameters(self):
-        gain = nn.init.calculate_gain('relu')
+        gain = nn.init.calculate_gain("relu")
        nn.init.xavier_uniform_(self.Q.weight, gain=gain)
        nn.init.xavier_uniform_(self.W.weight, gain=gain)
        nn.init.constant_(self.Q.bias, 0)
@@ -105,18 +115,21 @@ class WeightedSAGEConv(nn.Module):
        """
        h_src, h_dst = h
        with g.local_scope():
-            g.srcdata['n'] = self.act(self.Q(self.dropout(h_src)))
+            g.srcdata["n"] = self.act(self.Q(self.dropout(h_src)))
-            g.edata['w'] = weights.float()
+            g.edata["w"] = weights.float()
-            g.update_all(fn.u_mul_e('n', 'w', 'm'), fn.sum('m', 'n'))
+            g.update_all(fn.u_mul_e("n", "w", "m"), fn.sum("m", "n"))
-            g.update_all(fn.copy_e('w', 'm'), fn.sum('m', 'ws'))
+            g.update_all(fn.copy_e("w", "m"), fn.sum("m", "ws"))
-            n = g.dstdata['n']
+            n = g.dstdata["n"]
-            ws = g.dstdata['ws'].unsqueeze(1).clamp(min=1)
+            ws = g.dstdata["ws"].unsqueeze(1).clamp(min=1)
            z = self.act(self.W(self.dropout(torch.cat([n / ws, h_dst], 1))))
            z_norm = z.norm(2, 1, keepdim=True)
-            z_norm = torch.where(z_norm == 0, torch.tensor(1.).to(z_norm), z_norm)
+            z_norm = torch.where(
+                z_norm == 0, torch.tensor(1.0).to(z_norm), z_norm
+            )
            z = z / z_norm
            return z
 class SAGENet(nn.Module):
    def __init__(self, hidden_dims, n_layers):
        """
@@ -130,28 +143,34 @@ class SAGENet(nn.Module):
        self.convs = nn.ModuleList()
        for _ in range(n_layers):
-            self.convs.append(WeightedSAGEConv(hidden_dims, hidden_dims, hidden_dims))
+            self.convs.append(
+                WeightedSAGEConv(hidden_dims, hidden_dims, hidden_dims)
+            )
    def forward(self, blocks, h):
        for layer, block in zip(self.convs, blocks):
-            h_dst = h[:block.number_of_nodes('DST/' + block.ntypes[0])]
+            h_dst = h[: block.number_of_nodes("DST/" + block.ntypes[0])]
-            h = layer(block, (h, h_dst), block.edata['weights'])
+            h = layer(block, (h, h_dst), block.edata["weights"])
        return h
 class LinearProjector(nn.Module):
    """
    Projects each input feature of the graph linearly and sums them up
    """
    def __init__(self, full_graph, ntype, textset, hidden_dims):
        super().__init__()
        self.ntype = ntype
-        self.inputs = _init_input_modules(full_graph, ntype, textset, hidden_dims)
+        self.inputs = _init_input_modules(
+            full_graph, ntype, textset, hidden_dims
+        )
    def forward(self, ndata):
        projections = []
        for feature, data in ndata.items():
-            if feature == dgl.NID or feature.endswith('__len'):
+            if feature == dgl.NID or feature.endswith("__len"):
                # This is an additional feature indicating the length of the ``feature``
                # column; we shouldn't process this.
                continue
@@ -159,7 +178,7 @@ class LinearProjector(nn.Module):
            module = self.inputs[feature]
            if isinstance(module, (BagOfWords, BagOfWordsPretrained)):
                # Textual feature; find the length and pass it to the textual module.
-                length = ndata[feature + '__len']
+                length = ndata[feature + "__len"]
                result = module(data, length)
            else:
                result = module(data)
@@ -167,6 +186,7 @@ class LinearProjector(nn.Module):
        return torch.stack(projections, 1).sum(1)
 class ItemToItemScorer(nn.Module):
    def __init__(self, full_graph, ntype):
        super().__init__()
@@ -177,7 +197,7 @@ class ItemToItemScorer(nn.Module):
    def _add_bias(self, edges):
        bias_src = self.bias[edges.src[dgl.NID]]
        bias_dst = self.bias[edges.dst[dgl.NID]]
-        return {'s': edges.data['s'] + bias_src + bias_dst}
+        return {"s": edges.data["s"] + bias_src + bias_dst}
    def forward(self, item_item_graph, h):
        """
@@ -185,12 +205,13 @@ class ItemToItemScorer(nn.Module):
        h : hidden state of every node
        """
        with item_item_graph.local_scope():
-            item_item_graph.ndata['h'] = h
+            item_item_graph.ndata["h"] = h
-            item_item_graph.apply_edges(fn.u_dot_v('h', 'h', 's'))
+            item_item_graph.apply_edges(fn.u_dot_v("h", "h", "s"))
            item_item_graph.apply_edges(self._add_bias)
-            pair_score = item_item_graph.edata['s']
+            pair_score = item_item_graph.edata["s"]
        return pair_score
 class PinSAGEModel(nn.Module):
    def __init__(self, full_graph, ntype, textsets, hidden_dims, n_layers):
        super().__init__()
@@ -210,6 +231,7 @@ class PinSAGEModel(nn.Module):
        h_item_dst = self.proj(blocks[-1].dstdata)
        return h_item_dst + self.sage(blocks, h_item)
 def compact_and_copy(frontier, seeds):
    block = dgl.to_block(frontier, seeds)
    for col, data in frontier.edata.items():
@@ -218,6 +240,7 @@ def compact_and_copy(frontier, seeds):
        block.edata[col] = data[block.edata[dgl.EID]]
    return block
 class ItemToItemBatchSampler(IterableDataset):
    def __init__(self, g, user_type, item_type, batch_size):
        self.g = g
@@ -229,42 +252,69 @@ class ItemToItemBatchSampler(IterableDataset):
    def __iter__(self):
        while True:
-            heads = torch.randint(0, self.g.number_of_nodes(self.item_type), (self.batch_size,))
+            heads = torch.randint(
+                0, self.g.number_of_nodes(self.item_type), (self.batch_size,)
+            )
            tails = dgl.sampling.random_walk(
                self.g,
                heads,
-                metapath=[self.item_to_user_etype, self.user_to_item_etype])[0][:, 2]
+                metapath=[self.item_to_user_etype, self.user_to_item_etype],
-            neg_tails = torch.randint(0, self.g.number_of_nodes(self.item_type), (self.batch_size,))
+            )[0][:, 2]
+            neg_tails = torch.randint(
+                0, self.g.number_of_nodes(self.item_type), (self.batch_size,)
+            )
-            mask = (tails != -1)
+            mask = tails != -1
            yield heads[mask], tails[mask], neg_tails[mask]
 class NeighborSampler(object):
-    def __init__(self, g, user_type, item_type, random_walk_length, random_walk_restart_prob,
+    def __init__(
-                 num_random_walks, num_neighbors, num_layers):
+        self,
+        g,
+        user_type,
+        item_type,
+        random_walk_length,
+        random_walk_restart_prob,
+        num_random_walks,
+        num_neighbors,
+        num_layers,
+    ):
        self.g = g
        self.user_type = user_type
        self.item_type = item_type
        self.user_to_item_etype = list(g.metagraph()[user_type][item_type])[0]
        self.item_to_user_etype = list(g.metagraph()[item_type][user_type])[0]
        self.samplers = [
-            dgl.sampling.PinSAGESampler(g, item_type, user_type, random_walk_length,
+            dgl.sampling.PinSAGESampler(
-                random_walk_restart_prob, num_random_walks, num_neighbors)
+                g,
-            for _ in range(num_layers)]
+                item_type,
+                user_type,
+                random_walk_length,
+                random_walk_restart_prob,
+                num_random_walks,
+                num_neighbors,
+            )
+            for _ in range(num_layers)
+        ]
    def sample_blocks(self, seeds, heads=None, tails=None, neg_tails=None):
        blocks = []
        for sampler in self.samplers:
            frontier = sampler(seeds)
            if heads is not None:
-                eids = frontier.edge_ids(torch.cat([heads, heads]), torch.cat([tails, neg_tails]), return_uv=True)[2]
+                eids = frontier.edge_ids(
+                    torch.cat([heads, heads]),
+                    torch.cat([tails, neg_tails]),
+                    return_uv=True,
+                )[2]
                if len(eids) > 0:
                    old_frontier = frontier
                    frontier = dgl.remove_edges(old_frontier, eids)
-                    #print(old_frontier)
+                    # print(old_frontier)
-                    #print(frontier)
+                    # print(frontier)
-                    #print(frontier.edata['weights'])
+                    # print(frontier.edata['weights'])
-                    #frontier.edata['weights'] = old_frontier.edata['weights'][frontier.edata[dgl.EID]]
+                    # frontier.edata['weights'] = old_frontier.edata['weights'][frontier.edata[dgl.EID]]
            block = compact_and_copy(frontier, seeds)
            seeds = block.srcdata[dgl.NID]
            blocks.insert(0, block)
@@ -274,17 +324,18 @@ class NeighborSampler(object):
        # Create a graph with positive connections only and another graph with negative
        # connections only.
        pos_graph = dgl.graph(
-            (heads, tails),
+            (heads, tails), num_nodes=self.g.number_of_nodes(self.item_type)
-            num_nodes=self.g.number_of_nodes(self.item_type))
+        )
        neg_graph = dgl.graph(
-            (heads, neg_tails),
+            (heads, neg_tails), num_nodes=self.g.number_of_nodes(self.item_type)
-            num_nodes=self.g.number_of_nodes(self.item_type))
+        )
        pos_graph, neg_graph = dgl.compact_graphs([pos_graph, neg_graph])
        seeds = pos_graph.ndata[dgl.NID]
        blocks = self.sample_blocks(seeds, heads, tails, neg_tails)
        return pos_graph, neg_graph, blocks
 def assign_simple_node_features(ndata, g, ntype, assign_id=False):
    """
    Copies data to the given block from the corresponding nodes in the original graph.
@@ -295,6 +346,7 @@ def assign_simple_node_features(ndata, g, ntype, assign_id=False):
        induced_nodes = ndata[dgl.NID]
        ndata[col] = g.nodes[ntype].data[col][induced_nodes]
 def assign_textual_node_features(ndata, textset, ntype):
    """
    Assigns numericalized tokens from a torchtext dataset to given block.
@@ -327,7 +379,8 @@ def assign_textual_node_features(ndata, textset, ntype):
            tokens = tokens.t()
        ndata[field_name] = tokens
-        ndata[field_name + '__len'] = lengths
+        ndata[field_name + "__len"] = lengths
 def assign_features_to_blocks(blocks, g, textset, ntype):
    # For the first block (which is closest to the input), copy the features from
@@ -337,6 +390,7 @@ def assign_features_to_blocks(blocks, g, textset, ntype):
    assign_simple_node_features(blocks[-1].dstdata, g, ntype)
    assign_textual_node_features(blocks[-1].dstdata, textset, ntype)
 class PinSAGECollator(object):
    def __init__(self, sampler, g, ntype, textset):
        self.sampler = sampler
@@ -347,7 +401,9 @@ class PinSAGECollator(object):
    def collate_train(self, batches):
        heads, tails, neg_tails = batches[0]
        # Construct multilayer neighborhood via PinSAGE...
-        pos_graph, neg_graph, blocks = self.sampler.sample_from_item_pairs(heads, tails, neg_tails)
+        pos_graph, neg_graph, blocks = self.sampler.sample_from_item_pairs(
+            heads, tails, neg_tails
+        )
        assign_features_to_blocks(blocks, self.g, self.textset, self.ntype)
        return pos_graph, neg_graph, blocks
@@ -358,8 +414,9 @@ class PinSAGECollator(object):
        assign_features_to_blocks(blocks, self.g, self.textset, self.ntype)
        return blocks
-@utils.benchmark('time', 600)
-@utils.parametrize('data', ['nowplaying_rs'])
+@utils.benchmark("time", 600)
+@utils.parametrize("data", ["nowplaying_rs"])
 def track_time(data):
    dataset = utils.process_data(data)
    device = utils.get_bench_device()
@@ -383,24 +440,35 @@ def track_time(data):
    g = dataset[0]
    # Sampler
    batch_sampler = ItemToItemBatchSampler(
-        g, user_ntype, item_ntype, batch_size)
+        g, user_ntype, item_ntype, batch_size
+    )
    neighbor_sampler = NeighborSampler(
-        g, user_ntype, item_ntype, random_walk_length,
+        g,
-        random_walk_restart_prob, num_random_walks, num_neighbors,
+        user_ntype,
-        num_layers)
+        item_ntype,
+        random_walk_length,
+        random_walk_restart_prob,
+        num_random_walks,
+        num_neighbors,
+        num_layers,
+    )
    collator = PinSAGECollator(neighbor_sampler, g, item_ntype, textset)
    dataloader = DataLoader(
        batch_sampler,
        collate_fn=collator.collate_train,
-        num_workers=num_workers)
+        num_workers=num_workers,
+    )
    dataloader_test = DataLoader(
        torch.arange(g.number_of_nodes(item_ntype)),
        batch_size=batch_size,
        collate_fn=collator.collate_test,
-        num_workers=num_workers)
+        num_workers=num_workers,
+    )
    # Model
-    model = PinSAGEModel(g, item_ntype, textset, hidden_dims, num_layers).to(device)
+    model = PinSAGEModel(g, item_ntype, textset, hidden_dims, num_layers).to(
+        device
+    )
    # Optimizer
    opt = torch.optim.Adam(model.parameters(), lr=lr)
@@ -423,7 +491,9 @@ def track_time(data):
        # start timer at before iter_start
        if batch_id == iter_start - 1:
            t0 = time.time()
-        elif batch_id == iter_count + iter_start - 1:  # time iter_count iterations
+        elif (
+            batch_id == iter_count + iter_start - 1
+        ):  # time iter_count iterations
            break
    t1 = time.time()

--- a/benchmarks/benchmarks/model_speed/bench_rgcn_base.py
+++ b/benchmarks/benchmarks/model_speed/bench_rgcn_base.py
 import time
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from torchmetrics.functional import accuracy
-from .. import utils
-from .. import rgcn
+from .. import rgcn, utils
-@utils.benchmark('time', 1200)
+@utils.benchmark("time", 1200)
-@utils.parametrize('data', ['aifb', 'am'])
+@utils.parametrize("data", ["aifb", "am"])
 def track_time(data):
    # args
-    if data == 'aifb':
+    if data == "aifb":
        num_bases = -1
-        l2norm = 0.
+        l2norm = 0.0
-    elif data == 'am':
+    elif data == "am":
        num_bases = 40
        l2norm = 5e-4
    else:
        raise ValueError()
-    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = rgcn.load_data(
+    (
-        data, get_norm=True)
+        g,
+        num_rels,
+        num_classes,
+        labels,
+        train_idx,
+        test_idx,
+        target_idx,
+    ) = rgcn.load_data(data, get_norm=True)
    num_hidden = 16
-    model = rgcn.RGCN(g.num_nodes(),
+    model = rgcn.RGCN(
-                 num_hidden,
+        g.num_nodes(), num_hidden, num_classes, num_rels, num_bases=num_bases
-                 num_classes,
+    )
-                 num_rels,
-                 num_bases=num_bases)
    device = utils.get_bench_device()
    labels = labels.to(device)
    model = model.to(device)
    g = g.int().to(device)
    optimizer = torch.optim.Adam(
-        model.parameters(), lr=1e-2, weight_decay=l2norm)
+        model.parameters(), lr=1e-2, weight_decay=l2norm
+    )
    model.train()
    num_epochs = 30

--- a/benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py
+++ b/benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py
-import dgl
+import time
 import numpy as np
 import torch as th
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
-import dgl.nn.pytorch as dglnn
+import dgl
 import dgl.function as fn
-import time
+import dgl.nn.pytorch as dglnn
 from .. import utils
 class NegativeSampler(object):
    def __init__(self, g, k, neg_share=False):
        self.weights = g.in_degrees().float() ** 0.75
@@ -23,34 +26,32 @@ class NegativeSampler(object):
            dst = self.weights.multinomial(n, replacement=True)
            dst = dst.view(-1, 1, self.k).expand(-1, self.k, -1).flatten()
        else:
-            dst = self.weights.multinomial(n*self.k, replacement=True)
+            dst = self.weights.multinomial(n * self.k, replacement=True)
        src = src.repeat_interleave(self.k)
        return src, dst
 def load_subtensor(g, input_nodes, device):
    """
    Copys features and labels of a set of nodes onto GPU.
    """
-    batch_inputs = g.ndata['features'][input_nodes].to(device)
+    batch_inputs = g.ndata["features"][input_nodes].to(device)
    return batch_inputs
 class SAGE(nn.Module):
-    def __init__(self,
+    def __init__(
-                 in_feats,
+        self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
-                 n_hidden,
+    ):
-                 n_classes,
-                 n_layers,
-                 activation,
-                 dropout):
        super().__init__()
        self.n_layers = n_layers
        self.n_hidden = n_hidden
        self.n_classes = n_classes
        self.layers = nn.ModuleList()
-        self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean'))
+        self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean"))
        for i in range(1, n_layers - 1):
-            self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean'))
+            self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean"))
-        self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean'))
+        self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean"))
        self.dropout = nn.Dropout(dropout)
        self.activation = activation
@@ -63,40 +64,45 @@ class SAGE(nn.Module):
                h = self.dropout(h)
        return h
 def load_subtensor(g, input_nodes, device):
    """
    Copys features and labels of a set of nodes onto GPU.
    """
-    batch_inputs = g.ndata['features'][input_nodes].to(device)
+    batch_inputs = g.ndata["features"][input_nodes].to(device)
    return batch_inputs
 class CrossEntropyLoss(nn.Module):
    def forward(self, block_outputs, pos_graph, neg_graph):
        with pos_graph.local_scope():
-            pos_graph.ndata['h'] = block_outputs
+            pos_graph.ndata["h"] = block_outputs
-            pos_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            pos_graph.apply_edges(fn.u_dot_v("h", "h", "score"))
-            pos_score = pos_graph.edata['score']
+            pos_score = pos_graph.edata["score"]
        with neg_graph.local_scope():
-            neg_graph.ndata['h'] = block_outputs
+            neg_graph.ndata["h"] = block_outputs
-            neg_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            neg_graph.apply_edges(fn.u_dot_v("h", "h", "score"))
-            neg_score = neg_graph.edata['score']
+            neg_score = neg_graph.edata["score"]
        score = th.cat([pos_score, neg_score])
-        label = th.cat([th.ones_like(pos_score), th.zeros_like(neg_score)]).long()
+        label = th.cat(
+            [th.ones_like(pos_score), th.zeros_like(neg_score)]
+        ).long()
        loss = F.binary_cross_entropy_with_logits(score, label.float())
        return loss
-@utils.benchmark('time', 600)
-@utils.parametrize('data', ['reddit'])
+@utils.benchmark("time", 600)
-@utils.parametrize('num_negs', [2, 8, 32])
+@utils.parametrize("data", ["reddit"])
-@utils.parametrize('batch_size', [1024, 2048, 8192])
+@utils.parametrize("num_negs", [2, 8, 32])
+@utils.parametrize("batch_size", [1024, 2048, 8192])
 def track_time(data, num_negs, batch_size):
    data = utils.process_data(data)
    device = utils.get_bench_device()
    g = data[0]
-    g.ndata['features'] = g.ndata['feat']
+    g.ndata["features"] = g.ndata["feat"]
-    g.ndata['labels'] = g.ndata['label']
+    g.ndata["labels"] = g.ndata["label"]
-    in_feats = g.ndata['features'].shape[1]
+    in_feats = g.ndata["features"].shape[1]
    n_classes = data.num_classes
    # Create csr/coo/csc formats before launching training processes with multi-gpu.
@@ -106,7 +112,7 @@ def track_time(data, num_negs, batch_size):
    num_epochs = 2
    num_hidden = 16
    num_layers = 2
-    fan_out = '10,25'
+    fan_out = "10,25"
    lr = 0.003
    dropout = 0.5
    num_workers = 4
@@ -119,21 +125,26 @@ def track_time(data, num_negs, batch_size):
    # Create PyTorch DataLoader for constructing blocks
    sampler = dgl.dataloading.MultiLayerNeighborSampler(
-        [int(fanout) for fanout in fan_out.split(',')])
+        [int(fanout) for fanout in fan_out.split(",")]
+    )
    sampler = dgl.dataloading.as_edge_prediction_sampler(
-        sampler, exclude='reverse_id',
+        sampler,
+        exclude="reverse_id",
        # For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2.
-        reverse_eids=th.cat([
+        reverse_eids=th.cat(
-                            th.arange(
+            [th.arange(n_edges // 2, n_edges), th.arange(0, n_edges // 2)]
-                                n_edges // 2, n_edges),
+        ),
-                            th.arange(0, n_edges // 2)]),
+        negative_sampler=NegativeSampler(g, num_negs),
-        negative_sampler=NegativeSampler(g, num_negs))
+    )
    dataloader = dgl.dataloading.DataLoader(
-        g, train_seeds, sampler,
+        g,
+        train_seeds,
+        sampler,
        batch_size=batch_size,
        shuffle=True,
        drop_last=False,
-        num_workers=num_workers)
+        num_workers=num_workers,
+    )
    # Define model and optimizer
    model = SAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout)
@@ -145,7 +156,9 @@ def track_time(data, num_negs, batch_size):
    # Training loop
    avg = 0
    iter_tput = []
-    for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(dataloader):
+    for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(
+        dataloader
+    ):
        # Load the input features as well as output labels
        batch_inputs = load_subtensor(g, input_nodes, device)