[Misc] Black auto fix. (#4642)

* [Misc] Black auto fix. * sort Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

[Misc] Black auto fix. (#4642)
* [Misc] Black auto fix. * sort Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
23d09057 · Hongzhi (Steve), Chen · GitHub · a9f2acf3 · 23d09057 · 23d09057
Unverified Commit 23d09057 authored Sep 26, 2022 by Hongzhi (Steve), Chen Committed by GitHub Sep 26, 2022
20 changed files
--- a/examples/pytorch/cluster_gcn/cluster_gcn.py
+++ b/examples/pytorch/cluster_gcn/cluster_gcn.py
+import time
+import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torchmetrics.functional as MF
+from ogb.nodeproppred import DglNodePropPredDataset
 import dgl
 import dgl.nn as dglnn
-import time
-import numpy as np
-from ogb.nodeproppred import DglNodePropPredDataset
 class SAGE(nn.Module):
    def __init__(self, in_feats, n_hidden, n_classes):
        super().__init__()
        self.layers = nn.ModuleList()
-        self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean'))
+        self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean"))
-        self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean'))
+        self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean"))
-        self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean'))
+        self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean"))
        self.dropout = nn.Dropout(0.5)
    def forward(self, sg, x):
@@ -26,37 +29,43 @@ class SAGE(nn.Module):
                h = self.dropout(h)
        return h
-dataset = dgl.data.AsNodePredDataset(DglNodePropPredDataset('ogbn-products'))
-graph = dataset[0]      # already prepares ndata['label'/'train_mask'/'val_mask'/'test_mask']
-model = SAGE(graph.ndata['feat'].shape[1], 256, dataset.num_classes).cuda()
+dataset = dgl.data.AsNodePredDataset(DglNodePropPredDataset("ogbn-products"))
+graph = dataset[
+    0
+]  # already prepares ndata['label'/'train_mask'/'val_mask'/'test_mask']
+model = SAGE(graph.ndata["feat"].shape[1], 256, dataset.num_classes).cuda()
 opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
 num_partitions = 1000
 sampler = dgl.dataloading.ClusterGCNSampler(
-        graph, num_partitions,
+    graph,
-        prefetch_ndata=['feat', 'label', 'train_mask', 'val_mask', 'test_mask'])
+    num_partitions,
+    prefetch_ndata=["feat", "label", "train_mask", "val_mask", "test_mask"],
+)
 # DataLoader for generic dataloading with a graph, a set of indices (any indices, like
 # partition IDs here), and a graph sampler.
 dataloader = dgl.dataloading.DataLoader(
    graph,
-        torch.arange(num_partitions).to('cuda'),
+    torch.arange(num_partitions).to("cuda"),
    sampler,
-        device='cuda',
+    device="cuda",
    batch_size=100,
    shuffle=True,
    drop_last=False,
    num_workers=0,
-        use_uva=True)
+    use_uva=True,
+)
 durations = []
 for _ in range(10):
    t0 = time.time()
    model.train()
    for it, sg in enumerate(dataloader):
-        x = sg.ndata['feat']
+        x = sg.ndata["feat"]
-        y = sg.ndata['label']
+        y = sg.ndata["label"]
-        m = sg.ndata['train_mask'].bool()
+        m = sg.ndata["train_mask"].bool()
        y_hat = model(sg, x)
        loss = F.cross_entropy(y_hat[m], y[m])
        opt.zero_grad()
@@ -65,7 +74,7 @@ for _ in range(10):
        if it % 20 == 0:
            acc = MF.accuracy(y_hat[m], y[m])
            mem = torch.cuda.max_memory_allocated() / 1000000
-            print('Loss', loss.item(), 'Acc', acc.item(), 'GPU Mem', mem, 'MB')
+            print("Loss", loss.item(), "Acc", acc.item(), "GPU Mem", mem, "MB")
    tt = time.time()
    print(tt - t0)
    durations.append(tt - t0)
@@ -75,10 +84,10 @@ for _ in range(10):
        val_preds, test_preds = [], []
        val_labels, test_labels = [], []
        for it, sg in enumerate(dataloader):
-            x = sg.ndata['feat']
+            x = sg.ndata["feat"]
-            y = sg.ndata['label']
+            y = sg.ndata["label"]
-            m_val = sg.ndata['val_mask'].bool()
+            m_val = sg.ndata["val_mask"].bool()
-            m_test = sg.ndata['test_mask'].bool()
+            m_test = sg.ndata["test_mask"].bool()
            y_hat = model(sg, x)
            val_preds.append(y_hat[m_val])
            val_labels.append(y[m_val])
@@ -90,6 +99,6 @@ for _ in range(10):
        test_labels = torch.cat(test_labels, 0)
        val_acc = MF.accuracy(val_preds, val_labels)
        test_acc = MF.accuracy(test_preds, test_labels)
-        print('Validation acc:', val_acc.item(), 'Test acc:', test_acc.item())
+        print("Validation acc:", val_acc.item(), "Test acc:", test_acc.item())
 print(np.mean(durations[4:]), np.std(durations[4:]))
--- a/examples/pytorch/compGCN/data_loader.py
+++ b/examples/pytorch/compGCN/data_loader.py
-import torch
-from torch.utils.data import Dataset, DataLoader
-import numpy as np
-import dgl
 from collections import defaultdict as ddict
+import numpy as np
+import torch
 from ordered_set import OrderedSet
+from torch.utils.data import DataLoader, Dataset
+import dgl
 class TrainDataset(Dataset):
    """
@@ -18,6 +21,7 @@ class TrainDataset(Dataset):
    -------
    A training Dataset class instance used by DataLoader
    """
    def __init__(self, triples, num_ent, lbl_smooth):
        self.triples = triples
        self.num_ent = num_ent
@@ -29,11 +33,13 @@ class TrainDataset(Dataset):
    def __getitem__(self, idx):
        ele = self.triples[idx]
-        triple, label = torch.LongTensor(ele['triple']), np.int32(ele['label'])
+        triple, label = torch.LongTensor(ele["triple"]), np.int32(ele["label"])
        trp_label = self.get_label(label)
-        #label smoothing
+        # label smoothing
        if self.lbl_smooth != 0.0:
-            trp_label = (1.0 - self.lbl_smooth) * trp_label + (1.0 / self.num_ent)
+            trp_label = (1.0 - self.lbl_smooth) * trp_label + (
+                1.0 / self.num_ent
+            )
        return triple, trp_label
@@ -48,7 +54,7 @@ class TrainDataset(Dataset):
        trp_label = torch.stack(labels, dim=0)
        return triple, trp_label
-    #for edges that exist in the graph, the entry is 1.0, otherwise the entry is 0.0
+    # for edges that exist in the graph, the entry is 1.0, otherwise the entry is 0.0
    def get_label(self, label):
        y = np.zeros([self.num_ent], dtype=np.float32)
        for e2 in label:
@@ -68,6 +74,7 @@ class TestDataset(Dataset):
    -------
    An evaluation Dataset class instance used by DataLoader for model evaluation
    """
    def __init__(self, triples, num_ent):
        self.triples = triples
        self.num_ent = num_ent
@@ -77,7 +84,7 @@ class TestDataset(Dataset):
    def __getitem__(self, idx):
        ele = self.triples[idx]
-        triple, label = torch.LongTensor(ele['triple']), np.int32(ele['label'])
+        triple, label = torch.LongTensor(ele["triple"]), np.int32(ele["label"])
        label = self.get_label(label)
        return triple, label
@@ -93,7 +100,7 @@ class TestDataset(Dataset):
        label = torch.stack(labels, dim=0)
        return triple, label
-    #for edges that exist in the graph, the entry is 1.0, otherwise the entry is 0.0
+    # for edges that exist in the graph, the entry is 1.0, otherwise the entry is 0.0
    def get_label(self, label):
        y = np.zeros([self.num_ent], dtype=np.float32)
        for e2 in label:
@@ -102,7 +109,6 @@ class TestDataset(Dataset):
 class Data(object):
    def __init__(self, dataset, lbl_smooth, num_workers, batch_size):
        """
        Reading in raw triples and converts it into a standard format.
@@ -133,18 +139,23 @@ class Data(object):
        self.num_workers = num_workers
        self.batch_size = batch_size
-        #read in raw data and get mappings
+        # read in raw data and get mappings
        ent_set, rel_set = OrderedSet(), OrderedSet()
-        for split in ['train', 'test', 'valid']:
+        for split in ["train", "test", "valid"]:
-            for line in open('./{}/{}.txt'.format(self.dataset, split)):
+            for line in open("./{}/{}.txt".format(self.dataset, split)):
-                sub, rel, obj = map(str.lower, line.strip().split('\t'))
+                sub, rel, obj = map(str.lower, line.strip().split("\t"))
                ent_set.add(sub)
                rel_set.add(rel)
                ent_set.add(obj)
        self.ent2id = {ent: idx for idx, ent in enumerate(ent_set)}
        self.rel2id = {rel: idx for idx, rel in enumerate(rel_set)}
-        self.rel2id.update({rel+'_reverse': idx+len(self.rel2id) for idx, rel in enumerate(rel_set)})
+        self.rel2id.update(
+            {
+                rel + "_reverse": idx + len(self.rel2id)
+                for idx, rel in enumerate(rel_set)
+            }
+        )
        self.id2ent = {idx: ent for ent, idx in self.ent2id.items()}
        self.id2rel = {idx: rel for rel, idx in self.rel2id.items()}
@@ -152,92 +163,121 @@ class Data(object):
        self.num_ent = len(self.ent2id)
        self.num_rel = len(self.rel2id) // 2
-        #read in ids of subjects, relations, and objects for train/test/valid 
+        # read in ids of subjects, relations, and objects for train/test/valid
-        self.data = ddict(list) #stores the triples
+        self.data = ddict(list)  # stores the triples
-        sr2o = ddict(set) #The key of sr20 is (subject, relation), and the items are all the successors following (subject, relation)
+        sr2o = ddict(
-        src=[]
+            set
-        dst=[]
+        )  # The key of sr20 is (subject, relation), and the items are all the successors following (subject, relation)
+        src = []
+        dst = []
        rels = []
        inver_src = []
        inver_dst = []
        inver_rels = []
-        for split in ['train', 'test', 'valid']:
+        for split in ["train", "test", "valid"]:
-            for line in open('./{}/{}.txt'.format(self.dataset, split)):
+            for line in open("./{}/{}.txt".format(self.dataset, split)):
-                sub, rel, obj = map(str.lower, line.strip().split('\t'))
+                sub, rel, obj = map(str.lower, line.strip().split("\t"))
-                sub_id, rel_id, obj_id = self.ent2id[sub], self.rel2id[rel], self.ent2id[obj]
+                sub_id, rel_id, obj_id = (
+                    self.ent2id[sub],
+                    self.rel2id[rel],
+                    self.ent2id[obj],
+                )
                self.data[split].append((sub_id, rel_id, obj_id))
-                if split == 'train': 
+                if split == "train":
                    sr2o[(sub_id, rel_id)].add(obj_id)
-                    sr2o[(obj_id, rel_id+self.num_rel)].add(sub_id) #append the reversed edges
+                    sr2o[(obj_id, rel_id + self.num_rel)].add(
+                        sub_id
+                    )  # append the reversed edges
                    src.append(sub_id)
                    dst.append(obj_id)
                    rels.append(rel_id)
                    inver_src.append(obj_id)
                    inver_dst.append(sub_id)
-                    inver_rels.append(rel_id+self.num_rel)
+                    inver_rels.append(rel_id + self.num_rel)
-        #construct dgl graph
+        # construct dgl graph
        src = src + inver_src
        dst = dst + inver_dst
        rels = rels + inver_rels
        self.g = dgl.graph((src, dst), num_nodes=self.num_ent)
-        self.g.edata['etype'] = torch.Tensor(rels).long()
+        self.g.edata["etype"] = torch.Tensor(rels).long()
-        #identify in and out edges
+        # identify in and out edges
-        in_edges_mask = [True] * (self.g.num_edges()//2) + [False] * (self.g.num_edges()//2)
+        in_edges_mask = [True] * (self.g.num_edges() // 2) + [False] * (
-        out_edges_mask = [False] * (self.g.num_edges()//2) + [True] * (self.g.num_edges()//2)
+            self.g.num_edges() // 2
-        self.g.edata['in_edges_mask'] = torch.Tensor(in_edges_mask)
+        )
-        self.g.edata['out_edges_mask'] = torch.Tensor(out_edges_mask)
+        out_edges_mask = [False] * (self.g.num_edges() // 2) + [True] * (
+            self.g.num_edges() // 2
+        )
+        self.g.edata["in_edges_mask"] = torch.Tensor(in_edges_mask)
+        self.g.edata["out_edges_mask"] = torch.Tensor(out_edges_mask)
-        #Prepare train/valid/test data
+        # Prepare train/valid/test data
        self.data = dict(self.data)
-        self.sr2o = {k: list(v) for k, v in sr2o.items()} #store only the train data
+        self.sr2o = {
+            k: list(v) for k, v in sr2o.items()
+        }  # store only the train data
-        for split in ['test', 'valid']:
+        for split in ["test", "valid"]:
            for sub, rel, obj in self.data[split]:
                sr2o[(sub, rel)].add(obj)
-                sr2o[(obj, rel+self.num_rel)].add(sub)
+                sr2o[(obj, rel + self.num_rel)].add(sub)
-        self.sr2o_all = {k: list(v) for k, v in sr2o.items()} #store all the data
+        self.sr2o_all = {
+            k: list(v) for k, v in sr2o.items()
+        }  # store all the data
        self.triples = ddict(list)
        for (sub, rel), obj in self.sr2o.items():
-            self.triples['train'].append({'triple':(sub, rel, -1), 'label': self.sr2o[(sub, rel)]})
+            self.triples["train"].append(
+                {"triple": (sub, rel, -1), "label": self.sr2o[(sub, rel)]}
+            )
-        for split in ['test', 'valid']:
+        for split in ["test", "valid"]:
            for sub, rel, obj in self.data[split]:
                rel_inv = rel + self.num_rel
-                self.triples['{}_{}'.format(split, 'tail')].append({'triple': (sub, rel, obj), 'label': self.sr2o_all[(sub, rel)]})
+                self.triples["{}_{}".format(split, "tail")].append(
-                self.triples['{}_{}'.format(split, 'head')].append({'triple': (obj, rel_inv, sub), 'label': self.sr2o_all[(obj, rel_inv)]})
+                    {
+                        "triple": (sub, rel, obj),
+                        "label": self.sr2o_all[(sub, rel)],
+                    }
+                )
+                self.triples["{}_{}".format(split, "head")].append(
+                    {
+                        "triple": (obj, rel_inv, sub),
+                        "label": self.sr2o_all[(obj, rel_inv)],
+                    }
+                )
        self.triples = dict(self.triples)
        def get_train_data_loader(split, batch_size, shuffle=True):
            return DataLoader(
-                    TrainDataset(self.triples[split], self.num_ent, self.lbl_smooth),
+                TrainDataset(
-                    batch_size      = batch_size,
+                    self.triples[split], self.num_ent, self.lbl_smooth
-                    shuffle         = shuffle,
+                ),
-                    num_workers     = max(0, self.num_workers),
+                batch_size=batch_size,
-                    collate_fn      = TrainDataset.collate_fn
+                shuffle=shuffle,
+                num_workers=max(0, self.num_workers),
+                collate_fn=TrainDataset.collate_fn,
            )
        def get_test_data_loader(split, batch_size, shuffle=True):
            return DataLoader(
                TestDataset(self.triples[split], self.num_ent),
-                    batch_size      = batch_size,
+                batch_size=batch_size,
-                    shuffle         = shuffle,
+                shuffle=shuffle,
-                    num_workers     = max(0, self.num_workers),
+                num_workers=max(0, self.num_workers),
-                    collate_fn      = TestDataset.collate_fn
+                collate_fn=TestDataset.collate_fn,
            )
-        #train/valid/test dataloaders
+        # train/valid/test dataloaders
        self.data_iter = {
-            'train':        get_train_data_loader('train', self.batch_size),
+            "train": get_train_data_loader("train", self.batch_size),
-            'valid_head':   get_test_data_loader('valid_head', self.batch_size),
+            "valid_head": get_test_data_loader("valid_head", self.batch_size),
-            'valid_tail':   get_test_data_loader('valid_tail', self.batch_size),
+            "valid_tail": get_test_data_loader("valid_tail", self.batch_size),
-            'test_head':    get_test_data_loader('test_head', self.batch_size),
+            "test_head": get_test_data_loader("test_head", self.batch_size),
-            'test_tail':    get_test_data_loader('test_tail', self.batch_size),
+            "test_tail": get_test_data_loader("test_tail", self.batch_size),
        }
\ No newline at end of file
--- a/examples/pytorch/compGCN/main.py
+++ b/examples/pytorch/compGCN/main.py
 import argparse
+from time import time
+import numpy as np
 import torch as th
-import torch.optim as optim
-import torch.nn.functional as F
 import torch.nn as nn
+import torch.nn.functional as F
-import dgl.function as fn
+import torch.optim as optim
+from data_loader import Data
+from models import CompGCN_ConvE
 from utils import in_out_norm
-from models import CompGCN_ConvE
+import dgl.function as fn
-from data_loader import Data
-import numpy as np
-from time import time
-#predict the tail for (head, rel, -1) or head for (-1, rel, tail)
+# predict the tail for (head, rel, -1) or head for (-1, rel, tail)
-def predict(model, graph, device, data_iter, split='valid', mode='tail'):
+def predict(model, graph, device, data_iter, split="valid", mode="tail"):
    model.eval()
    with th.no_grad():
        results = {}
-        train_iter = iter(data_iter['{}_{}'.format(split, mode)])
+        train_iter = iter(data_iter["{}_{}".format(split, mode)])
        for step, batch in enumerate(train_iter):
            triple, label = batch[0].to(device), batch[1].to(device)
-            sub, rel, obj, label = triple[:, 0], triple[:, 1], triple[:, 2], label
+            sub, rel, obj, label = (
+                triple[:, 0],
+                triple[:, 1],
+                triple[:, 2],
+                label,
+            )
            pred = model(graph, sub, rel)
-            b_range = th.arange(pred.size()[0], device = device)
+            b_range = th.arange(pred.size()[0], device=device)
            target_pred = pred[b_range, obj]
            pred = th.where(label.byte(), -th.ones_like(pred) * 10000000, pred)
            pred[b_range, obj] = target_pred
-            #compute metrics
+            # compute metrics
-            ranks = 1 + th.argsort(th.argsort(pred, dim=1, descending=True), dim =1, descending=False)[b_range, obj]
+            ranks = (
+                1
+                + th.argsort(
+                    th.argsort(pred, dim=1, descending=True),
+                    dim=1,
+                    descending=False,
+                )[b_range, obj]
+            )
            ranks = ranks.float()
-            results['count'] = th.numel(ranks) + results.get('count', 0.0)
+            results["count"] = th.numel(ranks) + results.get("count", 0.0)
-            results['mr'] = th.sum(ranks).item() + results.get('mr', 0.0)
+            results["mr"] = th.sum(ranks).item() + results.get("mr", 0.0)
-            results['mrr'] = th.sum(1.0/ranks).item() + results.get('mrr', 0.0)
+            results["mrr"] = th.sum(1.0 / ranks).item() + results.get(
-            for k in [1,3,10]:
+                "mrr", 0.0
-                results['hits@{}'.format(k)] = th.numel(ranks[ranks <= (k)]) + results.get('hits@{}'.format(k), 0.0)
+            )
+            for k in [1, 3, 10]:
+                results["hits@{}".format(k)] = th.numel(
+                    ranks[ranks <= (k)]
+                ) + results.get("hits@{}".format(k), 0.0)
    return results
-#evaluation function, evaluate the head and tail prediction and then combine the results
-def evaluate(model, graph, device, data_iter, split='valid'):
+# evaluation function, evaluate the head and tail prediction and then combine the results
-    #predict for head and tail
+def evaluate(model, graph, device, data_iter, split="valid"):
-    left_results = predict(model, graph, device, data_iter, split, mode='tail')
+    # predict for head and tail
-    right_results = predict(model, graph, device, data_iter, split, mode='head')
+    left_results = predict(model, graph, device, data_iter, split, mode="tail")
+    right_results = predict(model, graph, device, data_iter, split, mode="head")
    results = {}
-    count = float(left_results['count'])
+    count = float(left_results["count"])
-    #combine the head and tail prediction results
+    # combine the head and tail prediction results
-    #Metrics: MRR, MR, and Hit@k
+    # Metrics: MRR, MR, and Hit@k
-    results['left_mr'] = round(left_results['mr']/count, 5)
+    results["left_mr"] = round(left_results["mr"] / count, 5)
-    results['left_mrr'] = round(left_results['mrr']/count, 5)
+    results["left_mrr"] = round(left_results["mrr"] / count, 5)
-    results['right_mr'] = round(right_results['mr']/count, 5)
+    results["right_mr"] = round(right_results["mr"] / count, 5)
-    results['right_mrr'] = round(right_results['mrr']/count, 5)
+    results["right_mrr"] = round(right_results["mrr"] / count, 5)
-    results['mr'] = round((left_results['mr'] + right_results['mr']) /(2*count), 5)
+    results["mr"] = round(
-    results['mrr'] = round((left_results['mrr'] + right_results['mrr']) /(2*count), 5)
+        (left_results["mr"] + right_results["mr"]) / (2 * count), 5
-    for k in [1,3,10]:
+    )
-        results['left_hits@{}'.format(k)] = round(left_results['hits@{}'.format(k)]/count, 5)
+    results["mrr"] = round(
-        results['right_hits@{}'.format(k)] = round(right_results['hits@{}'.format(k)]/count, 5)
+        (left_results["mrr"] + right_results["mrr"]) / (2 * count), 5
-        results['hits@{}'.format(k)] = round((left_results['hits@{}'.format(k)] + right_results['hits@{}'.format(k)])/(2*count), 5)
+    )
+    for k in [1, 3, 10]:
+        results["left_hits@{}".format(k)] = round(
+            left_results["hits@{}".format(k)] / count, 5
+        )
+        results["right_hits@{}".format(k)] = round(
+            right_results["hits@{}".format(k)] / count, 5
+        )
+        results["hits@{}".format(k)] = round(
+            (
+                left_results["hits@{}".format(k)]
+                + right_results["hits@{}".format(k)]
+            )
+            / (2 * count),
+            5,
+        )
    return results
@@ -69,21 +100,24 @@ def main(args):
    # Step 1: Prepare graph data and retrieve train/validation/test index ============================= #
    # check cuda
    if args.gpu >= 0 and th.cuda.is_available():
-        device = 'cuda:{}'.format(args.gpu)
+        device = "cuda:{}".format(args.gpu)
    else:
-        device = 'cpu'
+        device = "cpu"
-    #construct graph, split in/out edges and prepare train/validation/test data_loader
+    # construct graph, split in/out edges and prepare train/validation/test data_loader
-    data = Data(args.dataset, args.lbl_smooth, args.num_workers, args.batch_size)
+    data = Data(
-    data_iter = data.data_iter #train/validation/test data_loader
+        args.dataset, args.lbl_smooth, args.num_workers, args.batch_size
+    )
+    data_iter = data.data_iter  # train/validation/test data_loader
    graph = data.g.to(device)
-    num_rel = th.max(graph.edata['etype']).item() + 1
+    num_rel = th.max(graph.edata["etype"]).item() + 1
-    #Compute in/out edge norms and store in edata
+    # Compute in/out edge norms and store in edata
    graph = in_out_norm(graph)
    # Step 2: Create model =================================================================== #
-    compgcn_model=CompGCN_ConvE(num_bases=args.num_bases,
+    compgcn_model = CompGCN_ConvE(
+        num_bases=args.num_bases,
        num_rel=num_rel,
        num_ent=graph.num_nodes(),
        in_dim=args.init_dim,
@@ -97,13 +131,15 @@ def main(args):
        feat_drop=args.feat_drop,
        ker_sz=args.ker_sz,
        k_w=args.k_w,
-                                k_h=args.k_h
+        k_h=args.k_h,
    )
    compgcn_model = compgcn_model.to(device)
    # Step 3: Create training components ===================================================== #
    loss_fn = th.nn.BCELoss()
-    optimizer = optim.Adam(compgcn_model.parameters(), lr=args.lr, weight_decay=args.l2)
+    optimizer = optim.Adam(
+        compgcn_model.parameters(), lr=args.lr, weight_decay=args.l2
+    )
    # Step 4: training epoches =============================================================== #
    best_mrr = 0.0
@@ -111,11 +147,16 @@ def main(args):
    for epoch in range(args.max_epochs):
        # Training and validation using a full graph
        compgcn_model.train()
-        train_loss=[]
+        train_loss = []
        t0 = time()
-        for step, batch in enumerate(data_iter['train']):
+        for step, batch in enumerate(data_iter["train"]):
            triple, label = batch[0].to(device), batch[1].to(device)
-            sub, rel, obj, label = triple[:, 0], triple[:, 1], triple[:, 2], label
+            sub, rel, obj, label = (
+                triple[:, 0],
+                triple[:, 1],
+                triple[:, 2],
+                label,
+            )
            logits = compgcn_model(graph, sub, rel)
            # compute loss
@@ -130,62 +171,188 @@ def main(args):
        train_loss = np.sum(train_loss)
        t1 = time()
-        val_results = evaluate(compgcn_model, graph, device, data_iter, split='valid')
+        val_results = evaluate(
+            compgcn_model, graph, device, data_iter, split="valid"
+        )
        t2 = time()
-        #validate
+        # validate
-        if val_results['mrr']>best_mrr:
+        if val_results["mrr"] > best_mrr:
-            best_mrr = val_results['mrr']
+            best_mrr = val_results["mrr"]
            best_epoch = epoch
-            th.save(compgcn_model.state_dict(), 'comp_link'+'_'+args.dataset)
+            th.save(
+                compgcn_model.state_dict(), "comp_link" + "_" + args.dataset
+            )
            kill_cnt = 0
            print("saving model...")
        else:
            kill_cnt += 1
            if kill_cnt > 100:
-                print('early stop.')
+                print("early stop.")
                break
-        print("In epoch {}, Train Loss: {:.4f}, Valid MRR: {:.5}\n, Train time: {}, Valid time: {}"\
+        print(
-                .format(epoch, train_loss, val_results['mrr'], t1-t0, t2-t1))
+            "In epoch {}, Train Loss: {:.4f}, Valid MRR: {:.5}\n, Train time: {}, Valid time: {}".format(
+                epoch, train_loss, val_results["mrr"], t1 - t0, t2 - t1
+            )
+        )
-    #test use the best model
+    # test use the best model
    compgcn_model.eval()
-    compgcn_model.load_state_dict(th.load('comp_link'+'_'+args.dataset))
+    compgcn_model.load_state_dict(th.load("comp_link" + "_" + args.dataset))
-    test_results = evaluate(compgcn_model, graph, device, data_iter, split='test')
+    test_results = evaluate(
-    print("Test MRR: {:.5}\n, MR: {:.10}\n, H@10: {:.5}\n, H@3: {:.5}\n, H@1: {:.5}\n"\
+        compgcn_model, graph, device, data_iter, split="test"
-            .format(test_results['mrr'], test_results['mr'], test_results['hits@10'], test_results['hits@3'], test_results['hits@1']))
+    )
+    print(
+        "Test MRR: {:.5}\n, MR: {:.10}\n, H@10: {:.5}\n, H@3: {:.5}\n, H@1: {:.5}\n".format(
-if __name__ == '__main__':
+            test_results["mrr"],
-    parser = argparse.ArgumentParser(description='Parser For Arguments', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+            test_results["mr"],
+            test_results["hits@10"],
-    parser.add_argument('--data', dest='dataset', default='FB15k-237', help='Dataset to use, default: FB15k-237')
+            test_results["hits@3"],
-    parser.add_argument('--model', dest='model', default='compgcn', help='Model Name')
+            test_results["hits@1"],
-    parser.add_argument('--score_func', dest='score_func', default='conve', help='Score Function for Link prediction')
+        )
-    parser.add_argument('--opn', dest='opn', default='ccorr', help='Composition Operation to be used in CompGCN')
+    )
-    parser.add_argument('--batch', dest='batch_size', default=1024, type=int, help='Batch size')
-    parser.add_argument('--gpu', type=int, default='0', help='Set GPU Ids : Eg: For CPU = -1, For Single GPU = 0')
+if __name__ == "__main__":
-    parser.add_argument('--epoch', dest='max_epochs', type=int, default=500, help='Number of epochs')
+    parser = argparse.ArgumentParser(
-    parser.add_argument('--l2', type=float, default=0.0, help='L2 Regularization for Optimizer')
+        description="Parser For Arguments",
-    parser.add_argument('--lr', type=float, default=0.001, help='Starting Learning Rate')
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    parser.add_argument('--lbl_smooth', dest='lbl_smooth', type=float, default=0.1, help='Label Smoothing')
+    )
-    parser.add_argument('--num_workers', type=int, default=10, help='Number of processes to construct batches')
-    parser.add_argument('--seed', dest='seed', default=41504, type=int, help='Seed for randomization')
+    parser.add_argument(
+        "--data",
-    parser.add_argument('--num_bases', dest='num_bases', default=-1, type=int, help='Number of basis relation vectors to use')
+        dest="dataset",
-    parser.add_argument('--init_dim', dest='init_dim', default=100, type=int, help='Initial dimension size for entities and relations')
+        default="FB15k-237",
-    parser.add_argument('--layer_size', nargs='?', default='[200]', help='List of output size for each compGCN layer')
+        help="Dataset to use, default: FB15k-237",
-    parser.add_argument('--gcn_drop', dest='dropout', default=0.1, type=float, help='Dropout to use in GCN Layer')
+    )
-    parser.add_argument('--layer_dropout', nargs='?', default='[0.3]', help='List of dropout value after each compGCN layer')
+    parser.add_argument(
+        "--model", dest="model", default="compgcn", help="Model Name"
+    )
+    parser.add_argument(
+        "--score_func",
+        dest="score_func",
+        default="conve",
+        help="Score Function for Link prediction",
+    )
+    parser.add_argument(
+        "--opn",
+        dest="opn",
+        default="ccorr",
+        help="Composition Operation to be used in CompGCN",
+    )
+    parser.add_argument(
+        "--batch", dest="batch_size", default=1024, type=int, help="Batch size"
+    )
+    parser.add_argument(
+        "--gpu",
+        type=int,
+        default="0",
+        help="Set GPU Ids : Eg: For CPU = -1, For Single GPU = 0",
+    )
+    parser.add_argument(
+        "--epoch",
+        dest="max_epochs",
+        type=int,
+        default=500,
+        help="Number of epochs",
+    )
+    parser.add_argument(
+        "--l2", type=float, default=0.0, help="L2 Regularization for Optimizer"
+    )
+    parser.add_argument(
+        "--lr", type=float, default=0.001, help="Starting Learning Rate"
+    )
+    parser.add_argument(
+        "--lbl_smooth",
+        dest="lbl_smooth",
+        type=float,
+        default=0.1,
+        help="Label Smoothing",
+    )
+    parser.add_argument(
+        "--num_workers",
+        type=int,
+        default=10,
+        help="Number of processes to construct batches",
+    )
+    parser.add_argument(
+        "--seed",
+        dest="seed",
+        default=41504,
+        type=int,
+        help="Seed for randomization",
+    )
+    parser.add_argument(
+        "--num_bases",
+        dest="num_bases",
+        default=-1,
+        type=int,
+        help="Number of basis relation vectors to use",
+    )
+    parser.add_argument(
+        "--init_dim",
+        dest="init_dim",
+        default=100,
+        type=int,
+        help="Initial dimension size for entities and relations",
+    )
+    parser.add_argument(
+        "--layer_size",
+        nargs="?",
+        default="[200]",
+        help="List of output size for each compGCN layer",
+    )
+    parser.add_argument(
+        "--gcn_drop",
+        dest="dropout",
+        default=0.1,
+        type=float,
+        help="Dropout to use in GCN Layer",
+    )
+    parser.add_argument(
+        "--layer_dropout",
+        nargs="?",
+        default="[0.3]",
+        help="List of dropout value after each compGCN layer",
+    )
    # ConvE specific hyperparameters
-    parser.add_argument('--hid_drop', dest='hid_drop', default=0.3, type=float, help='ConvE: Hidden dropout')
+    parser.add_argument(
-    parser.add_argument('--feat_drop', dest='feat_drop', default=0.3, type=float, help='ConvE: Feature Dropout')
+        "--hid_drop",
-    parser.add_argument('--k_w', dest='k_w', default=10, type=int, help='ConvE: k_w')
+        dest="hid_drop",
-    parser.add_argument('--k_h', dest='k_h', default=20, type=int, help='ConvE: k_h')
+        default=0.3,
-    parser.add_argument('--num_filt', dest='num_filt', default=200, type=int, help='ConvE: Number of filters in convolution')
+        type=float,
-    parser.add_argument('--ker_sz', dest='ker_sz', default=7, type=int, help='ConvE: Kernel size to use')
+        help="ConvE: Hidden dropout",
+    )
+    parser.add_argument(
+        "--feat_drop",
+        dest="feat_drop",
+        default=0.3,
+        type=float,
+        help="ConvE: Feature Dropout",
+    )
+    parser.add_argument(
+        "--k_w", dest="k_w", default=10, type=int, help="ConvE: k_w"
+    )
+    parser.add_argument(
+        "--k_h", dest="k_h", default=20, type=int, help="ConvE: k_h"
+    )
+    parser.add_argument(
+        "--num_filt",
+        dest="num_filt",
+        default=200,
+        type=int,
+        help="ConvE: Number of filters in convolution",
+    )
+    parser.add_argument(
+        "--ker_sz",
+        dest="ker_sz",
+        default=7,
+        type=int,
+        help="ConvE: Kernel size to use",
+    )
    args = parser.parse_args()
@@ -198,4 +365,3 @@ if __name__ == '__main__':
    args.layer_dropout = eval(args.layer_dropout)
    main(args)
--- a/examples/pytorch/compGCN/models.py
+++ b/examples/pytorch/compGCN/models.py
 import torch as th
-import torch.optim as optim
-import torch.nn.functional as F
 import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from utils import ccorr
 import dgl
 import dgl.function as fn
-from utils import ccorr
 class CompGraphConv(nn.Module):
    """One layer of CompGCN."""
-    def __init__(self,
+    def __init__(
-                 in_dim,
+        self, in_dim, out_dim, comp_fn="sub", batchnorm=True, dropout=0.1
-                 out_dim,
+    ):
-                 comp_fn='sub',
-                 batchnorm=True,
-                 dropout=0.1):
        super(CompGraphConv, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
@@ -40,63 +36,74 @@ class CompGraphConv(nn.Module):
        # define relation transform layer
        self.W_R = nn.Linear(self.in_dim, self.out_dim)
-        #self loop embedding
+        # self loop embedding
        self.loop_rel = nn.Parameter(th.Tensor(1, self.in_dim))
        nn.init.xavier_normal_(self.loop_rel)
    def forward(self, g, n_in_feats, r_feats):
        with g.local_scope():
            # Assign values to source nodes. In a homogeneous graph, this is equal to
            # assigning them to all nodes.
-            g.srcdata['h'] = n_in_feats
+            g.srcdata["h"] = n_in_feats
-            #append loop_rel embedding to r_feats
+            # append loop_rel embedding to r_feats
            r_feats = th.cat((r_feats, self.loop_rel), 0)
            # Assign features to all edges with the corresponding relation embeddings
-            g.edata['h'] = r_feats[g.edata['etype']] * g.edata['norm']
+            g.edata["h"] = r_feats[g.edata["etype"]] * g.edata["norm"]
            # Compute composition function in 4 steps
            # Step 1: compute composition by edge in the edge direction, and store results in edges.
-            if self.comp_fn == 'sub':
+            if self.comp_fn == "sub":
-                g.apply_edges(fn.u_sub_e('h', 'h', out='comp_h'))
+                g.apply_edges(fn.u_sub_e("h", "h", out="comp_h"))
-            elif self.comp_fn == 'mul':
+            elif self.comp_fn == "mul":
-                g.apply_edges(fn.u_mul_e('h', 'h', out='comp_h'))
+                g.apply_edges(fn.u_mul_e("h", "h", out="comp_h"))
-            elif self.comp_fn == 'ccorr':
+            elif self.comp_fn == "ccorr":
-                g.apply_edges(lambda edges: {'comp_h': ccorr(edges.src['h'], edges.data['h'])})
+                g.apply_edges(
+                    lambda edges: {
+                        "comp_h": ccorr(edges.src["h"], edges.data["h"])
+                    }
+                )
            else:
-                raise Exception('Only supports sub, mul, and ccorr')
+                raise Exception("Only supports sub, mul, and ccorr")
            # Step 2: use extracted edge direction to compute in and out edges
-            comp_h = g.edata['comp_h']
+            comp_h = g.edata["comp_h"]
-            in_edges_idx = th.nonzero(g.edata['in_edges_mask'], as_tuple=False).squeeze()
+            in_edges_idx = th.nonzero(
-            out_edges_idx = th.nonzero(g.edata['out_edges_mask'], as_tuple=False).squeeze()
+                g.edata["in_edges_mask"], as_tuple=False
+            ).squeeze()
+            out_edges_idx = th.nonzero(
+                g.edata["out_edges_mask"], as_tuple=False
+            ).squeeze()
            comp_h_O = self.W_O(comp_h[out_edges_idx])
            comp_h_I = self.W_I(comp_h[in_edges_idx])
-            new_comp_h = th.zeros(comp_h.shape[0], self.out_dim).to(comp_h.device)
+            new_comp_h = th.zeros(comp_h.shape[0], self.out_dim).to(
+                comp_h.device
+            )
            new_comp_h[out_edges_idx] = comp_h_O
            new_comp_h[in_edges_idx] = comp_h_I
-            g.edata['new_comp_h'] = new_comp_h
+            g.edata["new_comp_h"] = new_comp_h
            # Step 3: sum comp results to both src and dst nodes
-            g.update_all(fn.copy_e('new_comp_h', 'm'), fn.sum('m', 'comp_edge'))
+            g.update_all(fn.copy_e("new_comp_h", "m"), fn.sum("m", "comp_edge"))
            # Step 4: add results of self-loop
-            if self.comp_fn == 'sub':
+            if self.comp_fn == "sub":
                comp_h_s = n_in_feats - r_feats[-1]
-            elif self.comp_fn == 'mul':
+            elif self.comp_fn == "mul":
                comp_h_s = n_in_feats * r_feats[-1]
-            elif self.comp_fn == 'ccorr':
+            elif self.comp_fn == "ccorr":
                comp_h_s = ccorr(n_in_feats, r_feats[-1])
            else:
-                raise Exception('Only supports sub, mul, and ccorr')
+                raise Exception("Only supports sub, mul, and ccorr")
            # Sum all of the comp results as output of nodes and dropout
-            n_out_feats = (self.W_S(comp_h_s) + self.dropout(g.ndata['comp_edge'])) * (1/3)
+            n_out_feats = (
+                self.W_S(comp_h_s) + self.dropout(g.ndata["comp_edge"])
+            ) * (1 / 3)
            # Compute relation output
            r_out_feats = self.W_R(r_feats)
@@ -113,16 +120,18 @@ class CompGraphConv(nn.Module):
 class CompGCN(nn.Module):
-    def __init__(self,
+    def __init__(
+        self,
        num_bases,
        num_rel,
        num_ent,
        in_dim=100,
        layer_size=[200],
-                 comp_fn='sub',
+        comp_fn="sub",
        batchnorm=True,
        dropout=0.1,
-                 layer_dropout=[0.3]):
+        layer_dropout=[0.3],
+    ):
        super(CompGCN, self).__init__()
        self.num_bases = num_bases
@@ -136,17 +145,29 @@ class CompGCN(nn.Module):
        self.layer_dropout = layer_dropout
        self.num_layer = len(layer_size)
-        #CompGCN layers
+        # CompGCN layers
        self.layers = nn.ModuleList()
        self.layers.append(
-            CompGraphConv(self.in_dim, self.layer_size[0], comp_fn = self.comp_fn, batchnorm=self.batchnorm, dropout=self.dropout)
+            CompGraphConv(
+                self.in_dim,
+                self.layer_size[0],
+                comp_fn=self.comp_fn,
+                batchnorm=self.batchnorm,
+                dropout=self.dropout,
+            )
        )
-        for i in range(self.num_layer-1):
+        for i in range(self.num_layer - 1):
            self.layers.append(
-                CompGraphConv(self.layer_size[i], self.layer_size[i+1], comp_fn = self.comp_fn, batchnorm=self.batchnorm, dropout=self.dropout)
+                CompGraphConv(
+                    self.layer_size[i],
+                    self.layer_size[i + 1],
+                    comp_fn=self.comp_fn,
+                    batchnorm=self.batchnorm,
+                    dropout=self.dropout,
+                )
            )
-        #Initial relation embeddings
+        # Initial relation embeddings
        if self.num_bases > 0:
            self.basis = nn.Parameter(th.Tensor(self.num_bases, self.in_dim))
            self.weights = nn.Parameter(th.Tensor(self.num_rel, self.num_bases))
@@ -156,20 +177,17 @@ class CompGCN(nn.Module):
            self.rel_embds = nn.Parameter(th.Tensor(self.num_rel, self.in_dim))
            nn.init.xavier_normal_(self.rel_embds)
-        #Node embeddings
+        # Node embeddings
        self.n_embds = nn.Parameter(th.Tensor(self.num_ent, self.in_dim))
        nn.init.xavier_normal_(self.n_embds)
-        #Dropout after compGCN layers
+        # Dropout after compGCN layers
        self.dropouts = nn.ModuleList()
        for i in range(self.num_layer):
-            self.dropouts.append(
+            self.dropouts.append(nn.Dropout(self.layer_dropout[i]))
-                nn.Dropout(self.layer_dropout[i])
-            )
    def forward(self, graph):
-        #node and relation features
+        # node and relation features
        n_feats = self.n_embds
        if self.num_bases > 0:
            r_embds = th.mm(self.weights, self.basis)
@@ -183,15 +201,17 @@ class CompGCN(nn.Module):
        return n_feats, r_feats
-#Use convE as the score function
+# Use convE as the score function
 class CompGCN_ConvE(nn.Module):
-    def __init__(self,
+    def __init__(
+        self,
        num_bases,
        num_rel,
        num_ent,
        in_dim,
        layer_size,
-                 comp_fn='sub',
+        comp_fn="sub",
        batchnorm=True,
        dropout=0.1,
        layer_dropout=[0.3],
@@ -200,56 +220,75 @@ class CompGCN_ConvE(nn.Module):
        feat_drop=0.3,
        ker_sz=5,
        k_w=5,
-                 k_h=5
+        k_h=5,
    ):
        super(CompGCN_ConvE, self).__init__()
        self.embed_dim = layer_size[-1]
-        self.hid_drop=hid_drop
+        self.hid_drop = hid_drop
-        self.feat_drop=feat_drop
+        self.feat_drop = feat_drop
-        self.ker_sz=ker_sz
+        self.ker_sz = ker_sz
-        self.k_w=k_w
+        self.k_w = k_w
-        self.k_h=k_h
+        self.k_h = k_h
-        self.num_filt=num_filt
+        self.num_filt = num_filt
-        #compGCN model to get sub/rel embs
+        # compGCN model to get sub/rel embs
-        self.compGCN_Model = CompGCN(num_bases, num_rel, num_ent, in_dim, layer_size, comp_fn, batchnorm, dropout, layer_dropout)
+        self.compGCN_Model = CompGCN(
+            num_bases,
+            num_rel,
+            num_ent,
+            in_dim,
+            layer_size,
+            comp_fn,
+            batchnorm,
+            dropout,
+            layer_dropout,
+        )
-        #batchnorms to the combined (sub+rel) emb
+        # batchnorms to the combined (sub+rel) emb
        self.bn0 = th.nn.BatchNorm2d(1)
        self.bn1 = th.nn.BatchNorm2d(self.num_filt)
        self.bn2 = th.nn.BatchNorm1d(self.embed_dim)
-        #dropouts and conv module to the combined (sub+rel) emb
+        # dropouts and conv module to the combined (sub+rel) emb
        self.hidden_drop = th.nn.Dropout(self.hid_drop)
        self.feature_drop = th.nn.Dropout(self.feat_drop)
-        self.m_conv1 = th.nn.Conv2d(1, out_channels=self.num_filt, kernel_size=(self.ker_sz, self.ker_sz), stride=1, padding=0, bias=False)
+        self.m_conv1 = th.nn.Conv2d(
+            1,
+            out_channels=self.num_filt,
+            kernel_size=(self.ker_sz, self.ker_sz),
+            stride=1,
+            padding=0,
+            bias=False,
+        )
        flat_sz_h = int(2 * self.k_w) - self.ker_sz + 1
        flat_sz_w = self.k_h - self.ker_sz + 1
        self.flat_sz = flat_sz_h * flat_sz_w * self.num_filt
        self.fc = th.nn.Linear(self.flat_sz, self.embed_dim)
-        #bias to the score
+        # bias to the score
        self.bias = nn.Parameter(th.zeros(num_ent))
-    #combine entity embeddings and relation embeddings
+    # combine entity embeddings and relation embeddings
    def concat(self, e1_embed, rel_embed):
        e1_embed = e1_embed.view(-1, 1, self.embed_dim)
        rel_embed = rel_embed.view(-1, 1, self.embed_dim)
        stack_inp = th.cat([e1_embed, rel_embed], 1)
-        stack_inp = th.transpose(stack_inp, 2, 1).reshape((-1, 1, 2 * self.k_w, self.k_h))
+        stack_inp = th.transpose(stack_inp, 2, 1).reshape(
+            (-1, 1, 2 * self.k_w, self.k_h)
+        )
        return stack_inp
    def forward(self, graph, sub, rel):
-        #get sub_emb and rel_emb via compGCN
+        # get sub_emb and rel_emb via compGCN
        n_feats, r_feats = self.compGCN_Model(graph)
        sub_emb = n_feats[sub, :]
        rel_emb = r_feats[rel, :]
-        #combine the sub_emb and rel_emb
+        # combine the sub_emb and rel_emb
        stk_inp = self.concat(sub_emb, rel_emb)
-        #use convE to score the combined emb
+        # use convE to score the combined emb
        x = self.bn0(stk_inp)
        x = self.m_conv1(x)
        x = self.bn1(x)
@@ -260,11 +299,9 @@ class CompGCN_ConvE(nn.Module):
        x = self.hidden_drop(x)
        x = self.bn2(x)
        x = F.relu(x)
-        #compute score
+        # compute score
-        x = th.mm(x, n_feats.transpose(1,0))
+        x = th.mm(x, n_feats.transpose(1, 0))
-        #add in bias
+        # add in bias
        x += self.bias.expand_as(x)
        score = th.sigmoid(x)
        return score
--- a/examples/pytorch/compGCN/utils.py
+++ b/examples/pytorch/compGCN/utils.py
@@ -3,12 +3,14 @@
 # It implements the operation of circular convolution in the ccorr function and an additional in_out_norm function for norm computation.
 import torch as th
 import dgl
 def com_mult(a, b):
    r1, i1 = a[..., 0], a[..., 1]
    r2, i2 = b[..., 0], b[..., 1]
-	return th.stack([r1 * r2 - i1 * i2, r1 * i2 + i1 * r2], dim = -1)
+    return th.stack([r1 * r2 - i1 * i2, r1 * i2 + i1 * r2], dim=-1)
 def conj(a):
@@ -32,26 +34,34 @@ def ccorr(a, b):
    -------
    Tensor, having the same dimension as the input a.
    """
-	return th.fft.irfftn(th.conj(th.fft.rfftn(a, (-1))) * th.fft.rfftn(b, (-1)), (-1))
+    return th.fft.irfftn(
+        th.conj(th.fft.rfftn(a, (-1))) * th.fft.rfftn(b, (-1)), (-1)
+    )
-#identify in/out edges, compute edge norm for each and store in edata
+# identify in/out edges, compute edge norm for each and store in edata
 def in_out_norm(graph):
-	src, dst, EID = graph.edges(form='all')
+    src, dst, EID = graph.edges(form="all")
-	graph.edata['norm'] = th.ones(EID.shape[0]).to(graph.device)
+    graph.edata["norm"] = th.ones(EID.shape[0]).to(graph.device)
-	in_edges_idx = th.nonzero(graph.edata['in_edges_mask'], as_tuple=False).squeeze()
+    in_edges_idx = th.nonzero(
-	out_edges_idx = th.nonzero(graph.edata['out_edges_mask'], as_tuple=False).squeeze()
+        graph.edata["in_edges_mask"], as_tuple=False
+    ).squeeze()
+    out_edges_idx = th.nonzero(
+        graph.edata["out_edges_mask"], as_tuple=False
+    ).squeeze()
    for idx in [in_edges_idx, out_edges_idx]:
        u, v = src[idx], dst[idx]
        deg = th.zeros(graph.num_nodes()).to(graph.device)
-		n_idx, inverse_index, count = th.unique(v, return_inverse=True, return_counts=True)
+        n_idx, inverse_index, count = th.unique(
-		deg[n_idx]=count.float()
+            v, return_inverse=True, return_counts=True
+        )
+        deg[n_idx] = count.float()
        deg_inv = deg.pow(-0.5)  # D^{-0.5}
-		deg_inv[deg_inv	== float('inf')] = 0
+        deg_inv[deg_inv == float("inf")] = 0
        norm = deg_inv[u] * deg_inv[v]
-		graph.edata['norm'][idx] = norm
+        graph.edata["norm"][idx] = norm
-	graph.edata['norm'] = graph.edata['norm'].unsqueeze(1)
+    graph.edata["norm"] = graph.edata["norm"].unsqueeze(1)
    return graph
--- a/examples/pytorch/correct_and_smooth/main.py
+++ b/examples/pytorch/correct_and_smooth/main.py
--- a/examples/pytorch/correct_and_smooth/model.py
+++ b/examples/pytorch/correct_and_smooth/model.py
--- a/examples/pytorch/dagnn/main.py
+++ b/examples/pytorch/dagnn/main.py
--- a/examples/pytorch/dagnn/utils.py
+++ b/examples/pytorch/dagnn/utils.py
-import numpy as np
 import random
-from torch.nn import functional as F
+import numpy as np
 import torch
+from torch.nn import functional as F
 def evaluate(model, graph, feats, labels, idxs):
@@ -11,7 +12,9 @@ def evaluate(model, graph, feats, labels, idxs):
        results = ()
        for idx in idxs:
            loss = F.cross_entropy(logits[idx], labels[idx])
-            acc = torch.sum(logits[idx].argmax(dim=1) == labels[idx]).item() / len(idx)
+            acc = torch.sum(
+                logits[idx].argmax(dim=1) == labels[idx]
+            ).item() / len(idx)
            results += (loss, acc)
    return results

--- a/examples/pytorch/deepergcn/layers.py
+++ b/examples/pytorch/deepergcn/layers.py
--- a/examples/pytorch/deepergcn/main.py
+++ b/examples/pytorch/deepergcn/main.py
--- a/examples/pytorch/deepergcn/models.py
+++ b/examples/pytorch/deepergcn/models.py
 import torch.nn as nn
 import torch.nn.functional as F
-import dgl.function as fn
+from layers import GENConv
 from ogb.graphproppred.mol_encoder import AtomEncoder
+import dgl.function as fn
 from dgl.nn.pytorch.glob import AvgPooling
-from layers import GENConv
 class DeeperGCN(nn.Module):
@@ -37,17 +37,20 @@ class DeeperGCN(nn.Module):
    mlp_layers: int
        Number of MLP layers in message normalization. Default is 1.
    """
-    def __init__(self,
+    def __init__(
+        self,
        node_feat_dim,
        edge_feat_dim,
        hid_dim,
        out_dim,
        num_layers,
-                 dropout=0.,
+        dropout=0.0,
        beta=1.0,
        learn_beta=False,
-                 aggr='softmax',
+        aggr="softmax",
-                 mlp_layers=1):
+        mlp_layers=1,
+    ):
        super(DeeperGCN, self).__init__()
        self.num_layers = num_layers
@@ -56,12 +59,14 @@ class DeeperGCN(nn.Module):
        self.norms = nn.ModuleList()
        for _ in range(self.num_layers):
-            conv = GENConv(in_dim=hid_dim,
+            conv = GENConv(
+                in_dim=hid_dim,
                out_dim=hid_dim,
                aggregator=aggr,
                beta=beta,
                learn_beta=learn_beta,
-                           mlp_layers=mlp_layers)
+                mlp_layers=mlp_layers,
+            )
            self.gcns.append(conv)
            self.norms.append(nn.BatchNorm1d(hid_dim, affine=True))

--- a/examples/pytorch/deepergcn/modules.py
+++ b/examples/pytorch/deepergcn/modules.py
@@ -10,11 +10,8 @@ class MLP(nn.Sequential):
    -----------
    From equation (5) in "DeeperGCN: All You Need to Train Deeper GCNs <https://arxiv.org/abs/2006.07739>"
    """
-    def __init__(self,
-                 channels,
+    def __init__(self, channels, act="relu", dropout=0.0, bias=True):
-                 act='relu',
-                 dropout=0.,
-                 bias=True):
        layers = []
        for i in range(1, len(channels)):
@@ -39,9 +36,12 @@ class MessageNorm(nn.Module):
    learn_scale: bool
        Whether s is a learnable scaling factor or not. Default is False.
    """
    def __init__(self, learn_scale=False):
        super(MessageNorm, self).__init__()
-        self.scale = nn.Parameter(torch.FloatTensor([1.0]), requires_grad=learn_scale)
+        self.scale = nn.Parameter(
+            torch.FloatTensor([1.0]), requires_grad=learn_scale
+        )
    def forward(self, feats, msg, p=2):
        msg = F.normalize(msg, p=2, dim=-1)

--- a/examples/pytorch/dgi/dgi.py
+++ b/examples/pytorch/dgi/dgi.py
--- a/examples/pytorch/dgi/gcn.py
+++ b/examples/pytorch/dgi/gcn.py
--- a/examples/pytorch/dgmg/configure.py
+++ b/examples/pytorch/dgmg/configure.py
--- a/examples/pytorch/dgmg/cycles.py
+++ b/examples/pytorch/dgmg/cycles.py
--- a/examples/pytorch/dgmg/main.py
+++ b/examples/pytorch/dgmg/main.py
--- a/examples/pytorch/dgmg/utils.py
+++ b/examples/pytorch/dgmg/utils.py
--- a/examples/pytorch/diffpool/data_utils.py
+++ b/examples/pytorch/diffpool/data_utils.py