black (#4722)

Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

black (#4722)
Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
6a3cb548 · Hongzhi (Steve), Chen · GitHub · c78ddee2 · 6a3cb548
Unverified Commit 6a3cb548 authored Oct 19, 2022 by Hongzhi (Steve), Chen Committed by GitHub Oct 19, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 78 additions and 41 deletions

tutorials/large/L2_large_link_prediction.py tutorials/large/L2_large_link_prediction.py +78 -41

No files found.
--- a/tutorials/large/L2_large_link_prediction.py
+++ b/tutorials/large/L2_large_link_prediction.py
@@ -57,8 +57,8 @@ import torch
 import numpy as np
 from ogb.nodeproppred import DglNodePropPredDataset

-dataset = DglNodePropPredDataset('ogbn-arxiv')
-device = 'cpu'      # change to 'cuda' for GPU
+dataset = DglNodePropPredDataset("ogbn-arxiv")
+device = "cpu"  # change to 'cuda' for GPU

 graph, node_labels = dataset[0]
 # Add reverse edges since ogbn-arxiv is unidirectional.
@@ -66,16 +66,16 @@ graph = dgl.add_reverse_edges(graph)
 print(graph)
 print(node_labels)

-node_features = graph.ndata['feat']
+node_features = graph.ndata["feat"]
 node_labels = node_labels[:, 0]
 num_features = node_features.shape[1]
 num_classes = (node_labels.max() + 1).item()
-print('Number of classes:', num_classes)
+print("Number of classes:", num_classes)

 idx_split = dataset.get_idx_split()
-train_nids = idx_split['train']
-valid_nids = idx_split['valid']
-test_nids = idx_split['test']
+train_nids = idx_split["train"]
+valid_nids = idx_split["valid"]
+test_nids = idx_split["test"]


 ######################################################################
@@ -112,18 +112,19 @@ negative_sampler = dgl.dataloading.negative_sampler.Uniform(5)

 sampler = dgl.dataloading.NeighborSampler([4, 4])
 sampler = dgl.dataloading.as_edge_prediction_sampler(
-    sampler, negative_sampler=negative_sampler)
+    sampler, negative_sampler=negative_sampler
+)
 train_dataloader = dgl.dataloading.DataLoader(
    # The following arguments are specific to DataLoader.
-    graph,                                  # The graph
+    graph,  # The graph
    torch.arange(graph.number_of_edges()),  # The edges to iterate over
-    sampler,                                # The neighbor sampler
-    device=device,                          # Put the MFGs on CPU or GPU
+    sampler,  # The neighbor sampler
+    device=device,  # Put the MFGs on CPU or GPU
    # The following arguments are inherited from PyTorch DataLoader.
-    batch_size=1024,    # Batch size
-    shuffle=True,       # Whether to shuffle the nodes for every epoch
-    drop_last=False,    # Whether to drop the last incomplete batch
-    num_workers=0       # Number of sampler processes
+    batch_size=1024,  # Batch size
+    shuffle=True,  # Whether to shuffle the nodes for every epoch
+    drop_last=False,  # Whether to drop the last incomplete batch
+    num_workers=0,  # Number of sampler processes
 )


@@ -133,9 +134,19 @@ train_dataloader = dgl.dataloading.DataLoader(
 #

 input_nodes, pos_graph, neg_graph, mfgs = next(iter(train_dataloader))
-print('Number of input nodes:', len(input_nodes))
-print('Positive graph # nodes:', pos_graph.number_of_nodes(), '# edges:', pos_graph.number_of_edges())
-print('Negative graph # nodes:', neg_graph.number_of_nodes(), '# edges:', neg_graph.number_of_edges())
+print("Number of input nodes:", len(input_nodes))
+print(
+    "Positive graph # nodes:",
+    pos_graph.number_of_nodes(),
+    "# edges:",
+    pos_graph.number_of_edges(),
+)
+print(
+    "Negative graph # nodes:",
+    neg_graph.number_of_nodes(),
+    "# edges:",
+    neg_graph.number_of_edges(),
+)
 print(mfgs)


@@ -174,21 +185,23 @@ import torch.nn as nn
 import torch.nn.functional as F
 from dgl.nn import SAGEConv

+
 class Model(nn.Module):
    def __init__(self, in_feats, h_feats):
        super(Model, self).__init__()
-        self.conv1 = SAGEConv(in_feats, h_feats, aggregator_type='mean')
-        self.conv2 = SAGEConv(h_feats, h_feats, aggregator_type='mean')
+        self.conv1 = SAGEConv(in_feats, h_feats, aggregator_type="mean")
+        self.conv2 = SAGEConv(h_feats, h_feats, aggregator_type="mean")
        self.h_feats = h_feats

    def forward(self, mfgs, x):
-        h_dst = x[:mfgs[0].num_dst_nodes()]
+        h_dst = x[: mfgs[0].num_dst_nodes()]
        h = self.conv1(mfgs[0], (x, h_dst))
        h = F.relu(h)
-        h_dst = h[:mfgs[1].num_dst_nodes()]
+        h_dst = h[: mfgs[1].num_dst_nodes()]
        h = self.conv2(mfgs[1], (h, h_dst))
        return h

+
 model = Model(num_features, 128).to(device)


@@ -207,15 +220,16 @@ model = Model(num_features, 128).to(device)

 import dgl.function as fn

+
 class DotPredictor(nn.Module):
    def forward(self, g, h):
        with g.local_scope():
-            g.ndata['h'] = h
+            g.ndata["h"] = h
            # Compute a new edge feature named 'score' by a dot-product between the
            # source node feature 'h' and destination node feature 'h'.
-            g.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            g.apply_edges(fn.u_dot_v("h", "h", "score"))
            # u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
-            return g.edata['score'][:, 0]
+            return g.edata["score"][:, 0]


 ######################################################################
@@ -244,29 +258,35 @@ class DotPredictor(nn.Module):
 #    guide <guide-minibatch-inference>`.
 #

+
 def inference(model, graph, node_features):
    with torch.no_grad():
        nodes = torch.arange(graph.number_of_nodes())

        sampler = dgl.dataloading.NeighborSampler([4, 4])
        train_dataloader = dgl.dataloading.DataLoader(
-            graph, torch.arange(graph.number_of_nodes()), sampler,
+            graph,
+            torch.arange(graph.number_of_nodes()),
+            sampler,
            batch_size=1024,
            shuffle=False,
            drop_last=False,
            num_workers=4,
-            device=device)
+            device=device,
+        )

        result = []
        for input_nodes, output_nodes, mfgs in train_dataloader:
            # feature copy from CPU to GPU takes place here
-            inputs = mfgs[0].srcdata['feat']
+            inputs = mfgs[0].srcdata["feat"]
            result.append(model(mfgs, inputs))

        return torch.cat(result)

+
 import sklearn.metrics

+
 def evaluate(emb, label, train_nids, valid_nids, test_nids):
    classifier = nn.Linear(emb.shape[1], num_classes).to(device)
    opt = torch.optim.LBFGS(classifier.parameters())
@@ -282,13 +302,13 @@ def evaluate(emb, label, train_nids, valid_nids, test_nids):
        loss.backward()
        return loss

-    prev_loss = float('inf')
+    prev_loss = float("inf")
    for i in range(1000):
        opt.step(closure)
        with torch.no_grad():
            loss = compute_loss().item()
            if np.abs(loss - prev_loss) < 1e-4:
-                print('Converges at iteration', i)
+                print("Converges at iteration", i)
                break
            else:
                prev_loss = loss
@@ -296,8 +316,12 @@ def evaluate(emb, label, train_nids, valid_nids, test_nids):
    with torch.no_grad():
        pred = classifier(emb.to(device)).cpu()
        label = label
-        valid_acc = sklearn.metrics.accuracy_score(label[valid_nids].numpy(), pred[valid_nids].numpy().argmax(1))
-        test_acc = sklearn.metrics.accuracy_score(label[test_nids].numpy(), pred[test_nids].numpy().argmax(1))
+        valid_acc = sklearn.metrics.accuracy_score(
+            label[valid_nids].numpy(), pred[valid_nids].numpy().argmax(1)
+        )
+        test_acc = sklearn.metrics.accuracy_score(
+            label[test_nids].numpy(), pred[test_nids].numpy().argmax(1)
+        )
    return valid_acc, test_acc


@@ -323,32 +347,40 @@ import tqdm
 import sklearn.metrics

 best_accuracy = 0
-best_model_path = 'model.pt'
+best_model_path = "model.pt"
 for epoch in range(1):
    with tqdm.tqdm(train_dataloader) as tq:
        for step, (input_nodes, pos_graph, neg_graph, mfgs) in enumerate(tq):
            # feature copy from CPU to GPU takes place here
-            inputs = mfgs[0].srcdata['feat']
+            inputs = mfgs[0].srcdata["feat"]

            outputs = model(mfgs, inputs)
            pos_score = predictor(pos_graph, outputs)
            neg_score = predictor(neg_graph, outputs)

            score = torch.cat([pos_score, neg_score])
-            label = torch.cat([torch.ones_like(pos_score), torch.zeros_like(neg_score)])
+            label = torch.cat(
+                [torch.ones_like(pos_score), torch.zeros_like(neg_score)]
+            )
            loss = F.binary_cross_entropy_with_logits(score, label)

            opt.zero_grad()
            loss.backward()
            opt.step()

-            tq.set_postfix({'loss': '%.03f' % loss.item()}, refresh=False)
+            tq.set_postfix({"loss": "%.03f" % loss.item()}, refresh=False)

            if (step + 1) % 500 == 0:
                model.eval()
                emb = inference(model, graph, node_features)
-                valid_acc, test_acc = evaluate(emb, node_labels, train_nids, valid_nids, test_nids)
-                print('Epoch {} Validation Accuracy {} Test Accuracy {}'.format(epoch, valid_acc, test_acc))
+                valid_acc, test_acc = evaluate(
+                    emb, node_labels, train_nids, valid_nids, test_nids
+                )
+                print(
+                    "Epoch {} Validation Accuracy {} Test Accuracy {}".format(
+                        epoch, valid_acc, test_acc
+                    )
+                )
                if best_accuracy < valid_acc:
                    best_accuracy = valid_acc
                    torch.save(model.state_dict(), best_model_path)
@@ -383,7 +415,8 @@ for epoch in range(1):
 n_test_pos = 1000
 test_pos_src, test_pos_dst = (
    torch.randint(0, graph.num_nodes(), (n_test_pos,)),
-    torch.randint(0, graph.num_nodes(), (n_test_pos,)))
+    torch.randint(0, graph.num_nodes(), (n_test_pos,)),
+)
 # Negative pairs.  Likewise, you will need to replace them with your
 # own ground truth.
 test_neg_src = test_pos_src
@@ -410,10 +443,14 @@ h_neg_dst = node_reprs[test_neg_dst]
 score_pos = (h_pos_src * h_pos_dst).sum(1)
 score_neg = (h_neg_src * h_neg_dst).sum(1)
 test_preds = torch.cat([score_pos, score_neg]).cpu().numpy()
-test_labels = torch.cat([torch.ones_like(score_pos), torch.zeros_like(score_neg)]).cpu().numpy()
+test_labels = (
+    torch.cat([torch.ones_like(score_pos), torch.zeros_like(score_neg)])
+    .cpu()
+    .numpy()
+)

 auc = sklearn.metrics.roc_auc_score(test_labels, test_preds)
-print('Link Prediction AUC:', auc)
+print("Link Prediction AUC:", auc)


 ######################################################################