examples (#5323)

Co-authored-by: Ubuntu <ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal>

examples (#5323)
Co-authored-by: Ubuntu <ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal>
704bcaf6 · Hongzhi (Steve), Chen · GitHub · 6bc82161 · 704bcaf6 · 704bcaf6
Unverified Commit 704bcaf6 authored Feb 19, 2023 by Hongzhi (Steve), Chen Committed by GitHub Feb 19, 2023
20 changed files
--- a/examples/pytorch/ogb/deepwalk/deepwalk.py
+++ b/examples/pytorch/ogb/deepwalk/deepwalk.py
@@ -3,6 +3,8 @@ import os
 import random
 import time

+import dgl
+
 import numpy as np
 import torch
 import torch.multiprocessing as mp
@@ -11,8 +13,6 @@ from reading_data import DeepwalkDataset
 from torch.utils.data import DataLoader
 from utils import shuffle_walks, sum_up_params

-import dgl
-

 class DeepwalkTrainer:
    def __init__(self, args):

--- a/examples/pytorch/ogb/deepwalk/reading_data.py
+++ b/examples/pytorch/ogb/deepwalk/reading_data.py
@@ -3,19 +3,19 @@ import pickle
 import random
 import time

+import dgl
+
 import numpy as np
 import scipy.sparse as sp
 import torch
-from torch.utils.data import DataLoader
-from utils import shuffle_walks
-
-import dgl
 from dgl.data.utils import (
    _get_dgl_url,
    download,
    extract_archive,
    get_download_dir,
 )
+from torch.utils.data import DataLoader
+from utils import shuffle_walks


 def ReadTxtNet(file_path="", undirected=True):

--- a/examples/pytorch/ogb/directional_GSN/main.py
+++ b/examples/pytorch/ogb/directional_GSN/main.py
 import argparse
 import random

+import dgl
+
 import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
+from dgl.dataloading import GraphDataLoader
 from ogb.graphproppred import Evaluator
 from ogb.graphproppred.mol_encoder import AtomEncoder
 from preprocessing import prepare_dataset
 from torch.utils.data import Dataset
 from tqdm import tqdm

-import dgl
-from dgl.dataloading import GraphDataLoader
-

 def aggregate_mean(h, vector_field, h_in):
    return torch.mean(h, dim=1)
@@ -116,7 +116,6 @@ class DGNLayer(nn.Module):
        return {"h": h}

    def forward(self, g, h, snorm_n):
-
        g.ndata["h"] = h

        # pretransformation
@@ -284,7 +283,6 @@ def evaluate_network(model, device, data_loader):


 def train(dataset, params):
-
    trainset, valset, testset = dataset.train, dataset.val, dataset.test
    device = params.device


--- a/examples/pytorch/ogb/directional_GSN/preprocessing.py
+++ b/examples/pytorch/ogb/directional_GSN/preprocessing.py
@@ -5,10 +5,10 @@ import graph_tool.topology as gt_topology
 import networkx as nx
 import numpy as np
 import torch
-from ogb.graphproppred import DglGraphPropPredDataset
-from tqdm import tqdm

 from dgl.data.utils import load_graphs, save_graphs
+from ogb.graphproppred import DglGraphPropPredDataset
+from tqdm import tqdm


 def to_undirected(edge_index):
@@ -20,7 +20,6 @@ def to_undirected(edge_index):


 def induced_edge_automorphism_orbits(edge_list):
-
    ##### node automorphism orbits #####
    graph = gt.Graph(directed=False)
    graph.add_edge_list(edge_list)
@@ -98,7 +97,6 @@ def induced_edge_automorphism_orbits(edge_list):


 def subgraph_isomorphism_edge_counts(edge_index, subgraph_dict):
-
    ##### edge structural identifiers #####

    edge_index = edge_index.transpose(1, 0).cpu().numpy()
@@ -131,7 +129,6 @@ def subgraph_isomorphism_edge_counts(edge_index, subgraph_dict):
    for sub_iso_curr in sub_iso:
        mapping = sub_iso_curr.get_array()
        for i, edge in enumerate(subgraph_edges):
-
            # for every edge in the graph H, find the edge in the subgraph G_S to which it is mapped
            # (by finding where its endpoints are matched).
            # Then, increase the count of the matched edge w.r.t. the corresponding orbit
@@ -149,7 +146,6 @@ def subgraph_isomorphism_edge_counts(edge_index, subgraph_dict):


 def prepare_dataset(name):
-
    # maximum size of cycle graph
    k = 8

@@ -174,7 +170,6 @@ def prepare_dataset(name):


 def generate_dataset(path, name):
-
    ### compute the orbits of each substructure in the list, as well as the node automorphism count
    subgraph_dicts = []

@@ -218,7 +213,6 @@ def generate_dataset(path, name):


 def _prepare(g, subgraph_dicts):
-
    edge_index = torch.stack(g.edges())

    identifiers = None

--- a/examples/pytorch/ogb/line/line.py
+++ b/examples/pytorch/ogb/line/line.py
@@ -3,6 +3,8 @@ import os
 import random
 import time

+import dgl
+
 import numpy as np
 import torch
 import torch.multiprocessing as mp
@@ -11,8 +13,6 @@ from reading_data import LineDataset
 from torch.utils.data import DataLoader
 from utils import check_args, sum_up_params

-import dgl
-

 class LineTrainer:
    def __init__(self, args):

--- a/examples/pytorch/ogb/line/load_dataset.py
+++ b/examples/pytorch/ogb/line/load_dataset.py
@@ -2,11 +2,11 @@

 import argparse

+import dgl
+
 from ogb.linkproppred import DglLinkPropPredDataset
 from ogb.nodeproppred import DglNodePropPredDataset

-import dgl
-

 def load_from_ogbl_with_name(name):
    choices = ["ogbl-collab", "ogbl-ddi", "ogbl-ppa", "ogbl-citation"]

--- a/examples/pytorch/ogb/line/reading_data.py
+++ b/examples/pytorch/ogb/line/reading_data.py
@@ -3,18 +3,18 @@ import pickle
 import random
 import time

+import dgl
+
 import numpy as np
 import scipy.sparse as sp
 import torch
-from torch.utils.data import DataLoader
-
-import dgl
 from dgl.data.utils import (
    _get_dgl_url,
    download,
    extract_archive,
    get_download_dir,
 )
+from torch.utils.data import DataLoader


 def ReadTxtNet(file_path="", undirected=True):

--- a/examples/pytorch/ogb/ngnn/main.py
+++ b/examples/pytorch/ogb/ngnn/main.py
 import argparse
 import math

+import dgl
+
 import torch
 import torch.nn.functional as F
+from dgl.dataloading.negative_sampler import GlobalUniform
+from dgl.nn.pytorch import GraphConv, SAGEConv
 from ogb.linkproppred import DglLinkPropPredDataset, Evaluator
 from torch.nn import Linear
 from torch.utils.data import DataLoader

-import dgl
-from dgl.dataloading.negative_sampler import GlobalUniform
-from dgl.nn.pytorch import GraphConv, SAGEConv
-

 class Logger(object):
    def __init__(self, runs, info=None):

--- a/examples/pytorch/ogb/ngnn_seal/main.py
+++ b/examples/pytorch/ogb/ngnn_seal/main.py
@@ -275,8 +275,8 @@ if __name__ == "__main__":
        type=str,
        default="none",
        choices=["none", "input", "hidden", "output", "all"],
-        help="You can set this value from 'none', 'input', 'hidden' or 'all' " \
-             "to apply NGNN to different GNN layers.",
+        help="You can set this value from 'none', 'input', 'hidden' or 'all' "
+        "to apply NGNN to different GNN layers.",
    )
    parser.add_argument(
        "--num_ngnn_layers", type=int, default=1, choices=[1, 2]
@@ -320,8 +320,8 @@ if __name__ == "__main__":
        "--num_workers",
        type=int,
        default=24,
-        help="number of workers for dynamic dataloaders; " \
-             "using a larger value for dynamic dataloading is recommended",
+        help="number of workers for dynamic dataloaders; "
+        "using a larger value for dynamic dataloading is recommended",
    )
    # Testing settings
    parser.add_argument(
@@ -335,8 +335,8 @@ if __name__ == "__main__":
        type=int,
        nargs="*",
        default=[10],
-        help="hits@K for each eval step; " \
-             "only available for datasets with hits@xx as the eval metric",
+        help="hits@K for each eval step; "
+        "only available for datasets with hits@xx as the eval metric",
    )
    parser.add_argument(
        "--test_topk",
@@ -352,7 +352,7 @@ if __name__ == "__main__":

    args.res_dir = os.path.join(
        f'results{"_NoTest" if args.no_test else ""}',
-        f'{args.dataset.split("-")[1]}-{args.ngnn_type}+{time.strftime("%m%d%H%M%S")}'
+        f'{args.dataset.split("-")[1]}-{args.ngnn_type}+{time.strftime("%m%d%H%M%S")}',
    )
    print(f"Results will be saved in {args.res_dir}")
    if not os.path.exists(args.res_dir):
@@ -381,7 +381,7 @@ if __name__ == "__main__":
                    [src[:, None].repeat(1, tgt_neg.size(1)), tgt_neg], dim=-1
                )  # [Ns, Nt, 2]

-    # Reconstruct the graph for ogbl-collab data 
+    # Reconstruct the graph for ogbl-collab data
    # for validation edge augmentation and coalesce.
    if args.dataset == "ogbl-collab":
        # Float edata for to_simple transformation.
@@ -503,7 +503,7 @@ if __name__ == "__main__":
        if args.dataset.startswith("ogbl-citation"):
            # For this dataset, subgraphs extracted around positive edges are
            # rather larger than negative edges. Thus we sample from 1000
-            # positive and 1000 negative edges to estimate the k (number of 
+            # positive and 1000 negative edges to estimate the k (number of
            # nodes to hold for each graph) used in SortPooling.
            # You can certainly set k manually, instead of estimating from
            # a percentage of sampled subgraphs.
@@ -554,25 +554,25 @@ if __name__ == "__main__":
            epo_train_etime = datetime.datetime.now()
            print_log(
                f"[epoch: {epoch}]",
-                f"   <Train> starts: {epo_stime}, " \
-                f"ends: {epo_train_etime}, " \
-                f"spent time:{epo_train_etime - epo_stime}"
+                f"   <Train> starts: {epo_stime}, "
+                f"ends: {epo_train_etime}, "
+                f"spent time:{epo_train_etime - epo_stime}",
            )
            if epoch % args.eval_steps == 0:
                epo_eval_stime = datetime.datetime.now()
                results = test(val_loader, loggers.keys())
                epo_eval_etime = datetime.datetime.now()
                print_log(
-                    f"   <Validation> starts: {epo_eval_stime}, " \
-                    f"ends: {epo_eval_etime}, " \
+                    f"   <Validation> starts: {epo_eval_stime}, "
+                    f"ends: {epo_eval_etime}, "
                    f"spent time:{epo_eval_etime - epo_eval_stime}"
                )
                for key, valid_res in results.items():
                    loggers[key].add_result(run, valid_res)
                    to_print = (
-                        f"Run: {run + 1:02d}, " \
-                        f"Epoch: {epoch:02d}, " \
-                        f"Loss: {loss:.4f}, " \
+                        f"Run: {run + 1:02d}, "
+                        f"Epoch: {epoch:02d}, "
+                        f"Loss: {loss:.4f}, "
                        f"Valid ({args.val_percent}%) [{key}]: {valid_res:.4f}"
                    )
                    print_log(key, to_print)
@@ -590,17 +590,15 @@ if __name__ == "__main__":
        tested = dict()
        for eval_metric in loggers.keys():
            # Select models according to the eval_metric of the dataset.
-            res = torch.tensor(
-                loggers[eval_metric].results["valid"][run]
-            )
+            res = torch.tensor(loggers[eval_metric].results["valid"][run])
            if args.no_test:
                epoch = torch.argmax(res).item() + 1
                val_res = loggers[eval_metric].results["valid"][run][epoch - 1]
                loggers[eval_metric].add_result(run, (epoch, val_res), "test")
                print_log(
                    f"No Test; Best Valid:",
-                    f"   Run: {run + 1:02d}, " \
-                    f"Epoch: {epoch:02d}, " \
+                    f"   Run: {run + 1:02d}, "
+                    f"Epoch: {epoch:02d}, "
                    f"Valid ({args.val_percent}%) [{eval_metric}]: {val_res:.4f}",
                )
                continue
@@ -610,13 +608,13 @@ if __name__ == "__main__":
            ).tolist()  # indices of top k valid results
            print_log(
                f"Eval Metric: {eval_metric}",
-                f"Run: {run + 1:02d}, " \
+                f"Run: {run + 1:02d}, "
                f"Top {args.test_topk} Eval Points: {idx_to_test}",
            )
            for _idx, epoch in enumerate(idx_to_test):
                print_log(
-                    f"Test Point[{_idx+1}]: " \
-                    f"Epoch {epoch:02d}, " \
+                    f"Test Point[{_idx+1}]: "
+                    f"Epoch {epoch:02d}, "
                    f"Test Metric: {dataset.eval_metric}"
                )
                if epoch not in tested:
@@ -643,11 +641,11 @@ if __name__ == "__main__":
                    run, (epoch, val_res, test_res), "test"
                )
                print_log(
-                    f"   Run: {run + 1:02d}, " \
-                    f"Epoch: {epoch:02d}, " \
-                    f"Valid ({args.val_percent}%) [{eval_metric}]: " \
-                    f"{loggers[eval_metric].results['valid'][run][epoch-1]:.4f}, " \
-                    f"Valid (final) [{dataset.eval_metric}]: {val_res:.4f}, " \
+                    f"   Run: {run + 1:02d}, "
+                    f"Epoch: {epoch:02d}, "
+                    f"Valid ({args.val_percent}%) [{eval_metric}]: "
+                    f"{loggers[eval_metric].results['valid'][run][epoch-1]:.4f}, "
+                    f"Valid (final) [{dataset.eval_metric}]: {val_res:.4f}, "
                    f"Test [{dataset.eval_metric}]: {test_res:.4f}"
                )


--- a/examples/pytorch/ogb/ngnn_seal/utils.py
+++ b/examples/pytorch/ogb/ngnn_seal/utils.py
@@ -129,7 +129,7 @@ class Logger(object):
                    f'   Final Test Point[1]: {self.results["test"][run][0][0]}',
                    f'   Final Valid: {self.results["test"][run][0][1]}',
                    f'   Final Test: {self.results["test"][run][0][2]}',
-                    sep='\n',
+                    sep="\n",
                    file=f,
                )
        else:

--- a/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py
+++ b/examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py
@@ -4,9 +4,9 @@ import glob
 import numpy as np
 import torch
 import torch.nn.functional as F
-from ogb.nodeproppred import DglNodePropPredDataset, Evaluator

 from dgl import function as fn
+from ogb.nodeproppred import DglNodePropPredDataset, Evaluator

 device = None


--- a/examples/pytorch/ogb/ogbn-arxiv/gat.py
+++ b/examples/pytorch/ogb/ogbn-arxiv/gat.py
@@ -7,6 +7,8 @@ import os
 import random
 import time

+import dgl
+
 import numpy as np
 import torch
 import torch.nn.functional as F
@@ -16,8 +18,6 @@ from matplotlib.ticker import AutoMinorLocator, MultipleLocator
 from models import GAT
 from ogb.nodeproppred import DglNodePropPredDataset, Evaluator

-import dgl
-
 epsilon = 1 - math.log(2)

 device = None

--- a/examples/pytorch/ogb/ogbn-arxiv/models.py
+++ b/examples/pytorch/ogb/ogbn-arxiv/models.py
+import dgl.nn.pytorch as dglnn
 import torch
 import torch.nn as nn
-
-import dgl.nn.pytorch as dglnn
 from dgl import function as fn
 from dgl.ops import edge_softmax
 from dgl.utils import expand_as_pair

--- a/examples/pytorch/ogb/ogbn-mag/hetero_rgcn.py
+++ b/examples/pytorch/ogb/ogbn-mag/hetero_rgcn.py
 import argparse
 import itertools
+import sys
+
+import dgl
+import dgl.nn as dglnn
+
+import psutil

 import torch as th
 import torch.nn as nn
 import torch.nn.functional as F
-from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
-from tqdm import tqdm
-
-import dgl
-import dgl.nn as dglnn
 from dgl import AddReverse, Compose, ToSimple
 from dgl.nn import HeteroEmbedding
-
-import psutil
-import sys
+from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
+from tqdm import tqdm

 v_t = dgl.__version__

+
 def prepare_data(args, device):
    dataset = DglNodePropPredDataset(name="ogbn-mag")
    split_idx = dataset.get_idx_split()
@@ -41,7 +42,7 @@ def prepare_data(args, device):
        batch_size=1024,
        shuffle=True,
        num_workers=num_workers,
-        device=device
+        device=device,
    )

    return g, labels, dataset.num_classes, split_idx, logger, train_loader
@@ -271,9 +272,7 @@ def train(

            emb = extract_embed(node_embed, input_nodes)
            # Add the batch's raw "paper" features
-            emb.update(
-                {"paper": g.ndata["feat"]["paper"][input_nodes_indexes]}
-            )
+            emb.update({"paper": g.ndata["feat"]["paper"][input_nodes_indexes]})

            emb = {k: e.to(device) for k, e in emb.items()}
            lbl = labels[seeds].to(device)
@@ -322,7 +321,7 @@ def test(g, model, node_embed, y_true, device, split_idx):
        batch_size=16384,
        shuffle=False,
        num_workers=0,
-        device=device
+        device=device,
    )

    pbar = tqdm(total=y_true.size(0))
@@ -375,14 +374,18 @@ def test(g, model, node_embed, y_true, device, split_idx):

    return train_acc, valid_acc, test_acc

+
 def is_support_affinity(v_t):
    # dgl supports enable_cpu_affinity since 0.9.1
    return v_t >= "0.9.1"

+
 def main(args):
    device = f"cuda:0" if th.cuda.is_available() else "cpu"

-    g, labels, num_classes, split_idx, logger, train_loader = prepare_data(args, device)
+    g, labels, num_classes, split_idx, logger, train_loader = prepare_data(
+        args, device
+    )

    embed_layer = rel_graph_embed(g, 128).to(device)
    model = EntityClassify(g, 128, num_classes).to(device)
@@ -395,7 +398,6 @@ def main(args):
    )

    for run in range(args.runs):
-
        try:
            embed_layer.reset_parameters()
            model.reset_parameters()
@@ -409,10 +411,17 @@ def main(args):
        )
        optimizer = th.optim.Adam(all_params, lr=0.01)

-        if args.num_workers != 0 and device == "cpu" and is_support_affinity(v_t):
+        if (
+            args.num_workers != 0
+            and device == "cpu"
+            and is_support_affinity(v_t)
+        ):
            expected_max = int(psutil.cpu_count(logical=False))
            if args.num_workers >= expected_max:
-                print(f"[ERROR] You specified num_workers are larger than physical cores, please set any number less than {expected_max}", file=sys.stderr)
+                print(
+                    f"[ERROR] You specified num_workers are larger than physical cores, please set any number less than {expected_max}",
+                    file=sys.stderr,
+                )
            with train_loader.enable_cpu_affinity():
                logger = train(
                    g,

--- a/examples/pytorch/ogb/ogbn-products/gat/gat.py
+++ b/examples/pytorch/ogb/ogbn-products/gat/gat.py
@@ -7,24 +7,24 @@ import random
 import time
 from collections import OrderedDict

+import dgl
+import dgl.function as fn
+
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 import torch.nn.functional as F
 import torch.optim as optim
-from matplotlib.ticker import AutoMinorLocator, MultipleLocator
-from models import GAT
-from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
-from torch import nn
-from tqdm import tqdm
-
-import dgl
-import dgl.function as fn
 from dgl.dataloading import (
    DataLoader,
    MultiLayerFullNeighborSampler,
    MultiLayerNeighborSampler,
 )
+from matplotlib.ticker import AutoMinorLocator, MultipleLocator
+from models import GAT
+from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
+from torch import nn
+from tqdm import tqdm

 epsilon = 1 - math.log(2)


--- a/examples/pytorch/ogb/ogbn-products/gat/main.py
+++ b/examples/pytorch/ogb/ogbn-products/gat/main.py
 import argparse
 import time

+import dgl
+import dgl.nn.pytorch as dglnn
+
 import numpy as np
 import torch as th
 import torch.nn as nn
@@ -9,9 +12,6 @@ import torch.optim as optim
 import tqdm
 from ogb.nodeproppred import DglNodePropPredDataset

-import dgl
-import dgl.nn.pytorch as dglnn
-

 class GAT(nn.Module):
    def __init__(

--- a/examples/pytorch/ogb/ogbn-products/graphsage/main.py
+++ b/examples/pytorch/ogb/ogbn-products/graphsage/main.py
 import argparse
 import time

+import dgl
+import dgl.nn.pytorch as dglnn
+
 import numpy as np
 import torch as th
 import torch.nn as nn
@@ -9,9 +12,6 @@ import torch.optim as optim
 import tqdm
 from ogb.nodeproppred import DglNodePropPredDataset

-import dgl
-import dgl.nn.pytorch as dglnn
-

 class SAGE(nn.Module):
    def __init__(

--- a/examples/pytorch/ogb/ogbn-products/mlp/mlp.py
+++ b/examples/pytorch/ogb/ogbn-products/mlp/mlp.py
@@ -7,23 +7,23 @@ import random
 import time
 from collections import OrderedDict

+import dgl.function as fn
+
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 import torch.nn.functional as F
 import torch.optim as optim
-from matplotlib.ticker import AutoMinorLocator, MultipleLocator
-from models import MLP
-from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
-from torch import nn
-from tqdm import tqdm
-
-import dgl.function as fn
 from dgl.dataloading import (
    DataLoader,
    MultiLayerFullNeighborSampler,
    MultiLayerNeighborSampler,
 )
+from matplotlib.ticker import AutoMinorLocator, MultipleLocator
+from models import MLP
+from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
+from torch import nn
+from tqdm import tqdm

 epsilon = 1 - math.log(2)


--- a/examples/pytorch/ogb/ogbn-proteins/gat.py
+++ b/examples/pytorch/ogb/ogbn-proteins/gat.py
@@ -7,23 +7,23 @@ import random
 import sys
 import time

+import dgl
+import dgl.function as fn
+
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 import torch.nn.functional as F
 import torch.optim as optim
-from matplotlib.ticker import AutoMinorLocator, MultipleLocator
-from models import GAT
-from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
-from torch import nn
-
-import dgl
-import dgl.function as fn
 from dgl.dataloading import (
    DataLoader,
    MultiLayerFullNeighborSampler,
    MultiLayerNeighborSampler,
 )
+from matplotlib.ticker import AutoMinorLocator, MultipleLocator
+from models import GAT
+from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
+from torch import nn

 device = None
 dataset = "ogbn-proteins"

--- a/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py
+++ b/examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py
 import os
 import time

+import dgl.function as fn
+
 import numpy as np
 import torch
 import torch.nn as nn
@@ -12,8 +14,6 @@ from torch.optim.lr_scheduler import ReduceLROnPlateau
 from torch.utils.tensorboard import SummaryWriter
 from utils import load_model, set_random_seed

-import dgl.function as fn
-

 def normalize_edge_weights(graph, device, num_ew_channels):
    degs = graph.in_degrees().float()