Unverified Commit 704bcaf6 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files
parent 6bc82161
......@@ -3,6 +3,8 @@ import os
import random
import time
import dgl
import numpy as np
import torch
import torch.multiprocessing as mp
......@@ -11,8 +13,6 @@ from reading_data import DeepwalkDataset
from torch.utils.data import DataLoader
from utils import shuffle_walks, sum_up_params
import dgl
class DeepwalkTrainer:
def __init__(self, args):
......
......@@ -3,19 +3,19 @@ import pickle
import random
import time
import dgl
import numpy as np
import scipy.sparse as sp
import torch
from torch.utils.data import DataLoader
from utils import shuffle_walks
import dgl
from dgl.data.utils import (
_get_dgl_url,
download,
extract_archive,
get_download_dir,
)
from torch.utils.data import DataLoader
from utils import shuffle_walks
def ReadTxtNet(file_path="", undirected=True):
......
import argparse
import random
import dgl
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from dgl.dataloading import GraphDataLoader
from ogb.graphproppred import Evaluator
from ogb.graphproppred.mol_encoder import AtomEncoder
from preprocessing import prepare_dataset
from torch.utils.data import Dataset
from tqdm import tqdm
import dgl
from dgl.dataloading import GraphDataLoader
def aggregate_mean(h, vector_field, h_in):
return torch.mean(h, dim=1)
......@@ -116,7 +116,6 @@ class DGNLayer(nn.Module):
return {"h": h}
def forward(self, g, h, snorm_n):
g.ndata["h"] = h
# pretransformation
......@@ -284,7 +283,6 @@ def evaluate_network(model, device, data_loader):
def train(dataset, params):
trainset, valset, testset = dataset.train, dataset.val, dataset.test
device = params.device
......
......@@ -5,10 +5,10 @@ import graph_tool.topology as gt_topology
import networkx as nx
import numpy as np
import torch
from ogb.graphproppred import DglGraphPropPredDataset
from tqdm import tqdm
from dgl.data.utils import load_graphs, save_graphs
from ogb.graphproppred import DglGraphPropPredDataset
from tqdm import tqdm
def to_undirected(edge_index):
......@@ -20,7 +20,6 @@ def to_undirected(edge_index):
def induced_edge_automorphism_orbits(edge_list):
##### node automorphism orbits #####
graph = gt.Graph(directed=False)
graph.add_edge_list(edge_list)
......@@ -98,7 +97,6 @@ def induced_edge_automorphism_orbits(edge_list):
def subgraph_isomorphism_edge_counts(edge_index, subgraph_dict):
##### edge structural identifiers #####
edge_index = edge_index.transpose(1, 0).cpu().numpy()
......@@ -131,7 +129,6 @@ def subgraph_isomorphism_edge_counts(edge_index, subgraph_dict):
for sub_iso_curr in sub_iso:
mapping = sub_iso_curr.get_array()
for i, edge in enumerate(subgraph_edges):
# for every edge in the graph H, find the edge in the subgraph G_S to which it is mapped
# (by finding where its endpoints are matched).
# Then, increase the count of the matched edge w.r.t. the corresponding orbit
......@@ -149,7 +146,6 @@ def subgraph_isomorphism_edge_counts(edge_index, subgraph_dict):
def prepare_dataset(name):
# maximum size of cycle graph
k = 8
......@@ -174,7 +170,6 @@ def prepare_dataset(name):
def generate_dataset(path, name):
### compute the orbits of each substructure in the list, as well as the node automorphism count
subgraph_dicts = []
......@@ -218,7 +213,6 @@ def generate_dataset(path, name):
def _prepare(g, subgraph_dicts):
edge_index = torch.stack(g.edges())
identifiers = None
......
......@@ -3,6 +3,8 @@ import os
import random
import time
import dgl
import numpy as np
import torch
import torch.multiprocessing as mp
......@@ -11,8 +13,6 @@ from reading_data import LineDataset
from torch.utils.data import DataLoader
from utils import check_args, sum_up_params
import dgl
class LineTrainer:
def __init__(self, args):
......
......@@ -2,11 +2,11 @@
import argparse
import dgl
from ogb.linkproppred import DglLinkPropPredDataset
from ogb.nodeproppred import DglNodePropPredDataset
import dgl
def load_from_ogbl_with_name(name):
choices = ["ogbl-collab", "ogbl-ddi", "ogbl-ppa", "ogbl-citation"]
......
......@@ -3,18 +3,18 @@ import pickle
import random
import time
import dgl
import numpy as np
import scipy.sparse as sp
import torch
from torch.utils.data import DataLoader
import dgl
from dgl.data.utils import (
_get_dgl_url,
download,
extract_archive,
get_download_dir,
)
from torch.utils.data import DataLoader
def ReadTxtNet(file_path="", undirected=True):
......
import argparse
import math
import dgl
import torch
import torch.nn.functional as F
from dgl.dataloading.negative_sampler import GlobalUniform
from dgl.nn.pytorch import GraphConv, SAGEConv
from ogb.linkproppred import DglLinkPropPredDataset, Evaluator
from torch.nn import Linear
from torch.utils.data import DataLoader
import dgl
from dgl.dataloading.negative_sampler import GlobalUniform
from dgl.nn.pytorch import GraphConv, SAGEConv
class Logger(object):
def __init__(self, runs, info=None):
......
......@@ -275,8 +275,8 @@ if __name__ == "__main__":
type=str,
default="none",
choices=["none", "input", "hidden", "output", "all"],
help="You can set this value from 'none', 'input', 'hidden' or 'all' " \
"to apply NGNN to different GNN layers.",
help="You can set this value from 'none', 'input', 'hidden' or 'all' "
"to apply NGNN to different GNN layers.",
)
parser.add_argument(
"--num_ngnn_layers", type=int, default=1, choices=[1, 2]
......@@ -320,8 +320,8 @@ if __name__ == "__main__":
"--num_workers",
type=int,
default=24,
help="number of workers for dynamic dataloaders; " \
"using a larger value for dynamic dataloading is recommended",
help="number of workers for dynamic dataloaders; "
"using a larger value for dynamic dataloading is recommended",
)
# Testing settings
parser.add_argument(
......@@ -335,8 +335,8 @@ if __name__ == "__main__":
type=int,
nargs="*",
default=[10],
help="hits@K for each eval step; " \
"only available for datasets with hits@xx as the eval metric",
help="hits@K for each eval step; "
"only available for datasets with hits@xx as the eval metric",
)
parser.add_argument(
"--test_topk",
......@@ -352,7 +352,7 @@ if __name__ == "__main__":
args.res_dir = os.path.join(
f'results{"_NoTest" if args.no_test else ""}',
f'{args.dataset.split("-")[1]}-{args.ngnn_type}+{time.strftime("%m%d%H%M%S")}'
f'{args.dataset.split("-")[1]}-{args.ngnn_type}+{time.strftime("%m%d%H%M%S")}',
)
print(f"Results will be saved in {args.res_dir}")
if not os.path.exists(args.res_dir):
......@@ -381,7 +381,7 @@ if __name__ == "__main__":
[src[:, None].repeat(1, tgt_neg.size(1)), tgt_neg], dim=-1
) # [Ns, Nt, 2]
# Reconstruct the graph for ogbl-collab data
# Reconstruct the graph for ogbl-collab data
# for validation edge augmentation and coalesce.
if args.dataset == "ogbl-collab":
# Float edata for to_simple transformation.
......@@ -503,7 +503,7 @@ if __name__ == "__main__":
if args.dataset.startswith("ogbl-citation"):
# For this dataset, subgraphs extracted around positive edges are
# rather larger than negative edges. Thus we sample from 1000
# positive and 1000 negative edges to estimate the k (number of
# positive and 1000 negative edges to estimate the k (number of
# nodes to hold for each graph) used in SortPooling.
# You can certainly set k manually, instead of estimating from
# a percentage of sampled subgraphs.
......@@ -554,25 +554,25 @@ if __name__ == "__main__":
epo_train_etime = datetime.datetime.now()
print_log(
f"[epoch: {epoch}]",
f" <Train> starts: {epo_stime}, " \
f"ends: {epo_train_etime}, " \
f"spent time:{epo_train_etime - epo_stime}"
f" <Train> starts: {epo_stime}, "
f"ends: {epo_train_etime}, "
f"spent time:{epo_train_etime - epo_stime}",
)
if epoch % args.eval_steps == 0:
epo_eval_stime = datetime.datetime.now()
results = test(val_loader, loggers.keys())
epo_eval_etime = datetime.datetime.now()
print_log(
f" <Validation> starts: {epo_eval_stime}, " \
f"ends: {epo_eval_etime}, " \
f" <Validation> starts: {epo_eval_stime}, "
f"ends: {epo_eval_etime}, "
f"spent time:{epo_eval_etime - epo_eval_stime}"
)
for key, valid_res in results.items():
loggers[key].add_result(run, valid_res)
to_print = (
f"Run: {run + 1:02d}, " \
f"Epoch: {epoch:02d}, " \
f"Loss: {loss:.4f}, " \
f"Run: {run + 1:02d}, "
f"Epoch: {epoch:02d}, "
f"Loss: {loss:.4f}, "
f"Valid ({args.val_percent}%) [{key}]: {valid_res:.4f}"
)
print_log(key, to_print)
......@@ -590,17 +590,15 @@ if __name__ == "__main__":
tested = dict()
for eval_metric in loggers.keys():
# Select models according to the eval_metric of the dataset.
res = torch.tensor(
loggers[eval_metric].results["valid"][run]
)
res = torch.tensor(loggers[eval_metric].results["valid"][run])
if args.no_test:
epoch = torch.argmax(res).item() + 1
val_res = loggers[eval_metric].results["valid"][run][epoch - 1]
loggers[eval_metric].add_result(run, (epoch, val_res), "test")
print_log(
f"No Test; Best Valid:",
f" Run: {run + 1:02d}, " \
f"Epoch: {epoch:02d}, " \
f" Run: {run + 1:02d}, "
f"Epoch: {epoch:02d}, "
f"Valid ({args.val_percent}%) [{eval_metric}]: {val_res:.4f}",
)
continue
......@@ -610,13 +608,13 @@ if __name__ == "__main__":
).tolist() # indices of top k valid results
print_log(
f"Eval Metric: {eval_metric}",
f"Run: {run + 1:02d}, " \
f"Run: {run + 1:02d}, "
f"Top {args.test_topk} Eval Points: {idx_to_test}",
)
for _idx, epoch in enumerate(idx_to_test):
print_log(
f"Test Point[{_idx+1}]: " \
f"Epoch {epoch:02d}, " \
f"Test Point[{_idx+1}]: "
f"Epoch {epoch:02d}, "
f"Test Metric: {dataset.eval_metric}"
)
if epoch not in tested:
......@@ -643,11 +641,11 @@ if __name__ == "__main__":
run, (epoch, val_res, test_res), "test"
)
print_log(
f" Run: {run + 1:02d}, " \
f"Epoch: {epoch:02d}, " \
f"Valid ({args.val_percent}%) [{eval_metric}]: " \
f"{loggers[eval_metric].results['valid'][run][epoch-1]:.4f}, " \
f"Valid (final) [{dataset.eval_metric}]: {val_res:.4f}, " \
f" Run: {run + 1:02d}, "
f"Epoch: {epoch:02d}, "
f"Valid ({args.val_percent}%) [{eval_metric}]: "
f"{loggers[eval_metric].results['valid'][run][epoch-1]:.4f}, "
f"Valid (final) [{dataset.eval_metric}]: {val_res:.4f}, "
f"Test [{dataset.eval_metric}]: {test_res:.4f}"
)
......
......@@ -129,7 +129,7 @@ class Logger(object):
f' Final Test Point[1]: {self.results["test"][run][0][0]}',
f' Final Valid: {self.results["test"][run][0][1]}',
f' Final Test: {self.results["test"][run][0][2]}',
sep='\n',
sep="\n",
file=f,
)
else:
......
......@@ -4,9 +4,9 @@ import glob
import numpy as np
import torch
import torch.nn.functional as F
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
from dgl import function as fn
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
device = None
......
......@@ -7,6 +7,8 @@ import os
import random
import time
import dgl
import numpy as np
import torch
import torch.nn.functional as F
......@@ -16,8 +18,6 @@ from matplotlib.ticker import AutoMinorLocator, MultipleLocator
from models import GAT
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
import dgl
epsilon = 1 - math.log(2)
device = None
......
import dgl.nn.pytorch as dglnn
import torch
import torch.nn as nn
import dgl.nn.pytorch as dglnn
from dgl import function as fn
from dgl.ops import edge_softmax
from dgl.utils import expand_as_pair
......
import argparse
import itertools
import sys
import dgl
import dgl.nn as dglnn
import psutil
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
from tqdm import tqdm
import dgl
import dgl.nn as dglnn
from dgl import AddReverse, Compose, ToSimple
from dgl.nn import HeteroEmbedding
import psutil
import sys
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
from tqdm import tqdm
v_t = dgl.__version__
def prepare_data(args, device):
dataset = DglNodePropPredDataset(name="ogbn-mag")
split_idx = dataset.get_idx_split()
......@@ -41,7 +42,7 @@ def prepare_data(args, device):
batch_size=1024,
shuffle=True,
num_workers=num_workers,
device=device
device=device,
)
return g, labels, dataset.num_classes, split_idx, logger, train_loader
......@@ -271,9 +272,7 @@ def train(
emb = extract_embed(node_embed, input_nodes)
# Add the batch's raw "paper" features
emb.update(
{"paper": g.ndata["feat"]["paper"][input_nodes_indexes]}
)
emb.update({"paper": g.ndata["feat"]["paper"][input_nodes_indexes]})
emb = {k: e.to(device) for k, e in emb.items()}
lbl = labels[seeds].to(device)
......@@ -322,7 +321,7 @@ def test(g, model, node_embed, y_true, device, split_idx):
batch_size=16384,
shuffle=False,
num_workers=0,
device=device
device=device,
)
pbar = tqdm(total=y_true.size(0))
......@@ -375,14 +374,18 @@ def test(g, model, node_embed, y_true, device, split_idx):
return train_acc, valid_acc, test_acc
def is_support_affinity(v_t):
# dgl supports enable_cpu_affinity since 0.9.1
return v_t >= "0.9.1"
def main(args):
device = f"cuda:0" if th.cuda.is_available() else "cpu"
g, labels, num_classes, split_idx, logger, train_loader = prepare_data(args, device)
g, labels, num_classes, split_idx, logger, train_loader = prepare_data(
args, device
)
embed_layer = rel_graph_embed(g, 128).to(device)
model = EntityClassify(g, 128, num_classes).to(device)
......@@ -395,7 +398,6 @@ def main(args):
)
for run in range(args.runs):
try:
embed_layer.reset_parameters()
model.reset_parameters()
......@@ -409,10 +411,17 @@ def main(args):
)
optimizer = th.optim.Adam(all_params, lr=0.01)
if args.num_workers != 0 and device == "cpu" and is_support_affinity(v_t):
if (
args.num_workers != 0
and device == "cpu"
and is_support_affinity(v_t)
):
expected_max = int(psutil.cpu_count(logical=False))
if args.num_workers >= expected_max:
print(f"[ERROR] You specified num_workers are larger than physical cores, please set any number less than {expected_max}", file=sys.stderr)
print(
f"[ERROR] You specified num_workers are larger than physical cores, please set any number less than {expected_max}",
file=sys.stderr,
)
with train_loader.enable_cpu_affinity():
logger = train(
g,
......
......@@ -7,24 +7,24 @@ import random
import time
from collections import OrderedDict
import dgl
import dgl.function as fn
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from matplotlib.ticker import AutoMinorLocator, MultipleLocator
from models import GAT
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
from torch import nn
from tqdm import tqdm
import dgl
import dgl.function as fn
from dgl.dataloading import (
DataLoader,
MultiLayerFullNeighborSampler,
MultiLayerNeighborSampler,
)
from matplotlib.ticker import AutoMinorLocator, MultipleLocator
from models import GAT
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
from torch import nn
from tqdm import tqdm
epsilon = 1 - math.log(2)
......
import argparse
import time
import dgl
import dgl.nn.pytorch as dglnn
import numpy as np
import torch as th
import torch.nn as nn
......@@ -9,9 +12,6 @@ import torch.optim as optim
import tqdm
from ogb.nodeproppred import DglNodePropPredDataset
import dgl
import dgl.nn.pytorch as dglnn
class GAT(nn.Module):
def __init__(
......
import argparse
import time
import dgl
import dgl.nn.pytorch as dglnn
import numpy as np
import torch as th
import torch.nn as nn
......@@ -9,9 +12,6 @@ import torch.optim as optim
import tqdm
from ogb.nodeproppred import DglNodePropPredDataset
import dgl
import dgl.nn.pytorch as dglnn
class SAGE(nn.Module):
def __init__(
......
......@@ -7,23 +7,23 @@ import random
import time
from collections import OrderedDict
import dgl.function as fn
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from matplotlib.ticker import AutoMinorLocator, MultipleLocator
from models import MLP
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
from torch import nn
from tqdm import tqdm
import dgl.function as fn
from dgl.dataloading import (
DataLoader,
MultiLayerFullNeighborSampler,
MultiLayerNeighborSampler,
)
from matplotlib.ticker import AutoMinorLocator, MultipleLocator
from models import MLP
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
from torch import nn
from tqdm import tqdm
epsilon = 1 - math.log(2)
......
......@@ -7,23 +7,23 @@ import random
import sys
import time
import dgl
import dgl.function as fn
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from matplotlib.ticker import AutoMinorLocator, MultipleLocator
from models import GAT
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
from torch import nn
import dgl
import dgl.function as fn
from dgl.dataloading import (
DataLoader,
MultiLayerFullNeighborSampler,
MultiLayerNeighborSampler,
)
from matplotlib.ticker import AutoMinorLocator, MultipleLocator
from models import GAT
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
from torch import nn
device = None
dataset = "ogbn-proteins"
......
import os
import time
import dgl.function as fn
import numpy as np
import torch
import torch.nn as nn
......@@ -12,8 +14,6 @@ from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.tensorboard import SummaryWriter
from utils import load_model, set_random_seed
import dgl.function as fn
def normalize_edge_weights(graph, device, num_ew_channels):
degs = graph.in_degrees().float()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment