Unverified Commit 704bcaf6 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files
parent 6bc82161
...@@ -3,7 +3,7 @@ from sklearn.linear_model import LogisticRegression ...@@ -3,7 +3,7 @@ from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score from sklearn.metrics import f1_score
if __name__ == "__main__": if __name__ == "__main__":
venue_count = 133 venue_count = 133
author_count = 246678 author_count = 246678
experiment_times = 1 experiment_times = 1
...@@ -25,7 +25,7 @@ if __name__ == "__main__": ...@@ -25,7 +25,7 @@ if __name__ == "__main__":
file.readline() file.readline()
print("read line by line") print("read line by line")
for line in file: for line in file:
embed = line.strip().split(' ') embed = line.strip().split(" ")
if embed[0] in check_venue: if embed[0] in check_venue:
venue_embed_dict[embed[0]] = [] venue_embed_dict[embed[0]] = []
for i in range(1, len(embed), 1): for i in range(1, len(embed), 1):
...@@ -34,7 +34,7 @@ if __name__ == "__main__": ...@@ -34,7 +34,7 @@ if __name__ == "__main__":
author_embed_dict[embed[0]] = [] author_embed_dict[embed[0]] = []
for j in range(1, len(embed), 1): for j in range(1, len(embed), 1):
author_embed_dict[embed[0]].append(float(embed[j])) author_embed_dict[embed[0]].append(float(embed[j]))
#get venue embeddings # get venue embeddings
print("reading finished") print("reading finished")
venues = list(venue_embed_dict.keys()) venues = list(venue_embed_dict.keys())
authors = list(author_embed_dict.keys()) authors = list(author_embed_dict.keys())
...@@ -68,11 +68,11 @@ if __name__ == "__main__": ...@@ -68,11 +68,11 @@ if __name__ == "__main__":
# split data into training and testing # split data into training and testing
print("splitting") print("splitting")
venue_split = int(venue_count * percent) venue_split = int(venue_count * percent)
venue_training = venue_embedding[:venue_split,:] venue_training = venue_embedding[:venue_split, :]
venue_testing = venue_embedding[venue_split:,:] venue_testing = venue_embedding[venue_split:, :]
author_split = int(author_count * percent) author_split = int(author_count * percent)
author_training = author_embedding[:author_split,:] author_training = author_embedding[:author_split, :]
author_testing = author_embedding[author_split:,:] author_testing = author_embedding[author_split:, :]
# split label into training and testing # split label into training and testing
venue_label = [] venue_label = []
venue_true = [] venue_true = []
...@@ -94,15 +94,27 @@ if __name__ == "__main__": ...@@ -94,15 +94,27 @@ if __name__ == "__main__":
author_true = np.array(author_true) author_true = np.array(author_true)
file.close() file.close()
print("beging predicting") print("beging predicting")
clf_venue = LogisticRegression(random_state=0, solver="lbfgs", multi_class="multinomial").fit(venue_training,venue_label) clf_venue = LogisticRegression(
random_state=0, solver="lbfgs", multi_class="multinomial"
).fit(venue_training, venue_label)
y_pred_venue = clf_venue.predict(venue_testing) y_pred_venue = clf_venue.predict(venue_testing)
clf_author = LogisticRegression(random_state=0, solver="lbfgs", multi_class="multinomial").fit(author_training,author_label) clf_author = LogisticRegression(
random_state=0, solver="lbfgs", multi_class="multinomial"
).fit(author_training, author_label)
y_pred_author = clf_author.predict(author_testing) y_pred_author = clf_author.predict(author_testing)
macro_average_venue += f1_score(venue_true, y_pred_venue, average="macro") macro_average_venue += f1_score(
micro_average_venue += f1_score(venue_true, y_pred_venue, average="micro") venue_true, y_pred_venue, average="macro"
macro_average_author += f1_score(author_true, y_pred_author, average="macro") )
micro_average_author += f1_score(author_true, y_pred_author, average="micro") micro_average_venue += f1_score(
print(macro_average_venue/float(experiment_times)) venue_true, y_pred_venue, average="micro"
print(micro_average_venue/float(experiment_times)) )
macro_average_author += f1_score(
author_true, y_pred_author, average="macro"
)
micro_average_author += f1_score(
author_true, y_pred_author, average="micro"
)
print(macro_average_venue / float(experiment_times))
print(micro_average_venue / float(experiment_times))
print(macro_average_author / float(experiment_times)) print(macro_average_author / float(experiment_times))
print(micro_average_author / float(experiment_times)) print(micro_average_author / float(experiment_times))
...@@ -4,15 +4,15 @@ import argparse ...@@ -4,15 +4,15 @@ import argparse
import copy import copy
import random import random
import dgl
import dgl.function as fn
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
from tqdm import trange
import dgl
import dgl.function as fn
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
from tqdm import trange
class MixHopConv(nn.Module): class MixHopConv(nn.Module):
...@@ -83,7 +83,6 @@ class MixHopConv(nn.Module): ...@@ -83,7 +83,6 @@ class MixHopConv(nn.Module):
max_j = max(self.p) + 1 max_j = max(self.p) + 1
outputs = [] outputs = []
for j in range(max_j): for j in range(max_j):
if j in self.p: if j in self.p:
output = self.weights[str(j)](feats) output = self.weights[str(j)](feats)
outputs.append(output) outputs.append(output)
......
...@@ -104,7 +104,6 @@ def HEM(W, levels, rid=None): ...@@ -104,7 +104,6 @@ def HEM(W, levels, rid=None):
print("Heavy Edge Matching coarsening with Xavier version") print("Heavy Edge Matching coarsening with Xavier version")
for _ in range(levels): for _ in range(levels):
# CHOOSE THE WEIGHTS FOR THE PAIRING # CHOOSE THE WEIGHTS FOR THE PAIRING
# weights = ones(N,1) # metis weights # weights = ones(N,1) # metis weights
weights = degree # graclus weights weights = degree # graclus weights
...@@ -186,7 +185,6 @@ def HEM_one_level(rr, cc, vv, rid, weights): ...@@ -186,7 +185,6 @@ def HEM_one_level(rr, cc, vv, rid, weights):
if marked[nid]: if marked[nid]:
tval = 0.0 tval = 0.0
else: else:
# First approach # First approach
if 2 == 1: if 2 == 1:
tval = vv[rs + jj] * ( tval = vv[rs + jj] * (
...@@ -230,7 +228,6 @@ def compute_perm(parents): ...@@ -230,7 +228,6 @@ def compute_perm(parents):
indices.append(list(range(M_last))) indices.append(list(range(M_last)))
for parent in parents[::-1]: for parent in parents[::-1]:
# Fake nodes go after real ones. # Fake nodes go after real ones.
pool_singeltons = len(parent) pool_singeltons = len(parent)
......
import argparse import argparse
import time import time
import dgl
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import torch import torch
...@@ -8,14 +10,12 @@ import torch.nn as nn ...@@ -8,14 +10,12 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from coarsening import coarsen from coarsening import coarsen
from coordinate import get_coordinates, z2polar from coordinate import get_coordinates, z2polar
from grid_graph import grid_graph
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import dgl
from dgl.data import load_data, register_data_args from dgl.data import load_data, register_data_args
from dgl.nn.pytorch.conv import ChebConv, GMMConv from dgl.nn.pytorch.conv import ChebConv, GMMConv
from dgl.nn.pytorch.glob import MaxPooling from dgl.nn.pytorch.glob import MaxPooling
from grid_graph import grid_graph
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
argparser = argparse.ArgumentParser("MNIST") argparser = argparse.ArgumentParser("MNIST")
argparser.add_argument( argparser.add_argument(
......
import argparse import argparse
import dgl
import dgl.nn as dglnn
import torch import torch
import torch.distributed as dist import torch.distributed as dist
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
from dgl.data import AsGraphPredDataset
from dgl.dataloading import GraphDataLoader
from ogb.graphproppred import DglGraphPropPredDataset, Evaluator from ogb.graphproppred import DglGraphPropPredDataset, Evaluator
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
from tqdm import tqdm from tqdm import tqdm
import dgl
import dgl.nn as dglnn
from dgl.data import AsGraphPredDataset
from dgl.dataloading import GraphDataLoader
class MLP(nn.Module): class MLP(nn.Module):
def __init__(self, in_feats): def __init__(self, in_feats):
......
import argparse import argparse
import os import os
import dgl.nn as dglnn
import torch import torch
import torch.distributed as dist import torch.distributed as dist
import torch.multiprocessing as mp import torch.multiprocessing as mp
...@@ -8,10 +10,6 @@ import torch.nn as nn ...@@ -8,10 +10,6 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torchmetrics.functional as MF import torchmetrics.functional as MF
import tqdm import tqdm
from ogb.nodeproppred import DglNodePropPredDataset
from torch.nn.parallel import DistributedDataParallel
import dgl.nn as dglnn
from dgl.data import AsNodePredDataset from dgl.data import AsNodePredDataset
from dgl.dataloading import ( from dgl.dataloading import (
DataLoader, DataLoader,
...@@ -19,6 +17,8 @@ from dgl.dataloading import ( ...@@ -19,6 +17,8 @@ from dgl.dataloading import (
NeighborSampler, NeighborSampler,
) )
from dgl.multiprocessing import shared_tensor from dgl.multiprocessing import shared_tensor
from ogb.nodeproppred import DglNodePropPredDataset
from torch.nn.parallel import DistributedDataParallel
class SAGE(nn.Module): class SAGE(nn.Module):
......
...@@ -3,13 +3,13 @@ import os ...@@ -3,13 +3,13 @@ import os
import re import re
from collections import Counter from collections import Counter
import dgl
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import torch as th import torch as th
from scipy.linalg import fractional_matrix_power, inv
import dgl
from dgl.data import DGLDataset from dgl.data import DGLDataset
from scipy.linalg import fractional_matrix_power, inv
""" Compute Personalized Page Ranking""" """ Compute Personalized Page Ranking"""
...@@ -137,7 +137,6 @@ def process(dataset): ...@@ -137,7 +137,6 @@ def process(dataset):
def load(dataset): def load(dataset):
basedir = os.path.dirname(os.path.abspath(__file__)) basedir = os.path.dirname(os.path.abspath(__file__))
datadir = os.path.join(basedir, "data", dataset) datadir = os.path.join(basedir, "data", dataset)
......
import argparse import argparse
import warnings import warnings
import dgl
import torch as th import torch as th
from dataset import load from dataset import load
import dgl
from dgl.dataloading import GraphDataLoader from dgl.dataloading import GraphDataLoader
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
...@@ -66,7 +66,6 @@ def collate(samples): ...@@ -66,7 +66,6 @@ def collate(samples):
if __name__ == "__main__": if __name__ == "__main__":
# Step 1: Prepare data =================================================================== # # Step 1: Prepare data =================================================================== #
dataset = load(args.dataname) dataset = load(args.dataname)
......
import torch as th import torch as th
import torch.nn as nn import torch.nn as nn
from utils import local_global_loss_
from dgl.nn.pytorch import GraphConv from dgl.nn.pytorch import GraphConv
from dgl.nn.pytorch.glob import SumPooling from dgl.nn.pytorch.glob import SumPooling
from utils import local_global_loss_
class MLP(nn.Module): class MLP(nn.Module):
......
...@@ -61,7 +61,6 @@ def get_negative_expectation(q_samples, average=True): ...@@ -61,7 +61,6 @@ def get_negative_expectation(q_samples, average=True):
def local_global_loss_(l_enc, g_enc, graph_id): def local_global_loss_(l_enc, g_enc, graph_id):
num_graphs = g_enc.shape[0] num_graphs = g_enc.shape[0]
num_nodes = l_enc.shape[0] num_nodes = l_enc.shape[0]
...@@ -71,7 +70,6 @@ def local_global_loss_(l_enc, g_enc, graph_id): ...@@ -71,7 +70,6 @@ def local_global_loss_(l_enc, g_enc, graph_id):
neg_mask = th.ones((num_nodes, num_graphs)).to(device) neg_mask = th.ones((num_nodes, num_graphs)).to(device)
for nodeidx, graphidx in enumerate(graph_id): for nodeidx, graphidx in enumerate(graph_id):
pos_mask[nodeidx][graphidx] = 1.0 pos_mask[nodeidx][graphidx] = 1.0
neg_mask[nodeidx][graphidx] = 0.0 neg_mask[nodeidx][graphidx] = 0.0
......
""" Code adapted from https://github.com/kavehhassani/mvgrl """ """ Code adapted from https://github.com/kavehhassani/mvgrl """
import dgl
import networkx as nx import networkx as nx
import numpy as np import numpy as np
import scipy.sparse as sp import scipy.sparse as sp
import torch as th import torch as th
from scipy.linalg import fractional_matrix_power, inv
from sklearn.preprocessing import MinMaxScaler
import dgl
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
from dgl.nn import APPNPConv from dgl.nn import APPNPConv
from scipy.linalg import fractional_matrix_power, inv
from sklearn.preprocessing import MinMaxScaler
def preprocess_features(features): def preprocess_features(features):
......
...@@ -8,7 +8,7 @@ import torch.nn as nn ...@@ -8,7 +8,7 @@ import torch.nn as nn
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
from dataset import process_dataset from dataset import process_dataset
from model import MVGRL, LogReg from model import LogReg, MVGRL
parser = argparse.ArgumentParser(description="mvgrl") parser = argparse.ArgumentParser(description="mvgrl")
......
...@@ -2,16 +2,16 @@ import argparse ...@@ -2,16 +2,16 @@ import argparse
import random import random
import warnings import warnings
import dgl
import numpy as np import numpy as np
import torch as th import torch as th
import torch.nn as nn import torch.nn as nn
import dgl
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
from dataset import process_dataset, process_dataset_appnp from dataset import process_dataset, process_dataset_appnp
from model import MVGRL, LogReg from model import LogReg, MVGRL
parser = argparse.ArgumentParser(description="mvgrl") parser = argparse.ArgumentParser(description="mvgrl")
......
import time import time
from dgl.sampling import node2vec_random_walk
from model import Node2vecModel from model import Node2vecModel
from utils import load_graph, parse_arguments from utils import load_graph, parse_arguments
from dgl.sampling import node2vec_random_walk
def time_randomwalk(graph, args): def time_randomwalk(graph, args):
""" """
...@@ -49,7 +49,6 @@ def train_node2vec(graph, eval_set, args): ...@@ -49,7 +49,6 @@ def train_node2vec(graph, eval_set, args):
if __name__ == "__main__": if __name__ == "__main__":
args = parse_arguments() args = parse_arguments()
graph, eval_set = load_graph(args.dataset) graph, eval_set = load_graph(args.dataset)
......
import torch import torch
import torch.nn as nn import torch.nn as nn
from sklearn.linear_model import LogisticRegression
from torch.utils.data import DataLoader
from dgl.sampling import node2vec_random_walk from dgl.sampling import node2vec_random_walk
from sklearn.linear_model import LogisticRegression
from torch.utils.data import DataLoader
class Node2vec(nn.Module): class Node2vec(nn.Module):
...@@ -255,7 +255,6 @@ class Node2vecModel(object): ...@@ -255,7 +255,6 @@ class Node2vecModel(object):
eval_steps=-1, eval_steps=-1,
device="cpu", device="cpu",
): ):
self.model = Node2vec( self.model = Node2vec(
g, g,
embedding_dim, embedding_dim,
......
...@@ -23,7 +23,6 @@ def load_graph(name): ...@@ -23,7 +23,6 @@ def load_graph(name):
eval_set = [(nodes_train, y_train), (nodes_val, y_val)] eval_set = [(nodes_train, y_train), (nodes_val, y_val)]
elif name.startswith("ogbn"): elif name.startswith("ogbn"):
dataset = DglNodePropPredDataset(name) dataset = DglNodePropPredDataset(name)
graph, y = dataset[0] graph, y = dataset[0]
split_nodes = dataset.get_idx_split() split_nodes = dataset.get_idx_split()
......
...@@ -2,6 +2,9 @@ import argparse ...@@ -2,6 +2,9 @@ import argparse
import time import time
from functools import partial from functools import partial
import dgl
import dgl.nn.pytorch as dglnn
import numpy as np import numpy as np
import torch as th import torch as th
import torch.nn as nn import torch.nn as nn
...@@ -12,9 +15,6 @@ from ogb.nodeproppred import DglNodePropPredDataset ...@@ -12,9 +15,6 @@ from ogb.nodeproppred import DglNodePropPredDataset
from sampler import ClusterIter, subgraph_collate_fn from sampler import ClusterIter, subgraph_collate_fn
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
import dgl
import dgl.nn.pytorch as dglnn
class GAT(nn.Module): class GAT(nn.Module):
def __init__( def __init__(
......
from time import time from time import time
import numpy as np
import dgl import dgl
import numpy as np
from dgl import backend as F from dgl import backend as F
from dgl.transforms import metis_partition from dgl.transforms import metis_partition
......
...@@ -3,6 +3,10 @@ import time ...@@ -3,6 +3,10 @@ import time
import traceback import traceback
from functools import partial from functools import partial
import dgl
import dgl.function as fn
import dgl.nn.pytorch as dglnn
import numpy as np import numpy as np
import torch as th import torch as th
import torch.multiprocessing as mp import torch.multiprocessing as mp
...@@ -10,15 +14,11 @@ import torch.nn as nn ...@@ -10,15 +14,11 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import tqdm import tqdm
from dgl.data import RedditDataset
from ogb.nodeproppred import DglNodePropPredDataset from ogb.nodeproppred import DglNodePropPredDataset
from sampler import ClusterIter, subgraph_collate_fn from sampler import ClusterIter, subgraph_collate_fn
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
import dgl
import dgl.function as fn
import dgl.nn.pytorch as dglnn
from dgl.data import RedditDataset
#### Neighbor sampler #### Neighbor sampler
......
from time import time from time import time
import numpy as np
import dgl import dgl
import numpy as np
from dgl import backend as F from dgl import backend as F
from dgl.transforms import metis_partition from dgl.transforms import metis_partition
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment