Unverified Commit 23d09057 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4642)



* [Misc] Black auto fix.

* sort
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent a9f2acf3
from collections import defaultdict
import math
import os
import sys
import time
from collections import defaultdict
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm.auto import tqdm
from numpy import random
from torch.nn.parameter import Parameter
from tqdm.auto import tqdm
from utils import *
import dgl
import dgl.function as fn
from utils import *
def get_graph(network_data, vocab):
""" Build graph, treat all nodes as the same type
"""Build graph, treat all nodes as the same type
Parameters
----------
......@@ -57,7 +57,9 @@ class NeighborSampler(object):
def sample(self, pairs):
heads, tails, types = zip(*pairs)
seeds, head_invmap = torch.unique(torch.LongTensor(heads), return_inverse=True)
seeds, head_invmap = torch.unique(
torch.LongTensor(heads), return_inverse=True
)
blocks = []
for fanout in reversed(self.num_fanouts):
sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout)
......@@ -90,7 +92,9 @@ class DGLGATNE(nn.Module):
self.edge_type_count = edge_type_count
self.dim_a = dim_a
self.node_embeddings = Parameter(torch.FloatTensor(num_nodes, embedding_size))
self.node_embeddings = Parameter(
torch.FloatTensor(num_nodes, embedding_size)
)
self.node_type_embeddings = Parameter(
torch.FloatTensor(num_nodes, edge_type_count, embedding_u_size)
)
......@@ -100,16 +104,24 @@ class DGLGATNE(nn.Module):
self.trans_weights_s1 = Parameter(
torch.FloatTensor(edge_type_count, embedding_u_size, dim_a)
)
self.trans_weights_s2 = Parameter(torch.FloatTensor(edge_type_count, dim_a, 1))
self.trans_weights_s2 = Parameter(
torch.FloatTensor(edge_type_count, dim_a, 1)
)
self.reset_parameters()
def reset_parameters(self):
self.node_embeddings.data.uniform_(-1.0, 1.0)
self.node_type_embeddings.data.uniform_(-1.0, 1.0)
self.trans_weights.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.trans_weights_s1.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.trans_weights_s2.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.trans_weights.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
self.trans_weights_s1.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
self.trans_weights_s2.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
# embs: [batch_size, embedding_size]
def forward(self, block):
......@@ -122,10 +134,16 @@ class DGLGATNE(nn.Module):
with block.local_scope():
for i in range(self.edge_type_count):
edge_type = self.edge_types[i]
block.srcdata[edge_type] = self.node_type_embeddings[input_nodes, i]
block.dstdata[edge_type] = self.node_type_embeddings[output_nodes, i]
block.srcdata[edge_type] = self.node_type_embeddings[
input_nodes, i
]
block.dstdata[edge_type] = self.node_type_embeddings[
output_nodes, i
]
block.update_all(
fn.copy_u(edge_type, "m"), fn.sum("m", edge_type), etype=edge_type
fn.copy_u(edge_type, "m"),
fn.sum("m", edge_type),
etype=edge_type,
)
node_type_embed.append(block.dstdata[edge_type])
......@@ -152,7 +170,9 @@ class DGLGATNE(nn.Module):
attention = (
F.softmax(
torch.matmul(
torch.tanh(torch.matmul(tmp_node_type_embed, trans_w_s1)),
torch.tanh(
torch.matmul(tmp_node_type_embed, trans_w_s1)
),
trans_w_s2,
)
.squeeze(2)
......@@ -173,7 +193,9 @@ class DGLGATNE(nn.Module):
)
last_node_embed = F.normalize(node_embed, dim=2)
return last_node_embed # [batch_size, edge_type_count, embedding_size]
return (
last_node_embed # [batch_size, edge_type_count, embedding_size]
)
class NSLoss(nn.Module):
......@@ -187,7 +209,8 @@ class NSLoss(nn.Module):
self.sample_weights = F.normalize(
torch.Tensor(
[
(math.log(k + 2) - math.log(k + 1)) / math.log(num_nodes + 1)
(math.log(k + 2) - math.log(k + 1))
/ math.log(num_nodes + 1)
for k in range(num_nodes)
]
),
......@@ -257,14 +280,20 @@ def train_model(network_data):
pin_memory=True,
)
model = DGLGATNE(
num_nodes, embedding_size, embedding_u_size, edge_types, edge_type_count, dim_a
num_nodes,
embedding_size,
embedding_u_size,
edge_types,
edge_type_count,
dim_a,
)
nsloss = NSLoss(num_nodes, num_sampled, embedding_size)
model.to(device)
nsloss.to(device)
optimizer = torch.optim.Adam(
[{"params": model.parameters()}, {"params": nsloss.parameters()}], lr=1e-3
[{"params": model.parameters()}, {"params": nsloss.parameters()}],
lr=1e-3,
)
best_score = 0
......@@ -286,7 +315,10 @@ def train_model(network_data):
block_types = block_types.to(device)
embs = model(block[0].to(device))[head_invmap]
embs = embs.gather(
1, block_types.view(-1, 1, 1).expand(embs.shape[0], 1, embs.shape[2])
1,
block_types.view(-1, 1, 1).expand(
embs.shape[0], 1, embs.shape[2]
),
)[:, 0]
loss = nsloss(
block[0].dstdata[dgl.NID][head_invmap].to(device),
......@@ -307,7 +339,9 @@ def train_model(network_data):
model.eval()
# {'1': {}, '2': {}}
final_model = dict(zip(edge_types, [dict() for _ in range(edge_type_count)]))
final_model = dict(
zip(edge_types, [dict() for _ in range(edge_type_count)])
)
for i in range(num_nodes):
train_inputs = (
torch.tensor([i for _ in range(edge_type_count)])
......@@ -315,7 +349,9 @@ def train_model(network_data):
.to(device)
) # [i, i]
train_types = (
torch.tensor(list(range(edge_type_count))).unsqueeze(1).to(device)
torch.tensor(list(range(edge_type_count)))
.unsqueeze(1)
.to(device)
) # [0, 1]
pairs = torch.cat(
(train_inputs, train_inputs, train_types), dim=1
......@@ -343,7 +379,9 @@ def train_model(network_data):
valid_aucs, valid_f1s, valid_prs = [], [], []
test_aucs, test_f1s, test_prs = [], [], []
for i in range(edge_type_count):
if args.eval_type == "all" or edge_types[i] in args.eval_type.split(","):
if args.eval_type == "all" or edge_types[i] in args.eval_type.split(
","
):
tmp_auc, tmp_f1, tmp_pr = evaluate(
final_model[edge_types[i]],
valid_true_data_by_edge[edge_types[i]],
......
from collections import defaultdict
import math
import os
import sys
import time
from collections import defaultdict
import numpy as np
import torch
......@@ -11,14 +11,14 @@ import torch.nn.functional as F
import tqdm
from numpy import random
from torch.nn.parameter import Parameter
from utils import *
import dgl
import dgl.function as fn
from utils import *
def get_graph(network_data, vocab):
""" Build graph, treat all nodes as the same type
"""Build graph, treat all nodes as the same type
Parameters
----------
......@@ -58,7 +58,9 @@ class NeighborSampler(object):
def sample(self, pairs):
pairs = np.stack(pairs)
heads, tails, types = pairs[:, 0], pairs[:, 1], pairs[:, 2]
seeds, head_invmap = torch.unique(torch.LongTensor(heads), return_inverse=True)
seeds, head_invmap = torch.unique(
torch.LongTensor(heads), return_inverse=True
)
blocks = []
for fanout in reversed(self.num_fanouts):
sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout)
......@@ -91,7 +93,9 @@ class DGLGATNE(nn.Module):
self.edge_type_count = edge_type_count
self.dim_a = dim_a
self.node_embeddings = nn.Embedding(num_nodes, embedding_size, sparse=True)
self.node_embeddings = nn.Embedding(
num_nodes, embedding_size, sparse=True
)
self.node_type_embeddings = nn.Embedding(
num_nodes * edge_type_count, embedding_u_size, sparse=True
)
......@@ -101,16 +105,24 @@ class DGLGATNE(nn.Module):
self.trans_weights_s1 = Parameter(
torch.FloatTensor(edge_type_count, embedding_u_size, dim_a)
)
self.trans_weights_s2 = Parameter(torch.FloatTensor(edge_type_count, dim_a, 1))
self.trans_weights_s2 = Parameter(
torch.FloatTensor(edge_type_count, dim_a, 1)
)
self.reset_parameters()
def reset_parameters(self):
self.node_embeddings.weight.data.uniform_(-1.0, 1.0)
self.node_type_embeddings.weight.data.uniform_(-1.0, 1.0)
self.trans_weights.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.trans_weights_s1.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.trans_weights_s2.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.trans_weights.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
self.trans_weights_s1.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
self.trans_weights_s2.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
# embs: [batch_size, embedding_size]
def forward(self, block):
......@@ -129,7 +141,9 @@ class DGLGATNE(nn.Module):
output_nodes * self.edge_type_count + i
)
block.update_all(
fn.copy_u(edge_type, "m"), fn.sum("m", edge_type), etype=edge_type
fn.copy_u(edge_type, "m"),
fn.sum("m", edge_type),
etype=edge_type,
)
node_type_embed.append(block.dstdata[edge_type])
......@@ -156,7 +170,9 @@ class DGLGATNE(nn.Module):
attention = (
F.softmax(
torch.matmul(
torch.tanh(torch.matmul(tmp_node_type_embed, trans_w_s1)),
torch.tanh(
torch.matmul(tmp_node_type_embed, trans_w_s1)
),
trans_w_s2,
)
.squeeze(2)
......@@ -177,7 +193,9 @@ class DGLGATNE(nn.Module):
)
last_node_embed = F.normalize(node_embed, dim=2)
return last_node_embed # [batch_size, edge_type_count, embedding_size]
return (
last_node_embed # [batch_size, edge_type_count, embedding_size]
)
class NSLoss(nn.Module):
......@@ -191,7 +209,8 @@ class NSLoss(nn.Module):
self.sample_weights = F.normalize(
torch.Tensor(
[
(math.log(k + 2) - math.log(k + 1)) / math.log(num_nodes + 1)
(math.log(k + 2) - math.log(k + 1))
/ math.log(num_nodes + 1)
for k in range(num_nodes)
]
),
......@@ -201,7 +220,9 @@ class NSLoss(nn.Module):
self.reset_parameters()
def reset_parameters(self):
self.weights.weight.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.weights.weight.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
def forward(self, input, embs, label):
n = input.shape[0]
......@@ -266,7 +287,12 @@ def train_model(network_data):
)
model = DGLGATNE(
num_nodes, embedding_size, embedding_u_size, edge_types, edge_type_count, dim_a,
num_nodes,
embedding_size,
embedding_u_size,
edge_types,
edge_type_count,
dim_a,
)
nsloss = NSLoss(num_nodes, num_sampled, embedding_size)
......@@ -274,21 +300,23 @@ def train_model(network_data):
model.to(device)
nsloss.to(device)
embeddings_params = list(map(id, model.node_embeddings.parameters())) + list(
map(id, model.node_type_embeddings.parameters())
)
embeddings_params = list(
map(id, model.node_embeddings.parameters())
) + list(map(id, model.node_type_embeddings.parameters()))
weights_params = list(map(id, nsloss.weights.parameters()))
optimizer = torch.optim.Adam(
[
{
"params": filter(
lambda p: id(p) not in embeddings_params, model.parameters(),
lambda p: id(p) not in embeddings_params,
model.parameters(),
)
},
{
"params": filter(
lambda p: id(p) not in weights_params, nsloss.parameters(),
lambda p: id(p) not in weights_params,
nsloss.parameters(),
)
},
],
......@@ -325,7 +353,10 @@ def train_model(network_data):
block_types = block_types.to(device)
embs = model(block[0].to(device))[head_invmap]
embs = embs.gather(
1, block_types.view(-1, 1, 1).expand(embs.shape[0], 1, embs.shape[2]),
1,
block_types.view(-1, 1, 1).expand(
embs.shape[0], 1, embs.shape[2]
),
)[:, 0]
loss = nsloss(
block[0].dstdata[dgl.NID][head_invmap].to(device),
......@@ -347,7 +378,9 @@ def train_model(network_data):
model.eval()
# {'1': {}, '2': {}}
final_model = dict(zip(edge_types, [dict() for _ in range(edge_type_count)]))
final_model = dict(
zip(edge_types, [dict() for _ in range(edge_type_count)])
)
for i in range(num_nodes):
train_inputs = (
torch.tensor([i for _ in range(edge_type_count)])
......@@ -355,7 +388,9 @@ def train_model(network_data):
.to(device)
) # [i, i]
train_types = (
torch.tensor(list(range(edge_type_count))).unsqueeze(1).to(device)
torch.tensor(list(range(edge_type_count)))
.unsqueeze(1)
.to(device)
) # [0, 1]
pairs = torch.cat(
(train_inputs, train_inputs, train_types), dim=1
......@@ -383,7 +418,9 @@ def train_model(network_data):
valid_aucs, valid_f1s, valid_prs = [], [], []
test_aucs, test_f1s, test_prs = [], [], []
for i in range(edge_type_count):
if args.eval_type == "all" or edge_types[i] in args.eval_type.split(","):
if args.eval_type == "all" or edge_types[i] in args.eval_type.split(
","
):
tmp_auc, tmp_f1, tmp_pr = evaluate(
final_model[edge_types[i]],
valid_true_data_by_edge[edge_types[i]],
......
from collections import defaultdict
import datetime
import math
import os
import sys
import time
import datetime
from collections import defaultdict
import numpy as np
import torch
import torch.multiprocessing as mp
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parallel import DistributedDataParallel
from tqdm.auto import tqdm
from numpy import random
from torch.nn.parallel import DistributedDataParallel
from torch.nn.parameter import Parameter
from tqdm.auto import tqdm
from utils import *
import dgl
import dgl.function as fn
import torch.multiprocessing as mp
from utils import *
def setup_seed(seed):
......@@ -29,7 +29,7 @@ def setup_seed(seed):
def get_graph(network_data, vocab):
""" Build graph, treat all nodes as the same type
"""Build graph, treat all nodes as the same type
Parameters
----------
......@@ -69,7 +69,9 @@ class NeighborSampler(object):
def sample(self, pairs):
pairs = np.stack(pairs)
heads, tails, types = pairs[:, 0], pairs[:, 1], pairs[:, 2]
seeds, head_invmap = torch.unique(torch.LongTensor(heads), return_inverse=True)
seeds, head_invmap = torch.unique(
torch.LongTensor(heads), return_inverse=True
)
blocks = []
for fanout in reversed(self.num_fanouts):
sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout)
......@@ -102,7 +104,9 @@ class DGLGATNE(nn.Module):
self.edge_type_count = edge_type_count
self.dim_a = dim_a
self.node_embeddings = nn.Embedding(num_nodes, embedding_size, sparse=True)
self.node_embeddings = nn.Embedding(
num_nodes, embedding_size, sparse=True
)
self.node_type_embeddings = nn.Embedding(
num_nodes * edge_type_count, embedding_u_size, sparse=True
)
......@@ -112,16 +116,24 @@ class DGLGATNE(nn.Module):
self.trans_weights_s1 = Parameter(
torch.FloatTensor(edge_type_count, embedding_u_size, dim_a)
)
self.trans_weights_s2 = Parameter(torch.FloatTensor(edge_type_count, dim_a, 1))
self.trans_weights_s2 = Parameter(
torch.FloatTensor(edge_type_count, dim_a, 1)
)
self.reset_parameters()
def reset_parameters(self):
self.node_embeddings.weight.data.uniform_(-1.0, 1.0)
self.node_type_embeddings.weight.data.uniform_(-1.0, 1.0)
self.trans_weights.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.trans_weights_s1.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.trans_weights_s2.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.trans_weights.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
self.trans_weights_s1.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
self.trans_weights_s2.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
# embs: [batch_size, embedding_size]
def forward(self, block):
......@@ -140,7 +152,9 @@ class DGLGATNE(nn.Module):
output_nodes * self.edge_type_count + i
)
block.update_all(
fn.copy_u(edge_type, "m"), fn.sum("m", edge_type), etype=edge_type
fn.copy_u(edge_type, "m"),
fn.sum("m", edge_type),
etype=edge_type,
)
node_type_embed.append(block.dstdata[edge_type])
......@@ -167,7 +181,9 @@ class DGLGATNE(nn.Module):
attention = (
F.softmax(
torch.matmul(
torch.tanh(torch.matmul(tmp_node_type_embed, trans_w_s1)),
torch.tanh(
torch.matmul(tmp_node_type_embed, trans_w_s1)
),
trans_w_s2,
)
.squeeze(2)
......@@ -188,7 +204,9 @@ class DGLGATNE(nn.Module):
)
last_node_embed = F.normalize(node_embed, dim=2)
return last_node_embed # [batch_size, edge_type_count, embedding_size]
return (
last_node_embed # [batch_size, edge_type_count, embedding_size]
)
class NSLoss(nn.Module):
......@@ -202,7 +220,8 @@ class NSLoss(nn.Module):
self.sample_weights = F.normalize(
torch.Tensor(
[
(math.log(k + 2) - math.log(k + 1)) / math.log(num_nodes + 1)
(math.log(k + 2) - math.log(k + 1))
/ math.log(num_nodes + 1)
for k in range(num_nodes)
]
),
......@@ -212,7 +231,9 @@ class NSLoss(nn.Module):
self.reset_parameters()
def reset_parameters(self):
self.weights.weight.data.normal_(std=1.0 / math.sqrt(self.embedding_size))
self.weights.weight.data.normal_(
std=1.0 / math.sqrt(self.embedding_size)
)
def forward(self, input, embs, label):
n = input.shape[0]
......@@ -267,7 +288,12 @@ def run(proc_id, n_gpus, args, devices, data):
neighbor_sampler = NeighborSampler(g, [neighbor_samples])
if n_gpus > 1:
train_sampler = torch.utils.data.distributed.DistributedSampler(
train_pairs, num_replicas=world_size, rank=proc_id, shuffle=True, drop_last=False)
train_pairs,
num_replicas=world_size,
rank=proc_id,
shuffle=True,
drop_last=False,
)
train_dataloader = torch.utils.data.DataLoader(
train_pairs,
batch_size=batch_size,
......@@ -288,7 +314,12 @@ def run(proc_id, n_gpus, args, devices, data):
)
model = DGLGATNE(
num_nodes, embedding_size, embedding_u_size, edge_types, edge_type_count, dim_a,
num_nodes,
embedding_size,
embedding_u_size,
edge_types,
edge_type_count,
dim_a,
)
nsloss = NSLoss(num_nodes, num_sampled, embedding_size)
......@@ -306,21 +337,23 @@ def run(proc_id, n_gpus, args, devices, data):
else:
mmodel = model
embeddings_params = list(map(id, mmodel.node_embeddings.parameters())) + list(
map(id, mmodel.node_type_embeddings.parameters())
)
embeddings_params = list(
map(id, mmodel.node_embeddings.parameters())
) + list(map(id, mmodel.node_type_embeddings.parameters()))
weights_params = list(map(id, nsloss.weights.parameters()))
optimizer = torch.optim.Adam(
[
{
"params": filter(
lambda p: id(p) not in embeddings_params, model.parameters(),
lambda p: id(p) not in embeddings_params,
model.parameters(),
)
},
{
"params": filter(
lambda p: id(p) not in weights_params, nsloss.parameters(),
lambda p: id(p) not in weights_params,
nsloss.parameters(),
)
},
],
......@@ -363,7 +396,10 @@ def run(proc_id, n_gpus, args, devices, data):
block_types = block_types.to(dev_id)
embs = model(block[0].to(dev_id))[head_invmap]
embs = embs.gather(
1, block_types.view(-1, 1, 1).expand(embs.shape[0], 1, embs.shape[2]),
1,
block_types.view(-1, 1, 1).expand(
embs.shape[0], 1, embs.shape[2]
),
)[:, 0]
loss = nsloss(
block[0].dstdata[dgl.NID][head_invmap].to(dev_id),
......@@ -399,7 +435,9 @@ def run(proc_id, n_gpus, args, devices, data):
.to(dev_id)
) # [i, i]
train_types = (
torch.tensor(list(range(edge_type_count))).unsqueeze(1).to(dev_id)
torch.tensor(list(range(edge_type_count)))
.unsqueeze(1)
.to(dev_id)
) # [0, 1]
pairs = torch.cat(
(train_inputs, train_inputs, train_types), dim=1
......@@ -427,9 +465,9 @@ def run(proc_id, n_gpus, args, devices, data):
valid_aucs, valid_f1s, valid_prs = [], [], []
test_aucs, test_f1s, test_prs = [], [], []
for i in range(edge_type_count):
if args.eval_type == "all" or edge_types[i] in args.eval_type.split(
","
):
if args.eval_type == "all" or edge_types[
i
] in args.eval_type.split(","):
tmp_auc, tmp_f1, tmp_pr = evaluate(
final_model[edge_types[i]],
valid_true_data_by_edge[edge_types[i]],
......
import argparse
import multiprocessing
import time
from collections import defaultdict
from functools import partial, reduce, wraps
import networkx as nx
import numpy as np
import torch
from gensim.models.keyedvectors import Vocab
from six import iteritems
from sklearn.metrics import auc, f1_score, precision_recall_curve, roc_auc_score
import torch
import time
import multiprocessing
from functools import partial, reduce, wraps
from sklearn.metrics import (auc, f1_score, precision_recall_curve,
roc_auc_score)
def parse_args():
......@@ -25,7 +25,10 @@ def parse_args():
)
parser.add_argument(
"--epoch", type=int, default=100, help="Number of epoch. Default is 100."
"--epoch",
type=int,
default=100,
help="Number of epoch. Default is 100.",
)
parser.add_argument(
......@@ -36,7 +39,10 @@ def parse_args():
)
parser.add_argument(
"--eval-type", type=str, default="all", help="The edge type(s) for evaluation."
"--eval-type",
type=str,
default="all",
help="The edge type(s) for evaluation.",
)
parser.add_argument(
......@@ -103,15 +109,24 @@ def parse_args():
)
parser.add_argument(
"--patience", type=int, default=5, help="Early stopping patience. Default is 5."
"--patience",
type=int,
default=5,
help="Early stopping patience. Default is 5.",
)
parser.add_argument(
"--gpu", type=str, default=None, help="Comma separated list of GPU device IDs."
"--gpu",
type=str,
default=None,
help="Comma separated list of GPU device IDs.",
)
parser.add_argument(
"--workers", type=int, default=4, help="Number of workers.",
"--workers",
type=int,
default=4,
help="Number of workers.",
)
return parser.parse_args()
......@@ -205,7 +220,9 @@ def generate_pairs(all_walks, window_size, num_workers):
walks_list = [walks]
tmp_result = pool.map(
partial(
generate_pairs_parallel, skip_window=skip_window, layer_id=layer_id
generate_pairs_parallel,
skip_window=skip_window,
layer_id=layer_id,
),
walks_list,
)
......@@ -285,4 +302,8 @@ def evaluate(model, true_edges, false_edges, num_workers):
y_true = np.array(true_list)
y_scores = np.array(prediction_list)
ps, rs, _ = precision_recall_curve(y_true, y_scores)
return roc_auc_score(y_true, y_scores), f1_score(y_true, y_pred), auc(rs, ps)
return (
roc_auc_score(y_true, y_scores),
f1_score(y_true, y_pred),
auc(rs, ps),
)
This diff is collapsed.
import torch
from BGNN import BGNNPredictor
import pandas as pd
import numpy as np
import json
import os
from dgl.data.utils import load_graphs
from dgl.nn.pytorch import GATConv as GATConvDGL, GraphConv, ChebConv as ChebConvDGL, \
AGNNConv as AGNNConvDGL, APPNPConv
from torch.nn import Dropout, ELU, Sequential, Linear, ReLU
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from BGNN import BGNNPredictor
from category_encoders import CatBoostEncoder
from sklearn import preprocessing
from torch.nn import ELU, Dropout, Linear, ReLU, Sequential
from dgl.data.utils import load_graphs
from dgl.nn.pytorch import AGNNConv as AGNNConvDGL
from dgl.nn.pytorch import APPNPConv
from dgl.nn.pytorch import ChebConv as ChebConvDGL
from dgl.nn.pytorch import GATConv as GATConvDGL
from dgl.nn.pytorch import GraphConv
class GNNModelDGL(torch.nn.Module):
def __init__(self, in_dim, hidden_dim, out_dim,
dropout=0., name='gat', residual=True, use_mlp=False, join_with_mlp=False):
def __init__(
self,
in_dim,
hidden_dim,
out_dim,
dropout=0.0,
name="gat",
residual=True,
use_mlp=False,
join_with_mlp=False,
):
super(GNNModelDGL, self).__init__()
self.name = name
self.use_mlp = use_mlp
self.join_with_mlp = join_with_mlp
self.normalize_input_columns = True
if name == 'gat':
self.l1 = GATConvDGL(in_dim, hidden_dim//8, 8, feat_drop=dropout, attn_drop=dropout, residual=False,
activation=F.elu)
self.l2 = GATConvDGL(hidden_dim, out_dim, 1, feat_drop=dropout, attn_drop=dropout, residual=residual, activation=None)
elif name == 'gcn':
if name == "gat":
self.l1 = GATConvDGL(
in_dim,
hidden_dim // 8,
8,
feat_drop=dropout,
attn_drop=dropout,
residual=False,
activation=F.elu,
)
self.l2 = GATConvDGL(
hidden_dim,
out_dim,
1,
feat_drop=dropout,
attn_drop=dropout,
residual=residual,
activation=None,
)
elif name == "gcn":
self.l1 = GraphConv(in_dim, hidden_dim, activation=F.elu)
self.l2 = GraphConv(hidden_dim, out_dim, activation=F.elu)
self.drop = Dropout(p=dropout)
elif name == 'cheb':
self.l1 = ChebConvDGL(in_dim, hidden_dim, k = 3)
self.l2 = ChebConvDGL(hidden_dim, out_dim, k = 3)
elif name == "cheb":
self.l1 = ChebConvDGL(in_dim, hidden_dim, k=3)
self.l2 = ChebConvDGL(hidden_dim, out_dim, k=3)
self.drop = Dropout(p=dropout)
elif name == 'agnn':
self.lin1 = Sequential(Dropout(p=dropout), Linear(in_dim, hidden_dim), ELU())
elif name == "agnn":
self.lin1 = Sequential(
Dropout(p=dropout), Linear(in_dim, hidden_dim), ELU()
)
self.l1 = AGNNConvDGL(learn_beta=False)
self.l2 = AGNNConvDGL(learn_beta=True)
self.lin2 = Sequential(Dropout(p=dropout), Linear(hidden_dim, out_dim), ELU())
elif name == 'appnp':
self.lin1 = Sequential(Dropout(p=dropout), Linear(in_dim, hidden_dim),
ReLU(), Dropout(p=dropout), Linear(hidden_dim, out_dim))
self.l1 = APPNPConv(k=10, alpha=0.1, edge_drop=0.)
self.lin2 = Sequential(
Dropout(p=dropout), Linear(hidden_dim, out_dim), ELU()
)
elif name == "appnp":
self.lin1 = Sequential(
Dropout(p=dropout),
Linear(in_dim, hidden_dim),
ReLU(),
Dropout(p=dropout),
Linear(hidden_dim, out_dim),
)
self.l1 = APPNPConv(k=10, alpha=0.1, edge_drop=0.0)
def forward(self, graph, features):
h = features
......@@ -50,36 +88,37 @@ class GNNModelDGL(torch.nn.Module):
h = torch.cat((h, self.mlp(features)), 1)
else:
h = self.mlp(features)
if self.name == 'gat':
if self.name == "gat":
h = self.l1(graph, h).flatten(1)
logits = self.l2(graph, h).mean(1)
elif self.name in ['appnp']:
elif self.name in ["appnp"]:
h = self.lin1(h)
logits = self.l1(graph, h)
elif self.name == 'agnn':
elif self.name == "agnn":
h = self.lin1(h)
h = self.l1(graph, h)
h = self.l2(graph, h)
logits = self.lin2(h)
elif self.name == 'che3b':
elif self.name == "che3b":
lambda_max = dgl.laplacian_lambda_max(graph)
h = self.drop(h)
h = self.l1(graph, h, lambda_max)
logits = self.l2(graph, h, lambda_max)
elif self.name == 'gcn':
elif self.name == "gcn":
h = self.drop(h)
h = self.l1(graph, h)
logits = self.l2(graph, h)
return logits
def read_input(input_folder):
X = pd.read_csv(f'{input_folder}/X.csv')
y = pd.read_csv(f'{input_folder}/y.csv')
X = pd.read_csv(f"{input_folder}/X.csv")
y = pd.read_csv(f"{input_folder}/y.csv")
categorical_columns = []
if os.path.exists(f'{input_folder}/cat_features.txt'):
with open(f'{input_folder}/cat_features.txt') as f:
if os.path.exists(f"{input_folder}/cat_features.txt"):
with open(f"{input_folder}/cat_features.txt") as f:
for line in f:
if line.strip():
categorical_columns.append(line.strip())
......@@ -92,14 +131,15 @@ def read_input(input_folder):
for col in list(columns[cat_features]):
X[col] = X[col].astype(str)
gs, _ = load_graphs(f'{input_folder}/graph.dgl')
gs, _ = load_graphs(f"{input_folder}/graph.dgl")
graph = gs[0]
with open(f'{input_folder}/masks.json') as f:
with open(f"{input_folder}/masks.json") as f:
masks = json.load(f)
return graph, X, y, cat_features, masks
def normalize_features(X, train_mask, val_mask, test_mask):
min_max_scaler = preprocessing.MinMaxScaler()
A = X.to_numpy(copy=True)
......@@ -107,72 +147,106 @@ def normalize_features(X, train_mask, val_mask, test_mask):
A[val_mask + test_mask] = min_max_scaler.transform(A[val_mask + test_mask])
return pd.DataFrame(A, columns=X.columns).astype(float)
def replace_na(X, train_mask):
if X.isna().any().any():
return X.fillna(X.iloc[train_mask].min() - 1)
return X
def encode_cat_features(X, y, cat_features, train_mask, val_mask, test_mask):
enc = CatBoostEncoder()
A = X.to_numpy(copy=True)
b = y.to_numpy(copy=True)
A[np.ix_(train_mask, cat_features)] = enc.fit_transform(A[np.ix_(train_mask, cat_features)], b[train_mask])
A[np.ix_(val_mask + test_mask, cat_features)] = enc.transform(A[np.ix_(val_mask + test_mask, cat_features)])
A[np.ix_(train_mask, cat_features)] = enc.fit_transform(
A[np.ix_(train_mask, cat_features)], b[train_mask]
)
A[np.ix_(val_mask + test_mask, cat_features)] = enc.transform(
A[np.ix_(val_mask + test_mask, cat_features)]
)
A = A.astype(float)
return pd.DataFrame(A, columns=X.columns)
if __name__ == '__main__':
if __name__ == "__main__":
# datasets can be found here: https://www.dropbox.com/s/verx1evkykzli88/datasets.zip
# Read dataset
input_folder = 'datasets/avazu'
input_folder = "datasets/avazu"
graph, X, y, cat_features, masks = read_input(input_folder)
train_mask, val_mask, test_mask = masks['0']['train'], masks['0']['val'], masks['0']['test']
train_mask, val_mask, test_mask = (
masks["0"]["train"],
masks["0"]["val"],
masks["0"]["test"],
)
encoded_X = X.copy()
normalizeFeatures = False
replaceNa = True
if len(cat_features):
encoded_X = encode_cat_features(encoded_X, y, cat_features, train_mask, val_mask, test_mask)
encoded_X = encode_cat_features(
encoded_X, y, cat_features, train_mask, val_mask, test_mask
)
if normalizeFeatures:
encoded_X = normalize_features(encoded_X, train_mask, val_mask, test_mask)
encoded_X = normalize_features(
encoded_X, train_mask, val_mask, test_mask
)
if replaceNa:
encoded_X = replace_na(encoded_X, train_mask)
# specify parameters
task = 'regression'
task = "regression"
hidden_dim = 128
trees_per_epoch = 5 # 5-10 are good values to try
backprop_per_epoch = 5 # 5-10 are good values to try
lr = 0.1 # 0.01-0.1 are good values to try
append_gbdt_pred = False # this can be important for performance (try True and False)
trees_per_epoch = 5 # 5-10 are good values to try
backprop_per_epoch = 5 # 5-10 are good values to try
lr = 0.1 # 0.01-0.1 are good values to try
append_gbdt_pred = (
False # this can be important for performance (try True and False)
)
train_input_features = False
gbdt_depth = 6
gbdt_lr = 0.1
out_dim = y.shape[1] if task == 'regression' else len(set(y.iloc[test_mask, 0]))
out_dim = (
y.shape[1] if task == "regression" else len(set(y.iloc[test_mask, 0]))
)
in_dim = out_dim + X.shape[1] if append_gbdt_pred else out_dim
# specify GNN model
gnn_model = GNNModelDGL(in_dim, hidden_dim, out_dim)
# initialize BGNN model
bgnn = BGNNPredictor(gnn_model, task=task,
loss_fn=None,
trees_per_epoch=trees_per_epoch,
backprop_per_epoch=backprop_per_epoch,
lr=lr,
append_gbdt_pred=append_gbdt_pred,
train_input_features=train_input_features,
gbdt_depth=gbdt_depth,
gbdt_lr=gbdt_lr)
bgnn = BGNNPredictor(
gnn_model,
task=task,
loss_fn=None,
trees_per_epoch=trees_per_epoch,
backprop_per_epoch=backprop_per_epoch,
lr=lr,
append_gbdt_pred=append_gbdt_pred,
train_input_features=train_input_features,
gbdt_depth=gbdt_depth,
gbdt_lr=gbdt_lr,
)
# train
metrics = bgnn.fit(graph, encoded_X, y, train_mask, val_mask, test_mask,
original_X = X, cat_features=cat_features,
num_epochs=100, patience=10, metric_name='loss')
bgnn.plot_interactive(metrics, legend=['train', 'valid', 'test'], title='Avazu', metric_name='loss')
metrics = bgnn.fit(
graph,
encoded_X,
y,
train_mask,
val_mask,
test_mask,
original_X=X,
cat_features=cat_features,
num_epochs=100,
patience=10,
metric_name="loss",
)
bgnn.plot_interactive(
metrics,
legend=["train", "valid", "test"],
title="Avazu",
metric_name="loss",
)
import torch
import numpy as np
import torch
from sklearn import metrics
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, ShuffleSplit, train_test_split
from sklearn.model_selection import (GridSearchCV, ShuffleSplit,
train_test_split)
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import OneHotEncoder, normalize
def fit_logistic_regression(X, y, data_random_seed=1, repeat=1):
# transform targets to one-hot vector
one_hot_encoder = OneHotEncoder(categories='auto', sparse=False)
one_hot_encoder = OneHotEncoder(categories="auto", sparse=False)
y = one_hot_encoder.fit_transform(y.reshape(-1, 1)).astype(np.bool)
# normalize x
X = normalize(X, norm='l2')
X = normalize(X, norm="l2")
# set random state, this will ensure the dataset will be split exactly the same throughout training
rng = np.random.RandomState(data_random_seed)
......@@ -22,37 +23,51 @@ def fit_logistic_regression(X, y, data_random_seed=1, repeat=1):
accuracies = []
for _ in range(repeat):
# different random split after each repeat
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=rng)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.8, random_state=rng
)
# grid search with one-vs-rest classifiers
logreg = LogisticRegression(solver='liblinear')
logreg = LogisticRegression(solver="liblinear")
c = 2.0 ** np.arange(-10, 11)
cv = ShuffleSplit(n_splits=5, test_size=0.5)
clf = GridSearchCV(estimator=OneVsRestClassifier(logreg), param_grid=dict(estimator__C=c),
n_jobs=5, cv=cv, verbose=0)
clf = GridSearchCV(
estimator=OneVsRestClassifier(logreg),
param_grid=dict(estimator__C=c),
n_jobs=5,
cv=cv,
verbose=0,
)
clf.fit(X_train, y_train)
y_pred = clf.predict_proba(X_test)
y_pred = np.argmax(y_pred, axis=1)
y_pred = one_hot_encoder.transform(y_pred.reshape(-1, 1)).astype(np.bool)
y_pred = one_hot_encoder.transform(y_pred.reshape(-1, 1)).astype(
np.bool
)
test_acc = metrics.accuracy_score(y_test, y_pred)
accuracies.append(test_acc)
return accuracies
def fit_logistic_regression_preset_splits(X, y, train_mask, val_mask, test_mask):
def fit_logistic_regression_preset_splits(
X, y, train_mask, val_mask, test_mask
):
# transform targets to one-hot vector
one_hot_encoder = OneHotEncoder(categories='auto', sparse=False)
one_hot_encoder = OneHotEncoder(categories="auto", sparse=False)
y = one_hot_encoder.fit_transform(y.reshape(-1, 1)).astype(np.bool)
# normalize x
X = normalize(X, norm='l2')
X = normalize(X, norm="l2")
accuracies = []
for split_id in range(train_mask.shape[1]):
# get train/val/test masks
tmp_train_mask, tmp_val_mask = train_mask[:, split_id], val_mask[:, split_id]
tmp_train_mask, tmp_val_mask = (
train_mask[:, split_id],
val_mask[:, split_id],
)
# make custom cv
X_train, y_train = X[tmp_train_mask], y[tmp_train_mask]
......@@ -62,29 +77,37 @@ def fit_logistic_regression_preset_splits(X, y, train_mask, val_mask, test_mask)
# grid search with one-vs-rest classifiers
best_test_acc, best_acc = 0, 0
for c in 2.0 ** np.arange(-10, 11):
clf = OneVsRestClassifier(LogisticRegression(solver='liblinear', C=c))
clf = OneVsRestClassifier(
LogisticRegression(solver="liblinear", C=c)
)
clf.fit(X_train, y_train)
y_pred = clf.predict_proba(X_val)
y_pred = np.argmax(y_pred, axis=1)
y_pred = one_hot_encoder.transform(y_pred.reshape(-1, 1)).astype(np.bool)
y_pred = one_hot_encoder.transform(y_pred.reshape(-1, 1)).astype(
np.bool
)
val_acc = metrics.accuracy_score(y_val, y_pred)
if val_acc > best_acc:
best_acc = val_acc
y_pred = clf.predict_proba(X_test)
y_pred = np.argmax(y_pred, axis=1)
y_pred = one_hot_encoder.transform(y_pred.reshape(-1, 1)).astype(np.bool)
y_pred = one_hot_encoder.transform(
y_pred.reshape(-1, 1)
).astype(np.bool)
best_test_acc = metrics.accuracy_score(y_test, y_pred)
accuracies.append(best_test_acc)
return accuracies
def fit_ppi_linear(num_classes, train_data, val_data, test_data, device, repeat=1):
def fit_ppi_linear(
num_classes, train_data, val_data, test_data, device, repeat=1
):
r"""
Trains a linear layer on top of the representations. This function is specific to the PPI dataset,
which has multiple labels.
"""
Trains a linear layer on top of the representations. This function is specific to the PPI dataset,
which has multiple labels.
"""
def train(classifier, train_data, optimizer):
classifier.train()
......@@ -111,13 +134,19 @@ def fit_ppi_linear(num_classes, train_data, val_data, test_data, device, repeat=
pred_logits = classifier(x.to(device))
pred_class = (pred_logits > 0).float().cpu().numpy()
return metrics.f1_score(label, pred_class, average='micro') if pred_class.sum() > 0 else 0
return (
metrics.f1_score(label, pred_class, average="micro")
if pred_class.sum() > 0
else 0
)
num_feats = train_data[0].size(1)
criterion = torch.nn.BCEWithLogitsLoss()
# normalization
mean, std = train_data[0].mean(0, keepdim=True), train_data[0].std(0, unbiased=False, keepdim=True)
mean, std = train_data[0].mean(0, keepdim=True), train_data[0].std(
0, unbiased=False, keepdim=True
)
train_data[0] = (train_data[0] - mean) / std
val_data[0] = (val_data[0] - mean) / std
test_data[0] = (test_data[0] - mean) / std
......@@ -129,7 +158,11 @@ def fit_ppi_linear(num_classes, train_data, val_data, test_data, device, repeat=
tmp_test_f1 = 0
for weight_decay in 2.0 ** np.arange(-10, 11, 2):
classifier = torch.nn.Linear(num_feats, num_classes).to(device)
optimizer = torch.optim.AdamW(params=classifier.parameters(), lr=0.01, weight_decay=weight_decay)
optimizer = torch.optim.AdamW(
params=classifier.parameters(),
lr=0.01,
weight_decay=weight_decay,
)
train(classifier, train_data, optimizer)
val_f1 = test(classifier, val_data)
......
import os
import dgl
import copy
import torch
import os
import warnings
import numpy as np
from tqdm import tqdm
from torch.optim import AdamW
import torch
from eval_function import (fit_logistic_regression,
fit_logistic_regression_preset_splits,
fit_ppi_linear)
from model import (BGRL, GCN, GraphSAGE_GCN, MLP_Predictor,
compute_representations)
from torch.nn.functional import cosine_similarity
from utils import get_graph_drop_transform, CosineDecayScheduler, get_dataset
from model import GCN, GraphSAGE_GCN, MLP_Predictor, BGRL, compute_representations
from eval_function import fit_logistic_regression, fit_logistic_regression_preset_splits, fit_ppi_linear
from torch.optim import AdamW
from tqdm import tqdm
from utils import CosineDecayScheduler, get_dataset, get_graph_drop_transform
import dgl
import warnings
warnings.filterwarnings("ignore")
def train(step, model, optimizer, lr_scheduler, mm_scheduler, transform_1, transform_2, data, args):
def train(
step,
model,
optimizer,
lr_scheduler,
mm_scheduler,
transform_1,
transform_2,
data,
args,
):
model.train()
# update learning rate
lr = lr_scheduler.get(step)
for param_group in optimizer.param_groups:
param_group['lr'] = lr
param_group["lr"] = lr
# update momentum
mm = 1 - mm_scheduler.get(step)
......@@ -31,13 +45,17 @@ def train(step, model, optimizer, lr_scheduler, mm_scheduler, transform_1, trans
x1, x2 = transform_1(data), transform_2(data)
if args.dataset != 'ppi':
if args.dataset != "ppi":
x1, x2 = dgl.add_self_loop(x1), dgl.add_self_loop(x2)
q1, y2 = model(x1, x2)
q2, y1 = model(x2, x1)
loss = 2 - cosine_similarity(q1, y2.detach(), dim=-1).mean() - cosine_similarity(q2, y1.detach(), dim=-1).mean()
loss = (
2
- cosine_similarity(q1, y2.detach(), dim=-1).mean()
- cosine_similarity(q2, y1.detach(), dim=-1).mean()
)
loss.backward()
# update online network
......@@ -53,113 +71,187 @@ def eval(model, dataset, device, args, train_data, val_data, test_data):
tmp_encoder = copy.deepcopy(model.online_encoder).eval()
val_scores = None
if args.dataset == 'ppi':
if args.dataset == "ppi":
train_data = compute_representations(tmp_encoder, train_data, device)
val_data = compute_representations(tmp_encoder, val_data, device)
test_data = compute_representations(tmp_encoder, test_data, device)
num_classes = train_data[1].shape[1]
val_scores, test_scores = fit_ppi_linear(num_classes, train_data, val_data, test_data, device,
args.num_eval_splits)
elif args.dataset != 'wiki_cs':
representations, labels = compute_representations(tmp_encoder, dataset, device)
test_scores = fit_logistic_regression(representations.cpu().numpy(), labels.cpu().numpy(),
data_random_seed=args.data_seed, repeat=args.num_eval_splits)
val_scores, test_scores = fit_ppi_linear(
num_classes,
train_data,
val_data,
test_data,
device,
args.num_eval_splits,
)
elif args.dataset != "wiki_cs":
representations, labels = compute_representations(
tmp_encoder, dataset, device
)
test_scores = fit_logistic_regression(
representations.cpu().numpy(),
labels.cpu().numpy(),
data_random_seed=args.data_seed,
repeat=args.num_eval_splits,
)
else:
g = dataset[0]
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
representations, labels = compute_representations(tmp_encoder, dataset, device)
test_scores = fit_logistic_regression_preset_splits(representations.cpu().numpy(), labels.cpu().numpy(),
train_mask, val_mask, test_mask)
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
representations, labels = compute_representations(
tmp_encoder, dataset, device
)
test_scores = fit_logistic_regression_preset_splits(
representations.cpu().numpy(),
labels.cpu().numpy(),
train_mask,
val_mask,
test_mask,
)
return val_scores, test_scores
def main(args):
# use CUDA_VISIBLE_DEVICES to select gpu
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('Using device:', device)
device = (
torch.device("cuda")
if torch.cuda.is_available()
else torch.device("cpu")
)
print("Using device:", device)
dataset, train_data, val_data, test_data = get_dataset(args.dataset)
g = dataset[0]
g = g.to(device)
input_size, representation_size = g.ndata['feat'].size(1), args.graph_encoder_layer[-1]
input_size, representation_size = (
g.ndata["feat"].size(1),
args.graph_encoder_layer[-1],
)
# prepare transforms
transform_1 = get_graph_drop_transform(drop_edge_p=args.drop_edge_p[0], feat_mask_p=args.feat_mask_p[0])
transform_2 = get_graph_drop_transform(drop_edge_p=args.drop_edge_p[1], feat_mask_p=args.feat_mask_p[1])
transform_1 = get_graph_drop_transform(
drop_edge_p=args.drop_edge_p[0], feat_mask_p=args.feat_mask_p[0]
)
transform_2 = get_graph_drop_transform(
drop_edge_p=args.drop_edge_p[1], feat_mask_p=args.feat_mask_p[1]
)
# scheduler
lr_scheduler = CosineDecayScheduler(args.lr, args.lr_warmup_epochs, args.epochs)
lr_scheduler = CosineDecayScheduler(
args.lr, args.lr_warmup_epochs, args.epochs
)
mm_scheduler = CosineDecayScheduler(1 - args.mm, 0, args.epochs)
# build networks
if args.dataset == 'ppi':
if args.dataset == "ppi":
encoder = GraphSAGE_GCN([input_size] + args.graph_encoder_layer)
else:
encoder = GCN([input_size] + args.graph_encoder_layer)
predictor = MLP_Predictor(representation_size, representation_size, hidden_size=args.predictor_hidden_size)
predictor = MLP_Predictor(
representation_size,
representation_size,
hidden_size=args.predictor_hidden_size,
)
model = BGRL(encoder, predictor).to(device)
# optimizer
optimizer = AdamW(model.trainable_parameters(), lr=args.lr, weight_decay=args.weight_decay)
optimizer = AdamW(
model.trainable_parameters(), lr=args.lr, weight_decay=args.weight_decay
)
# train
for epoch in tqdm(range(1, args.epochs + 1), desc=' - (Training) '):
train(epoch - 1, model, optimizer, lr_scheduler, mm_scheduler, transform_1, transform_2, g, args)
for epoch in tqdm(range(1, args.epochs + 1), desc=" - (Training) "):
train(
epoch - 1,
model,
optimizer,
lr_scheduler,
mm_scheduler,
transform_1,
transform_2,
g,
args,
)
if epoch % args.eval_epochs == 0:
val_scores, test_scores = eval(model, dataset, device, args, train_data, val_data, test_data)
if args.dataset == 'ppi':
print('Epoch: {:04d} | Best Val F1: {:.4f} | Test F1: {:.4f}'.format(epoch, np.mean(val_scores),
np.mean(test_scores)))
val_scores, test_scores = eval(
model, dataset, device, args, train_data, val_data, test_data
)
if args.dataset == "ppi":
print(
"Epoch: {:04d} | Best Val F1: {:.4f} | Test F1: {:.4f}".format(
epoch, np.mean(val_scores), np.mean(test_scores)
)
)
else:
print('Epoch: {:04d} | Test Accuracy: {:.4f}'.format(epoch, np.mean(test_scores)))
print(
"Epoch: {:04d} | Test Accuracy: {:.4f}".format(
epoch, np.mean(test_scores)
)
)
# save encoder weights
if not os.path.isdir(args.weights_dir):
os.mkdir(args.weights_dir)
torch.save({'model': model.online_encoder.state_dict()},
os.path.join(args.weights_dir, 'bgrl-{}.pt'.format(args.dataset)))
torch.save(
{"model": model.online_encoder.state_dict()},
os.path.join(args.weights_dir, "bgrl-{}.pt".format(args.dataset)),
)
if __name__ == '__main__':
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser()
# Dataset options.
parser.add_argument('--dataset', type=str, default='amazon_photos', choices=['coauthor_cs', 'coauthor_physics',
'amazon_photos', 'amazon_computers',
'wiki_cs', 'ppi'])
parser.add_argument(
"--dataset",
type=str,
default="amazon_photos",
choices=[
"coauthor_cs",
"coauthor_physics",
"amazon_photos",
"amazon_computers",
"wiki_cs",
"ppi",
],
)
# Model options.
parser.add_argument('--graph_encoder_layer', type=int, nargs='+', default=[256, 128])
parser.add_argument('--predictor_hidden_size', type=int, default=512)
parser.add_argument(
"--graph_encoder_layer", type=int, nargs="+", default=[256, 128]
)
parser.add_argument("--predictor_hidden_size", type=int, default=512)
# Training options.
parser.add_argument('--epochs', type=int, default=10000)
parser.add_argument('--lr', type=float, default=1e-5)
parser.add_argument('--weight_decay', type=float, default=1e-5)
parser.add_argument('--mm', type=float, default=0.99)
parser.add_argument('--lr_warmup_epochs', type=int, default=1000)
parser.add_argument('--weights_dir', type=str, default='../weights')
parser.add_argument("--epochs", type=int, default=10000)
parser.add_argument("--lr", type=float, default=1e-5)
parser.add_argument("--weight_decay", type=float, default=1e-5)
parser.add_argument("--mm", type=float, default=0.99)
parser.add_argument("--lr_warmup_epochs", type=int, default=1000)
parser.add_argument("--weights_dir", type=str, default="../weights")
# Augmentations options.
parser.add_argument('--drop_edge_p', type=float, nargs='+', default=[0., 0.])
parser.add_argument('--feat_mask_p', type=float, nargs='+', default=[0., 0.])
parser.add_argument(
"--drop_edge_p", type=float, nargs="+", default=[0.0, 0.0]
)
parser.add_argument(
"--feat_mask_p", type=float, nargs="+", default=[0.0, 0.0]
)
# Evaluation options.
parser.add_argument('--eval_epochs', type=int, default=250)
parser.add_argument('--num_eval_splits', type=int, default=20)
parser.add_argument('--data_seed', type=int, default=1)
parser.add_argument("--eval_epochs", type=int, default=250)
parser.add_argument("--num_eval_splits", type=int, default=20)
parser.add_argument("--data_seed", type=int, default=1)
# Experiment options.
parser.add_argument('--num_experiments', type=int, default=20)
parser.add_argument("--num_experiments", type=int, default=20)
args = parser.parse_args()
main(args)
import dgl
import copy
import torch
from torch import nn
from torch.nn.init import ones_, zeros_
from torch.nn import BatchNorm1d, Parameter
from torch.nn.init import ones_, zeros_
import dgl
from dgl.nn.pytorch.conv import GraphConv, SAGEConv
......@@ -17,8 +19,8 @@ class LayerNorm(nn.Module):
self.weight = Parameter(torch.Tensor(in_channels))
self.bias = Parameter(torch.Tensor(in_channels))
else:
self.register_parameter('weight', None)
self.register_parameter('bias', None)
self.register_parameter("weight", None)
self.register_parameter("bias", None)
self.reset_parameters()
......@@ -34,13 +36,27 @@ class LayerNorm(nn.Module):
else:
batch_size = int(batch.max()) + 1
batch_idx = [batch == i for i in range(batch_size)]
norm = torch.tensor([i.sum() for i in batch_idx], dtype=x.dtype).clamp_(min=1).to(device)
norm = (
torch.tensor([i.sum() for i in batch_idx], dtype=x.dtype)
.clamp_(min=1)
.to(device)
)
norm = norm.mul_(x.size(-1)).view(-1, 1)
tmp_list = [x[i] for i in batch_idx]
mean = torch.concat([i.sum(0).unsqueeze(0) for i in tmp_list], dim=0).sum(dim=-1, keepdim=True).to(device)
mean = (
torch.concat([i.sum(0).unsqueeze(0) for i in tmp_list], dim=0)
.sum(dim=-1, keepdim=True)
.to(device)
)
mean = mean / norm
x = x - mean.index_select(0, batch.long())
var = torch.concat([(i * i).sum(0).unsqueeze(0) for i in tmp_list], dim=0).sum(dim=-1, keepdim=True).to(device)
var = (
torch.concat(
[(i * i).sum(0).unsqueeze(0) for i in tmp_list], dim=0
)
.sum(dim=-1, keepdim=True)
.to(device)
)
var = var / norm
out = x / (var + self.eps).sqrt().index_select(0, batch.long())
......@@ -50,7 +66,7 @@ class LayerNorm(nn.Module):
return out
def __repr__(self):
return f'{self.__class__.__name__}({self.in_channels})'
return f"{self.__class__.__name__}({self.in_channels})"
class MLP_Predictor(nn.Module):
......@@ -60,13 +76,14 @@ class MLP_Predictor(nn.Module):
output_size (int): Size of output features.
hidden_size (int, optional): Size of hidden layer. (default: :obj:`4096`).
"""
def __init__(self, input_size, output_size, hidden_size=512):
super().__init__()
self.net = nn.Sequential(
nn.Linear(input_size, hidden_size, bias=True),
nn.PReLU(1),
nn.Linear(hidden_size, output_size, bias=True)
nn.Linear(hidden_size, output_size, bias=True),
)
self.reset_parameters()
......@@ -91,7 +108,7 @@ class GCN(nn.Module):
self.layers.append(nn.PReLU())
def forward(self, g):
x = g.ndata['feat']
x = g.ndata["feat"]
for layer in self.layers:
if isinstance(layer, GraphConv):
x = layer(g, x)
......@@ -101,7 +118,7 @@ class GCN(nn.Module):
def reset_parameters(self):
for layer in self.layers:
if hasattr(layer, 'reset_parameters'):
if hasattr(layer, "reset_parameters"):
layer.reset_parameters()
......@@ -111,33 +128,41 @@ class GraphSAGE_GCN(nn.Module):
input_size, hidden_size, embedding_size = layer_sizes
self.convs = nn.ModuleList([
SAGEConv(input_size, hidden_size, 'mean'),
SAGEConv(hidden_size, hidden_size, 'mean'),
SAGEConv(hidden_size, embedding_size, 'mean')
])
self.skip_lins = nn.ModuleList([
nn.Linear(input_size, hidden_size, bias=False),
nn.Linear(input_size, hidden_size, bias=False),
])
self.layer_norms = nn.ModuleList([
LayerNorm(hidden_size),
LayerNorm(hidden_size),
LayerNorm(embedding_size),
])
self.activations = nn.ModuleList([
nn.PReLU(),
nn.PReLU(),
nn.PReLU(),
])
self.convs = nn.ModuleList(
[
SAGEConv(input_size, hidden_size, "mean"),
SAGEConv(hidden_size, hidden_size, "mean"),
SAGEConv(hidden_size, embedding_size, "mean"),
]
)
self.skip_lins = nn.ModuleList(
[
nn.Linear(input_size, hidden_size, bias=False),
nn.Linear(input_size, hidden_size, bias=False),
]
)
self.layer_norms = nn.ModuleList(
[
LayerNorm(hidden_size),
LayerNorm(hidden_size),
LayerNorm(embedding_size),
]
)
self.activations = nn.ModuleList(
[
nn.PReLU(),
nn.PReLU(),
nn.PReLU(),
]
)
def forward(self, g):
x = g.ndata['feat']
if 'batch' in g.ndata.keys():
batch = g.ndata['batch']
x = g.ndata["feat"]
if "batch" in g.ndata.keys():
batch = g.ndata["batch"]
else:
batch = None
......@@ -176,6 +201,7 @@ class BGRL(nn.Module):
`encoder` must have a `reset_parameters` method, as the weights of the target network will be initialized
differently from the online network.
"""
def __init__(self, encoder, predictor):
super(BGRL, self).__init__()
# online network
......@@ -194,7 +220,9 @@ class BGRL(nn.Module):
def trainable_parameters(self):
r"""Returns the parameters that will be updated via an optimizer."""
return list(self.online_encoder.parameters()) + list(self.predictor.parameters())
return list(self.online_encoder.parameters()) + list(
self.predictor.parameters()
)
@torch.no_grad()
def update_target_network(self, mm):
......@@ -202,8 +230,10 @@ class BGRL(nn.Module):
Args:
mm (float): Momentum used in moving average update.
"""
for param_q, param_k in zip(self.online_encoder.parameters(), self.target_encoder.parameters()):
param_k.data.mul_(mm).add_(param_q.data, alpha=1. - mm)
for param_q, param_k in zip(
self.online_encoder.parameters(), self.target_encoder.parameters()
):
param_k.data.mul_(mm).add_(param_q.data, alpha=1.0 - mm)
def forward(self, online_x, target_x):
# forward online network
......@@ -233,16 +263,15 @@ def compute_representations(net, dataset, device):
g = g.to(device)
with torch.no_grad():
reps.append(net(g))
labels.append(g.ndata['label'])
labels.append(g.ndata["label"])
else:
for g in dataset:
# forward
g = g.to(device)
with torch.no_grad():
reps.append(net(g))
labels.append(g.ndata['label'])
labels.append(g.ndata["label"])
reps = torch.cat(reps, dim=0)
labels = torch.cat(labels, dim=0)
return [reps, labels]
import copy
import torch
import numpy as np
import torch
from dgl.data import (AmazonCoBuyComputerDataset, AmazonCoBuyPhotoDataset,
CoauthorCSDataset, CoauthorPhysicsDataset, PPIDataset,
WikiCSDataset)
from dgl.dataloading import GraphDataLoader
from dgl.transforms import Compose, DropEdge, FeatMask, RowFeatNormalizer
from dgl.data import CoauthorCSDataset, CoauthorPhysicsDataset, AmazonCoBuyPhotoDataset, AmazonCoBuyComputerDataset, PPIDataset, WikiCSDataset
class CosineDecayScheduler:
......@@ -16,10 +20,24 @@ class CosineDecayScheduler:
if step < self.warmup_steps:
return self.max_val * step / self.warmup_steps
elif self.warmup_steps <= step <= self.total_steps:
return self.max_val * (1 + np.cos((step - self.warmup_steps) * np.pi /
(self.total_steps - self.warmup_steps))) / 2
return (
self.max_val
* (
1
+ np.cos(
(step - self.warmup_steps)
* np.pi
/ (self.total_steps - self.warmup_steps)
)
)
/ 2
)
else:
raise ValueError('Step ({}) > total number of steps ({}).'.format(step, self.total_steps))
raise ValueError(
"Step ({}) > total number of steps ({}).".format(
step, self.total_steps
)
)
def get_graph_drop_transform(drop_edge_p, feat_mask_p):
......@@ -29,12 +47,12 @@ def get_graph_drop_transform(drop_edge_p, feat_mask_p):
transforms.append(copy.deepcopy)
# drop edges
if drop_edge_p > 0.:
if drop_edge_p > 0.0:
transforms.append(DropEdge(drop_edge_p))
# drop features
if feat_mask_p > 0.:
transforms.append(FeatMask(feat_mask_p, node_feat_names=['feat']))
if feat_mask_p > 0.0:
transforms.append(FeatMask(feat_mask_p, node_feat_names=["feat"]))
return Compose(transforms)
......@@ -42,41 +60,41 @@ def get_graph_drop_transform(drop_edge_p, feat_mask_p):
def get_wiki_cs(transform=RowFeatNormalizer(subtract_min=True)):
dataset = WikiCSDataset(transform=transform)
g = dataset[0]
std, mean = torch.std_mean(g.ndata['feat'], dim=0, unbiased=False)
g.ndata['feat'] = (g.ndata['feat'] - mean) / std
std, mean = torch.std_mean(g.ndata["feat"], dim=0, unbiased=False)
g.ndata["feat"] = (g.ndata["feat"] - mean) / std
return [g]
def get_ppi():
train_dataset = PPIDataset(mode='train')
val_dataset = PPIDataset(mode='valid')
test_dataset = PPIDataset(mode='test')
train_dataset = PPIDataset(mode="train")
val_dataset = PPIDataset(mode="valid")
test_dataset = PPIDataset(mode="test")
train_val_dataset = [i for i in train_dataset] + [i for i in val_dataset]
for idx, data in enumerate(train_val_dataset):
data.ndata['batch'] = torch.zeros(data.number_of_nodes()) + idx
data.ndata['batch'] = data.ndata['batch'].long()
data.ndata["batch"] = torch.zeros(data.number_of_nodes()) + idx
data.ndata["batch"] = data.ndata["batch"].long()
g = list(GraphDataLoader(train_val_dataset, batch_size=22, shuffle=True))
return g, PPIDataset(mode='train'), PPIDataset(mode='valid'), test_dataset
return g, PPIDataset(mode="train"), PPIDataset(mode="valid"), test_dataset
def get_dataset(name, transform=RowFeatNormalizer(subtract_min=True)):
dgl_dataset_dict = {
'coauthor_cs': CoauthorCSDataset,
'coauthor_physics': CoauthorPhysicsDataset,
'amazon_computers': AmazonCoBuyComputerDataset,
'amazon_photos': AmazonCoBuyPhotoDataset,
'wiki_cs': get_wiki_cs,
'ppi': get_ppi
"coauthor_cs": CoauthorCSDataset,
"coauthor_physics": CoauthorPhysicsDataset,
"amazon_computers": AmazonCoBuyComputerDataset,
"amazon_photos": AmazonCoBuyPhotoDataset,
"wiki_cs": get_wiki_cs,
"ppi": get_ppi,
}
dataset_class = dgl_dataset_dict[name]
train_data, val_data, test_data = None, None, None
if name != 'ppi':
if name != "ppi":
dataset = dataset_class(transform=transform)
else:
dataset, train_data, val_data, test_data = dataset_class()
return dataset, train_data, val_data, test_data
\ No newline at end of file
return dataset, train_data, val_data, test_data
import dgl
import torch
from DGLRoutingLayer import DGLRoutingLayer
from torch import nn
from torch.nn import functional as F
import dgl.function as fn
from DGLRoutingLayer import DGLRoutingLayer
import dgl
import dgl.function as fn
class DGLDigitCapsuleLayer(nn.Module):
def __init__(self, in_nodes_dim=8, in_nodes=1152, out_nodes=10, out_nodes_dim=16, device='cpu'):
def __init__(
self,
in_nodes_dim=8,
in_nodes=1152,
out_nodes=10,
out_nodes_dim=16,
device="cpu",
):
super(DGLDigitCapsuleLayer, self).__init__()
self.device = device
self.in_nodes_dim, self.out_nodes_dim = in_nodes_dim, out_nodes_dim
self.in_nodes, self.out_nodes = in_nodes, out_nodes
self.weight = nn.Parameter(torch.randn(in_nodes, out_nodes, out_nodes_dim, in_nodes_dim))
self.weight = nn.Parameter(
torch.randn(in_nodes, out_nodes, out_nodes_dim, in_nodes_dim)
)
def forward(self, x):
self.batch_size = x.size(0)
u_hat = self.compute_uhat(x)
routing = DGLRoutingLayer(self.in_nodes, self.out_nodes, self.out_nodes_dim, batch_size=self.batch_size,
device=self.device)
routing = DGLRoutingLayer(
self.in_nodes,
self.out_nodes,
self.out_nodes_dim,
batch_size=self.batch_size,
device=self.device,
)
routing(u_hat, routing_num=3)
out_nodes_feature = routing.g.nodes[routing.out_indx].data['v']
out_nodes_feature = routing.g.nodes[routing.out_indx].data["v"]
# shape transformation is for further classification
return out_nodes_feature.transpose(0, 1).unsqueeze(1).unsqueeze(4).squeeze(1)
return (
out_nodes_feature.transpose(0, 1)
.unsqueeze(1)
.unsqueeze(4)
.squeeze(1)
)
def compute_uhat(self, x):
# x is the input vextor with shape [batch_size, in_nodes_dim, in_nodes]
......
import torch.nn as nn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import dgl
class DGLRoutingLayer(nn.Module):
def __init__(self, in_nodes, out_nodes, f_size, batch_size=0, device='cpu'):
def __init__(self, in_nodes, out_nodes, f_size, batch_size=0, device="cpu"):
super(DGLRoutingLayer, self).__init__()
self.batch_size = batch_size
self.g = init_graph(in_nodes, out_nodes, f_size, device=device)
......@@ -16,49 +17,59 @@ class DGLRoutingLayer(nn.Module):
self.device = device
def forward(self, u_hat, routing_num=1):
self.g.edata['u_hat'] = u_hat
self.g.edata["u_hat"] = u_hat
batch_size = self.batch_size
# step 2 (line 5)
def cap_message(edges):
if batch_size:
return {'m': edges.data['c'].unsqueeze(1) * edges.data['u_hat']}
return {"m": edges.data["c"].unsqueeze(1) * edges.data["u_hat"]}
else:
return {'m': edges.data['c'] * edges.data['u_hat']}
return {"m": edges.data["c"] * edges.data["u_hat"]}
def cap_reduce(nodes):
return {'s': th.sum(nodes.mailbox['m'], dim=1)}
return {"s": th.sum(nodes.mailbox["m"], dim=1)}
for r in range(routing_num):
# step 1 (line 4): normalize over out edges
edges_b = self.g.edata['b'].view(self.in_nodes, self.out_nodes)
self.g.edata['c'] = F.softmax(edges_b, dim=1).view(-1, 1)
edges_b = self.g.edata["b"].view(self.in_nodes, self.out_nodes)
self.g.edata["c"] = F.softmax(edges_b, dim=1).view(-1, 1)
# Execute step 1 & 2
self.g.update_all(message_func=cap_message, reduce_func=cap_reduce)
# step 3 (line 6)
if self.batch_size:
self.g.nodes[self.out_indx].data['v'] = squash(self.g.nodes[self.out_indx].data['s'], dim=2)
self.g.nodes[self.out_indx].data["v"] = squash(
self.g.nodes[self.out_indx].data["s"], dim=2
)
else:
self.g.nodes[self.out_indx].data['v'] = squash(self.g.nodes[self.out_indx].data['s'], dim=1)
self.g.nodes[self.out_indx].data["v"] = squash(
self.g.nodes[self.out_indx].data["s"], dim=1
)
# step 4 (line 7)
v = th.cat([self.g.nodes[self.out_indx].data['v']] * self.in_nodes, dim=0)
v = th.cat(
[self.g.nodes[self.out_indx].data["v"]] * self.in_nodes, dim=0
)
if self.batch_size:
self.g.edata['b'] = self.g.edata['b'] + (self.g.edata['u_hat'] * v).mean(dim=1).sum(dim=1, keepdim=True)
self.g.edata["b"] = self.g.edata["b"] + (
self.g.edata["u_hat"] * v
).mean(dim=1).sum(dim=1, keepdim=True)
else:
self.g.edata['b'] = self.g.edata['b'] + (self.g.edata['u_hat'] * v).sum(dim=1, keepdim=True)
self.g.edata["b"] = self.g.edata["b"] + (
self.g.edata["u_hat"] * v
).sum(dim=1, keepdim=True)
def squash(s, dim=1):
sq = th.sum(s ** 2, dim=dim, keepdim=True)
sq = th.sum(s**2, dim=dim, keepdim=True)
s_norm = th.sqrt(sq)
s = (sq / (1.0 + sq)) * (s / s_norm)
return s
def init_graph(in_nodes, out_nodes, f_size, device='cpu'):
def init_graph(in_nodes, out_nodes, f_size, device="cpu"):
g = dgl.DGLGraph()
g.set_n_initializer(dgl.frame.zero_initializer)
all_nodes = in_nodes + out_nodes
......@@ -70,5 +81,5 @@ def init_graph(in_nodes, out_nodes, f_size, device='cpu'):
g.add_edges(u, out_indx)
g = g.to(device)
g.edata['b'] = th.zeros(in_nodes * out_nodes, 1).to(device)
g.edata["b"] = th.zeros(in_nodes * out_nodes, 1).to(device)
return g
import argparse
import torch
import torch.optim as optim
from torchvision import datasets, transforms
from model import Net
from torchvision import datasets, transforms
def train(args, model, device, train_loader, optimizer, epoch):
......@@ -16,9 +16,15 @@ def train(args, model, device, train_loader, optimizer, epoch):
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
print(
"Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
epoch,
batch_idx * len(data),
len(train_loader.dataset),
100.0 * batch_idx / len(train_loader),
loss.item(),
)
)
def test(args, model, device, test_loader):
......@@ -29,33 +35,76 @@ def test(args, model, device, test_loader):
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += model.margin_loss(output, target).item() # sum up batch loss
pred = output.norm(dim=2).squeeze().max(1, keepdim=True)[1] # get the index of the max log-probability
test_loss += model.margin_loss(
output, target
).item() # sum up batch loss
pred = (
output.norm(dim=2).squeeze().max(1, keepdim=True)[1]
) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
print(
"\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
test_loss,
correct,
len(test_loader.dataset),
100.0 * correct / len(test_loader.dataset),
)
)
def main():
# Training settings
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=512, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=512, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=10, metavar='N',
help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
help='learning rate (default: 0.01)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
parser.add_argument(
"--batch-size",
type=int,
default=512,
metavar="N",
help="input batch size for training (default: 64)",
)
parser.add_argument(
"--test-batch-size",
type=int,
default=512,
metavar="N",
help="input batch size for testing (default: 1000)",
)
parser.add_argument(
"--epochs",
type=int,
default=10,
metavar="N",
help="number of epochs to train (default: 10)",
)
parser.add_argument(
"--lr",
type=float,
default=0.01,
metavar="LR",
help="learning rate (default: 0.01)",
)
parser.add_argument(
"--no-cuda",
action="store_true",
default=False,
help="disables CUDA training",
)
parser.add_argument(
"--seed",
type=int,
default=1,
metavar="S",
help="random seed (default: 1)",
)
parser.add_argument(
"--log-interval",
type=int,
default=10,
metavar="N",
help="how many batches to wait before logging training status",
)
args = parser.parse_args()
use_cuda = not args.no_cuda and torch.cuda.is_available()
......@@ -63,20 +112,38 @@ def main():
device = torch.device("cuda" if use_cuda else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.batch_size, shuffle=True, **kwargs)
datasets.MNIST(
"../data",
train=True,
download=True,
transform=transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)),
]
),
),
batch_size=args.batch_size,
shuffle=True,
**kwargs
)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('../data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.test_batch_size, shuffle=True, **kwargs)
datasets.MNIST(
"../data",
train=False,
transform=transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)),
]
),
),
batch_size=args.test_batch_size,
shuffle=True,
**kwargs
)
model = Net(device=device).to(device)
optimizer = optim.Adam(model.parameters(), lr=args.lr)
......@@ -86,5 +153,5 @@ def main():
test(args, model, device, test_loader)
if __name__ == '__main__':
if __name__ == "__main__":
main()
import torch
from torch import nn
from DGLDigitCapsule import DGLDigitCapsuleLayer
from DGLRoutingLayer import squash
from torch import nn
class Net(nn.Module):
def __init__(self, device='cpu'):
def __init__(self, device="cpu"):
super(Net, self).__init__()
self.device = device
self.conv1 = nn.Sequential(nn.Conv2d(in_channels=1,
out_channels=256,
kernel_size=9,
stride=1), nn.ReLU(inplace=True))
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=256, kernel_size=9, stride=1),
nn.ReLU(inplace=True),
)
self.primary = PrimaryCapsuleLayer(device=device)
self.digits = DGLDigitCapsuleLayer(device=device)
......@@ -29,7 +28,7 @@ class Net(nn.Module):
for i in range(batch_s):
one_hot_vec[i, target[i]] = 1.0
batch_size = input.size(0)
v_c = torch.sqrt((input ** 2).sum(dim=2, keepdim=True))
v_c = torch.sqrt((input**2).sum(dim=2, keepdim=True))
zero = torch.zeros(1).to(self.device)
m_plus = 0.9
m_minus = 0.1
......@@ -43,15 +42,14 @@ class Net(nn.Module):
class PrimaryCapsuleLayer(nn.Module):
def __init__(self, in_channel=256, num_unit=8, device='cpu'):
def __init__(self, in_channel=256, num_unit=8, device="cpu"):
super(PrimaryCapsuleLayer, self).__init__()
self.in_channel = in_channel
self.num_unit = num_unit
self.deivce = device
self.conv_units = nn.ModuleList([
nn.Conv2d(self.in_channel, 32, 9, 2) for _ in range(self.num_unit)
])
self.conv_units = nn.ModuleList(
[nn.Conv2d(self.in_channel, 32, 9, 2) for _ in range(self.num_unit)]
)
def forward(self, x):
unit = [self.conv_units[i](x) for i, l in enumerate(self.conv_units)]
......
import dgl
import torch as th
import torch.nn as nn
from torch.nn import functional as F
from DGLRoutingLayer import DGLRoutingLayer
from torch.nn import functional as F
import dgl
g = dgl.DGLGraph()
g.graph_data = {}
in_nodes = 20
out_nodes = 10
g.graph_data['in_nodes']=in_nodes
g.graph_data['out_nodes']=out_nodes
g.graph_data["in_nodes"] = in_nodes
g.graph_data["out_nodes"] = out_nodes
all_nodes = in_nodes + out_nodes
g.add_nodes(all_nodes)
in_indx = list(range(in_nodes))
out_indx = list(range(in_nodes, in_nodes + out_nodes))
g.graph_data['in_indx']=in_indx
g.graph_data['out_indx']=out_indx
g.graph_data["in_indx"] = in_indx
g.graph_data["out_indx"] = out_indx
# add edges use edge broadcasting
for u in out_indx:
......@@ -28,17 +27,16 @@ for u in out_indx:
# init states
f_size = 4
g.ndata['v'] = th.zeros(all_nodes, f_size)
g.edata['u_hat'] = th.randn(in_nodes * out_nodes, f_size)
g.edata['b'] = th.randn(in_nodes * out_nodes, 1)
g.ndata["v"] = th.zeros(all_nodes, f_size)
g.edata["u_hat"] = th.randn(in_nodes * out_nodes, f_size)
g.edata["b"] = th.randn(in_nodes * out_nodes, 1)
routing_layer = DGLRoutingLayer(g)
entropy_list=[]
entropy_list = []
for i in range(15):
routing_layer()
dist_matrix = g.edata['c'].view(in_nodes, out_nodes)
dist_matrix = g.edata["c"].view(in_nodes, out_nodes)
entropy = (-dist_matrix * th.log(dist_matrix)).sum(dim=0)
entropy_list.append(entropy.data.numpy())
std = dist_matrix.std(dim=0)
import dgl
import argparse
import torch as th
from model import CAREGNN
import torch.optim as optim
from torch.nn.functional import softmax
from model import CAREGNN
from sklearn.metrics import recall_score, roc_auc_score
from torch.nn.functional import softmax
from utils import EarlyStopping
import dgl
def main(args):
# Step 1: Prepare graph data and retrieve train/validation/test index ============================= #
......@@ -18,45 +19,51 @@ def main(args):
# check cuda
if args.gpu >= 0 and th.cuda.is_available():
device = 'cuda:{}'.format(args.gpu)
device = "cuda:{}".format(args.gpu)
else:
device = 'cpu'
device = "cpu"
# retrieve labels of ground truth
labels = graph.ndata['label'].to(device)
labels = graph.ndata["label"].to(device)
# Extract node features
feat = graph.ndata['feature'].to(device)
feat = graph.ndata["feature"].to(device)
# retrieve masks for train/validation/test
train_mask = graph.ndata['train_mask']
val_mask = graph.ndata['val_mask']
test_mask = graph.ndata['test_mask']
train_mask = graph.ndata["train_mask"]
val_mask = graph.ndata["val_mask"]
test_mask = graph.ndata["test_mask"]
train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device)
val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device)
test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device)
# Reinforcement learning module only for positive training nodes
rl_idx = th.nonzero(train_mask.to(device) & labels.bool(), as_tuple=False).squeeze(1)
rl_idx = th.nonzero(
train_mask.to(device) & labels.bool(), as_tuple=False
).squeeze(1)
graph = graph.to(device)
# Step 2: Create model =================================================================== #
model = CAREGNN(in_dim=feat.shape[-1],
num_classes=num_classes,
hid_dim=args.hid_dim,
num_layers=args.num_layers,
activation=th.tanh,
step_size=args.step_size,
edges=graph.canonical_etypes)
model = CAREGNN(
in_dim=feat.shape[-1],
num_classes=num_classes,
hid_dim=args.hid_dim,
num_layers=args.num_layers,
activation=th.tanh,
step_size=args.step_size,
edges=graph.canonical_etypes,
)
model = model.to(device)
# Step 3: Create training components ===================================================== #
_, cnt = th.unique(labels, return_counts=True)
loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt)
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
optimizer = optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay
)
if args.early_stop:
stopper = EarlyStopping(patience=100)
......@@ -67,17 +74,30 @@ def main(args):
logits_gnn, logits_sim = model(graph, feat)
# compute loss
tr_loss = loss_fn(logits_gnn[train_idx], labels[train_idx]) + \
args.sim_weight * loss_fn(logits_sim[train_idx], labels[train_idx])
tr_recall = recall_score(labels[train_idx].cpu(), logits_gnn.data[train_idx].argmax(dim=1).cpu())
tr_auc = roc_auc_score(labels[train_idx].cpu(), softmax(logits_gnn, dim=1).data[train_idx][:, 1].cpu())
tr_loss = loss_fn(
logits_gnn[train_idx], labels[train_idx]
) + args.sim_weight * loss_fn(logits_sim[train_idx], labels[train_idx])
tr_recall = recall_score(
labels[train_idx].cpu(),
logits_gnn.data[train_idx].argmax(dim=1).cpu(),
)
tr_auc = roc_auc_score(
labels[train_idx].cpu(),
softmax(logits_gnn, dim=1).data[train_idx][:, 1].cpu(),
)
# validation
val_loss = loss_fn(logits_gnn[val_idx], labels[val_idx]) + \
args.sim_weight * loss_fn(logits_sim[val_idx], labels[val_idx])
val_recall = recall_score(labels[val_idx].cpu(), logits_gnn.data[val_idx].argmax(dim=1).cpu())
val_auc = roc_auc_score(labels[val_idx].cpu(), softmax(logits_gnn, dim=1).data[val_idx][:, 1].cpu())
val_loss = loss_fn(
logits_gnn[val_idx], labels[val_idx]
) + args.sim_weight * loss_fn(logits_sim[val_idx], labels[val_idx])
val_recall = recall_score(
labels[val_idx].cpu(), logits_gnn.data[val_idx].argmax(dim=1).cpu()
)
val_auc = roc_auc_score(
labels[val_idx].cpu(),
softmax(logits_gnn, dim=1).data[val_idx][:, 1].cpu(),
)
# backward
optimizer.zero_grad()
......@@ -85,8 +105,17 @@ def main(args):
optimizer.step()
# Print out performance
print("Epoch {}, Train: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f} | Val: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}"
.format(epoch, tr_recall, tr_auc, tr_loss.item(), val_recall, val_auc, val_loss.item()))
print(
"Epoch {}, Train: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f} | Val: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}".format(
epoch,
tr_recall,
tr_auc,
tr_loss.item(),
val_recall,
val_auc,
val_loss.item(),
)
)
# Adjust p value with reinforcement learning module
model.RLModule(graph, epoch, rl_idx)
......@@ -98,32 +127,80 @@ def main(args):
# Test after all epoch
model.eval()
if args.early_stop:
model.load_state_dict(th.load('es_checkpoint.pt'))
model.load_state_dict(th.load("es_checkpoint.pt"))
# forward
logits_gnn, logits_sim = model.forward(graph, feat)
# compute loss
test_loss = loss_fn(logits_gnn[test_idx], labels[test_idx]) + \
args.sim_weight * loss_fn(logits_sim[test_idx], labels[test_idx])
test_recall = recall_score(labels[test_idx].cpu(), logits_gnn[test_idx].argmax(dim=1).cpu())
test_auc = roc_auc_score(labels[test_idx].cpu(), softmax(logits_gnn, dim=1).data[test_idx][:, 1].cpu())
print("Test Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}".format(test_recall, test_auc, test_loss.item()))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN-based Anti-Spam Model')
parser.add_argument("--dataset", type=str, default="amazon", help="DGL dataset for this model (yelp, or amazon)")
parser.add_argument("--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU.")
parser.add_argument("--hid_dim", type=int, default=64, help="Hidden layer dimension")
parser.add_argument("--num_layers", type=int, default=1, help="Number of layers")
parser.add_argument("--max_epoch", type=int, default=30, help="The max number of epochs. Default: 30")
parser.add_argument("--lr", type=float, default=0.01, help="Learning rate. Default: 0.01")
parser.add_argument("--weight_decay", type=float, default=0.001, help="Weight decay. Default: 0.001")
parser.add_argument("--step_size", type=float, default=0.02, help="RL action step size (lambda 2). Default: 0.02")
parser.add_argument("--sim_weight", type=float, default=2, help="Similarity loss weight (lambda 1). Default: 2")
parser.add_argument('--early-stop', action='store_true', default=False, help="indicates whether to use early stop")
test_loss = loss_fn(
logits_gnn[test_idx], labels[test_idx]
) + args.sim_weight * loss_fn(logits_sim[test_idx], labels[test_idx])
test_recall = recall_score(
labels[test_idx].cpu(), logits_gnn[test_idx].argmax(dim=1).cpu()
)
test_auc = roc_auc_score(
labels[test_idx].cpu(),
softmax(logits_gnn, dim=1).data[test_idx][:, 1].cpu(),
)
print(
"Test Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}".format(
test_recall, test_auc, test_loss.item()
)
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GCN-based Anti-Spam Model")
parser.add_argument(
"--dataset",
type=str,
default="amazon",
help="DGL dataset for this model (yelp, or amazon)",
)
parser.add_argument(
"--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU."
)
parser.add_argument(
"--hid_dim", type=int, default=64, help="Hidden layer dimension"
)
parser.add_argument(
"--num_layers", type=int, default=1, help="Number of layers"
)
parser.add_argument(
"--max_epoch",
type=int,
default=30,
help="The max number of epochs. Default: 30",
)
parser.add_argument(
"--lr", type=float, default=0.01, help="Learning rate. Default: 0.01"
)
parser.add_argument(
"--weight_decay",
type=float,
default=0.001,
help="Weight decay. Default: 0.001",
)
parser.add_argument(
"--step_size",
type=float,
default=0.02,
help="RL action step size (lambda 2). Default: 0.02",
)
parser.add_argument(
"--sim_weight",
type=float,
default=2,
help="Similarity loss weight (lambda 1). Default: 2",
)
parser.add_argument(
"--early-stop",
action="store_true",
default=False,
help="indicates whether to use early stop",
)
args = parser.parse_args()
print(args)
......
import dgl
import argparse
import torch as th
import torch.optim as optim
from model_sampling import CAREGNN, CARESampler, _l1_dist
from sklearn.metrics import recall_score, roc_auc_score
from torch.nn.functional import softmax
from sklearn.metrics import roc_auc_score, recall_score
from utils import EarlyStopping
from model_sampling import CAREGNN, CARESampler, _l1_dist
import dgl
def evaluate(model, loss_fn, dataloader, device='cpu'):
def evaluate(model, loss_fn, dataloader, device="cpu"):
loss = 0
auc = 0
recall = 0
num_blocks = 0
for input_nodes, output_nodes, blocks in dataloader:
blocks = [b.to(device) for b in blocks]
feature = blocks[0].srcdata['feature']
label = blocks[-1].dstdata['label']
feature = blocks[0].srcdata["feature"]
label = blocks[-1].dstdata["label"]
logits_gnn, logits_sim = model(blocks, feature)
# compute loss
loss += loss_fn(logits_gnn, label).item() + args.sim_weight * loss_fn(logits_sim, label).item()
recall += recall_score(label.cpu(), logits_gnn.argmax(dim=1).detach().cpu())
auc += roc_auc_score(label.cpu(), softmax(logits_gnn, dim=1)[:, 1].detach().cpu())
loss += (
loss_fn(logits_gnn, label).item()
+ args.sim_weight * loss_fn(logits_sim, label).item()
)
recall += recall_score(
label.cpu(), logits_gnn.argmax(dim=1).detach().cpu()
)
auc += roc_auc_score(
label.cpu(), softmax(logits_gnn, dim=1)[:, 1].detach().cpu()
)
num_blocks += 1
return recall / num_blocks, auc / num_blocks, loss / num_blocks
......@@ -38,47 +46,53 @@ def main(args):
# check cuda
if args.gpu >= 0 and th.cuda.is_available():
device = 'cuda:{}'.format(args.gpu)
device = "cuda:{}".format(args.gpu)
args.num_workers = 0
else:
device = 'cpu'
device = "cpu"
# retrieve labels of ground truth
labels = graph.ndata['label'].to(device)
labels = graph.ndata["label"].to(device)
# Extract node features
feat = graph.ndata['feature'].to(device)
feat = graph.ndata["feature"].to(device)
layers_feat = feat.expand(args.num_layers, -1, -1)
# retrieve masks for train/validation/test
train_mask = graph.ndata['train_mask']
val_mask = graph.ndata['val_mask']
test_mask = graph.ndata['test_mask']
train_mask = graph.ndata["train_mask"]
val_mask = graph.ndata["val_mask"]
test_mask = graph.ndata["test_mask"]
train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device)
val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device)
test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device)
# Reinforcement learning module only for positive training nodes
rl_idx = th.nonzero(train_mask.to(device) & labels.bool(), as_tuple=False).squeeze(1)
rl_idx = th.nonzero(
train_mask.to(device) & labels.bool(), as_tuple=False
).squeeze(1)
graph = graph.to(device)
# Step 2: Create model =================================================================== #
model = CAREGNN(in_dim=feat.shape[-1],
num_classes=num_classes,
hid_dim=args.hid_dim,
num_layers=args.num_layers,
activation=th.tanh,
step_size=args.step_size,
edges=graph.canonical_etypes)
model = CAREGNN(
in_dim=feat.shape[-1],
num_classes=num_classes,
hid_dim=args.hid_dim,
num_layers=args.num_layers,
activation=th.tanh,
step_size=args.step_size,
edges=graph.canonical_etypes,
)
model = model.to(device)
# Step 3: Create training components ===================================================== #
_, cnt = th.unique(labels, return_counts=True)
loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt)
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
optimizer = optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay
)
if args.early_stop:
stopper = EarlyStopping(patience=100)
......@@ -89,13 +103,13 @@ def main(args):
p = []
for i in range(args.num_layers):
dist = {}
graph.ndata['nd'] = th.tanh(model.layers[i].MLP(layers_feat[i]))
graph.ndata["nd"] = th.tanh(model.layers[i].MLP(layers_feat[i]))
for etype in graph.canonical_etypes:
graph.apply_edges(_l1_dist, etype=etype)
dist[etype] = graph.edges[etype].data.pop('ed').detach().cpu()
dist[etype] = graph.edges[etype].data.pop("ed").detach().cpu()
dists.append(dist)
p.append(model.layers[i].p)
graph.ndata.pop('nd')
graph.ndata.pop("nd")
sampler = CARESampler(p, dists, args.num_layers)
# train
......@@ -105,20 +119,33 @@ def main(args):
tr_auc = 0
tr_blk = 0
train_dataloader = dgl.dataloading.DataLoader(
graph, train_idx, sampler, batch_size=args.batch_size,
shuffle=True, drop_last=False, num_workers=args.num_workers)
graph,
train_idx,
sampler,
batch_size=args.batch_size,
shuffle=True,
drop_last=False,
num_workers=args.num_workers,
)
for input_nodes, output_nodes, blocks in train_dataloader:
blocks = [b.to(device) for b in blocks]
train_feature = blocks[0].srcdata['feature']
train_label = blocks[-1].dstdata['label']
train_feature = blocks[0].srcdata["feature"]
train_label = blocks[-1].dstdata["label"]
logits_gnn, logits_sim = model(blocks, train_feature)
# compute loss
blk_loss = loss_fn(logits_gnn, train_label) + args.sim_weight * loss_fn(logits_sim, train_label)
blk_loss = loss_fn(
logits_gnn, train_label
) + args.sim_weight * loss_fn(logits_sim, train_label)
tr_loss += blk_loss.item()
tr_recall += recall_score(train_label.cpu(), logits_gnn.argmax(dim=1).detach().cpu())
tr_auc += roc_auc_score(train_label.cpu(), softmax(logits_gnn, dim=1)[:, 1].detach().cpu())
tr_recall += recall_score(
train_label.cpu(), logits_gnn.argmax(dim=1).detach().cpu()
)
tr_auc += roc_auc_score(
train_label.cpu(),
softmax(logits_gnn, dim=1)[:, 1].detach().cpu(),
)
tr_blk += 1
# backward
......@@ -132,15 +159,32 @@ def main(args):
# validation
model.eval()
val_dataloader = dgl.dataloading.DataLoader(
graph, val_idx, sampler, batch_size=args.batch_size,
shuffle=True, drop_last=False, num_workers=args.num_workers)
val_recall, val_auc, val_loss = evaluate(model, loss_fn, val_dataloader, device)
graph,
val_idx,
sampler,
batch_size=args.batch_size,
shuffle=True,
drop_last=False,
num_workers=args.num_workers,
)
val_recall, val_auc, val_loss = evaluate(
model, loss_fn, val_dataloader, device
)
# Print out performance
print("In epoch {}, Train Recall: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; "
"Valid Recall: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}".
format(epoch, tr_recall / tr_blk, tr_auc / tr_blk, tr_loss / tr_blk, val_recall, val_auc, val_loss))
print(
"In epoch {}, Train Recall: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; "
"Valid Recall: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}".format(
epoch,
tr_recall / tr_blk,
tr_auc / tr_blk,
tr_loss / tr_blk,
val_recall,
val_auc,
val_loss,
)
)
if args.early_stop:
if stopper.step(val_auc, model):
......@@ -149,30 +193,84 @@ def main(args):
# Test with mini batch after all epoch
model.eval()
if args.early_stop:
model.load_state_dict(th.load('es_checkpoint.pt'))
model.load_state_dict(th.load("es_checkpoint.pt"))
test_dataloader = dgl.dataloading.DataLoader(
graph, test_idx, sampler, batch_size=args.batch_size,
shuffle=True, drop_last=False, num_workers=args.num_workers)
test_recall, test_auc, test_loss = evaluate(model, loss_fn, test_dataloader, device)
print("Test Recall: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format(test_recall, test_auc, test_loss))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN-based Anti-Spam Model')
parser.add_argument("--dataset", type=str, default="amazon", help="DGL dataset for this model (yelp, or amazon)")
parser.add_argument("--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU.")
parser.add_argument("--hid_dim", type=int, default=64, help="Hidden layer dimension")
parser.add_argument("--num_layers", type=int, default=1, help="Number of layers")
parser.add_argument("--batch_size", type=int, default=256, help="Size of mini-batch")
parser.add_argument("--max_epoch", type=int, default=30, help="The max number of epochs. Default: 30")
parser.add_argument("--lr", type=float, default=0.01, help="Learning rate. Default: 0.01")
parser.add_argument("--weight_decay", type=float, default=0.001, help="Weight decay. Default: 0.001")
parser.add_argument("--step_size", type=float, default=0.02, help="RL action step size (lambda 2). Default: 0.02")
parser.add_argument("--sim_weight", type=float, default=2, help="Similarity loss weight (lambda 1). Default: 0.001")
parser.add_argument("--num_workers", type=int, default=4, help="Number of node dataloader")
parser.add_argument('--early-stop', action='store_true', default=False, help="indicates whether to use early stop")
graph,
test_idx,
sampler,
batch_size=args.batch_size,
shuffle=True,
drop_last=False,
num_workers=args.num_workers,
)
test_recall, test_auc, test_loss = evaluate(
model, loss_fn, test_dataloader, device
)
print(
"Test Recall: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format(
test_recall, test_auc, test_loss
)
)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="GCN-based Anti-Spam Model")
parser.add_argument(
"--dataset",
type=str,
default="amazon",
help="DGL dataset for this model (yelp, or amazon)",
)
parser.add_argument(
"--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU."
)
parser.add_argument(
"--hid_dim", type=int, default=64, help="Hidden layer dimension"
)
parser.add_argument(
"--num_layers", type=int, default=1, help="Number of layers"
)
parser.add_argument(
"--batch_size", type=int, default=256, help="Size of mini-batch"
)
parser.add_argument(
"--max_epoch",
type=int,
default=30,
help="The max number of epochs. Default: 30",
)
parser.add_argument(
"--lr", type=float, default=0.01, help="Learning rate. Default: 0.01"
)
parser.add_argument(
"--weight_decay",
type=float,
default=0.001,
help="Weight decay. Default: 0.001",
)
parser.add_argument(
"--step_size",
type=float,
default=0.02,
help="RL action step size (lambda 2). Default: 0.02",
)
parser.add_argument(
"--sim_weight",
type=float,
default=2,
help="Similarity loss weight (lambda 1). Default: 0.001",
)
parser.add_argument(
"--num_workers", type=int, default=4, help="Number of node dataloader"
)
parser.add_argument(
"--early-stop",
action="store_true",
default=False,
help="indicates whether to use early stop",
)
args = parser.parse_args()
th.manual_seed(717)
......
import torch as th
import numpy as np
import torch as th
import torch.nn as nn
import dgl.function as fn
class CAREConv(nn.Module):
"""One layer of CARE-GNN."""
def __init__(self, in_dim, out_dim, num_classes, edges, activation=None, step_size=0.02):
def __init__(
self,
in_dim,
out_dim,
num_classes,
edges,
activation=None,
step_size=0.02,
):
super(CAREConv, self).__init__()
self.activation = activation
......@@ -33,20 +42,27 @@ class CAREConv(nn.Module):
def _calc_distance(self, edges):
# formula 2
d = th.norm(th.tanh(self.MLP(edges.src['h'])) - th.tanh(self.MLP(edges.dst['h'])), 1, 1)
return {'d': d}
d = th.norm(
th.tanh(self.MLP(edges.src["h"]))
- th.tanh(self.MLP(edges.dst["h"])),
1,
1,
)
return {"d": d}
def _top_p_sampling(self, g, p):
# this implementation is low efficient
# optimization requires dgl.sampling.select_top_p requested in issue #3100
dist = g.edata['d']
dist = g.edata["d"]
neigh_list = []
for node in g.nodes():
edges = g.in_edges(node, form='eid')
edges = g.in_edges(node, form="eid")
num_neigh = th.ceil(g.in_degrees(node) * p).int().item()
neigh_dist = dist[edges]
if neigh_dist.shape[0] > num_neigh:
neigh_index = np.argpartition(neigh_dist.cpu().detach(), num_neigh)[:num_neigh]
neigh_index = np.argpartition(
neigh_dist.cpu().detach(), num_neigh
)[:num_neigh]
else:
neigh_index = np.arange(num_neigh)
neigh_list.append(edges[neigh_index])
......@@ -54,22 +70,29 @@ class CAREConv(nn.Module):
def forward(self, g, feat):
with g.local_scope():
g.ndata['h'] = feat
g.ndata["h"] = feat
hr = {}
for i, etype in enumerate(g.canonical_etypes):
g.apply_edges(self._calc_distance, etype=etype)
self.dist[etype] = g.edges[etype].data['d']
self.dist[etype] = g.edges[etype].data["d"]
sampled_edges = self._top_p_sampling(g[etype], self.p[etype])
# formula 8
g.send_and_recv(sampled_edges, fn.copy_u('h', 'm'), fn.mean('m', 'h_%s' % etype[1]), etype=etype)
hr[etype] = g.ndata['h_%s' % etype[1]]
g.send_and_recv(
sampled_edges,
fn.copy_u("h", "m"),
fn.mean("m", "h_%s" % etype[1]),
etype=etype,
)
hr[etype] = g.ndata["h_%s" % etype[1]]
if self.activation is not None:
hr[etype] = self.activation(hr[etype])
# formula 9 using mean as inter-relation aggregator
p_tensor = th.Tensor(list(self.p.values())).view(-1, 1, 1).to(g.device)
p_tensor = (
th.Tensor(list(self.p.values())).view(-1, 1, 1).to(g.device)
)
h_homo = th.sum(th.stack(list(hr.values())) * p_tensor, dim=0)
h_homo += feat
if self.activation is not None:
......@@ -79,14 +102,16 @@ class CAREConv(nn.Module):
class CAREGNN(nn.Module):
def __init__(self,
in_dim,
num_classes,
hid_dim=64,
edges=None,
num_layers=2,
activation=None,
step_size=0.02):
def __init__(
self,
in_dim,
num_classes,
hid_dim=64,
edges=None,
num_layers=2,
activation=None,
step_size=0.02,
):
super(CAREGNN, self).__init__()
self.in_dim = in_dim
self.hid_dim = hid_dim
......@@ -100,38 +125,54 @@ class CAREGNN(nn.Module):
if self.num_layers == 1:
# Single layer
self.layers.append(CAREConv(self.in_dim,
self.num_classes,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size))
self.layers.append(
CAREConv(
self.in_dim,
self.num_classes,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size,
)
)
else:
# Input layer
self.layers.append(CAREConv(self.in_dim,
self.hid_dim,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size))
self.layers.append(
CAREConv(
self.in_dim,
self.hid_dim,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size,
)
)
# Hidden layers with n - 2 layers
for i in range(self.num_layers - 2):
self.layers.append(CAREConv(self.hid_dim,
self.hid_dim,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size))
self.layers.append(
CAREConv(
self.hid_dim,
self.hid_dim,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size,
)
)
# Output layer
self.layers.append(CAREConv(self.hid_dim,
self.num_classes,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size))
self.layers.append(
CAREConv(
self.hid_dim,
self.num_classes,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size,
)
)
def forward(self, graph, feat):
# For full graph training, directly use the graph
......@@ -149,7 +190,7 @@ class CAREGNN(nn.Module):
for etype in self.edges:
if not layer.cvg[etype]:
# formula 5
eid = graph.in_edges(idx, form='eid', etype=etype)
eid = graph.in_edges(idx, form="eid", etype=etype)
avg_dist = th.mean(layer.dist[etype][eid])
# formula 6
......
import dgl
import torch as th
import numpy as np
import torch as th
import torch.nn as nn
import dgl
import dgl.function as fn
def _l1_dist(edges):
# formula 2
ed = th.norm(edges.src['nd'] - edges.dst['nd'], 1, 1)
return {'ed': ed}
ed = th.norm(edges.src["nd"] - edges.dst["nd"], 1, 1)
return {"ed": ed}
class CARESampler(dgl.dataloading.BlockSampler):
......@@ -25,11 +26,20 @@ class CARESampler(dgl.dataloading.BlockSampler):
edge_mask = th.zeros(g.number_of_edges(etype))
# extract each node from dict because of single node type
for node in seed_nodes:
edges = g.in_edges(node, form='eid', etype=etype)
num_neigh = th.ceil(g.in_degrees(node, etype=etype) * self.p[block_id][etype]).int().item()
edges = g.in_edges(node, form="eid", etype=etype)
num_neigh = (
th.ceil(
g.in_degrees(node, etype=etype)
* self.p[block_id][etype]
)
.int()
.item()
)
neigh_dist = self.dists[block_id][etype][edges]
if neigh_dist.shape[0] > num_neigh:
neigh_index = np.argpartition(neigh_dist, num_neigh)[:num_neigh]
neigh_index = np.argpartition(neigh_dist, num_neigh)[
:num_neigh
]
else:
neigh_index = np.arange(num_neigh)
edge_mask[edges[neigh_index]] = 1
......@@ -57,7 +67,15 @@ class CARESampler(dgl.dataloading.BlockSampler):
class CAREConv(nn.Module):
"""One layer of CARE-GNN."""
def __init__(self, in_dim, out_dim, num_classes, edges, activation=None, step_size=0.02):
def __init__(
self,
in_dim,
out_dim,
num_classes,
edges,
activation=None,
step_size=0.02,
):
super(CAREConv, self).__init__()
self.activation = activation
......@@ -82,20 +100,22 @@ class CAREConv(nn.Module):
self.cvg[etype] = False
def forward(self, g, feat):
g.srcdata['h'] = feat
g.srcdata["h"] = feat
# formula 8
hr = {}
for etype in g.canonical_etypes:
g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'hr'), etype=etype)
hr[etype] = g.dstdata['hr']
g.update_all(fn.copy_u("h", "m"), fn.mean("m", "hr"), etype=etype)
hr[etype] = g.dstdata["hr"]
if self.activation is not None:
hr[etype] = self.activation(hr[etype])
# formula 9 using mean as inter-relation aggregator
p_tensor = th.Tensor(list(self.p.values())).view(-1, 1, 1).to(feat.device)
p_tensor = (
th.Tensor(list(self.p.values())).view(-1, 1, 1).to(feat.device)
)
h_homo = th.sum(th.stack(list(hr.values())) * p_tensor, dim=0)
h_homo += feat[:g.number_of_dst_nodes()]
h_homo += feat[: g.number_of_dst_nodes()]
if self.activation is not None:
h_homo = self.activation(h_homo)
......@@ -103,14 +123,16 @@ class CAREConv(nn.Module):
class CAREGNN(nn.Module):
def __init__(self,
in_dim,
num_classes,
hid_dim=64,
edges=None,
num_layers=2,
activation=None,
step_size=0.02):
def __init__(
self,
in_dim,
num_classes,
hid_dim=64,
edges=None,
num_layers=2,
activation=None,
step_size=0.02,
):
super(CAREGNN, self).__init__()
self.in_dim = in_dim
self.hid_dim = hid_dim
......@@ -124,42 +146,58 @@ class CAREGNN(nn.Module):
if self.num_layers == 1:
# Single layer
self.layers.append(CAREConv(self.in_dim,
self.num_classes,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size))
self.layers.append(
CAREConv(
self.in_dim,
self.num_classes,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size,
)
)
else:
# Input layer
self.layers.append(CAREConv(self.in_dim,
self.hid_dim,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size))
self.layers.append(
CAREConv(
self.in_dim,
self.hid_dim,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size,
)
)
# Hidden layers with n - 2 layers
for i in range(self.num_layers - 2):
self.layers.append(CAREConv(self.hid_dim,
self.hid_dim,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size))
self.layers.append(
CAREConv(
self.hid_dim,
self.hid_dim,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size,
)
)
# Output layer
self.layers.append(CAREConv(self.hid_dim,
self.num_classes,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size))
self.layers.append(
CAREConv(
self.hid_dim,
self.num_classes,
self.num_classes,
self.edges,
activation=self.activation,
step_size=self.step_size,
)
)
def forward(self, blocks, feat):
# formula 4
sim = th.tanh(self.layers[0].MLP(blocks[-1].dstdata['feature'].float()))
sim = th.tanh(self.layers[0].MLP(blocks[-1].dstdata["feature"].float()))
# Forward of n layers of CARE-GNN
for block, layer in zip(blocks, self.layers):
......@@ -171,7 +209,7 @@ class CAREGNN(nn.Module):
for etype in self.edges:
if not layer.cvg[etype]:
# formula 5
eid = graph.in_edges(idx, form='eid', etype=etype)
eid = graph.in_edges(idx, form="eid", etype=etype)
avg_dist = th.mean(dists[i][etype][eid])
# formula 6
......
......@@ -18,7 +18,9 @@ class EarlyStopping:
self.save_checkpoint(model)
elif score < self.best_score:
self.counter += 1
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
print(
f"EarlyStopping counter: {self.counter} out of {self.patience}"
)
if self.counter >= self.patience:
self.early_stop = True
else:
......@@ -28,5 +30,5 @@ class EarlyStopping:
return self.early_stop
def save_checkpoint(self, model):
'''Saves model when validation loss decrease.'''
torch.save(model.state_dict(), 'es_checkpoint.pt')
"""Saves model when validation loss decrease."""
torch.save(model.state_dict(), "es_checkpoint.pt")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment