Unverified Commit 0b9df9d7 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4652)


Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent f19f05ce
import dgl
import rdkit.Chem as Chem
from .chemutils import get_clique_mol, tree_decomp, get_mol, get_smiles, \
set_atommap, enum_assemble_nx, decode_stereo
import numpy as np
import rdkit.Chem as Chem
import dgl
from .chemutils import (
decode_stereo,
enum_assemble_nx,
get_clique_mol,
get_mol,
get_smiles,
set_atommap,
tree_decomp,
)
class DGLMolTree(object):
def __init__(self, smiles):
......@@ -28,21 +38,23 @@ class DGLMolTree(object):
cmol = get_clique_mol(self.mol, c)
csmiles = get_smiles(cmol)
self.nodes_dict[i] = dict(
smiles=csmiles,
mol=get_mol(csmiles),
clique=c,
)
smiles=csmiles,
mol=get_mol(csmiles),
clique=c,
)
if min(c) == 0:
root = i
# The clique with atom ID 0 becomes root
if root > 0:
for attr in self.nodes_dict[0]:
self.nodes_dict[0][attr], self.nodes_dict[root][attr] = \
self.nodes_dict[root][attr], self.nodes_dict[0][attr]
self.nodes_dict[0][attr], self.nodes_dict[root][attr] = (
self.nodes_dict[root][attr],
self.nodes_dict[0][attr],
)
src = np.zeros((len(edges) * 2,), dtype='int')
dst = np.zeros((len(edges) * 2,), dtype='int')
src = np.zeros((len(edges) * 2,), dtype="int")
dst = np.zeros((len(edges) * 2,), dtype="int")
for i, (_x, _y) in enumerate(edges):
x = 0 if _x == root else root if _x == 0 else _x
y = 0 if _y == root else root if _y == 0 else _y
......@@ -53,10 +65,12 @@ class DGLMolTree(object):
self.graph = dgl.graph((src, dst), num_nodes=len(cliques))
for i in self.nodes_dict:
self.nodes_dict[i]['nid'] = i + 1
if self.graph.out_degrees(i) > 1: # Leaf node mol is not marked
set_atommap(self.nodes_dict[i]['mol'], self.nodes_dict[i]['nid'])
self.nodes_dict[i]['is_leaf'] = (self.graph.out_degrees(i) == 1)
self.nodes_dict[i]["nid"] = i + 1
if self.graph.out_degrees(i) > 1: # Leaf node mol is not marked
set_atommap(
self.nodes_dict[i]["mol"], self.nodes_dict[i]["nid"]
)
self.nodes_dict[i]["is_leaf"] = self.graph.out_degrees(i) == 1
def treesize(self):
return self.graph.number_of_nodes()
......@@ -65,49 +79,65 @@ class DGLMolTree(object):
node = self.nodes_dict[i]
clique = []
clique.extend(node['clique'])
if not node['is_leaf']:
for cidx in node['clique']:
original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(node['nid'])
clique.extend(node["clique"])
if not node["is_leaf"]:
for cidx in node["clique"]:
original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(node["nid"])
for j in self.graph.successors(i).numpy():
nei_node = self.nodes_dict[j]
clique.extend(nei_node['clique'])
if nei_node['is_leaf']: # Leaf node, no need to mark
clique.extend(nei_node["clique"])
if nei_node["is_leaf"]: # Leaf node, no need to mark
continue
for cidx in nei_node['clique']:
for cidx in nei_node["clique"]:
# allow singleton node override the atom mapping
if cidx not in node['clique'] or len(nei_node['clique']) == 1:
if cidx not in node["clique"] or len(nei_node["clique"]) == 1:
atom = original_mol.GetAtomWithIdx(cidx)
atom.SetAtomMapNum(nei_node['nid'])
atom.SetAtomMapNum(nei_node["nid"])
clique = list(set(clique))
label_mol = get_clique_mol(original_mol, clique)
node['label'] = Chem.MolToSmiles(Chem.MolFromSmiles(get_smiles(label_mol)))
node['label_mol'] = get_mol(node['label'])
node["label"] = Chem.MolToSmiles(
Chem.MolFromSmiles(get_smiles(label_mol))
)
node["label_mol"] = get_mol(node["label"])
for cidx in clique:
original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(0)
return node['label']
return node["label"]
def _assemble_node(self, i):
neighbors = [self.nodes_dict[j] for j in self.graph.successors(i).numpy()
if self.nodes_dict[j]['mol'].GetNumAtoms() > 1]
neighbors = sorted(neighbors, key=lambda x: x['mol'].GetNumAtoms(), reverse=True)
singletons = [self.nodes_dict[j] for j in self.graph.successors(i).numpy()
if self.nodes_dict[j]['mol'].GetNumAtoms() == 1]
neighbors = [
self.nodes_dict[j]
for j in self.graph.successors(i).numpy()
if self.nodes_dict[j]["mol"].GetNumAtoms() > 1
]
neighbors = sorted(
neighbors, key=lambda x: x["mol"].GetNumAtoms(), reverse=True
)
singletons = [
self.nodes_dict[j]
for j in self.graph.successors(i).numpy()
if self.nodes_dict[j]["mol"].GetNumAtoms() == 1
]
neighbors = singletons + neighbors
cands = enum_assemble_nx(self.nodes_dict[i], neighbors)
if len(cands) > 0:
self.nodes_dict[i]['cands'], self.nodes_dict[i]['cand_mols'], _ = list(zip(*cands))
self.nodes_dict[i]['cands'] = list(self.nodes_dict[i]['cands'])
self.nodes_dict[i]['cand_mols'] = list(self.nodes_dict[i]['cand_mols'])
(
self.nodes_dict[i]["cands"],
self.nodes_dict[i]["cand_mols"],
_,
) = list(zip(*cands))
self.nodes_dict[i]["cands"] = list(self.nodes_dict[i]["cands"])
self.nodes_dict[i]["cand_mols"] = list(
self.nodes_dict[i]["cand_mols"]
)
else:
self.nodes_dict[i]['cands'] = []
self.nodes_dict[i]['cand_mols'] = []
self.nodes_dict[i]["cands"] = []
self.nodes_dict[i]["cand_mols"] = []
def recover(self):
for i in self.nodes_dict:
......
import rdkit.Chem as Chem
import torch
import torch.nn as nn
import rdkit.Chem as Chem
import torch.nn.functional as F
from .chemutils import get_mol
import dgl
from dgl import mean_nodes, line_graph
import dgl.function as DGLF
from dgl import line_graph, mean_nodes
from .chemutils import get_mol
ELEM_LIST = ['C', 'N', 'O', 'S', 'F', 'Si', 'P', 'Cl', 'Br', 'Mg', 'Na', 'Ca',
'Fe', 'Al', 'I', 'B', 'K', 'Se', 'Zn', 'H', 'Cu', 'Mn', 'unknown']
ELEM_LIST = [
"C",
"N",
"O",
"S",
"F",
"Si",
"P",
"Cl",
"Br",
"Mg",
"Na",
"Ca",
"Fe",
"Al",
"I",
"B",
"K",
"Se",
"Zn",
"H",
"Cu",
"Mn",
"unknown",
]
ATOM_FDIM = len(ELEM_LIST) + 6 + 5 + 4 + 1
BOND_FDIM = 5 + 6
MAX_NB = 6
def onek_encoding_unk(x, allowable_set):
if x not in allowable_set:
x = allowable_set[-1]
return [x == s for s in allowable_set]
def atom_features(atom):
return (torch.Tensor(onek_encoding_unk(atom.GetSymbol(), ELEM_LIST)
+ onek_encoding_unk(atom.GetDegree(), [0,1,2,3,4,5])
+ onek_encoding_unk(atom.GetFormalCharge(), [-1,-2,1,2,0])
+ onek_encoding_unk(int(atom.GetChiralTag()), [0,1,2,3])
+ [atom.GetIsAromatic()]))
return torch.Tensor(
onek_encoding_unk(atom.GetSymbol(), ELEM_LIST)
+ onek_encoding_unk(atom.GetDegree(), [0, 1, 2, 3, 4, 5])
+ onek_encoding_unk(atom.GetFormalCharge(), [-1, -2, 1, 2, 0])
+ onek_encoding_unk(int(atom.GetChiralTag()), [0, 1, 2, 3])
+ [atom.GetIsAromatic()]
)
def bond_features(bond):
bt = bond.GetBondType()
stereo = int(bond.GetStereo())
fbond = [bt == Chem.rdchem.BondType.SINGLE, bt == Chem.rdchem.BondType.DOUBLE, bt == Chem.rdchem.BondType.TRIPLE, bt == Chem.rdchem.BondType.AROMATIC, bond.IsInRing()]
fstereo = onek_encoding_unk(stereo, [0,1,2,3,4,5])
return (torch.Tensor(fbond + fstereo))
fbond = [
bt == Chem.rdchem.BondType.SINGLE,
bt == Chem.rdchem.BondType.DOUBLE,
bt == Chem.rdchem.BondType.TRIPLE,
bt == Chem.rdchem.BondType.AROMATIC,
bond.IsInRing(),
]
fstereo = onek_encoding_unk(stereo, [0, 1, 2, 3, 4, 5])
return torch.Tensor(fbond + fstereo)
def mol2dgl_single(smiles):
n_edges = 0
......@@ -61,8 +98,11 @@ def mol2dgl_single(smiles):
bond_x.append(features)
graph = dgl.graph((bond_src, bond_dst), num_nodes=n_atoms)
n_edges += n_bonds
return graph, torch.stack(atom_x), \
torch.stack(bond_x) if len(bond_x) > 0 else torch.zeros(0)
return (
graph,
torch.stack(atom_x),
torch.stack(bond_x) if len(bond_x) > 0 else torch.zeros(0),
)
class LoopyBPUpdate(nn.Module):
......@@ -73,10 +113,10 @@ class LoopyBPUpdate(nn.Module):
self.W_h = nn.Linear(hidden_size, hidden_size, bias=False)
def forward(self, nodes):
msg_input = nodes.data['msg_input']
msg_delta = self.W_h(nodes.data['accum_msg'])
msg_input = nodes.data["msg_input"]
msg_delta = self.W_h(nodes.data["accum_msg"])
msg = F.relu(msg_input + msg_delta)
return {'msg': msg}
return {"msg": msg}
class GatherUpdate(nn.Module):
......@@ -87,9 +127,9 @@ class GatherUpdate(nn.Module):
self.W_o = nn.Linear(ATOM_FDIM + hidden_size, hidden_size)
def forward(self, nodes):
m = nodes.data['m']
m = nodes.data["m"]
return {
'h': F.relu(self.W_o(torch.cat([nodes.data['x'], m], 1))),
"h": F.relu(self.W_o(torch.cat([nodes.data["x"], m], 1))),
}
......@@ -121,7 +161,7 @@ class DGLMPN(nn.Module):
mol_graph = self.run(mol_graph, mol_line_graph)
# TODO: replace with unbatch or readout
g_repr = mean_nodes(mol_graph, 'h')
g_repr = mean_nodes(mol_graph, "h")
self.n_samples_total += n_samples
self.n_nodes_total += n_nodes
......@@ -134,32 +174,38 @@ class DGLMPN(nn.Module):
n_nodes = mol_graph.number_of_nodes()
mol_graph.apply_edges(
func=lambda edges: {'src_x': edges.src['x']},
func=lambda edges: {"src_x": edges.src["x"]},
)
mol_line_graph.ndata.update(mol_graph.edata)
e_repr = mol_line_graph.ndata
bond_features = e_repr['x']
source_features = e_repr['src_x']
bond_features = e_repr["x"]
source_features = e_repr["src_x"]
features = torch.cat([source_features, bond_features], 1)
msg_input = self.W_i(features)
mol_line_graph.ndata.update({
'msg_input': msg_input,
'msg': F.relu(msg_input),
'accum_msg': torch.zeros_like(msg_input),
})
mol_graph.ndata.update({
'm': bond_features.new(n_nodes, self.hidden_size).zero_(),
'h': bond_features.new(n_nodes, self.hidden_size).zero_(),
})
mol_line_graph.ndata.update(
{
"msg_input": msg_input,
"msg": F.relu(msg_input),
"accum_msg": torch.zeros_like(msg_input),
}
)
mol_graph.ndata.update(
{
"m": bond_features.new(n_nodes, self.hidden_size).zero_(),
"h": bond_features.new(n_nodes, self.hidden_size).zero_(),
}
)
for i in range(self.depth - 1):
mol_line_graph.update_all(DGLF.copy_u('msg', 'msg'), DGLF.sum('msg', 'accum_msg'))
mol_line_graph.update_all(
DGLF.copy_u("msg", "msg"), DGLF.sum("msg", "accum_msg")
)
mol_line_graph.apply_nodes(self.loopy_bp_updater)
mol_graph.edata.update(mol_line_graph.ndata)
mol_graph.update_all(DGLF.copy_e('msg', 'msg'), DGLF.sum('msg', 'm'))
mol_graph.update_all(DGLF.copy_e("msg", "msg"), DGLF.sum("msg", "m"))
mol_graph.apply_nodes(self.gather_updater)
return mol_graph
import os
import torch
import torch.nn as nn
import os
import dgl
def cuda(x):
if torch.cuda.is_available() and not os.getenv('NOCUDA', None):
return x.to(torch.device('cuda')) # works for both DGLGraph and tensor
if torch.cuda.is_available() and not os.getenv("NOCUDA", None):
return x.to(torch.device("cuda")) # works for both DGLGraph and tensor
else:
return x
......@@ -22,27 +24,28 @@ class GRUUpdate(nn.Module):
self.W_h = nn.Linear(2 * hidden_size, hidden_size)
def update_zm(self, node):
src_x = node.data['src_x']
s = node.data['s']
rm = node.data['accum_rm']
src_x = node.data["src_x"]
s = node.data["s"]
rm = node.data["accum_rm"]
z = torch.sigmoid(self.W_z(torch.cat([src_x, s], 1)))
m = torch.tanh(self.W_h(torch.cat([src_x, rm], 1)))
m = (1 - z) * s + z * m
return {'m': m, 'z': z}
return {"m": m, "z": z}
def update_r(self, node, zm=None):
dst_x = node.data['dst_x']
m = node.data['m'] if zm is None else zm['m']
dst_x = node.data["dst_x"]
m = node.data["m"] if zm is None else zm["m"]
r_1 = self.W_r(dst_x)
r_2 = self.U_r(m)
r = torch.sigmoid(r_1 + r_2)
return {'r': r, 'rm': r * m}
return {"r": r, "rm": r * m}
def forward(self, node):
dic = self.update_zm(node)
dic.update(self.update_r(node, zm=dic))
return dic
def tocpu(g):
src, dst = g.edges()
src = src.cpu()
......
import math
import random
import sys
from collections import deque
from optparse import OptionParser
import rdkit
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.utils.data import DataLoader
import math, random, sys
from optparse import OptionParser
from collections import deque
import rdkit
import tqdm
from jtnn import *
from torch.utils.data import DataLoader
torch.multiprocessing.set_sharing_strategy("file_system")
torch.multiprocessing.set_sharing_strategy('file_system')
def worker_init_fn(id_):
lg = rdkit.RDLogger.logger()
lg = rdkit.RDLogger.logger()
lg.setLevel(rdkit.RDLogger.CRITICAL)
worker_init_fn(None)
parser = OptionParser()
parser.add_option("-t", "--train", dest="train", default='train', help='Training file name')
parser.add_option("-v", "--vocab", dest="vocab", default='vocab', help='Vocab file name')
parser.add_option(
"-t", "--train", dest="train", default="train", help="Training file name"
)
parser.add_option(
"-v", "--vocab", dest="vocab", default="vocab", help="Vocab file name"
)
parser.add_option("-s", "--save_dir", dest="save_path")
parser.add_option("-m", "--model", dest="model_path", default=None)
parser.add_option("-b", "--batch", dest="batch_size", default=40)
......@@ -31,7 +39,7 @@ parser.add_option("-d", "--depth", dest="depth", default=3)
parser.add_option("-z", "--beta", dest="beta", default=1.0)
parser.add_option("-q", "--lr", dest="lr", default=1e-3)
parser.add_option("-T", "--test", dest="test", action="store_true")
opts,args = parser.parse_args()
opts, args = parser.parse_args()
dataset = JTNNDataset(data=opts.train, vocab=opts.vocab, training=True)
vocab = dataset.vocab
......@@ -55,7 +63,10 @@ else:
nn.init.xavier_normal(param)
model = cuda(model)
print("Model #Params: %dK" % (sum([x.nelement() for x in model.parameters()]) / 1000,))
print(
"Model #Params: %dK"
% (sum([x.nelement() for x in model.parameters()]) / 1000,)
)
optimizer = optim.Adam(model.parameters(), lr=lr)
scheduler = lr_scheduler.ExponentialLR(optimizer, 0.9)
......@@ -64,26 +75,28 @@ scheduler.step()
MAX_EPOCH = 100
PRINT_ITER = 20
def train():
dataset.training = True
dataloader = DataLoader(
dataset,
batch_size=batch_size,
shuffle=True,
num_workers=4,
collate_fn=JTNNCollator(vocab, True),
drop_last=True,
worker_init_fn=worker_init_fn)
dataset,
batch_size=batch_size,
shuffle=True,
num_workers=4,
collate_fn=JTNNCollator(vocab, True),
drop_last=True,
worker_init_fn=worker_init_fn,
)
for epoch in range(MAX_EPOCH):
word_acc,topo_acc,assm_acc,steo_acc = 0,0,0,0
word_acc, topo_acc, assm_acc, steo_acc = 0, 0, 0, 0
for it, batch in enumerate(tqdm.tqdm(dataloader)):
model.zero_grad()
try:
loss, kl_div, wacc, tacc, sacc, dacc = model(batch, beta)
except:
print([t.smiles for t in batch['mol_trees']])
print([t.smiles for t in batch["mol_trees"]])
raise
loss.backward()
optimizer.step()
......@@ -99,36 +112,51 @@ def train():
assm_acc = assm_acc / PRINT_ITER * 100
steo_acc = steo_acc / PRINT_ITER * 100
print("KL: %.1f, Word: %.2f, Topo: %.2f, Assm: %.2f, Steo: %.2f, Loss: %.6f" % (
kl_div, word_acc, topo_acc, assm_acc, steo_acc, loss.item()))
word_acc,topo_acc,assm_acc,steo_acc = 0,0,0,0
print(
"KL: %.1f, Word: %.2f, Topo: %.2f, Assm: %.2f, Steo: %.2f, Loss: %.6f"
% (
kl_div,
word_acc,
topo_acc,
assm_acc,
steo_acc,
loss.item(),
)
)
word_acc, topo_acc, assm_acc, steo_acc = 0, 0, 0, 0
sys.stdout.flush()
if (it + 1) % 1500 == 0: #Fast annealing
if (it + 1) % 1500 == 0: # Fast annealing
scheduler.step()
print("learning rate: %.6f" % scheduler.get_lr()[0])
torch.save(model.state_dict(),
opts.save_path + "/model.iter-%d-%d" % (epoch, it + 1))
torch.save(
model.state_dict(),
opts.save_path + "/model.iter-%d-%d" % (epoch, it + 1),
)
scheduler.step()
print("learning rate: %.6f" % scheduler.get_lr()[0])
torch.save(model.state_dict(), opts.save_path + "/model.iter-" + str(epoch))
torch.save(
model.state_dict(), opts.save_path + "/model.iter-" + str(epoch)
)
def test():
dataset.training = False
dataloader = DataLoader(
dataset,
batch_size=1,
shuffle=False,
num_workers=0,
collate_fn=JTNNCollator(vocab, False),
drop_last=True,
worker_init_fn=worker_init_fn)
dataset,
batch_size=1,
shuffle=False,
num_workers=0,
collate_fn=JTNNCollator(vocab, False),
drop_last=True,
worker_init_fn=worker_init_fn,
)
# Just an example of molecule decoding; in reality you may want to sample
# tree and molecule vectors.
for it, batch in enumerate(dataloader):
gt_smiles = batch['mol_trees'][0].smiles
gt_smiles = batch["mol_trees"][0].smiles
print(gt_smiles)
model.move_to_cuda(batch)
_, tree_vec, mol_vec = model.encode(batch)
......@@ -136,21 +164,28 @@ def test():
smiles = model.decode(tree_vec, mol_vec)
print(smiles)
if __name__ == '__main__':
if __name__ == "__main__":
if opts.test:
test()
else:
train()
print('# passes:', model.n_passes)
print('Total # nodes processed:', model.n_nodes_total)
print('Total # edges processed:', model.n_edges_total)
print('Total # tree nodes processed:', model.n_tree_nodes_total)
print('Graph decoder: # passes:', model.jtmpn.n_passes)
print('Graph decoder: Total # candidates processed:', model.jtmpn.n_samples_total)
print('Graph decoder: Total # nodes processed:', model.jtmpn.n_nodes_total)
print('Graph decoder: Total # edges processed:', model.jtmpn.n_edges_total)
print('Graph encoder: # passes:', model.mpn.n_passes)
print('Graph encoder: Total # candidates processed:', model.mpn.n_samples_total)
print('Graph encoder: Total # nodes processed:', model.mpn.n_nodes_total)
print('Graph encoder: Total # edges processed:', model.mpn.n_edges_total)
print("# passes:", model.n_passes)
print("Total # nodes processed:", model.n_nodes_total)
print("Total # edges processed:", model.n_edges_total)
print("Total # tree nodes processed:", model.n_tree_nodes_total)
print("Graph decoder: # passes:", model.jtmpn.n_passes)
print(
"Graph decoder: Total # candidates processed:",
model.jtmpn.n_samples_total,
)
print("Graph decoder: Total # nodes processed:", model.jtmpn.n_nodes_total)
print("Graph decoder: Total # edges processed:", model.jtmpn.n_edges_total)
print("Graph encoder: # passes:", model.mpn.n_passes)
print(
"Graph encoder: Total # candidates processed:",
model.mpn.n_samples_total,
)
print("Graph encoder: Total # nodes processed:", model.mpn.n_nodes_total)
print("Graph encoder: Total # edges processed:", model.mpn.n_edges_total)
import argparse
import torch
import dgl
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
from dgl.nn import LabelPropagation
def main():
# check cuda
device = f'cuda:{args.gpu}' if torch.cuda.is_available() and args.gpu >= 0 else 'cpu'
device = (
f"cuda:{args.gpu}"
if torch.cuda.is_available() and args.gpu >= 0
else "cpu"
)
# load data
if args.dataset == 'Cora':
if args.dataset == "Cora":
dataset = CoraGraphDataset()
elif args.dataset == 'Citeseer':
elif args.dataset == "Citeseer":
dataset = CiteseerGraphDataset()
elif args.dataset == 'Pubmed':
elif args.dataset == "Pubmed":
dataset = PubmedGraphDataset()
else:
raise ValueError('Dataset {} is invalid.'.format(args.dataset))
raise ValueError("Dataset {} is invalid.".format(args.dataset))
g = dataset[0]
g = dgl.add_self_loop(g)
labels = g.ndata.pop('label').to(device).long()
labels = g.ndata.pop("label").to(device).long()
# load masks for train / test, valid is not used.
train_mask = g.ndata.pop('train_mask')
test_mask = g.ndata.pop('test_mask')
train_mask = g.ndata.pop("train_mask")
test_mask = g.ndata.pop("test_mask")
train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device)
test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze().to(device)
g = g.to(device)
# label propagation
lp = LabelPropagation(args.num_layers, args.alpha)
logits = lp(g, labels, mask=train_idx)
test_acc = torch.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]).item() / len(test_idx)
test_acc = torch.sum(
logits[test_idx].argmax(dim=1) == labels[test_idx]
).item() / len(test_idx)
print("Test Acc {:.4f}".format(test_acc))
if __name__ == '__main__':
if __name__ == "__main__":
"""
Label Propagation Hyperparameters
"""
parser = argparse.ArgumentParser(description='LP')
parser.add_argument('--gpu', type=int, default=0)
parser.add_argument('--dataset', type=str, default='Cora')
parser.add_argument('--num-layers', type=int, default=10)
parser.add_argument('--alpha', type=float, default=0.5)
parser = argparse.ArgumentParser(description="LP")
parser.add_argument("--gpu", type=int, default=0)
parser.add_argument("--dataset", type=str, default="Cora")
parser.add_argument("--num-layers", type=int, default=10)
parser.add_argument("--alpha", type=float, default=0.5)
args = parser.parse_args()
print(args)
......
......@@ -17,49 +17,49 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from time import time
import matplotlib.pyplot as plt
import warnings
from time import time
import matplotlib.pyplot as plt
import numpy as np
import scipy.sparse as ss
import torch
import dgl
from dgl import function as fn
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
from lda_model import LatentDirichletAllocation as LDAModel
from sklearn.datasets import fetch_20newsgroups
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from lda_model import LatentDirichletAllocation as LDAModel
import dgl
from dgl import function as fn
n_samples = 2000
n_features = 1000
n_components = 10
n_top_words = 20
device = 'cuda'
device = "cuda"
def plot_top_words(model, feature_names, n_top_words, title):
fig, axes = plt.subplots(2, 5, figsize=(30, 15), sharex=True)
axes = axes.flatten()
for topic_idx, topic in enumerate(model.components_):
top_features_ind = topic.argsort()[:-n_top_words - 1:-1]
top_features_ind = topic.argsort()[: -n_top_words - 1 : -1]
top_features = [feature_names[i] for i in top_features_ind]
weights = topic[top_features_ind]
ax = axes[topic_idx]
ax.barh(top_features, weights, height=0.7)
ax.set_title(f'Topic {topic_idx +1}',
fontdict={'fontsize': 30})
ax.set_title(f"Topic {topic_idx +1}", fontdict={"fontsize": 30})
ax.invert_yaxis()
ax.tick_params(axis='both', which='major', labelsize=20)
for i in 'top right left'.split():
ax.tick_params(axis="both", which="major", labelsize=20)
for i in "top right left".split():
ax.spines[i].set_visible(False)
fig.suptitle(title, fontsize=40)
plt.subplots_adjust(top=0.90, bottom=0.05, wspace=0.90, hspace=0.3)
plt.show()
# Load the 20 newsgroups dataset and vectorize it. We use a few heuristics
# to filter out useless terms early on: the posts are stripped of headers,
# footers and quoted replies, and common English words, words occurring in
......@@ -67,43 +67,50 @@ def plot_top_words(model, feature_names, n_top_words, title):
print("Loading dataset...")
t0 = time()
data, _ = fetch_20newsgroups(shuffle=True, random_state=1,
remove=('headers', 'footers', 'quotes'),
return_X_y=True)
data, _ = fetch_20newsgroups(
shuffle=True,
random_state=1,
remove=("headers", "footers", "quotes"),
return_X_y=True,
)
data_samples = data[:n_samples]
data_test = data[n_samples:2*n_samples]
data_test = data[n_samples : 2 * n_samples]
print("done in %0.3fs." % (time() - t0))
# Use tf (raw term count) features for LDA.
print("Extracting tf features for LDA...")
tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2,
max_features=n_features,
stop_words='english')
tf_vectorizer = CountVectorizer(
max_df=0.95, min_df=2, max_features=n_features, stop_words="english"
)
t0 = time()
tf_vectorizer.fit(data)
tf = tf_vectorizer.transform(data_samples)
tt = tf_vectorizer.transform(data_test)
tf_feature_names = tf_vectorizer.get_feature_names()
tf_uv = [(u,v)
for u,v,e in zip(tf.tocoo().row, tf.tocoo().col, tf.tocoo().data)
for _ in range(e)]
tt_uv = [(u,v)
for u,v,e in zip(tt.tocoo().row, tt.tocoo().col, tt.tocoo().data)
for _ in range(e)]
tf_uv = [
(u, v)
for u, v, e in zip(tf.tocoo().row, tf.tocoo().col, tf.tocoo().data)
for _ in range(e)
]
tt_uv = [
(u, v)
for u, v, e in zip(tt.tocoo().row, tt.tocoo().col, tt.tocoo().data)
for _ in range(e)
]
print("done in %0.3fs." % (time() - t0))
print()
print("Preparing dgl graphs...")
t0 = time()
G = dgl.heterograph({('doc','topic','word'): tf_uv}, device=device)
Gt = dgl.heterograph({('doc','topic','word'): tt_uv}, device=device)
G = dgl.heterograph({("doc", "topic", "word"): tf_uv}, device=device)
Gt = dgl.heterograph({("doc", "topic", "word"): tt_uv}, device=device)
print("done in %0.3fs." % (time() - t0))
print()
print("Training dgl-lda model...")
t0 = time()
model = LDAModel(G.num_nodes('word'), n_components)
model = LDAModel(G.num_nodes("word"), n_components)
model.fit(G)
print("done in %0.3fs." % (time() - t0))
print()
......@@ -113,20 +120,27 @@ print(f"dgl-lda testing perplexity {model.perplexity(Gt):.3f}")
word_nphi = np.vstack([nphi.tolist() for nphi in model.word_data.nphi])
plot_top_words(
type('dummy', (object,), {'components_': word_nphi}),
tf_feature_names, n_top_words, 'Topics in LDA model')
type("dummy", (object,), {"components_": word_nphi}),
tf_feature_names,
n_top_words,
"Topics in LDA model",
)
print("Training scikit-learn model...")
print('\n' * 2, "Fitting LDA models with tf features, "
"n_samples=%d and n_features=%d..."
% (n_samples, n_features))
lda = LatentDirichletAllocation(n_components=n_components, max_iter=5,
learning_method='online',
learning_offset=50.,
random_state=0,
verbose=1,
)
print(
"\n" * 2,
"Fitting LDA models with tf features, "
"n_samples=%d and n_features=%d..." % (n_samples, n_features),
)
lda = LatentDirichletAllocation(
n_components=n_components,
max_iter=5,
learning_method="online",
learning_offset=50.0,
random_state=0,
verbose=1,
)
t0 = time()
lda.fit(tf)
print("done in %0.3fs." % (time() - t0))
......
......@@ -17,8 +17,17 @@
# limitations under the License.
import os, functools, warnings, torch, collections, dgl, io
import numpy as np, scipy as sp
import collections
import functools
import io
import os
import warnings
import numpy as np
import scipy as sp
import torch
import dgl
try:
from functools import cached_property
......@@ -37,17 +46,21 @@ class EdgeData:
@property
def loglike(self):
return (self.src_data['Elog'] + self.dst_data['Elog']).logsumexp(1)
return (self.src_data["Elog"] + self.dst_data["Elog"]).logsumexp(1)
@property
def phi(self):
return (
self.src_data['Elog'] + self.dst_data['Elog'] - self.loglike.unsqueeze(1)
self.src_data["Elog"]
+ self.dst_data["Elog"]
- self.loglike.unsqueeze(1)
).exp()
@property
def expectation(self):
return (self.src_data['expectation'] * self.dst_data['expectation']).sum(1)
return (
self.src_data["expectation"] * self.dst_data["expectation"]
).sum(1)
class _Dirichlet:
......@@ -55,10 +68,13 @@ class _Dirichlet:
self.prior = prior
self.nphi = nphi
self.device = nphi.device
self._sum_by_parts = lambda map_fn: functools.reduce(torch.add, [
map_fn(slice(i, min(i+_chunksize, nphi.shape[1]))).sum(1)
for i in list(range(0, nphi.shape[1], _chunksize))
])
self._sum_by_parts = lambda map_fn: functools.reduce(
torch.add,
[
map_fn(slice(i, min(i + _chunksize, nphi.shape[1]))).sum(1)
for i in list(range(0, nphi.shape[1], _chunksize))
],
)
def _posterior(self, _ID=slice(None)):
return self.prior + self.nphi[:, _ID]
......@@ -68,14 +84,15 @@ class _Dirichlet:
return self.nphi.sum(1) + self.prior * self.nphi.shape[1]
def _Elog(self, _ID=slice(None)):
return torch.digamma(self._posterior(_ID)) - \
torch.digamma(self.posterior_sum.unsqueeze(1))
return torch.digamma(self._posterior(_ID)) - torch.digamma(
self.posterior_sum.unsqueeze(1)
)
@cached_property
def loglike(self):
neg_evid = -self._sum_by_parts(
lambda s: (self.nphi[:, s] * self._Elog(s))
)
)
prior = torch.as_tensor(self.prior).to(self.nphi)
K = self.nphi.shape[1]
......@@ -83,7 +100,7 @@ class _Dirichlet:
log_B_posterior = self._sum_by_parts(
lambda s: torch.lgamma(self._posterior(s))
) - torch.lgamma(self.posterior_sum)
) - torch.lgamma(self.posterior_sum)
return neg_evid - log_B_prior + log_B_posterior
......@@ -105,9 +122,15 @@ class _Dirichlet:
@cached_property
def Bayesian_gap(self):
return 1. - self._sum_by_parts(lambda s: self._Elog(s).exp())
return 1.0 - self._sum_by_parts(lambda s: self._Elog(s).exp())
_cached_properties = ["posterior_sum", "loglike", "n", "cdf", "Bayesian_gap"]
_cached_properties = [
"posterior_sum",
"loglike",
"n",
"cdf",
"Bayesian_gap",
]
def clear_cache(self):
for name in self._cached_properties:
......@@ -117,27 +140,29 @@ class _Dirichlet:
pass
def update(self, new, _ID=slice(None), rho=1):
""" inplace: old * (1-rho) + new * rho """
"""inplace: old * (1-rho) + new * rho"""
self.clear_cache()
mean_change = (self.nphi[:, _ID] - new).abs().mean().tolist()
self.nphi *= (1 - rho)
self.nphi *= 1 - rho
self.nphi[:, _ID] += new * rho
return mean_change
class DocData(_Dirichlet):
""" nphi (n_docs by n_topics) """
"""nphi (n_docs by n_topics)"""
def prepare_graph(self, G, key="Elog"):
G.nodes['doc'].data[key] = getattr(self, '_'+key)().to(G.device)
G.nodes["doc"].data[key] = getattr(self, "_" + key)().to(G.device)
def update_from(self, G, mult):
new = G.nodes['doc'].data['nphi'] * mult
new = G.nodes["doc"].data["nphi"] * mult
return self.update(new.to(self.device))
class _Distributed(collections.UserList):
""" split on dim=0 and store on multiple devices """
"""split on dim=0 and store on multiple devices"""
def __init__(self, prior, nphi):
self.prior = prior
self.nphi = nphi
......@@ -146,36 +171,38 @@ class _Distributed(collections.UserList):
def split_device(self, other, dim=0):
split_sections = [x.shape[0] for x in self.nphi]
out = torch.split(other, split_sections, dim)
return [y.to(x.device) for x,y in zip(self.nphi, out)]
return [y.to(x.device) for x, y in zip(self.nphi, out)]
class WordData(_Distributed):
""" distributed nphi (n_topics by n_words), transpose to/from graph nodes data """
"""distributed nphi (n_topics by n_words), transpose to/from graph nodes data"""
def prepare_graph(self, G, key="Elog"):
if '_ID' in G.nodes['word'].data:
_ID = G.nodes['word'].data['_ID']
if "_ID" in G.nodes["word"].data:
_ID = G.nodes["word"].data["_ID"]
else:
_ID = slice(None)
out = [getattr(part, '_'+key)(_ID).to(G.device) for part in self]
G.nodes['word'].data[key] = torch.cat(out).T
out = [getattr(part, "_" + key)(_ID).to(G.device) for part in self]
G.nodes["word"].data[key] = torch.cat(out).T
def update_from(self, G, mult, rho):
nphi = G.nodes['word'].data['nphi'].T * mult
nphi = G.nodes["word"].data["nphi"].T * mult
if '_ID' in G.nodes['word'].data:
_ID = G.nodes['word'].data['_ID']
if "_ID" in G.nodes["word"].data:
_ID = G.nodes["word"].data["_ID"]
else:
_ID = slice(None)
mean_change = [x.update(y, _ID, rho)
for x, y in zip(self, self.split_device(nphi))]
mean_change = [
x.update(y, _ID, rho) for x, y in zip(self, self.split_device(nphi))
]
return np.mean(mean_change)
class Gamma(collections.namedtuple('Gamma', "concentration, rate")):
""" articulate the difference between torch gamma and numpy gamma """
class Gamma(collections.namedtuple("Gamma", "concentration, rate")):
"""articulate the difference between torch gamma and numpy gamma"""
@property
def shape(self):
return self.concentration
......@@ -218,20 +245,23 @@ class LatentDirichletAllocation:
(NIPS 2010).
[2] Reactive LDA Library blogpost by Yingjie Miao for a similar Gibbs model
"""
def __init__(
self, n_words, n_components,
self,
n_words,
n_components,
prior=None,
rho=1,
mult={'doc': 1, 'word': 1},
init={'doc': (100., 100.), 'word': (100., 100.)},
device_list=['cpu'],
mult={"doc": 1, "word": 1},
init={"doc": (100.0, 100.0), "word": (100.0, 100.0)},
device_list=["cpu"],
verbose=True,
):
):
self.n_words = n_words
self.n_components = n_components
if prior is None:
prior = {'doc': 1./n_components, 'word': 1./n_components}
prior = {"doc": 1.0 / n_components, "word": 1.0 / n_components}
self.prior = prior
self.rho = rho
......@@ -239,117 +269,128 @@ class LatentDirichletAllocation:
self.init = init
assert not isinstance(device_list, str), "plz wrap devices in a list"
self.device_list = device_list[:n_components] # avoid edge cases
self.device_list = device_list[:n_components] # avoid edge cases
self.verbose = verbose
self._init_word_data()
def _init_word_data(self):
split_sections = np.diff(
np.linspace(0, self.n_components, len(self.device_list)+1).astype(int)
np.linspace(0, self.n_components, len(self.device_list) + 1).astype(
int
)
)
word_nphi = [
Gamma(*self.init['word']).sample((s, self.n_words), device)
Gamma(*self.init["word"]).sample((s, self.n_words), device)
for s, device in zip(split_sections, self.device_list)
]
self.word_data = WordData(self.prior['word'], word_nphi)
self.word_data = WordData(self.prior["word"], word_nphi)
def _init_doc_data(self, n_docs, device):
doc_nphi = Gamma(*self.init['doc']).sample(
(n_docs, self.n_components), device)
return DocData(self.prior['doc'], doc_nphi)
doc_nphi = Gamma(*self.init["doc"]).sample(
(n_docs, self.n_components), device
)
return DocData(self.prior["doc"], doc_nphi)
def save(self, f):
for w in self.word_data:
w.clear_cache()
torch.save({
'prior': self.prior,
'rho': self.rho,
'mult': self.mult,
'init': self.init,
'word_data': [part.nphi for part in self.word_data],
}, f)
torch.save(
{
"prior": self.prior,
"rho": self.rho,
"mult": self.mult,
"init": self.init,
"word_data": [part.nphi for part in self.word_data],
},
f,
)
def _prepare_graph(self, G, doc_data, key="Elog"):
doc_data.prepare_graph(G, key)
self.word_data.prepare_graph(G, key)
def _e_step(self, G, doc_data=None, mean_change_tol=1e-3, max_iters=100):
"""_e_step implements doc data sampling until convergence or max_iters
"""
"""_e_step implements doc data sampling until convergence or max_iters"""
if doc_data is None:
doc_data = self._init_doc_data(G.num_nodes('doc'), G.device)
doc_data = self._init_doc_data(G.num_nodes("doc"), G.device)
G_rev = G.reverse() # word -> doc
G_rev = G.reverse() # word -> doc
self.word_data.prepare_graph(G_rev)
for i in range(max_iters):
doc_data.prepare_graph(G_rev)
G_rev.update_all(
lambda edges: {'phi': EdgeData(edges.src, edges.dst).phi},
dgl.function.sum('phi', 'nphi')
lambda edges: {"phi": EdgeData(edges.src, edges.dst).phi},
dgl.function.sum("phi", "nphi"),
)
mean_change = doc_data.update_from(G_rev, self.mult['doc'])
mean_change = doc_data.update_from(G_rev, self.mult["doc"])
if mean_change < mean_change_tol:
break
if self.verbose:
print(f"e-step num_iters={i+1} with mean_change={mean_change:.4f}, "
f"perplexity={self.perplexity(G, doc_data):.4f}")
print(
f"e-step num_iters={i+1} with mean_change={mean_change:.4f}, "
f"perplexity={self.perplexity(G, doc_data):.4f}"
)
return doc_data
transform = _e_step
def predict(self, doc_data):
pred_scores = [
# d_exp @ w._expectation()
(lambda x: x @ w.nphi + x.sum(1, keepdims=True) * w.prior)
(d_exp / w.posterior_sum.unsqueeze(0))
(lambda x: x @ w.nphi + x.sum(1, keepdims=True) * w.prior)(
d_exp / w.posterior_sum.unsqueeze(0)
)
for (d_exp, w) in zip(
self.word_data.split_device(doc_data._expectation(), dim=1),
self.word_data)
self.word_data,
)
]
x = torch.zeros_like(pred_scores[0], device=doc_data.device)
for p in pred_scores:
x += p.to(x.device)
return x
def sample(self, doc_data, num_samples):
""" draw independent words and return the marginal probabilities,
"""draw independent words and return the marginal probabilities,
i.e., the expectations in Dirichlet distributions.
"""
def fn(cdf):
u = torch.rand(cdf.shape[0], num_samples, device=cdf.device)
return torch.searchsorted(cdf, u).to(doc_data.device)
topic_ids = fn(doc_data.cdf)
word_ids = torch.cat([fn(part.cdf) for part in self.word_data])
ids = torch.gather(word_ids, 0, topic_ids) # pick components by topic_ids
ids = torch.gather(
word_ids, 0, topic_ids
) # pick components by topic_ids
# compute expectation scores on sampled ids
src_ids = torch.arange(
ids.shape[0], dtype=ids.dtype, device=ids.device
).reshape((-1, 1)).expand(ids.shape)
unique_ids, inverse_ids = torch.unique(ids, sorted=False, return_inverse=True)
src_ids = (
torch.arange(ids.shape[0], dtype=ids.dtype, device=ids.device)
.reshape((-1, 1))
.expand(ids.shape)
)
unique_ids, inverse_ids = torch.unique(
ids, sorted=False, return_inverse=True
)
G = dgl.heterograph({('doc','','word'): (src_ids.ravel(), inverse_ids.ravel())})
G.nodes['word'].data['_ID'] = unique_ids
G = dgl.heterograph(
{("doc", "", "word"): (src_ids.ravel(), inverse_ids.ravel())}
)
G.nodes["word"].data["_ID"] = unique_ids
self._prepare_graph(G, doc_data, "expectation")
G.apply_edges(lambda e: {'expectation': EdgeData(e.src, e.dst).expectation})
expectation = G.edata.pop('expectation').reshape(ids.shape)
G.apply_edges(
lambda e: {"expectation": EdgeData(e.src, e.dst).expectation}
)
expectation = G.edata.pop("expectation").reshape(ids.shape)
return ids, expectation
def _m_step(self, G, doc_data):
"""_m_step implements word data sampling and stores word_z stats.
mean_change is in the sense of full graph with rho=1.
......@@ -357,26 +398,25 @@ class LatentDirichletAllocation:
G = G.clone()
self._prepare_graph(G, doc_data)
G.update_all(
lambda edges: {'phi': EdgeData(edges.src, edges.dst).phi},
dgl.function.sum('phi', 'nphi')
lambda edges: {"phi": EdgeData(edges.src, edges.dst).phi},
dgl.function.sum("phi", "nphi"),
)
self._last_mean_change = self.word_data.update_from(
G, self.mult['word'], self.rho)
G, self.mult["word"], self.rho
)
if self.verbose:
print(f"m-step mean_change={self._last_mean_change:.4f}, ", end="")
Bayesian_gap = np.mean([
part.Bayesian_gap.mean().tolist() for part in self.word_data
])
Bayesian_gap = np.mean(
[part.Bayesian_gap.mean().tolist() for part in self.word_data]
)
print(f"Bayesian_gap={Bayesian_gap:.4f}")
def partial_fit(self, G):
doc_data = self._e_step(G)
self._m_step(G, doc_data)
return self
def fit(self, G, mean_change_tol=1e-3, max_epochs=10):
for i in range(max_epochs):
if self.verbose:
......@@ -387,7 +427,6 @@ class LatentDirichletAllocation:
break
return self
def perplexity(self, G, doc_data=None):
"""ppl = exp{-sum[log(p(w1,...,wn|d))] / n}
Follows Eq (15) in Hoffman et al., 2010.
......@@ -398,45 +437,50 @@ class LatentDirichletAllocation:
# compute E[log p(docs | theta, beta)]
G = G.clone()
self._prepare_graph(G, doc_data)
G.apply_edges(lambda edges: {'loglike': EdgeData(edges.src, edges.dst).loglike})
edge_elbo = (G.edata['loglike'].sum() / G.num_edges()).tolist()
G.apply_edges(
lambda edges: {"loglike": EdgeData(edges.src, edges.dst).loglike}
)
edge_elbo = (G.edata["loglike"].sum() / G.num_edges()).tolist()
if self.verbose:
print(f'neg_elbo phi: {-edge_elbo:.3f}', end=' ')
print(f"neg_elbo phi: {-edge_elbo:.3f}", end=" ")
# compute E[log p(theta | alpha) - log q(theta | gamma)]
doc_elbo = (doc_data.loglike.sum() / doc_data.n.sum()).tolist()
if self.verbose:
print(f'theta: {-doc_elbo:.3f}', end=' ')
print(f"theta: {-doc_elbo:.3f}", end=" ")
# compute E[log p(beta | eta) - log q(beta | lambda)]
# The denominator n for extrapolation perplexity is undefined.
# We use the train set, whereas sklearn uses the test set.
word_elbo = (
sum([part.loglike.sum().tolist() for part in self.word_data])
/ sum([part.n.sum().tolist() for part in self.word_data])
)
word_elbo = sum(
[part.loglike.sum().tolist() for part in self.word_data]
) / sum([part.n.sum().tolist() for part in self.word_data])
if self.verbose:
print(f'beta: {-word_elbo:.3f}')
print(f"beta: {-word_elbo:.3f}")
ppl = np.exp(-edge_elbo - doc_elbo - word_elbo)
if G.num_edges()>0 and np.isnan(ppl):
if G.num_edges() > 0 and np.isnan(ppl):
warnings.warn("numerical issue in perplexity")
return ppl
def doc_subgraph(G, doc_ids):
sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1)
_, _, (block,) = sampler.sample(G.reverse(), {'doc': torch.as_tensor(doc_ids)})
_, _, (block,) = sampler.sample(
G.reverse(), {"doc": torch.as_tensor(doc_ids)}
)
B = dgl.DGLHeteroGraph(
block._graph, ['_', 'word', 'doc', '_'], block.etypes
block._graph, ["_", "word", "doc", "_"], block.etypes
).reverse()
B.nodes['word'].data['_ID'] = block.nodes['word'].data['_ID']
B.nodes["word"].data["_ID"] = block.nodes["word"].data["_ID"]
return B
if __name__ == '__main__':
print('Testing LatentDirichletAllocation ...')
G = dgl.heterograph({('doc', '', 'word'): [(0, 0), (1, 3)]}, {'doc': 2, 'word': 5})
if __name__ == "__main__":
print("Testing LatentDirichletAllocation ...")
G = dgl.heterograph(
{("doc", "", "word"): [(0, 0), (1, 3)]}, {"doc": 2, "word": 5}
)
model = LatentDirichletAllocation(n_words=5, n_components=10, verbose=False)
model.fit(G)
model.transform(G)
......@@ -454,4 +498,4 @@ if __name__ == '__main__':
f.seek(0)
print(torch.load(f))
print('Testing LatentDirichletAllocation passed!')
print("Testing LatentDirichletAllocation passed!")
......@@ -6,11 +6,12 @@ Author's implementation: https://github.com/joanbruna/GNN_community
"""
from __future__ import division
import time
import argparse
import time
from itertools import permutations
import gnn
import numpy as np
import torch as th
import torch.nn.functional as F
......@@ -18,37 +19,51 @@ import torch.optim as optim
from torch.utils.data import DataLoader
from dgl.data import SBMMixtureDataset
import gnn
parser = argparse.ArgumentParser()
parser.add_argument('--batch-size', type=int, help='Batch size', default=1)
parser.add_argument('--gpu', type=int, help='GPU index', default=-1)
parser.add_argument('--lr', type=float, help='Learning rate', default=0.001)
parser.add_argument('--n-communities', type=int, help='Number of communities', default=2)
parser.add_argument('--n-epochs', type=int, help='Number of epochs', default=100)
parser.add_argument('--n-features', type=int, help='Number of features', default=16)
parser.add_argument('--n-graphs', type=int, help='Number of graphs', default=10)
parser.add_argument('--n-layers', type=int, help='Number of layers', default=30)
parser.add_argument('--n-nodes', type=int, help='Number of nodes', default=10000)
parser.add_argument('--optim', type=str, help='Optimizer', default='Adam')
parser.add_argument('--radius', type=int, help='Radius', default=3)
parser.add_argument('--verbose', action='store_true')
parser.add_argument("--batch-size", type=int, help="Batch size", default=1)
parser.add_argument("--gpu", type=int, help="GPU index", default=-1)
parser.add_argument("--lr", type=float, help="Learning rate", default=0.001)
parser.add_argument(
"--n-communities", type=int, help="Number of communities", default=2
)
parser.add_argument(
"--n-epochs", type=int, help="Number of epochs", default=100
)
parser.add_argument(
"--n-features", type=int, help="Number of features", default=16
)
parser.add_argument("--n-graphs", type=int, help="Number of graphs", default=10)
parser.add_argument("--n-layers", type=int, help="Number of layers", default=30)
parser.add_argument(
"--n-nodes", type=int, help="Number of nodes", default=10000
)
parser.add_argument("--optim", type=str, help="Optimizer", default="Adam")
parser.add_argument("--radius", type=int, help="Radius", default=3)
parser.add_argument("--verbose", action="store_true")
args = parser.parse_args()
dev = th.device('cpu') if args.gpu < 0 else th.device('cuda:%d' % args.gpu)
dev = th.device("cpu") if args.gpu < 0 else th.device("cuda:%d" % args.gpu)
K = args.n_communities
training_dataset = SBMMixtureDataset(args.n_graphs, args.n_nodes, K)
training_loader = DataLoader(training_dataset, args.batch_size,
collate_fn=training_dataset.collate_fn, drop_last=True)
training_loader = DataLoader(
training_dataset,
args.batch_size,
collate_fn=training_dataset.collate_fn,
drop_last=True,
)
ones = th.ones(args.n_nodes // K)
y_list = [th.cat([x * ones for x in p]).long().to(dev) for p in permutations(range(K))]
y_list = [
th.cat([x * ones for x in p]).long().to(dev) for p in permutations(range(K))
]
feats = [1] + [args.n_features] * args.n_layers + [K]
model = gnn.GNN(feats, args.radius, K).to(dev)
optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr)
def compute_overlap(z_list):
ybar_list = [th.max(z, 1)[1] for z in z_list]
overlap_list = []
......@@ -58,15 +73,20 @@ def compute_overlap(z_list):
overlap_list.append(overlap)
return sum(overlap_list) / len(overlap_list)
def from_np(f, *args):
def wrap(*args):
new = [th.from_numpy(x) if isinstance(x, np.ndarray) else x for x in args]
new = [
th.from_numpy(x) if isinstance(x, np.ndarray) else x for x in args
]
return f(*new)
return wrap
@from_np
def step(i, j, g, lg, deg_g, deg_lg, pm_pd):
""" One step of training. """
"""One step of training."""
g = g.to(dev)
lg = lg.to(dev)
deg_g = deg_g.to(dev).unsqueeze(1)
......@@ -77,7 +97,10 @@ def step(i, j, g, lg, deg_g, deg_lg, pm_pd):
t_forward = time.time() - t0
z_list = th.chunk(z, args.batch_size, 0)
loss = sum(min(F.cross_entropy(z, y) for y in y_list) for z in z_list) / args.batch_size
loss = (
sum(min(F.cross_entropy(z, y) for y in y_list) for z in z_list)
/ args.batch_size
)
overlap = compute_overlap(z_list)
optimizer.zero_grad()
......@@ -88,6 +111,7 @@ def step(i, j, g, lg, deg_g, deg_lg, pm_pd):
return loss, overlap, t_forward, t_backward
@from_np
def inference(g, lg, deg_g, deg_lg, pm_pd):
g = g.to(dev)
......@@ -99,9 +123,11 @@ def inference(g, lg, deg_g, deg_lg, pm_pd):
z = model(g, lg, deg_g, deg_lg, pm_pd)
return z
def test():
p_list =[6, 5.5, 5, 4.5, 1.5, 1, 0.5, 0]
q_list =[0, 0.5, 1, 1.5, 4.5, 5, 5.5, 6]
p_list = [6, 5.5, 5, 4.5, 1.5, 1, 0.5, 0]
q_list = [0, 0.5, 1, 1.5, 4.5, 5, 5.5, 6]
N = 1
overlap_list = []
for p, q in zip(p_list, q_list):
......@@ -112,31 +138,38 @@ def test():
overlap_list.append(compute_overlap(th.chunk(z, N, 0)))
return overlap_list
n_iterations = args.n_graphs // args.batch_size
for i in range(args.n_epochs):
total_loss, total_overlap, s_forward, s_backward = 0, 0, 0, 0
for j, [g, lg, deg_g, deg_lg, pm_pd] in enumerate(training_loader):
loss, overlap, t_forward, t_backward = step(i, j, g, lg, deg_g, deg_lg, pm_pd)
loss, overlap, t_forward, t_backward = step(
i, j, g, lg, deg_g, deg_lg, pm_pd
)
total_loss += loss
total_overlap += overlap
s_forward += t_forward
s_backward += t_backward
epoch = '0' * (len(str(args.n_epochs)) - len(str(i)))
iteration = '0' * (len(str(n_iterations)) - len(str(j)))
epoch = "0" * (len(str(args.n_epochs)) - len(str(i)))
iteration = "0" * (len(str(n_iterations)) - len(str(j)))
if args.verbose:
print('[epoch %s%d iteration %s%d]loss %.3f | overlap %.3f'
% (epoch, i, iteration, j, loss, overlap))
print(
"[epoch %s%d iteration %s%d]loss %.3f | overlap %.3f"
% (epoch, i, iteration, j, loss, overlap)
)
epoch = '0' * (len(str(args.n_epochs)) - len(str(i)))
epoch = "0" * (len(str(args.n_epochs)) - len(str(i)))
loss = total_loss / (j + 1)
overlap = total_overlap / (j + 1)
t_forward = s_forward / (j + 1)
t_backward = s_backward / (j + 1)
print('[epoch %s%d]loss %.3f | overlap %.3f | forward time %.3fs | backward time %.3fs'
% (epoch, i, loss, overlap, t_forward, t_backward))
print(
"[epoch %s%d]loss %.3f | overlap %.3f | forward time %.3fs | backward time %.3fs"
% (epoch, i, loss, overlap, t_forward, t_backward)
)
overlap_list = test()
overlap_str = ' - '.join(['%.3f' % overlap for overlap in overlap_list])
print('[epoch %s%d]overlap: %s' % (epoch, i, overlap_str))
overlap_str = " - ".join(["%.3f" % overlap for overlap in overlap_list])
print("[epoch %s%d]overlap: %s" % (epoch, i, overlap_str))
......@@ -2,16 +2,18 @@
import argparse
import copy
import random
import numpy as np
import torch
import torch.optim as optim
import torch.nn as nn
import numpy as np
import random
import torch.optim as optim
from tqdm import trange
import dgl
import dgl.function as fn
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from tqdm import trange
class MixHopConv(nn.Module):
r"""
......@@ -44,13 +46,16 @@ class MixHopConv(nn.Module):
batchnorm: bool, optional
If True, use batch normalization. Defaults: ``False``.
"""
def __init__(self,
in_dim,
out_dim,
p=[0, 1, 2],
dropout=0,
activation=None,
batchnorm=False):
def __init__(
self,
in_dim,
out_dim,
p=[0, 1, 2],
dropout=0,
activation=None,
batchnorm=False,
):
super(MixHopConv, self).__init__()
self.in_dim = in_dim
self.out_dim = out_dim
......@@ -64,11 +69,11 @@ class MixHopConv(nn.Module):
# define batch norm layer
if self.batchnorm:
self.bn = nn.BatchNorm1d(out_dim * len(p))
# define weight dict for each power j
self.weights = nn.ModuleDict({
str(j): nn.Linear(in_dim, out_dim, bias=False) for j in p
})
self.weights = nn.ModuleDict(
{str(j): nn.Linear(in_dim, out_dim, bias=False) for j in p}
)
def forward(self, graph, feats):
with graph.local_scope():
......@@ -84,34 +89,37 @@ class MixHopConv(nn.Module):
outputs.append(output)
feats = feats * norm
graph.ndata['h'] = feats
graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
feats = graph.ndata.pop('h')
graph.ndata["h"] = feats
graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h"))
feats = graph.ndata.pop("h")
feats = feats * norm
final = torch.cat(outputs, dim=1)
if self.batchnorm:
final = self.bn(final)
if self.activation is not None:
final = self.activation(final)
final = self.dropout(final)
return final
class MixHop(nn.Module):
def __init__(self,
in_dim,
hid_dim,
out_dim,
num_layers=2,
p=[0, 1, 2],
input_dropout=0.0,
layer_dropout=0.0,
activation=None,
batchnorm=False):
def __init__(
self,
in_dim,
hid_dim,
out_dim,
num_layers=2,
p=[0, 1, 2],
input_dropout=0.0,
layer_dropout=0.0,
activation=None,
batchnorm=False,
):
super(MixHop, self).__init__()
self.in_dim = in_dim
self.hid_dim = hid_dim
......@@ -127,68 +135,79 @@ class MixHop(nn.Module):
self.dropout = nn.Dropout(self.input_dropout)
# Input layer
self.layers.append(MixHopConv(self.in_dim,
self.hid_dim,
p=self.p,
dropout=self.input_dropout,
activation=self.activation,
batchnorm=self.batchnorm))
self.layers.append(
MixHopConv(
self.in_dim,
self.hid_dim,
p=self.p,
dropout=self.input_dropout,
activation=self.activation,
batchnorm=self.batchnorm,
)
)
# Hidden layers with n - 1 MixHopConv layers
for i in range(self.num_layers - 2):
self.layers.append(MixHopConv(self.hid_dim * len(args.p),
self.hid_dim,
p=self.p,
dropout=self.layer_dropout,
activation=self.activation,
batchnorm=self.batchnorm))
self.fc_layers = nn.Linear(self.hid_dim * len(args.p), self.out_dim, bias=False)
self.layers.append(
MixHopConv(
self.hid_dim * len(args.p),
self.hid_dim,
p=self.p,
dropout=self.layer_dropout,
activation=self.activation,
batchnorm=self.batchnorm,
)
)
self.fc_layers = nn.Linear(
self.hid_dim * len(args.p), self.out_dim, bias=False
)
def forward(self, graph, feats):
feats = self.dropout(feats)
for layer in self.layers:
feats = layer(graph, feats)
feats = self.fc_layers(feats)
return feats
def main(args):
# Step 1: Prepare graph data and retrieve train/validation/test index ============================= #
# Load from DGL dataset
if args.dataset == 'Cora':
if args.dataset == "Cora":
dataset = CoraGraphDataset()
elif args.dataset == 'Citeseer':
elif args.dataset == "Citeseer":
dataset = CiteseerGraphDataset()
elif args.dataset == 'Pubmed':
elif args.dataset == "Pubmed":
dataset = PubmedGraphDataset()
else:
raise ValueError('Dataset {} is invalid.'.format(args.dataset))
raise ValueError("Dataset {} is invalid.".format(args.dataset))
graph = dataset[0]
graph = dgl.add_self_loop(graph)
# check cuda
if args.gpu >= 0 and torch.cuda.is_available():
device = 'cuda:{}'.format(args.gpu)
device = "cuda:{}".format(args.gpu)
else:
device = 'cpu'
device = "cpu"
# retrieve the number of classes
n_classes = dataset.num_classes
# retrieve labels of ground truth
labels = graph.ndata.pop('label').to(device).long()
labels = graph.ndata.pop("label").to(device).long()
# Extract node features
feats = graph.ndata.pop('feat').to(device)
feats = graph.ndata.pop("feat").to(device)
n_features = feats.shape[-1]
# retrieve masks for train/validation/test
train_mask = graph.ndata.pop('train_mask')
val_mask = graph.ndata.pop('val_mask')
test_mask = graph.ndata.pop('test_mask')
train_mask = graph.ndata.pop("train_mask")
val_mask = graph.ndata.pop("val_mask")
test_mask = graph.ndata.pop("test_mask")
train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device)
val_idx = torch.nonzero(val_mask, as_tuple=False).squeeze().to(device)
......@@ -197,16 +216,18 @@ def main(args):
graph = graph.to(device)
# Step 2: Create model =================================================================== #
model = MixHop(in_dim=n_features,
hid_dim=args.hid_dim,
out_dim=n_classes,
num_layers=args.num_layers,
p=args.p,
input_dropout=args.input_dropout,
layer_dropout=args.layer_dropout,
activation=torch.tanh,
batchnorm=True)
model = MixHop(
in_dim=n_features,
hid_dim=args.hid_dim,
out_dim=n_classes,
num_layers=args.num_layers,
p=args.p,
input_dropout=args.input_dropout,
layer_dropout=args.layer_dropout,
activation=torch.tanh,
batchnorm=True,
)
model = model.to(device)
best_model = copy.deepcopy(model)
......@@ -218,7 +239,7 @@ def main(args):
# Step 4: training epoches =============================================================== #
acc = 0
no_improvement = 0
epochs = trange(args.epochs, desc='Accuracy & Loss')
epochs = trange(args.epochs, desc="Accuracy & Loss")
for _ in epochs:
# Training using a full graph
......@@ -228,7 +249,9 @@ def main(args):
# compute loss
train_loss = loss_fn(logits[train_idx], labels[train_idx])
train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx]).item() / len(train_idx)
train_acc = torch.sum(
logits[train_idx].argmax(dim=1) == labels[train_idx]
).item() / len(train_idx)
# backward
opt.zero_grad()
......@@ -240,54 +263,99 @@ def main(args):
with torch.no_grad():
valid_loss = loss_fn(logits[val_idx], labels[val_idx])
valid_acc = torch.sum(logits[val_idx].argmax(dim=1) == labels[val_idx]).item() / len(val_idx)
valid_acc = torch.sum(
logits[val_idx].argmax(dim=1) == labels[val_idx]
).item() / len(val_idx)
# Print out performance
epochs.set_description('Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}'.format(
train_acc, train_loss.item(), valid_acc, valid_loss.item()))
epochs.set_description(
"Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}".format(
train_acc, train_loss.item(), valid_acc, valid_loss.item()
)
)
if valid_acc < acc:
no_improvement += 1
if no_improvement == args.early_stopping:
print('Early stop.')
print("Early stop.")
break
else:
no_improvement = 0
acc = valid_acc
best_model = copy.deepcopy(model)
scheduler.step()
best_model.eval()
logits = best_model(graph, feats)
test_acc = torch.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]).item() / len(test_idx)
test_acc = torch.sum(
logits[test_idx].argmax(dim=1) == labels[test_idx]
).item() / len(test_idx)
print("Test Acc {:.4f}".format(test_acc))
return test_acc
if __name__ == "__main__":
"""
MixHop Model Hyperparameters
"""
parser = argparse.ArgumentParser(description='MixHop GCN')
parser = argparse.ArgumentParser(description="MixHop GCN")
# data source params
parser.add_argument('--dataset', type=str, default='Cora', help='Name of dataset.')
parser.add_argument(
"--dataset", type=str, default="Cora", help="Name of dataset."
)
# cuda params
parser.add_argument('--gpu', type=int, default=-1, help='GPU index. Default: -1, using CPU.')
parser.add_argument(
"--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU."
)
# training params
parser.add_argument('--epochs', type=int, default=2000, help='Training epochs.')
parser.add_argument('--early-stopping', type=int, default=200, help='Patient epochs to wait before early stopping.')
parser.add_argument('--lr', type=float, default=0.5, help='Learning rate.')
parser.add_argument('--lamb', type=float, default=5e-4, help='L2 reg.')
parser.add_argument('--step-size', type=int, default=40, help='Period of learning rate decay.')
parser.add_argument('--gamma', type=float, default=0.01, help='Multiplicative factor of learning rate decay.')
parser.add_argument(
"--epochs", type=int, default=2000, help="Training epochs."
)
parser.add_argument(
"--early-stopping",
type=int,
default=200,
help="Patient epochs to wait before early stopping.",
)
parser.add_argument("--lr", type=float, default=0.5, help="Learning rate.")
parser.add_argument("--lamb", type=float, default=5e-4, help="L2 reg.")
parser.add_argument(
"--step-size",
type=int,
default=40,
help="Period of learning rate decay.",
)
parser.add_argument(
"--gamma",
type=float,
default=0.01,
help="Multiplicative factor of learning rate decay.",
)
# model params
parser.add_argument("--hid-dim", type=int, default=60, help='Hidden layer dimensionalities.')
parser.add_argument("--num-layers", type=int, default=4, help='Number of GNN layers.')
parser.add_argument("--input-dropout", type=float, default=0.7, help='Dropout applied at input layer.')
parser.add_argument("--layer-dropout", type=float, default=0.9, help='Dropout applied at hidden layers.')
parser.add_argument('--p', nargs='+', type=int, help='List of powers of adjacency matrix.')
parser.add_argument(
"--hid-dim", type=int, default=60, help="Hidden layer dimensionalities."
)
parser.add_argument(
"--num-layers", type=int, default=4, help="Number of GNN layers."
)
parser.add_argument(
"--input-dropout",
type=float,
default=0.7,
help="Dropout applied at input layer.",
)
parser.add_argument(
"--layer-dropout",
type=float,
default=0.9,
help="Dropout applied at hidden layers.",
)
parser.add_argument(
"--p", nargs="+", type=int, help="List of powers of adjacency matrix."
)
parser.set_defaults(p=[0, 1, 2])
......@@ -298,13 +366,13 @@ if __name__ == "__main__":
for _ in range(100):
acc_lists.append(main(args))
acc_lists.sort()
acc_lists_top = np.array(acc_lists[50:])
mean = np.around(np.mean(acc_lists_top, axis=0), decimals=3)
std = np.around(np.std(acc_lists_top, axis=0), decimals=3)
print('Total acc: ', acc_lists)
print('Top 50 acc:', acc_lists_top)
print('mean', mean)
print('std', std)
print("Total acc: ", acc_lists)
print("Top 50 acc:", acc_lists_top)
print("mean", mean)
print("std", std)
......@@ -2,55 +2,55 @@ import torch as th
import torch.nn.functional as F
GCN_CONFIG = {
'extra_args': [16, 1, F.relu, 0.5],
'lr': 1e-2,
'weight_decay': 5e-4,
"extra_args": [16, 1, F.relu, 0.5],
"lr": 1e-2,
"weight_decay": 5e-4,
}
GAT_CONFIG = {
'extra_args': [8, 1, [8] * 1 + [1], F.elu, 0.6, 0.6, 0.2, False],
'lr': 0.005,
'weight_decay': 5e-4,
"extra_args": [8, 1, [8] * 1 + [1], F.elu, 0.6, 0.6, 0.2, False],
"lr": 0.005,
"weight_decay": 5e-4,
}
GRAPHSAGE_CONFIG = {
'extra_args': [16, 1, F.relu, 0.5, 'gcn'],
'lr': 1e-2,
'weight_decay': 5e-4,
"extra_args": [16, 1, F.relu, 0.5, "gcn"],
"lr": 1e-2,
"weight_decay": 5e-4,
}
APPNP_CONFIG = {
'extra_args': [64, 1, F.relu, 0.5, 0.5, 0.1, 10],
'lr': 1e-2,
'weight_decay': 5e-4,
"extra_args": [64, 1, F.relu, 0.5, 0.5, 0.1, 10],
"lr": 1e-2,
"weight_decay": 5e-4,
}
TAGCN_CONFIG = {
'extra_args': [16, 1, F.relu, 0.5],
'lr': 1e-2,
'weight_decay': 5e-4,
"extra_args": [16, 1, F.relu, 0.5],
"lr": 1e-2,
"weight_decay": 5e-4,
}
AGNN_CONFIG = {
'extra_args': [32, 2, 1.0, True, 0.5],
'lr': 1e-2,
'weight_decay': 5e-4,
"extra_args": [32, 2, 1.0, True, 0.5],
"lr": 1e-2,
"weight_decay": 5e-4,
}
SGC_CONFIG = {
'extra_args': [None, 2, False],
'lr': 0.2,
'weight_decay': 5e-6,
"extra_args": [None, 2, False],
"lr": 0.2,
"weight_decay": 5e-6,
}
GIN_CONFIG = {
'extra_args': [16, 1, 0, True],
'lr': 1e-2,
'weight_decay': 5e-6,
"extra_args": [16, 1, 0, True],
"lr": 1e-2,
"weight_decay": 5e-6,
}
CHEBNET_CONFIG = {
'extra_args': [32, 1, 2, True],
'lr': 1e-2,
'weight_decay': 5e-4,
"extra_args": [32, 1, 2, True],
"lr": 1e-2,
"weight_decay": 5e-4,
}
import torch
import torch.nn as nn
from dgl.nn.pytorch import GraphConv, GATConv, SAGEConv, GINConv,\
APPNPConv, TAGConv, SGConv, AGNNConv, ChebConv
from dgl.nn.pytorch import (
AGNNConv,
APPNPConv,
ChebConv,
GATConv,
GINConv,
GraphConv,
SAGEConv,
SGConv,
TAGConv,
)
class GCN(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
activation,
dropout):
def __init__(
self, g, in_feats, n_classes, n_hidden, n_layers, activation, dropout
):
super(GCN, self).__init__()
self.g = g
self.layers = nn.ModuleList()
......@@ -20,7 +25,9 @@ class GCN(nn.Module):
self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
self.layers.append(
GraphConv(n_hidden, n_hidden, activation=activation)
)
# output layer
self.layers.append(GraphConv(n_hidden, n_classes))
self.dropout = nn.Dropout(p=dropout)
......@@ -35,37 +42,66 @@ class GCN(nn.Module):
class GAT(nn.Module):
def __init__(self,
g,
in_dim,
num_classes,
num_hidden,
num_layers,
heads,
activation,
feat_drop,
attn_drop,
negative_slope,
residual):
def __init__(
self,
g,
in_dim,
num_classes,
num_hidden,
num_layers,
heads,
activation,
feat_drop,
attn_drop,
negative_slope,
residual,
):
super(GAT, self).__init__()
self.g = g
self.num_layers = num_layers
self.gat_layers = nn.ModuleList()
self.activation = activation
# input projection (no residual)
self.gat_layers.append(GATConv(
in_dim, num_hidden, heads[0],
feat_drop, attn_drop, negative_slope, False, self.activation))
self.gat_layers.append(
GATConv(
in_dim,
num_hidden,
heads[0],
feat_drop,
attn_drop,
negative_slope,
False,
self.activation,
)
)
# hidden layers
for l in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GATConv(
num_hidden * heads[l-1], num_hidden, heads[l],
feat_drop, attn_drop, negative_slope, residual, self.activation))
self.gat_layers.append(
GATConv(
num_hidden * heads[l - 1],
num_hidden,
heads[l],
feat_drop,
attn_drop,
negative_slope,
residual,
self.activation,
)
)
# output projection
self.gat_layers.append(GATConv(
num_hidden * heads[-2], num_classes, heads[-1],
feat_drop, attn_drop, negative_slope, residual, None))
self.gat_layers.append(
GATConv(
num_hidden * heads[-2],
num_classes,
heads[-1],
feat_drop,
attn_drop,
negative_slope,
residual,
None,
)
)
def forward(self, inputs):
h = inputs
......@@ -77,26 +113,52 @@ class GAT(nn.Module):
class GraphSAGE(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
activation,
dropout,
aggregator_type):
def __init__(
self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
activation,
dropout,
aggregator_type,
):
super(GraphSAGE, self).__init__()
self.layers = nn.ModuleList()
self.g = g
# input layer
self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
self.layers.append(
SAGEConv(
in_feats,
n_hidden,
aggregator_type,
feat_drop=dropout,
activation=activation,
)
)
# hidden layers
for i in range(n_layers - 1):
self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation))
self.layers.append(
SAGEConv(
n_hidden,
n_hidden,
aggregator_type,
feat_drop=dropout,
activation=activation,
)
)
# output layer
self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None)) # activation None
self.layers.append(
SAGEConv(
n_hidden,
n_classes,
aggregator_type,
feat_drop=dropout,
activation=None,
)
) # activation None
def forward(self, features):
h = features
......@@ -106,17 +168,19 @@ class GraphSAGE(nn.Module):
class APPNP(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
activation,
feat_drop,
edge_drop,
alpha,
k):
def __init__(
self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
activation,
feat_drop,
edge_drop,
alpha,
k,
):
super(APPNP, self).__init__()
self.g = g
self.layers = nn.ModuleList()
......@@ -153,14 +217,9 @@ class APPNP(nn.Module):
class TAGCN(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
activation,
dropout):
def __init__(
self, g, in_feats, n_classes, n_hidden, n_layers, activation, dropout
):
super(TAGCN, self).__init__()
self.g = g
self.layers = nn.ModuleList()
......@@ -168,9 +227,11 @@ class TAGCN(nn.Module):
self.layers.append(TAGConv(in_feats, n_hidden, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(TAGConv(n_hidden, n_hidden, activation=activation))
self.layers.append(
TAGConv(n_hidden, n_hidden, activation=activation)
)
# output layer
self.layers.append(TAGConv(n_hidden, n_classes)) #activation=None
self.layers.append(TAGConv(n_hidden, n_classes)) # activation=None
self.dropout = nn.Dropout(p=dropout)
def forward(self, features):
......@@ -183,28 +244,27 @@ class TAGCN(nn.Module):
class AGNN(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
init_beta,
learn_beta,
dropout):
def __init__(
self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
init_beta,
learn_beta,
dropout,
):
super(AGNN, self).__init__()
self.g = g
self.layers = nn.ModuleList(
[AGNNConv(init_beta, learn_beta) for _ in range(n_layers)]
)
self.proj = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(in_feats, n_hidden),
nn.ReLU()
nn.Dropout(dropout), nn.Linear(in_feats, n_hidden), nn.ReLU()
)
self.cls = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(n_hidden, n_classes)
nn.Dropout(dropout), nn.Linear(n_hidden, n_classes)
)
def forward(self, features):
......@@ -215,34 +275,19 @@ class AGNN(nn.Module):
class SGC(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
k,
bias):
def __init__(self, g, in_feats, n_classes, n_hidden, k, bias):
super(SGC, self).__init__()
self.g = g
self.net = SGConv(in_feats,
n_classes,
k=k,
cached=True,
bias=bias)
self.net = SGConv(in_feats, n_classes, k=k, cached=True, bias=bias)
def forward(self, features):
return self.net(self.g, features)
class GIN(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
init_eps,
learn_eps):
def __init__(
self, g, in_feats, n_classes, n_hidden, n_layers, init_eps, learn_eps
):
super(GIN, self).__init__()
self.g = g
self.layers = nn.ModuleList()
......@@ -253,9 +298,9 @@ class GIN(nn.Module):
nn.Linear(in_feats, n_hidden),
nn.ReLU(),
),
'mean',
"mean",
init_eps,
learn_eps
learn_eps,
)
)
for i in range(n_layers - 1):
......@@ -264,11 +309,11 @@ class GIN(nn.Module):
nn.Sequential(
nn.Dropout(0.6),
nn.Linear(n_hidden, n_hidden),
nn.ReLU()
nn.ReLU(),
),
'mean',
"mean",
init_eps,
learn_eps
learn_eps,
)
)
self.layers.append(
......@@ -277,9 +322,9 @@ class GIN(nn.Module):
nn.Dropout(0.6),
nn.Linear(n_hidden, n_classes),
),
'mean',
"mean",
init_eps,
learn_eps
learn_eps,
)
)
......@@ -289,29 +334,17 @@ class GIN(nn.Module):
h = layer(self.g, h)
return h
class ChebNet(nn.Module):
def __init__(self,
g,
in_feats,
n_classes,
n_hidden,
n_layers,
k,
bias):
def __init__(self, g, in_feats, n_classes, n_hidden, n_layers, k, bias):
super(ChebNet, self).__init__()
self.g = g
self.layers = nn.ModuleList()
self.layers.append(
ChebConv(in_feats, n_hidden, k, bias=bias)
)
self.layers.append(ChebConv(in_feats, n_hidden, k, bias=bias))
for _ in range(n_layers - 1):
self.layers.append(
ChebConv(n_hidden, n_hidden, k, bias=bias)
)
self.layers.append(ChebConv(n_hidden, n_hidden, k, bias=bias))
self.layers.append(
ChebConv(n_hidden, n_classes, k, bias=bias)
)
self.layers.append(ChebConv(n_hidden, n_classes, k, bias=bias))
def forward(self, features):
h = features
......
import argparse, time
import argparse
import time
import networkx as nx
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from dgl.data import register_data_args, load_data
from models import *
from conf import *
import networkx as nx
from models import *
import dgl
from dgl.data import load_data, register_data_args
def get_model_and_config(name):
name = name.lower()
if name == 'gcn':
if name == "gcn":
return GCN, GCN_CONFIG
elif name == 'gat':
elif name == "gat":
return GAT, GAT_CONFIG
elif name == 'graphsage':
elif name == "graphsage":
return GraphSAGE, GRAPHSAGE_CONFIG
elif name == 'appnp':
elif name == "appnp":
return APPNP, APPNP_CONFIG
elif name == 'tagcn':
elif name == "tagcn":
return TAGCN, TAGCN_CONFIG
elif name == 'agnn':
elif name == "agnn":
return AGNN, AGNN_CONFIG
elif name == 'sgc':
elif name == "sgc":
return SGC, SGC_CONFIG
elif name == 'gin':
elif name == "gin":
return GIN, GIN_CONFIG
elif name == 'chebnet':
elif name == "chebnet":
return ChebNet, CHEBNET_CONFIG
def evaluate(model, features, labels, mask):
model.eval()
with torch.no_grad():
......@@ -41,6 +45,7 @@ def evaluate(model, features, labels, mask):
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
def main(args):
# load and preprocess dataset
data = load_data(args)
......@@ -50,24 +55,29 @@ def main(args):
else:
cuda = True
g = g.to(args.gpu)
features = g.ndata['feat']
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
features = g.ndata["feat"]
labels = g.ndata["label"]
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = g.number_of_edges()
print("""----Data statistics------'
print(
"""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.int().sum().item(),
val_mask.int().sum().item(),
test_mask.int().sum().item()))
#Test samples %d"""
% (
n_edges,
n_classes,
train_mask.int().sum().item(),
val_mask.int().sum().item(),
test_mask.int().sum().item(),
)
)
# graph preprocess and calculate normalization factor
# add self loop
......@@ -79,14 +89,11 @@ def main(args):
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
g.ndata['norm'] = norm.unsqueeze(1)
g.ndata["norm"] = norm.unsqueeze(1)
# create GCN model
GNN, config = get_model_and_config(args.model)
model = GNN(g,
in_feats,
n_classes,
*config['extra_args'])
model = GNN(g, in_feats, n_classes, *config["extra_args"])
if cuda:
model = model.cuda()
......@@ -96,9 +103,9 @@ def main(args):
loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=config['lr'],
weight_decay=config['weight_decay'])
optimizer = torch.optim.Adam(
model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"]
)
# initialize graph
dur = []
......@@ -118,25 +125,40 @@ def main(args):
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print(
"Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(
epoch,
np.mean(dur),
loss.item(),
acc,
n_edges / np.mean(dur) / 1000,
)
)
print()
acc = evaluate(model, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Node classification on citation networks.')
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Node classification on citation networks."
)
register_data_args(parser)
parser.add_argument("--model", type=str, default='gcn',
help='model to use, available models are gcn, gat, graphsage, gin,'
'appnp, tagcn, sgc, agnn')
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--self-loop", action='store_true',
help="graph self-loop (default=False)")
parser.add_argument(
"--model",
type=str,
default="gcn",
help="model to use, available models are gcn, gat, graphsage, gin,"
"appnp, tagcn, sgc, agnn",
)
parser.add_argument("--gpu", type=int, default=-1, help="gpu")
parser.add_argument(
"--self-loop",
action="store_true",
help="graph self-loop (default=False)",
)
args = parser.parse_args()
print(args)
main(args)
......@@ -31,7 +31,7 @@ def laplacian(W, normalized=True):
def rescale_L(L, lmax=2):
"""Rescale Laplacian eigenvalues to [-1,1]"""
M, M = L.shape
I = scipy.sparse.identity(M, format='csr', dtype=L.dtype)
I = scipy.sparse.identity(M, format="csr", dtype=L.dtype)
L /= lmax * 2
L -= I
return L
......@@ -39,7 +39,9 @@ def rescale_L(L, lmax=2):
def lmax_L(L):
"""Compute largest Laplacian eigenvalue"""
return scipy.sparse.linalg.eigsh(L, k=1, which='LM', return_eigenvectors=False)[0]
return scipy.sparse.linalg.eigsh(
L, k=1, which="LM", return_eigenvectors=False
)[0]
# graph coarsening with Heavy Edge Matching
......@@ -57,7 +59,11 @@ def coarsen(A, levels):
A = A.tocsr()
A.eliminate_zeros()
Mnew, Mnew = A.shape
print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges'.format(i, Mnew, Mnew - M, A.nnz // 2))
print(
"Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges".format(
i, Mnew, Mnew - M, A.nnz // 2
)
)
L = laplacian(A, normalized=True)
laplacians.append(L)
......@@ -95,7 +101,7 @@ def HEM(W, levels, rid=None):
graphs = []
graphs.append(W)
print('Heavy Edge Matching coarsening with Xavier version')
print("Heavy Edge Matching coarsening with Xavier version")
for _ in range(levels):
......@@ -183,7 +189,9 @@ def HEM_one_level(rr, cc, vv, rid, weights):
# First approach
if 2 == 1:
tval = vv[rs + jj] * (1.0 / weights[tid] + 1.0 / weights[nid])
tval = vv[rs + jj] * (
1.0 / weights[tid] + 1.0 / weights[nid]
)
# Second approach
if 1 == 1:
......@@ -192,7 +200,7 @@ def HEM_one_level(rr, cc, vv, rid, weights):
Wjj = vv[rowstart[nid]]
di = weights[tid]
dj = weights[nid]
tval = (2. * Wij + Wii + Wjj) * 1. / (di + dj + 1e-9)
tval = (2.0 * Wij + Wii + Wjj) * 1.0 / (di + dj + 1e-9)
if tval > wmax:
wmax = tval
......@@ -247,7 +255,7 @@ def compute_perm(parents):
# Sanity checks.
for i, indices_layer in enumerate(indices):
M = M_last * 2 ** i
M = M_last * 2**i
# Reduction by 2 at each layer (binary tree).
assert len(indices[0] == M)
# The new ordering does not omit an indice.
......@@ -256,8 +264,9 @@ def compute_perm(parents):
return indices[::-1]
assert (compute_perm([np.array([4, 1, 1, 2, 2, 3, 0, 0, 3]), np.array([2, 1, 0, 1, 0])])
== [[3, 4, 0, 9, 1, 2, 5, 8, 6, 7, 10, 11], [2, 4, 1, 3, 0, 5], [0, 1, 2]])
assert compute_perm(
[np.array([4, 1, 1, 2, 2, 3, 0, 0, 3]), np.array([2, 1, 0, 1, 0])]
) == [[3, 4, 0, 9, 1, 2, 5, 8, 6, 7, 10, 11], [2, 4, 1, 3, 0, 5], [0, 1, 2]]
def perm_adjacency(A, indices):
......
......@@ -2,6 +2,8 @@ import torch as th
"""Compute x,y coordinate for nodes in the graph"""
eps = 1e-8
def get_coordinates(graphs, grid_side, coarsening_levels, perm):
rst = []
for l in range(coarsening_levels + 1):
......@@ -10,21 +12,25 @@ def get_coordinates(graphs, grid_side, coarsening_levels, perm):
cnt = eps
x_accum = 0
y_accum = 0
for j in range(i * 2 ** l, (i + 1) * 2 ** l):
if perm[j] < grid_side ** 2:
x_accum += (perm[j] // grid_side)
y_accum += (perm[j] % grid_side)
for j in range(i * 2**l, (i + 1) * 2**l):
if perm[j] < grid_side**2:
x_accum += perm[j] // grid_side
y_accum += perm[j] % grid_side
cnt += 1
xs.append(x_accum / cnt)
ys.append(y_accum / cnt)
rst.append(th.cat([th.tensor(xs).view(-1, 1), th.tensor(ys).view(-1, 1)], -1))
rst.append(
th.cat([th.tensor(xs).view(-1, 1), th.tensor(ys).view(-1, 1)], -1)
)
return rst
"""Cartesian coordinate to polar coordinate"""
def z2polar(edges):
z = edges.dst['xy'] - edges.src['xy']
z = edges.dst["xy"] - edges.src["xy"]
rho = th.norm(z, dim=-1, p=2)
x, y = z.unbind(dim=-1)
phi = th.atan2(y, x)
return {'u': th.cat([rho.unsqueeze(-1), phi.unsqueeze(-1)], -1)}
return {"u": th.cat([rho.unsqueeze(-1), phi.unsqueeze(-1)], -1)}
# author: xbresson
# code link: https://github.com/xbresson/CE7454_2019/blob/master/codes/labs_lecture14/lab01_ChebGCNs/lib/grid_graph.py
import numpy as np
import scipy.sparse # scipy.spatial.distance
import scipy.sparse.linalg
import sklearn
import sklearn.metrics
import scipy.sparse, scipy.sparse.linalg # scipy.spatial.distance
import numpy as np
def grid_graph(grid_side,number_edges,metric):
def grid_graph(grid_side, number_edges, metric):
"""Generate graph of a grid"""
z = grid(grid_side)
dist, idx = distance_sklearn_metrics(z, k=number_edges, metric=metric)
A = adjacency(dist, idx)
print("nb edges: ",A.nnz)
print("nb edges: ", A.nnz)
return A
def grid(m, dtype=np.float32):
"""Return coordinates of grid points"""
M = m**2
x = np.linspace(0,1,m, dtype=dtype)
y = np.linspace(0,1,m, dtype=dtype)
x = np.linspace(0, 1, m, dtype=dtype)
y = np.linspace(0, 1, m, dtype=dtype)
xx, yy = np.meshgrid(x, y)
z = np.empty((M,2), dtype)
z[:,0] = xx.reshape(M)
z[:,1] = yy.reshape(M)
z = np.empty((M, 2), dtype)
z[:, 0] = xx.reshape(M)
z[:, 1] = yy.reshape(M)
return z
def distance_sklearn_metrics(z, k=4, metric='euclidean'):
def distance_sklearn_metrics(z, k=4, metric="euclidean"):
"""Compute pairwise distances"""
#d = sklearn.metrics.pairwise.pairwise_distances(z, metric=metric, n_jobs=-2)
# d = sklearn.metrics.pairwise.pairwise_distances(z, metric=metric, n_jobs=-2)
d = sklearn.metrics.pairwise.pairwise_distances(z, metric=metric, n_jobs=1)
# k-NN
idx = np.argsort(d)[:,1:k+1]
idx = np.argsort(d)[:, 1 : k + 1]
d.sort()
d = d[:,1:k+1]
d = d[:, 1 : k + 1]
return d, idx
......@@ -47,13 +48,13 @@ def adjacency(dist, idx):
assert dist.max() <= 1
# Pairwise distances
sigma2 = np.mean(dist[:,-1])**2
dist = np.exp(- dist**2 / sigma2)
sigma2 = np.mean(dist[:, -1]) ** 2
dist = np.exp(-(dist**2) / sigma2)
# Weight matrix
I = np.arange(0, M).repeat(k)
J = idx.reshape(M*k)
V = dist.reshape(M*k)
J = idx.reshape(M * k)
V = dist.reshape(M * k)
W = scipy.sparse.coo_matrix((V, (I, J)), shape=(M, M))
# No self-connections
......
import argparse
import time
import numpy as np
import networkx as nx
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from coarsening import coarsen
from coordinate import get_coordinates, z2polar
from grid_graph import grid_graph
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from dgl.data import register_data_args, load_data
import dgl
from dgl.data import load_data, register_data_args
from dgl.nn.pytorch.conv import ChebConv, GMMConv
from dgl.nn.pytorch.glob import MaxPooling
from grid_graph import grid_graph
from coarsening import coarsen
from coordinate import get_coordinates, z2polar
argparser = argparse.ArgumentParser("MNIST")
argparser.add_argument("--gpu", type=int, default=-1,
help="gpu id, use cpu if set to -1")
argparser.add_argument("--model", type=str, default="chebnet",
help="model to use, chebnet/monet")
argparser.add_argument("--batch-size", type=int, default=100,
help="batch size")
argparser.add_argument(
"--gpu", type=int, default=-1, help="gpu id, use cpu if set to -1"
)
argparser.add_argument(
"--model", type=str, default="chebnet", help="model to use, chebnet/monet"
)
argparser.add_argument("--batch-size", type=int, default=100, help="batch size")
args = argparser.parse_args()
grid_side = 28
number_edges = 8
metric = 'euclidean'
metric = "euclidean"
A = grid_graph(28, 8, metric)
......@@ -35,18 +38,25 @@ L, perm = coarsen(A, coarsening_levels)
g_arr = [dgl.from_scipy(csr) for csr in L]
coordinate_arr = get_coordinates(g_arr, grid_side, coarsening_levels, perm)
str_to_torch_dtype = {'float16':torch.half, 'float32':torch.float32, 'float64':torch.float64}
coordinate_arr = [coord.to(dtype=str_to_torch_dtype[str(A.dtype)]) for coord in coordinate_arr]
str_to_torch_dtype = {
"float16": torch.half,
"float32": torch.float32,
"float64": torch.float64,
}
coordinate_arr = [
coord.to(dtype=str_to_torch_dtype[str(A.dtype)]) for coord in coordinate_arr
]
for g, coordinate_arr in zip(g_arr, coordinate_arr):
g.ndata['xy'] = coordinate_arr
g.ndata["xy"] = coordinate_arr
g.apply_edges(z2polar)
def batcher(batch):
g_batch = [[] for _ in range(coarsening_levels + 1)]
x_batch = []
y_batch = []
for x, y in batch:
x = torch.cat([x.view(-1), x.new_zeros(len(perm) - 28 ** 2)], 0)
x = torch.cat([x.view(-1), x.new_zeros(len(perm) - 28**2)], 0)
x = x[perm]
x_batch.append(x)
y_batch.append(y)
......@@ -58,87 +68,98 @@ def batcher(batch):
g_batch = [dgl.batch(g) for g in g_batch]
return g_batch, x_batch, y_batch
trainset = datasets.MNIST(root='.', train=True, download=True, transform=transforms.ToTensor())
testset = datasets.MNIST(root='.', train=False, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(trainset,
batch_size=args.batch_size,
shuffle=True,
collate_fn=batcher,
num_workers=6)
test_loader = DataLoader(testset,
batch_size=args.batch_size,
shuffle=False,
collate_fn=batcher,
num_workers=6)
trainset = datasets.MNIST(
root=".", train=True, download=True, transform=transforms.ToTensor()
)
testset = datasets.MNIST(
root=".", train=False, download=True, transform=transforms.ToTensor()
)
train_loader = DataLoader(
trainset,
batch_size=args.batch_size,
shuffle=True,
collate_fn=batcher,
num_workers=6,
)
test_loader = DataLoader(
testset,
batch_size=args.batch_size,
shuffle=False,
collate_fn=batcher,
num_workers=6,
)
class MoNet(nn.Module):
def __init__(self,
n_kernels,
in_feats,
hiddens,
out_feats):
def __init__(self, n_kernels, in_feats, hiddens, out_feats):
super(MoNet, self).__init__()
self.pool = nn.MaxPool1d(2)
self.layers = nn.ModuleList()
self.readout = MaxPooling()
# Input layer
self.layers.append(
GMMConv(in_feats, hiddens[0], 2, n_kernels))
self.layers.append(GMMConv(in_feats, hiddens[0], 2, n_kernels))
# Hidden layer
for i in range(1, len(hiddens)):
self.layers.append(GMMConv(hiddens[i - 1], hiddens[i], 2, n_kernels))
self.layers.append(
GMMConv(hiddens[i - 1], hiddens[i], 2, n_kernels)
)
self.cls = nn.Sequential(
nn.Linear(hiddens[-1], out_feats),
nn.LogSoftmax()
nn.Linear(hiddens[-1], out_feats), nn.LogSoftmax()
)
def forward(self, g_arr, feat):
for g, layer in zip(g_arr, self.layers):
u = g.edata['u']
feat = self.pool(layer(g, feat, u).transpose(-1, -2).unsqueeze(0))\
.squeeze(0).transpose(-1, -2)
u = g.edata["u"]
feat = (
self.pool(layer(g, feat, u).transpose(-1, -2).unsqueeze(0))
.squeeze(0)
.transpose(-1, -2)
)
return self.cls(self.readout(g_arr[-1], feat))
class ChebNet(nn.Module):
def __init__(self,
k,
in_feats,
hiddens,
out_feats):
def __init__(self, k, in_feats, hiddens, out_feats):
super(ChebNet, self).__init__()
self.pool = nn.MaxPool1d(2)
self.layers = nn.ModuleList()
self.readout = MaxPooling()
# Input layer
self.layers.append(
ChebConv(in_feats, hiddens[0], k))
self.layers.append(ChebConv(in_feats, hiddens[0], k))
for i in range(1, len(hiddens)):
self.layers.append(
ChebConv(hiddens[i - 1], hiddens[i], k))
self.layers.append(ChebConv(hiddens[i - 1], hiddens[i], k))
self.cls = nn.Sequential(
nn.Linear(hiddens[-1], out_feats),
nn.LogSoftmax()
nn.Linear(hiddens[-1], out_feats), nn.LogSoftmax()
)
def forward(self, g_arr, feat):
for g, layer in zip(g_arr, self.layers):
feat = self.pool(layer(g, feat, [2] * g.batch_size).transpose(-1, -2).unsqueeze(0))\
.squeeze(0).transpose(-1, -2)
feat = (
self.pool(
layer(g, feat, [2] * g.batch_size)
.transpose(-1, -2)
.unsqueeze(0)
)
.squeeze(0)
.transpose(-1, -2)
)
return self.cls(self.readout(g_arr[-1], feat))
if args.gpu == -1:
device = torch.device('cpu')
device = torch.device("cpu")
else:
device = torch.device(args.gpu)
if args.model == 'chebnet':
if args.model == "chebnet":
model = ChebNet(2, 1, [32, 64, 128, 256], 10)
else:
model = MoNet(10, 1, [32, 64, 128, 256], 10)
......@@ -149,7 +170,7 @@ optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
log_interval = 50
for epoch in range(10):
print('epoch {} starts'.format(epoch))
print("epoch {} starts".format(epoch))
model.train()
hit, tot = 0, 0
loss_accum = 0
......@@ -164,7 +185,9 @@ for epoch in range(10):
loss_accum += loss.item()
if (i + 1) % log_interval == 0:
print('loss: {}, acc: {}'.format(loss_accum / log_interval, hit / tot))
print(
"loss: {}, acc: {}".format(loss_accum / log_interval, hit / tot)
)
hit, tot = 0, 0
loss_accum = 0
......@@ -182,4 +205,4 @@ for epoch in range(10):
hit += (out.max(-1)[1] == y).sum().item()
tot += len(y)
print('test acc: ', hit / tot)
print("test acc: ", hit / tot)
import argparse
import time
import numpy as np
import networkx as nx
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.data import load_data, register_data_args
from dgl.nn.pytorch.conv import GMMConv
class MoNet(nn.Module):
def __init__(self,
g,
in_feats,
n_hidden,
out_feats,
n_layers,
dim,
n_kernels,
dropout):
def __init__(
self,
g,
in_feats,
n_hidden,
out_feats,
n_layers,
dim,
n_kernels,
dropout,
):
super(MoNet, self).__init__()
self.g = g
self.layers = nn.ModuleList()
self.pseudo_proj = nn.ModuleList()
# Input layer
self.layers.append(
GMMConv(in_feats, n_hidden, dim, n_kernels))
self.pseudo_proj.append(
nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
self.layers.append(GMMConv(in_feats, n_hidden, dim, n_kernels))
self.pseudo_proj.append(nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
# Hidden layer
for _ in range(n_layers - 1):
self.layers.append(GMMConv(n_hidden, n_hidden, dim, n_kernels))
self.pseudo_proj.append(
nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
self.pseudo_proj.append(nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
# Output layer
self.layers.append(GMMConv(n_hidden, out_feats, dim, n_kernels))
self.pseudo_proj.append(
nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
self.pseudo_proj.append(nn.Sequential(nn.Linear(2, dim), nn.Tanh()))
self.dropout = nn.Dropout(dropout)
def forward(self, feat, pseudo):
......@@ -48,10 +48,10 @@ class MoNet(nn.Module):
for i in range(len(self.layers)):
if i != 0:
h = self.dropout(h)
h = self.layers[i](
self.g, h, self.pseudo_proj[i](pseudo))
h = self.layers[i](self.g, h, self.pseudo_proj[i](pseudo))
return h
def evaluate(model, features, pseudo, labels, mask):
model.eval()
with torch.no_grad():
......@@ -62,6 +62,7 @@ def evaluate(model, features, pseudo, labels, mask):
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
def main(args):
# load and preprocess dataset
data = load_data(args)
......@@ -71,49 +72,59 @@ def main(args):
else:
cuda = True
g = g.to(args.gpu)
features = g.ndata['feat']
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
features = g.ndata["feat"]
labels = g.ndata["label"]
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = g.number_of_edges()
print("""----Data statistics------'
print(
"""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.sum().item(),
val_mask.sum().item(),
test_mask.sum().item()))
#Test samples %d"""
% (
n_edges,
n_classes,
train_mask.sum().item(),
val_mask.sum().item(),
test_mask.sum().item(),
)
)
# graph preprocess and calculate normalization factor
g = g.remove_self_loop().add_self_loop()
n_edges = g.number_of_edges()
us, vs = g.edges(order='eid')
udeg, vdeg = 1 / torch.sqrt(g.in_degrees(us).float()), 1 / torch.sqrt(g.in_degrees(vs).float())
us, vs = g.edges(order="eid")
udeg, vdeg = 1 / torch.sqrt(g.in_degrees(us).float()), 1 / torch.sqrt(
g.in_degrees(vs).float()
)
pseudo = torch.cat([udeg.unsqueeze(1), vdeg.unsqueeze(1)], dim=1)
# create GraphSAGE model
model = MoNet(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
args.pseudo_dim,
args.n_kernels,
args.dropout
)
model = MoNet(
g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
args.pseudo_dim,
args.n_kernels,
args.dropout,
)
if cuda:
model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
optimizer = torch.optim.Adam(
model.parameters(), lr=args.lr, weight_decay=args.weight_decay
)
# initialize graph
dur = []
......@@ -133,36 +144,54 @@ def main(args):
dur.append(time.time() - t0)
acc = evaluate(model, features, pseudo, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print(
"Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(
epoch,
np.mean(dur),
loss.item(),
acc,
n_edges / np.mean(dur) / 1000,
)
)
print()
acc = evaluate(model, features, pseudo, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='MoNet on citation network')
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="MoNet on citation network")
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--pseudo-dim", type=int, default=2,
help="Pseudo coordinate dimensions in GMMConv, 2 for cora and 3 for pubmed")
parser.add_argument("--n-kernels", type=int, default=3,
help="Number of kernels in GMMConv layer")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
parser.add_argument(
"--dropout", type=float, default=0.5, help="dropout probability"
)
parser.add_argument("--gpu", type=int, default=-1, help="gpu")
parser.add_argument("--lr", type=float, default=1e-2, help="learning rate")
parser.add_argument(
"--n-epochs", type=int, default=200, help="number of training epochs"
)
parser.add_argument(
"--n-hidden", type=int, default=16, help="number of hidden gcn units"
)
parser.add_argument(
"--n-layers", type=int, default=1, help="number of hidden gcn layers"
)
parser.add_argument(
"--pseudo-dim",
type=int,
default=2,
help="Pseudo coordinate dimensions in GMMConv, 2 for cora and 3 for pubmed",
)
parser.add_argument(
"--n-kernels",
type=int,
default=3,
help="Number of kernels in GMMConv layer",
)
parser.add_argument(
"--weight-decay", type=float, default=5e-4, help="Weight for L2 loss"
)
args = parser.parse_args()
print(args)
......
import argparse
import torch
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
import torch.distributed as dist
import torch.optim as optim
from ogb.graphproppred import DglGraphPropPredDataset, Evaluator
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
from tqdm import tqdm
import dgl
import dgl.nn as dglnn
from dgl.data import AsGraphPredDataset
from dgl.dataloading import GraphDataLoader
from ogb.graphproppred import DglGraphPropPredDataset, Evaluator
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
from tqdm import tqdm
import argparse
class MLP(nn.Module):
def __init__(self, in_feats):
......@@ -20,25 +23,27 @@ class MLP(nn.Module):
nn.BatchNorm1d(2 * in_feats),
nn.ReLU(),
nn.Linear(2 * in_feats, in_feats),
nn.BatchNorm1d(in_feats)
nn.BatchNorm1d(in_feats),
)
def forward(self, h):
return self.mlp(h)
class GIN(nn.Module):
def __init__(self, n_hidden, n_output, n_layers=5):
super().__init__()
self.node_encoder = AtomEncoder(n_hidden)
self.edge_encoders = nn.ModuleList([
BondEncoder(n_hidden) for _ in range(n_layers)])
self.edge_encoders = nn.ModuleList(
[BondEncoder(n_hidden) for _ in range(n_layers)]
)
self.pool = dglnn.AvgPooling()
self.dropout = nn.Dropout(0.5)
self.layers = nn.ModuleList()
for _ in range(n_layers):
self.layers.append(dglnn.GINEConv(MLP(n_hidden), learn_eps=True))
self.predictor = nn.Linear(n_hidden, n_output)
self.layers.append(dglnn.GINEConv(MLP(n_hidden), learn_eps=True))
self.predictor = nn.Linear(n_hidden, n_output)
# add virtual node
self.virtual_emb = nn.Embedding(1, n_hidden)
......@@ -65,6 +70,7 @@ class GIN(nn.Module):
hn = self.pool(g, hn)
return self.predictor(hn)
@torch.no_grad()
def evaluate(dataloader, device, model, evaluator):
model.eval()
......@@ -72,17 +78,23 @@ def evaluate(dataloader, device, model, evaluator):
y_pred = []
for batched_graph, labels in tqdm(dataloader):
batched_graph, labels = batched_graph.to(device), labels.to(device)
node_feat, edge_feat = batched_graph.ndata['feat'], batched_graph.edata['feat']
node_feat, edge_feat = (
batched_graph.ndata["feat"],
batched_graph.edata["feat"],
)
y_hat = model(batched_graph, node_feat, edge_feat)
y_true.append(labels.view(y_hat.shape).detach().cpu())
y_pred.append(y_hat.detach().cpu())
y_pred.append(y_hat.detach().cpu())
y_true = torch.cat(y_true, dim=0).numpy()
y_pred = torch.cat(y_pred, dim=0).numpy()
input_dict = {'y_true': y_true, 'y_pred': y_pred}
input_dict = {"y_true": y_true, "y_pred": y_pred}
return evaluator.eval(input_dict)
def train(rank, world_size, dataset_name, root):
dist.init_process_group('nccl', 'tcp://127.0.0.1:12347', world_size=world_size, rank=rank)
dist.init_process_group(
"nccl", "tcp://127.0.0.1:12347", world_size=world_size, rank=rank
)
torch.cuda.set_device(rank)
dataset = AsGraphPredDataset(DglGraphPropPredDataset(dataset_name, root))
......@@ -94,48 +106,62 @@ def train(rank, world_size, dataset_name, root):
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
train_dataloader = GraphDataLoader(
dataset[dataset.train_idx], batch_size=256,
use_ddp=True, shuffle=True)
valid_dataloader = GraphDataLoader(
dataset[dataset.val_idx], batch_size=256)
test_dataloader = GraphDataLoader(
dataset[dataset.test_idx], batch_size=256)
dataset[dataset.train_idx], batch_size=256, use_ddp=True, shuffle=True
)
valid_dataloader = GraphDataLoader(dataset[dataset.val_idx], batch_size=256)
test_dataloader = GraphDataLoader(dataset[dataset.test_idx], batch_size=256)
for epoch in range(50):
model.train()
train_dataloader.set_epoch(epoch)
for batched_graph, labels in train_dataloader:
batched_graph, labels = batched_graph.to(rank), labels.to(rank)
node_feat, edge_feat = batched_graph.ndata['feat'], batched_graph.edata['feat']
node_feat, edge_feat = (
batched_graph.ndata["feat"],
batched_graph.edata["feat"],
)
logits = model(batched_graph, node_feat, edge_feat)
optimizer.zero_grad()
is_labeled = labels == labels
loss = F.binary_cross_entropy_with_logits(logits.float()[is_labeled], labels.float()[is_labeled])
loss = F.binary_cross_entropy_with_logits(
logits.float()[is_labeled], labels.float()[is_labeled]
)
loss.backward()
optimizer.step()
scheduler.step()
if rank == 0:
val_metric = evaluate(valid_dataloader, rank, model.module, evaluator)[evaluator.eval_metric]
test_metric = evaluate(test_dataloader, rank, model.module, evaluator)[evaluator.eval_metric]
print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, '
f'Val: {val_metric:.4f}, Test: {test_metric:.4f}')
val_metric = evaluate(
valid_dataloader, rank, model.module, evaluator
)[evaluator.eval_metric]
test_metric = evaluate(
test_dataloader, rank, model.module, evaluator
)[evaluator.eval_metric]
print(
f"Epoch: {epoch:03d}, Loss: {loss:.4f}, "
f"Val: {val_metric:.4f}, Test: {test_metric:.4f}"
)
dist.destroy_process_group()
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default="ogbg-molhiv",
choices=['ogbg-molhiv', 'ogbg-molpcba'],
help='name of dataset (default: ogbg-molhiv)')
parser.add_argument(
"--dataset",
type=str,
default="ogbg-molhiv",
choices=["ogbg-molhiv", "ogbg-molpcba"],
help="name of dataset (default: ogbg-molhiv)",
)
dataset_name = parser.parse_args().dataset
root = './data/OGB'
root = "./data/OGB"
DglGraphPropPredDataset(dataset_name, root)
world_size = torch.cuda.device_count()
print('Let\'s use', world_size, 'GPUs!')
print("Let's use", world_size, "GPUs!")
args = (world_size, dataset_name, root)
import torch.multiprocessing as mp
mp.spawn(train, args=args, nprocs=world_size, join=True)
import argparse
import os
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
import torch.nn as nn
import torch.nn.functional as F
import torchmetrics.functional as MF
import torch.distributed as dist
import tqdm
from ogb.nodeproppred import DglNodePropPredDataset
from torch.nn.parallel import DistributedDataParallel
import torch.multiprocessing as mp
import dgl.nn as dglnn
from dgl.multiprocessing import shared_tensor
from dgl.data import AsNodePredDataset
from dgl.dataloading import DataLoader, NeighborSampler, MultiLayerFullNeighborSampler
from ogb.nodeproppred import DglNodePropPredDataset
import tqdm
import argparse
from dgl.dataloading import (
DataLoader,
MultiLayerFullNeighborSampler,
NeighborSampler,
)
from dgl.multiprocessing import shared_tensor
class SAGE(nn.Module):
def __init__(self, in_size, hid_size, out_size):
super().__init__()
self.layers = nn.ModuleList()
# three-layer GraphSAGE-mean
self.layers.append(dglnn.SAGEConv(in_size, hid_size, 'mean'))
self.layers.append(dglnn.SAGEConv(hid_size, hid_size, 'mean'))
self.layers.append(dglnn.SAGEConv(hid_size, out_size, 'mean'))
self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean"))
self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean"))
self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean"))
self.dropout = nn.Dropout(0.5)
self.hid_size = hid_size
self.out_size = out_size
......@@ -36,21 +43,36 @@ class SAGE(nn.Module):
return h
def inference(self, g, device, batch_size, use_uva):
g.ndata['h'] = g.ndata['feat']
sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=['h'])
g.ndata["h"] = g.ndata["feat"]
sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["h"])
for l, layer in enumerate(self.layers):
dataloader = DataLoader(
g, torch.arange(g.num_nodes(), device=device), sampler, device=device,
batch_size=batch_size, shuffle=False, drop_last=False,
num_workers=0, use_ddp=True, use_uva=use_uva)
g,
torch.arange(g.num_nodes(), device=device),
sampler,
device=device,
batch_size=batch_size,
shuffle=False,
drop_last=False,
num_workers=0,
use_ddp=True,
use_uva=use_uva,
)
# in order to prevent running out of GPU memory, allocate a
# shared output tensor 'y' in host memory
y = shared_tensor(
(g.num_nodes(), self.hid_size if l != len(self.layers) - 1 else self.out_size))
for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader) \
if dist.get_rank() == 0 else dataloader:
x = blocks[0].srcdata['h']
h = layer(blocks[0], x) # len(blocks) = 1
(
g.num_nodes(),
self.hid_size
if l != len(self.layers) - 1
else self.out_size,
)
)
for input_nodes, output_nodes, blocks in (
tqdm.tqdm(dataloader) if dist.get_rank() == 0 else dataloader
):
x = blocks[0].srcdata["h"]
h = layer(blocks[0], x) # len(blocks) = 1
if l != len(self.layers) - 1:
h = F.relu(h)
h = self.dropout(h)
......@@ -58,51 +80,74 @@ class SAGE(nn.Module):
y[output_nodes] = h.to(y.device, non_blocking=True)
# make sure all GPUs are done writing to 'y'
dist.barrier()
g.ndata['h'] = y if use_uva else y.to(device)
g.ndata["h"] = y if use_uva else y.to(device)
g.ndata.pop('h')
g.ndata.pop("h")
return y
def evaluate(model, g, dataloader):
model.eval()
ys = []
y_hats = []
for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader):
with torch.no_grad():
x = blocks[0].srcdata['feat']
ys.append(blocks[-1].dstdata['label'])
x = blocks[0].srcdata["feat"]
ys.append(blocks[-1].dstdata["label"])
y_hats.append(model(blocks, x))
return MF.accuracy(torch.cat(y_hats), torch.cat(ys))
def layerwise_infer(proc_id, device, g, nid, model, use_uva, batch_size = 2**16):
def layerwise_infer(
proc_id, device, g, nid, model, use_uva, batch_size=2**16
):
model.eval()
with torch.no_grad():
pred = model.module.inference(g, device, batch_size, use_uva)
pred = pred[nid]
labels = g.ndata['label'][nid].to(pred.device)
labels = g.ndata["label"][nid].to(pred.device)
if proc_id == 0:
acc = MF.accuracy(pred, labels)
print("Test Accuracy {:.4f}".format(acc.item()))
def train(proc_id, nprocs, device, g, train_idx, val_idx, model, use_uva):
sampler = NeighborSampler([10, 10, 10],
prefetch_node_feats=['feat'],
prefetch_labels=['label'])
train_dataloader = DataLoader(g, train_idx, sampler, device=device,
batch_size=1024, shuffle=True,
drop_last=False, num_workers=0,
use_ddp=True, use_uva=use_uva)
val_dataloader = DataLoader(g, val_idx, sampler, device=device,
batch_size=1024, shuffle=True,
drop_last=False, num_workers=0,
use_ddp=True, use_uva=use_uva)
sampler = NeighborSampler(
[10, 10, 10], prefetch_node_feats=["feat"], prefetch_labels=["label"]
)
train_dataloader = DataLoader(
g,
train_idx,
sampler,
device=device,
batch_size=1024,
shuffle=True,
drop_last=False,
num_workers=0,
use_ddp=True,
use_uva=use_uva,
)
val_dataloader = DataLoader(
g,
val_idx,
sampler,
device=device,
batch_size=1024,
shuffle=True,
drop_last=False,
num_workers=0,
use_ddp=True,
use_uva=use_uva,
)
opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
for epoch in range(10):
model.train()
total_loss = 0
for it, (input_nodes, output_nodes, blocks) in enumerate(train_dataloader):
x = blocks[0].srcdata['feat']
y = blocks[-1].dstdata['label']
for it, (input_nodes, output_nodes, blocks) in enumerate(
train_dataloader
):
x = blocks[0].srcdata["feat"]
y = blocks[-1].dstdata["label"]
y_hat = model(blocks, x)
loss = F.cross_entropy(y_hat, y)
opt.zero_grad()
......@@ -111,54 +156,80 @@ def train(proc_id, nprocs, device, g, train_idx, val_idx, model, use_uva):
total_loss += loss
acc = evaluate(model, g, val_dataloader).to(device) / nprocs
dist.reduce(acc, 0)
if (proc_id == 0):
print("Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} "
.format(epoch, total_loss / (it+1), acc.item()))
if proc_id == 0:
print(
"Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format(
epoch, total_loss / (it + 1), acc.item()
)
)
def run(proc_id, nprocs, devices, g, data, mode):
# find corresponding device for my rank
device = devices[proc_id]
torch.cuda.set_device(device)
# initialize process group and unpack data for sub-processes
dist.init_process_group(backend="nccl", init_method='tcp://127.0.0.1:12345',
world_size=nprocs, rank=proc_id)
dist.init_process_group(
backend="nccl",
init_method="tcp://127.0.0.1:12345",
world_size=nprocs,
rank=proc_id,
)
out_size, train_idx, val_idx, test_idx = data
train_idx = train_idx.to(device)
val_idx = val_idx.to(device)
g = g.to(device if mode == 'puregpu' else 'cpu')
g = g.to(device if mode == "puregpu" else "cpu")
# create GraphSAGE model (distributed)
in_size = g.ndata['feat'].shape[1]
in_size = g.ndata["feat"].shape[1]
model = SAGE(in_size, 256, out_size).to(device)
model = DistributedDataParallel(model, device_ids=[device], output_device=device)
model = DistributedDataParallel(
model, device_ids=[device], output_device=device
)
# training + testing
use_uva = (mode == 'mixed')
use_uva = mode == "mixed"
train(proc_id, nprocs, device, g, train_idx, val_idx, model, use_uva)
layerwise_infer(proc_id, device, g, test_idx, model, use_uva)
# cleanup process group
dist.destroy_process_group()
if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--mode", default='mixed', choices=['mixed', 'puregpu'],
help="Training mode. 'mixed' for CPU-GPU mixed training, "
"'puregpu' for pure-GPU training.")
parser.add_argument("--gpu", type=str, default='0',
help="GPU(s) in use. Can be a list of gpu ids for multi-gpu training,"
" e.g., 0,1,2,3.")
parser.add_argument(
"--mode",
default="mixed",
choices=["mixed", "puregpu"],
help="Training mode. 'mixed' for CPU-GPU mixed training, "
"'puregpu' for pure-GPU training.",
)
parser.add_argument(
"--gpu",
type=str,
default="0",
help="GPU(s) in use. Can be a list of gpu ids for multi-gpu training,"
" e.g., 0,1,2,3.",
)
args = parser.parse_args()
devices = list(map(int, args.gpu.split(',')))
devices = list(map(int, args.gpu.split(",")))
nprocs = len(devices)
assert torch.cuda.is_available(), f"Must have GPUs to enable multi-gpu training."
print(f'Training in {args.mode} mode using {nprocs} GPU(s)')
assert (
torch.cuda.is_available()
), f"Must have GPUs to enable multi-gpu training."
print(f"Training in {args.mode} mode using {nprocs} GPU(s)")
# load and preprocess dataset
print('Loading data')
dataset = AsNodePredDataset(DglNodePropPredDataset('ogbn-products'))
print("Loading data")
dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products"))
g = dataset[0]
# avoid creating certain graph formats in each sub-process to save momory
g.create_formats_()
# thread limiting to avoid resource competition
os.environ['OMP_NUM_THREADS'] = str(mp.cpu_count() // 2 // nprocs)
data = dataset.num_classes, dataset.train_idx, dataset.val_idx, dataset.test_idx
os.environ["OMP_NUM_THREADS"] = str(mp.cpu_count() // 2 // nprocs)
data = (
dataset.num_classes,
dataset.train_idx,
dataset.val_idx,
dataset.test_idx,
)
mp.spawn(run, args=(nprocs, devices, g, data, args.mode), nprocs=nprocs)
''' Code adapted from https://github.com/kavehhassani/mvgrl '''
""" Code adapted from https://github.com/kavehhassani/mvgrl """
import os
import re
from collections import Counter
import networkx as nx
import numpy as np
import dgl
import torch as th
import networkx as nx
from dgl.data import DGLDataset
from collections import Counter
from scipy.linalg import fractional_matrix_power, inv
''' Compute Personalized Page Ranking'''
import dgl
from dgl.data import DGLDataset
""" Compute Personalized Page Ranking"""
def compute_ppr(graph: nx.Graph, alpha=0.2, self_loop=True):
a = nx.convert_matrix.to_numpy_array(graph)
if self_loop:
a = a + np.eye(a.shape[0]) # A^ = A + I_n
d = np.diag(np.sum(a, 1)) # D^ = Sigma A^_ii
dinv = fractional_matrix_power(d, -0.5) # D^(-1/2)
at = np.matmul(np.matmul(dinv, a), dinv) # A~ = D^(-1/2) x A^ x D^(-1/2)
return alpha * inv((np.eye(a.shape[0]) - (1 - alpha) * at)) # a(I_n-(1-a)A~)^-1
a = a + np.eye(a.shape[0]) # A^ = A + I_n
d = np.diag(np.sum(a, 1)) # D^ = Sigma A^_ii
dinv = fractional_matrix_power(d, -0.5) # D^(-1/2)
at = np.matmul(np.matmul(dinv, a), dinv) # A~ = D^(-1/2) x A^ x D^(-1/2)
return alpha * inv(
(np.eye(a.shape[0]) - (1 - alpha) * at)
) # a(I_n-(1-a)A~)^-1
def download(dataset, datadir):
os.makedirs(datadir)
url = 'https://ls11-www.cs.tu-dortmund.de/people/morris/graphkerneldatasets/{0}.zip'.format(dataset)
url = "https://ls11-www.cs.tu-dortmund.de/people/morris/graphkerneldatasets/{0}.zip".format(
dataset
)
zipfile = os.path.basename(url)
os.system('wget {0}; unzip {1}'.format(url, zipfile))
os.system('mv {0}/* {1}'.format(dataset, datadir))
os.system('rm -r {0}'.format(dataset))
os.system('rm {0}'.format(zipfile))
os.system("wget {0}; unzip {1}".format(url, zipfile))
os.system("mv {0}/* {1}".format(dataset, datadir))
os.system("rm -r {0}".format(dataset))
os.system("rm {0}".format(zipfile))
def process(dataset):
src = os.path.join(os.path.dirname(__file__), 'data')
src = os.path.join(os.path.dirname(__file__), "data")
prefix = os.path.join(src, dataset, dataset)
# assign each node to the corresponding graph
graph_node_dict = {}
with open('{0}_graph_indicator.txt'.format(prefix), 'r') as f:
with open("{0}_graph_indicator.txt".format(prefix), "r") as f:
for idx, line in enumerate(f):
graph_node_dict[idx + 1] = int(line.strip('\n'))
graph_node_dict[idx + 1] = int(line.strip("\n"))
node_labels = []
if os.path.exists('{0}_node_labels.txt'.format(prefix)):
with open('{0}_node_labels.txt'.format(prefix), 'r') as f:
if os.path.exists("{0}_node_labels.txt".format(prefix)):
with open("{0}_node_labels.txt".format(prefix), "r") as f:
for line in f:
node_labels += [int(line.strip('\n')) - 1]
node_labels += [int(line.strip("\n")) - 1]
num_unique_node_labels = max(node_labels) + 1
else:
print('No node labels')
print("No node labels")
node_attrs = []
if os.path.exists('{0}_node_attributes.txt'.format(prefix)):
with open('{0}_node_attributes.txt'.format(prefix), 'r') as f:
if os.path.exists("{0}_node_attributes.txt".format(prefix)):
with open("{0}_node_attributes.txt".format(prefix), "r") as f:
for line in f:
node_attrs.append(
np.array([float(attr) for attr in re.split("[,\s]+", line.strip("\s\n")) if attr], dtype=np.float)
np.array(
[
float(attr)
for attr in re.split("[,\s]+", line.strip("\s\n"))
if attr
],
dtype=np.float,
)
)
else:
print('No node attributes')
print("No node attributes")
graph_labels = []
unique_labels = set()
with open('{0}_graph_labels.txt'.format(prefix), 'r') as f:
with open("{0}_graph_labels.txt".format(prefix), "r") as f:
for line in f:
val = int(line.strip('\n'))
val = int(line.strip("\n"))
if val not in unique_labels:
unique_labels.add(val)
graph_labels.append(val)
......@@ -71,9 +87,9 @@ def process(dataset):
adj_list = {idx: [] for idx in range(1, len(graph_labels) + 1)}
index_graph = {idx: [] for idx in range(1, len(graph_labels) + 1)}
with open('{0}_A.txt'.format(prefix), 'r') as f:
with open("{0}_A.txt".format(prefix), "r") as f:
for line in f:
u, v = tuple(map(int, line.strip('\n').split(',')))
u, v = tuple(map(int, line.strip("\n").split(",")))
adj_list[graph_node_dict[u]].append((u, v))
index_graph[graph_node_dict[u]] += [u, v]
......@@ -84,17 +100,17 @@ def process(dataset):
for idx in range(1, 1 + len(adj_list)):
graph = nx.from_edgelist(adj_list[idx])
graph.graph['label'] = graph_labels[idx - 1]
graph.graph["label"] = graph_labels[idx - 1]
for u in graph.nodes():
if len(node_labels) > 0:
node_label_one_hot = [0] * num_unique_node_labels
node_label = node_labels[u - 1]
node_label_one_hot[node_label] = 1
graph.nodes[u]['label'] = node_label_one_hot
graph.nodes[u]["label"] = node_label_one_hot
if len(node_attrs) > 0:
graph.nodes[u]['feat'] = node_attrs[u - 1]
graph.nodes[u]["feat"] = node_attrs[u - 1]
if len(node_attrs) > 0:
graph.graph['feat_dim'] = node_attrs[0].shape[0]
graph.graph["feat_dim"] = node_attrs[0].shape[0]
# relabeling
mapping = {}
......@@ -104,7 +120,7 @@ def process(dataset):
graphs.append(nx.relabel_nodes(graph, mapping))
pprs.append(compute_ppr(graph, alpha=0.2))
if 'feat_dim' in graphs[0].graph:
if "feat_dim" in graphs[0].graph:
pass
else:
max_deg = max([max(dict(graph.degree).values()) for graph in graphs])
......@@ -112,15 +128,18 @@ def process(dataset):
for u in graph.nodes(data=True):
f = np.zeros(max_deg + 1)
f[graph.degree[u[0]]] = 1.0
if 'label' in u[1]:
f = np.concatenate((np.array(u[1]['label'], dtype=np.float), f))
graph.nodes[u[0]]['feat'] = f
if "label" in u[1]:
f = np.concatenate(
(np.array(u[1]["label"], dtype=np.float), f)
)
graph.nodes[u[0]]["feat"] = f
return graphs, pprs
def load(dataset):
basedir = os.path.dirname(os.path.abspath(__file__))
datadir = os.path.join(basedir, 'data', dataset)
datadir = os.path.join(basedir, "data", dataset)
if not os.path.exists(datadir):
download(dataset, datadir)
......@@ -129,20 +148,27 @@ def load(dataset):
for idx, graph in enumerate(graphs):
adj.append(nx.to_numpy_array(graph))
labels.append(graph.graph['label'])
feat.append(np.array(list(nx.get_node_attributes(graph, 'feat').values())))
adj, diff, feat, labels = np.array(adj), np.array(diff), np.array(feat), np.array(labels)
np.save(f'{datadir}/adj.npy', adj)
np.save(f'{datadir}/diff.npy', diff)
np.save(f'{datadir}/feat.npy', feat)
np.save(f'{datadir}/labels.npy', labels)
labels.append(graph.graph["label"])
feat.append(
np.array(list(nx.get_node_attributes(graph, "feat").values()))
)
adj, diff, feat, labels = (
np.array(adj),
np.array(diff),
np.array(feat),
np.array(labels),
)
np.save(f"{datadir}/adj.npy", adj)
np.save(f"{datadir}/diff.npy", diff)
np.save(f"{datadir}/feat.npy", feat)
np.save(f"{datadir}/labels.npy", labels)
else:
adj = np.load(f'{datadir}/adj.npy', allow_pickle=True)
diff = np.load(f'{datadir}/diff.npy', allow_pickle=True)
feat = np.load(f'{datadir}/feat.npy', allow_pickle=True)
labels = np.load(f'{datadir}/labels.npy', allow_pickle=True)
adj = np.load(f"{datadir}/adj.npy", allow_pickle=True)
diff = np.load(f"{datadir}/diff.npy", allow_pickle=True)
feat = np.load(f"{datadir}/feat.npy", allow_pickle=True)
labels = np.load(f"{datadir}/labels.npy", allow_pickle=True)
n_graphs = adj.shape[0]
......@@ -156,14 +182,14 @@ def load(dataset):
graph = dgl.graph(edge_indexes)
graph = graph.add_self_loop()
graph.ndata['feat'] = th.tensor(feat[i]).float()
graph.ndata["feat"] = th.tensor(feat[i]).float()
diff_adj = diff[i]
diff_indexes = diff_adj.nonzero()
diff_weight = th.tensor(diff_adj[diff_indexes]).float()
diff_graph = dgl.graph(diff_indexes)
diff_graph.edata['edge_weight'] = diff_weight
diff_graph.edata["edge_weight"] = diff_weight
label = labels[i]
graphs.append(graph)
diff_graphs.append(diff_graph)
......@@ -174,9 +200,10 @@ def load(dataset):
dataset = TUDataset(graphs, diff_graphs, labels)
return dataset
class TUDataset(DGLDataset):
def __init__(self, graphs, diff_graphs, labels):
super(TUDataset, self).__init__(name='tu')
super(TUDataset, self).__init__(name="tu")
self.graphs = graphs
self.diff_graphs = diff_graphs
self.labels = labels
......@@ -188,4 +215,4 @@ class TUDataset(DGLDataset):
return len(self.graphs)
def __getitem__(self, idx):
return self.graphs[idx], self.diff_graphs[idx], self.labels[idx]
\ No newline at end of file
return self.graphs[idx], self.diff_graphs[idx], self.labels[idx]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment