"vscode:/vscode.git/clone" did not exist on "5ea234834840e40232c6706a13c574ff96cbee93"
Unverified Commit 4361f8a6 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[Test] Fix broken regression benchmarks (#4229)

* dummy

* upgrade torch from 1.8.1 to 1.9.0 for asv

* update torch download link

* remove version for torchvision and torch text

* revert unnecessary change

* fix several test failure

* fix

* remove am dataset

* fix test failure

* fix test failure

* fix rgcn failures
parent c56e27a8
...@@ -11,7 +11,7 @@ from .. import utils ...@@ -11,7 +11,7 @@ from .. import utils
@utils.skip_if_gpu() @utils.skip_if_gpu()
@utils.benchmark('time') @utils.benchmark('time')
@utils.parametrize('graph_name', ['livejournal', 'reddit']) @utils.parametrize('graph_name', ['livejournal', 'reddit'])
@utils.parametrize('format', ['coo', 'csc']) @utils.parametrize('format', ['coo'])
@utils.parametrize('seed_egdes_num', [500, 5000, 50000]) @utils.parametrize('seed_egdes_num', [500, 5000, 50000])
def track_time(graph_name, format, seed_egdes_num): def track_time(graph_name, format, seed_egdes_num):
device = utils.get_bench_device() device = utils.get_bench_device()
......
...@@ -10,8 +10,9 @@ def _random_walk(g, seeds, length): ...@@ -10,8 +10,9 @@ def _random_walk(g, seeds, length):
def _node2vec(g, seeds, length): def _node2vec(g, seeds, length):
return dgl.sampling.node2vec_random_walk(g, seeds, 1, 1, length) return dgl.sampling.node2vec_random_walk(g, seeds, 1, 1, length)
@utils.skip_if_gpu()
@utils.benchmark('time') @utils.benchmark('time')
@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster']) @utils.parametrize('graph_name', ['cora', 'livejournal', 'friendster'])
@utils.parametrize('num_seeds', [10, 100, 1000]) @utils.parametrize('num_seeds', [10, 100, 1000])
@utils.parametrize('length', [2, 5, 10, 20]) @utils.parametrize('length', [2, 5, 10, 20])
@utils.parametrize('algorithm', ['_random_walk', '_node2vec']) @utils.parametrize('algorithm', ['_random_walk', '_node2vec'])
......
...@@ -8,7 +8,7 @@ from .. import utils ...@@ -8,7 +8,7 @@ from .. import utils
@utils.skip_if_gpu() @utils.skip_if_gpu()
@utils.benchmark('time', timeout=1200) @utils.benchmark('time', timeout=1200)
@utils.parametrize('graph_name', ['reddit', "ogbn-product"]) @utils.parametrize('graph_name', ['reddit', "ogbn-products"])
@utils.parametrize('num_seed_nodes', [32, 256, 1024, 2048]) @utils.parametrize('num_seed_nodes', [32, 256, 1024, 2048])
@utils.parametrize('fanout', [5, 10, 20]) @utils.parametrize('fanout', [5, 10, 20])
def track_time(graph_name, num_seed_nodes, fanout): def track_time(graph_name, num_seed_nodes, fanout):
......
import numpy as np
import dgl
from dgl.nn.pytorch import RelGraphConv
import torch
import torch.nn as nn
import torch.nn.functional as F
from .. import utils
class RGCN(nn.Module):
def __init__(self,
num_nodes,
n_hidden,
num_classes,
num_rels,
num_bases,
num_hidden_layers,
dropout,
low_mem):
super(RGCN, self).__init__()
self.layers = nn.ModuleList()
# i2h
self.layers.append(RelGraphConv(num_nodes, n_hidden, num_rels, "basis",
num_bases, activation=F.relu, dropout=dropout,
low_mem=low_mem))
# h2h
for i in range(num_hidden_layers):
self.layers.append(RelGraphConv(n_hidden, n_hidden, num_rels, "basis",
num_bases, activation=F.relu, dropout=dropout,
low_mem=low_mem))
# o2h
self.layers.append(RelGraphConv(n_hidden, num_classes, num_rels, "basis",
num_bases, activation=None, low_mem=low_mem))
def forward(self, g, h, r, norm):
for layer in self.layers:
h = layer(g, h, r, norm)
return h
def evaluate(model, g, feats, edge_type, edge_norm, labels, idx):
model.eval()
with torch.no_grad():
logits = model(g, feats, edge_type, edge_norm)
logits = logits[idx]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels) * 100
@utils.benchmark('acc')
@utils.parametrize('data', ['aifb', 'mutag'])
@utils.parametrize('lowmem', [True, False])
@utils.parametrize('use_type_count', [True, False])
def track_acc(data, lowmem, use_type_count):
# args
if data == 'aifb':
num_bases = -1
l2norm = 0.
elif data == 'mutag':
num_bases = 30
l2norm = 5e-4
elif data == 'am':
num_bases = 40
l2norm = 5e-4
else:
raise ValueError()
data = utils.process_data(data)
device = utils.get_bench_device()
g = data[0]
num_rels = len(g.canonical_etypes)
category = data.predict_category
num_classes = data.num_classes
train_mask = g.nodes[category].data.pop('train_mask').bool().to(device)
test_mask = g.nodes[category].data.pop('test_mask').bool().to(device)
labels = g.nodes[category].data.pop('labels').to(device)
# calculate norm for each edge type and store in edge
for canonical_etype in g.canonical_etypes:
u, v, eid = g.all_edges(form='all', etype=canonical_etype)
_, inverse_index, count = torch.unique(v, return_inverse=True, return_counts=True)
degrees = count[inverse_index]
norm = 1. / degrees.float()
norm = norm.unsqueeze(1)
g.edges[canonical_etype].data['norm'] = norm
# get target category id
category_id = len(g.ntypes)
for i, ntype in enumerate(g.ntypes):
if ntype == category:
category_id = i
if use_type_count:
g, _, edge_type = dgl.to_homogeneous(g, edata=['norm'], return_count=True)
g = g.to(device)
else:
g = dgl.to_homogeneous(g, edata=['norm']).to(device)
edge_type = g.edata.pop(dgl.ETYPE).long()
num_nodes = g.number_of_nodes()
edge_norm = g.edata['norm']
# find out the target node ids in g
target_idx = torch.where(g.ndata[dgl.NTYPE] == category_id)[0]
train_idx = target_idx[train_mask]
test_idx = target_idx[test_mask]
train_labels = labels[train_mask]
test_labels = labels[test_mask]
# since the nodes are featureless, the input feature is then the node id.
feats = torch.arange(num_nodes, device=device)
# create model
model = RGCN(num_nodes,
16,
num_classes,
num_rels,
num_bases,
0,
0,
lowmem).to(device)
optimizer = torch.optim.Adam(model.parameters(),
lr=1e-2,
weight_decay=l2norm)
model.train()
for epoch in range(30):
logits = model(g, feats, edge_type, edge_norm)
loss = F.cross_entropy(logits[train_idx], train_labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc = evaluate(model, g, feats, edge_type, edge_norm, test_labels, test_idx)
return acc
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchmetrics.functional import accuracy
from .. import utils
from .. import rgcn
@utils.benchmark('acc', timeout=1200)
@utils.parametrize('dataset', ['aifb', 'mutag'])
@utils.parametrize('ns_mode', [False])
def track_acc(dataset, ns_mode):
g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = rgcn.load_data(
dataset, get_norm=True)
num_hidden = 16
if dataset == 'aifb':
num_bases = -1
l2norm = 0.
elif dataset == 'mutag':
num_bases = 30
l2norm = 5e-4
elif dataset == 'am':
num_bases = 40
l2norm = 5e-4
else:
raise ValueError()
model = rgcn.RGCN(g.num_nodes(),
num_hidden,
num_classes,
num_rels,
num_bases=num_bases,
ns_mode=ns_mode)
device = utils.get_bench_device()
labels = labels.to(device)
model = model.to(device)
g = g.int().to(device)
optimizer = torch.optim.Adam(
model.parameters(), lr=1e-2, weight_decay=l2norm)
model.train()
for epoch in range(30):
logits = model(g)
logits = logits[target_idx]
loss = F.cross_entropy(logits[train_idx], labels[train_idx])
optimizer.zero_grad()
loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
logits = model(g)
logits = logits[target_idx]
test_acc = accuracy(logits[test_idx].argmax(
dim=1), labels[test_idx]).item()
return test_acc
...@@ -34,9 +34,6 @@ class EntityClassify(nn.Module): ...@@ -34,9 +34,6 @@ class EntityClassify(nn.Module):
Dropout Dropout
use_self_loop : bool use_self_loop : bool
Use self loop if True, default False. Use self loop if True, default False.
low_mem : bool
True to use low memory implementation of relation message passing function
trade speed with memory consumption
""" """
def __init__(self, def __init__(self,
device, device,
...@@ -48,7 +45,6 @@ class EntityClassify(nn.Module): ...@@ -48,7 +45,6 @@ class EntityClassify(nn.Module):
num_hidden_layers=1, num_hidden_layers=1,
dropout=0, dropout=0,
use_self_loop=False, use_self_loop=False,
low_mem=False,
layer_norm=False): layer_norm=False):
super(EntityClassify, self).__init__() super(EntityClassify, self).__init__()
self.device = device self.device = device
...@@ -60,7 +56,6 @@ class EntityClassify(nn.Module): ...@@ -60,7 +56,6 @@ class EntityClassify(nn.Module):
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.dropout = dropout self.dropout = dropout
self.use_self_loop = use_self_loop self.use_self_loop = use_self_loop
self.low_mem = low_mem
self.layer_norm = layer_norm self.layer_norm = layer_norm
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
...@@ -68,19 +63,19 @@ class EntityClassify(nn.Module): ...@@ -68,19 +63,19 @@ class EntityClassify(nn.Module):
self.layers.append(RelGraphConv( self.layers.append(RelGraphConv(
self.h_dim, self.h_dim, self.num_rels, "basis", self.h_dim, self.h_dim, self.num_rels, "basis",
self.num_bases, activation=F.relu, self_loop=self.use_self_loop, self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm)) dropout=self.dropout, layer_norm = layer_norm))
# h2h # h2h
for idx in range(self.num_hidden_layers): for idx in range(self.num_hidden_layers):
self.layers.append(RelGraphConv( self.layers.append(RelGraphConv(
self.h_dim, self.h_dim, self.num_rels, "basis", self.h_dim, self.h_dim, self.num_rels, "basis",
self.num_bases, activation=F.relu, self_loop=self.use_self_loop, self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm)) dropout=self.dropout, layer_norm = layer_norm))
# h2o # h2o
self.layers.append(RelGraphConv( self.layers.append(RelGraphConv(
self.h_dim, self.out_dim, self.num_rels, "basis", self.h_dim, self.out_dim, self.num_rels, "basis",
self.num_bases, activation=None, self.num_bases, activation=None,
self_loop=self.use_self_loop, self_loop=self.use_self_loop,
low_mem=self.low_mem, layer_norm = layer_norm)) layer_norm = layer_norm))
def forward(self, blocks, feats, norm=None): def forward(self, blocks, feats, norm=None):
if blocks is None: if blocks is None:
...@@ -195,8 +190,7 @@ def evaluate(model, embed_layer, eval_loader, node_feats): ...@@ -195,8 +190,7 @@ def evaluate(model, embed_layer, eval_loader, node_feats):
return eval_logits, eval_seeds return eval_logits, eval_seeds
@utils.benchmark('acc', timeout=3600) # ogbn-mag takes ~1 hour to train
@utils.benchmark('time', 3600) # ogbn-mag takes ~1 hour to train
@utils.parametrize('data', ['am', 'ogbn-mag']) @utils.parametrize('data', ['am', 'ogbn-mag'])
def track_acc(data): def track_acc(data):
dataset = utils.process_data(data) dataset = utils.process_data(data)
...@@ -220,7 +214,6 @@ def track_acc(data): ...@@ -220,7 +214,6 @@ def track_acc(data):
dropout = 0.5 dropout = 0.5
use_self_loop = True use_self_loop = True
lr = 0.01 lr = 0.01
low_mem = True
num_workers = 4 num_workers = 4
hg = dataset[0] hg = dataset[0]
...@@ -306,7 +299,6 @@ def track_acc(data): ...@@ -306,7 +299,6 @@ def track_acc(data):
num_hidden_layers=n_layers - 2, num_hidden_layers=n_layers - 2,
dropout=dropout, dropout=dropout,
use_self_loop=use_self_loop, use_self_loop=use_self_loop,
low_mem=low_mem,
layer_norm=False) layer_norm=False)
embed_layer = embed_layer.to(device) embed_layer = embed_layer.to(device)
......
import time
import numpy as np
import dgl
from dgl.nn.pytorch import RelGraphConv
import torch
import torch.nn as nn
import torch.nn.functional as F
from .. import utils
class RGCN(nn.Module):
def __init__(self,
num_nodes,
n_hidden,
num_classes,
num_rels,
num_bases,
num_hidden_layers,
dropout):
super(RGCN, self).__init__()
self.layers = nn.ModuleList()
# i2h
self.layers.append(RelGraphConv(num_nodes, n_hidden, num_rels, "basis",
num_bases, activation=F.relu, dropout=dropout))
# h2h
for i in range(num_hidden_layers):
self.layers.append(RelGraphConv(n_hidden, n_hidden, num_rels, "basis",
num_bases, activation=F.relu, dropout=dropout))
# o2h
self.layers.append(RelGraphConv(n_hidden, num_classes, num_rels, "basis",
num_bases, activation=None))
def forward(self, g, h, r, norm):
for layer in self.layers:
h = layer(g, h, r, norm)
return h
@utils.benchmark('time', 300)
@utils.parametrize('data', ['aifb'])
@utils.parametrize('use_type_count', [True, False])
def track_time(data, use_type_count):
# args
if data == 'aifb':
if dgl.__version__.startswith("0.8"):
num_bases = None
else:
num_bases = -1
l2norm = 0.
elif data == 'am':
num_bases = 40
l2norm = 5e-4
else:
raise ValueError()
data = utils.process_data(data)
device = utils.get_bench_device()
num_epochs = 30
g = data[0]
num_rels = len(g.canonical_etypes)
category = data.predict_category
num_classes = data.num_classes
train_mask = g.nodes[category].data.pop('train_mask').bool().to(device)
test_mask = g.nodes[category].data.pop('test_mask').bool().to(device)
labels = g.nodes[category].data.pop('labels').to(device)
# calculate norm for each edge type and store in edge
for canonical_etype in g.canonical_etypes:
u, v, eid = g.all_edges(form='all', etype=canonical_etype)
_, inverse_index, count = torch.unique(v, return_inverse=True, return_counts=True)
degrees = count[inverse_index]
norm = 1. / degrees.float()
norm = norm.unsqueeze(1)
g.edges[canonical_etype].data['norm'] = norm
# get target category id
category_id = len(g.ntypes)
for i, ntype in enumerate(g.ntypes):
if ntype == category:
category_id = i
if use_type_count:
g, _, edge_type = dgl.to_homogeneous(g, edata=['norm'], return_count=True)
g = g.to(device)
else:
g = dgl.to_homogeneous(g, edata=['norm']).to(device)
edge_type = g.edata.pop(dgl.ETYPE).long()
num_nodes = g.number_of_nodes()
edge_norm = g.edata['norm']
# find out the target node ids in g
target_idx = torch.where(g.ndata[dgl.NTYPE] == category_id)[0]
train_idx = target_idx[train_mask]
test_idx = target_idx[test_mask]
train_labels = labels[train_mask]
test_labels = labels[test_mask]
# since the nodes are featureless, the input feature is then the node id.
feats = torch.arange(num_nodes, device=device)
# create model
model = RGCN(num_nodes,
16,
num_classes,
num_rels,
num_bases,
0,
0).to(device)
optimizer = torch.optim.Adam(model.parameters(),
lr=1e-2,
weight_decay=l2norm)
model.train()
t0 = time.time()
for epoch in range(num_epochs):
logits = model(g, feats, edge_type, edge_norm)
loss = F.cross_entropy(logits[train_idx], train_labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
t1 = time.time()
return (t1 - t0) / num_epochs
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchmetrics.functional import accuracy
from .. import utils
from .. import rgcn
@utils.benchmark('time', 1200)
@utils.parametrize('data', ['aifb', 'am'])
def track_time(data):
# args
if data == 'aifb':
num_bases = -1
l2norm = 0.
elif data == 'am':
num_bases = 40
l2norm = 5e-4
else:
raise ValueError()
g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = rgcn.load_data(
data, get_norm=True)
num_hidden = 16
model = rgcn.RGCN(g.num_nodes(),
num_hidden,
num_classes,
num_rels,
num_bases=num_bases)
device = utils.get_bench_device()
labels = labels.to(device)
model = model.to(device)
g = g.int().to(device)
optimizer = torch.optim.Adam(
model.parameters(), lr=1e-2, weight_decay=l2norm)
model.train()
num_epochs = 30
t0 = time.time()
for epoch in range(num_epochs):
logits = model(g)
logits = logits[target_idx]
loss = F.cross_entropy(logits[train_idx], labels[train_idx])
optimizer.zero_grad()
loss.backward()
optimizer.step()
t1 = time.time()
return (t1 - t0) / num_epochs
...@@ -228,15 +228,12 @@ class EntityClassify(nn.Module): ...@@ -228,15 +228,12 @@ class EntityClassify(nn.Module):
return h return h
@utils.benchmark('time', 600) @utils.benchmark('time', 600)
@utils.parametrize('data', ['am', 'ogbn-mag']) @utils.parametrize('data', ['ogbn-mag'])
def track_time(data): def track_time(data):
dataset = utils.process_data(data) dataset = utils.process_data(data)
device = utils.get_bench_device() device = utils.get_bench_device()
if data == 'am': if data == 'ogbn-mag':
n_bases = 40
l2norm = 5e-4
elif data == 'ogbn-mag':
n_bases = 2 n_bases = 2
l2norm = 0 l2norm = 0
else: else:
......
...@@ -34,9 +34,6 @@ class EntityClassify(nn.Module): ...@@ -34,9 +34,6 @@ class EntityClassify(nn.Module):
Dropout Dropout
use_self_loop : bool use_self_loop : bool
Use self loop if True, default False. Use self loop if True, default False.
low_mem : bool
True to use low memory implementation of relation message passing function
trade speed with memory consumption
""" """
def __init__(self, def __init__(self,
device, device,
...@@ -48,7 +45,6 @@ class EntityClassify(nn.Module): ...@@ -48,7 +45,6 @@ class EntityClassify(nn.Module):
num_hidden_layers=1, num_hidden_layers=1,
dropout=0, dropout=0,
use_self_loop=False, use_self_loop=False,
low_mem=False,
layer_norm=False): layer_norm=False):
super(EntityClassify, self).__init__() super(EntityClassify, self).__init__()
self.device = device self.device = device
...@@ -60,7 +56,6 @@ class EntityClassify(nn.Module): ...@@ -60,7 +56,6 @@ class EntityClassify(nn.Module):
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.dropout = dropout self.dropout = dropout
self.use_self_loop = use_self_loop self.use_self_loop = use_self_loop
self.low_mem = low_mem
self.layer_norm = layer_norm self.layer_norm = layer_norm
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
...@@ -68,19 +63,19 @@ class EntityClassify(nn.Module): ...@@ -68,19 +63,19 @@ class EntityClassify(nn.Module):
self.layers.append(RelGraphConv( self.layers.append(RelGraphConv(
self.h_dim, self.h_dim, self.num_rels, "basis", self.h_dim, self.h_dim, self.num_rels, "basis",
self.num_bases, activation=F.relu, self_loop=self.use_self_loop, self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm)) dropout=self.dropout, layer_norm = layer_norm))
# h2h # h2h
for idx in range(self.num_hidden_layers): for idx in range(self.num_hidden_layers):
self.layers.append(RelGraphConv( self.layers.append(RelGraphConv(
self.h_dim, self.h_dim, self.num_rels, "basis", self.h_dim, self.h_dim, self.num_rels, "basis",
self.num_bases, activation=F.relu, self_loop=self.use_self_loop, self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm)) dropout=self.dropout, layer_norm = layer_norm))
# h2o # h2o
self.layers.append(RelGraphConv( self.layers.append(RelGraphConv(
self.h_dim, self.out_dim, self.num_rels, "basis", self.h_dim, self.out_dim, self.num_rels, "basis",
self.num_bases, activation=None, self.num_bases, activation=None,
self_loop=self.use_self_loop, self_loop=self.use_self_loop,
low_mem=self.low_mem, layer_norm = layer_norm)) layer_norm = layer_norm))
def forward(self, blocks, feats, norm=None): def forward(self, blocks, feats, norm=None):
if blocks is None: if blocks is None:
...@@ -196,7 +191,6 @@ def track_time(data): ...@@ -196,7 +191,6 @@ def track_time(data):
dropout = 0.5 dropout = 0.5
use_self_loop = True use_self_loop = True
lr = 0.01 lr = 0.01
low_mem = True
num_workers = 4 num_workers = 4
iter_start = 3 iter_start = 3
iter_count = 10 iter_count = 10
...@@ -276,7 +270,6 @@ def track_time(data): ...@@ -276,7 +270,6 @@ def track_time(data):
num_hidden_layers=n_layers - 2, num_hidden_layers=n_layers - 2,
dropout=dropout, dropout=dropout,
use_self_loop=use_self_loop, use_self_loop=use_self_loop,
low_mem=low_mem,
layer_norm=False) layer_norm=False)
embed_layer = embed_layer.to(device) embed_layer = embed_layer.to(device)
......
...@@ -4,12 +4,9 @@ import torch as th ...@@ -4,12 +4,9 @@ import torch as th
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import torch.multiprocessing as mp
from torch.utils.data import DataLoader
import dgl.nn.pytorch as dglnn import dgl.nn.pytorch as dglnn
import dgl.function as fn import dgl.function as fn
import time import time
import traceback
from .. import utils from .. import utils
...@@ -123,17 +120,19 @@ def track_time(data, num_negs, batch_size): ...@@ -123,17 +120,19 @@ def track_time(data, num_negs, batch_size):
# Create PyTorch DataLoader for constructing blocks # Create PyTorch DataLoader for constructing blocks
sampler = dgl.dataloading.MultiLayerNeighborSampler( sampler = dgl.dataloading.MultiLayerNeighborSampler(
[int(fanout) for fanout in fan_out.split(',')]) [int(fanout) for fanout in fan_out.split(',')])
dataloader = dgl.dataloading.EdgeDataLoader( sampler = dgl.dataloading.as_edge_prediction_sampler(
g, train_seeds, sampler, exclude='reverse_id', sampler, exclude='reverse_id',
# For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2. # For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2.
reverse_eids=th.cat([ reverse_eids=th.cat([
th.arange(n_edges // 2, n_edges), th.arange(
n_edges // 2, n_edges),
th.arange(0, n_edges // 2)]), th.arange(0, n_edges // 2)]),
negative_sampler=NegativeSampler(g, num_negs), negative_sampler=NegativeSampler(g, num_negs))
dataloader = dgl.dataloading.DataLoader(
g, train_seeds, sampler,
batch_size=batch_size, batch_size=batch_size,
shuffle=True, shuffle=True,
drop_last=False, drop_last=False,
pin_memory=True,
num_workers=num_workers) num_workers=num_workers)
# Define model and optimizer # Define model and optimizer
......
import dgl
from dgl.nn.pytorch import RelGraphConv
import torch
import torch.nn as nn
import torch.nn.functional as F
from . import utils
class RGCN(nn.Module):
def __init__(self, num_nodes, h_dim, out_dim, num_rels,
regularizer="basis", num_bases=-1, dropout=0.,
self_loop=False,
ns_mode=False):
super(RGCN, self).__init__()
if num_bases == -1:
num_bases = num_rels
self.emb = nn.Embedding(num_nodes, h_dim)
self.conv1 = RelGraphConv(h_dim, h_dim, num_rels, regularizer,
num_bases, self_loop=self_loop)
self.conv2 = RelGraphConv(
h_dim, out_dim, num_rels, regularizer, num_bases, self_loop=self_loop)
self.dropout = nn.Dropout(dropout)
self.ns_mode = ns_mode
def forward(self, g, nids=None):
if self.ns_mode:
# forward for neighbor sampling
x = self.emb(g[0].srcdata[dgl.NID])
h = self.conv1(g[0], x, g[0].edata[dgl.ETYPE], g[0].edata['norm'])
h = self.dropout(F.relu(h))
h = self.conv2(g[1], h, g[1].edata[dgl.ETYPE], g[1].edata['norm'])
return h
else:
x = self.emb.weight if nids is None else self.emb(nids)
h = self.conv1(g, x, g.edata[dgl.ETYPE], g.edata['norm'])
h = self.dropout(F.relu(h))
h = self.conv2(g, h, g.edata[dgl.ETYPE], g.edata['norm'])
return h
def load_data(data_name, get_norm=False, inv_target=False):
dataset = utils.process_data(data_name)
# Load hetero-graph
hg = dataset[0]
num_rels = len(hg.canonical_etypes)
category = dataset.predict_category
num_classes = dataset.num_classes
labels = hg.nodes[category].data.pop('labels')
train_mask = hg.nodes[category].data.pop('train_mask')
test_mask = hg.nodes[category].data.pop('test_mask')
train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
if get_norm:
# Calculate normalization weight for each edge,
# 1. / d, d is the degree of the destination node
for cetype in hg.canonical_etypes:
hg.edges[cetype].data['norm'] = dgl.norm_by_dst(
hg, cetype).unsqueeze(1)
edata = ['norm']
else:
edata = None
# get target category id
category_id = hg.ntypes.index(category)
g = dgl.to_homogeneous(hg, edata=edata)
# Rename the fields as they can be changed by for example DataLoader
g.ndata['ntype'] = g.ndata.pop(dgl.NTYPE)
g.ndata['type_id'] = g.ndata.pop(dgl.NID)
node_ids = torch.arange(g.num_nodes())
# find out the target node ids in g
loc = (g.ndata['ntype'] == category_id)
target_idx = node_ids[loc]
if inv_target:
# Map global node IDs to type-specific node IDs. This is required for
# looking up type-specific labels in a minibatch
inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64)
inv_target[target_idx] = torch.arange(0, target_idx.shape[0],
dtype=inv_target.dtype)
return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target
else:
return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
--find-links https://download.pytorch.org/whl/lts/1.8/torch_lts.html --find-links https://download.pytorch.org/whl/torch
torch==1.8.1+cu111 torch==1.9.0+cu111
torchvision==0.9.1+cu111 torchvision
pytest pytest
nose nose
numpy numpy
...@@ -12,7 +12,8 @@ nltk ...@@ -12,7 +12,8 @@ nltk
requests[security] requests[security]
tqdm tqdm
awscli awscli
torchtext==0.9.1 torchtext
pandas pandas
rdflib rdflib
ogb==1.3.1 ogb==1.3.1
torchmetrics
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment