Unverified Commit 264d96cd authored by Zihao Ye's avatar Zihao Ye Committed by GitHub
Browse files

[bugfix] Fix a bunch of examples to be compatible with dgl 0.5 (#1957)



* upd

* upd

* upd

* upd

* upd

* upd

* fix pinsage also

* upd

* upd

* upd
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-29-3.us-east-2.compute.internal>
Co-authored-by: default avatarQuan Gan <coin2028@hotmail.com>
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
parent dcf46412
......@@ -220,7 +220,7 @@ class BiDecoder(Block):
for i in range(self._num_basis_functions):
graph.nodes['user'].data['h'] = F.dot(ufeat, self.Ps[i].data())
graph.apply_edges(fn.u_dot_v('h', 'h', 'sr'))
basis_out.append(graph.edata['sr'].expand_dims(1))
basis_out.append(graph.edata['sr'])
out = F.concat(*basis_out, dim=1)
out = self.rate_out(out)
return out
......
......@@ -36,7 +36,7 @@ def main(args):
else:
cuda = True
ctx = mx.gpu(args.gpu)
g = g.to(ctx)
g = g.int().to(ctx)
features = g.ndata['feat']
labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx)
......
......@@ -69,7 +69,7 @@ def main(args):
else:
cuda = True
ctx = mx.gpu(args.gpu)
g = g.to(ctx)
g = g.int().to(ctx)
features = g.ndata['feat']
labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx)
......
......@@ -39,7 +39,7 @@ def main(args):
else:
cuda = True
ctx = mx.gpu(args.gpu)
g = g.to(ctx)
g = g.int().to(ctx)
features = g.ndata['feat']
labels = mx.nd.array(g.ndata['label'], dtype="float32", ctx=ctx)
......
......@@ -11,7 +11,6 @@ import torch.nn as nn
import torch.nn.functional as F
import dgl
from dgl.data import register_data_args
from torch.utils.tensorboard import SummaryWriter
from modules import GraphSAGE
from sampler import ClusterIter
......@@ -84,7 +83,7 @@ def main(args):
torch.cuda.set_device(args.gpu)
val_mask = val_mask.cuda()
test_mask = test_mask.cuda()
g = g.to(args.gpu)
g = g.int().to(args.gpu)
print('labels shape:', g.ndata['label'].shape)
print("features shape, ", g.ndata['feat'].shape)
......@@ -102,7 +101,6 @@ def main(args):
# logger and so on
log_dir = save_log_dir(args)
writer = SummaryWriter(log_dir)
logger = Logger(os.path.join(log_dir, 'loggings'))
logger.write(args)
......@@ -148,8 +146,6 @@ def main(args):
if j % args.log_every == 0:
print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/"
f"{len(cluster_iterator)}:training loss", loss.item())
writer.add_scalar('train/loss', loss.item(),
global_step=j + epoch * len(cluster_iterator))
print("current memory:",
torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024)
......@@ -164,8 +160,6 @@ def main(args):
print('new best val f1:', best_f1)
torch.save(model.state_dict(), os.path.join(
log_dir, 'best_model.pkl'))
writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch)
writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch)
end_time = time.time()
print(f'training using time {start_time-end_time}')
......@@ -177,8 +171,6 @@ def main(args):
test_f1_mic, test_f1_mac = evaluate(
model, g, labels, test_mask, multitask)
print("Test F1-mic{:.4f}, Test F1-mac{:.4f}". format(test_f1_mic, test_f1_mac))
writer.add_scalar('test/f1-mic', test_f1_mic)
writer.add_scalar('test/f1-mac', test_f1_mac)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
......
......@@ -53,7 +53,7 @@ def main(args):
cuda = False
else:
cuda = True
g = g.to(args.gpu)
g = g.int().to(args.gpu)
features = g.ndata['feat']
labels = g.ndata['label']
......
......@@ -60,7 +60,7 @@ def main(args):
g = train_dataset[0]
n_classes = train_dataset.num_labels
num_feats = g.ndata['feat'].shape[1]
g = g.to(device)
g = g.int().to(device)
heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads]
# define the model
model = GAT(g,
......@@ -117,12 +117,9 @@ def main(args):
if cur_step == patience:
break
test_score_list = []
for batch, test_data in enumerate(test_dataloader):
subgraph, feats, labels = test_data
for batch, subgraph in enumerate(test_dataloader):
subgraph = subgraph.to(device)
feats = feats.to(device)
labels = labels.to(device)
test_score_list.append(evaluate(feats, model, subgraph, labels.float(), loss_fcn)[0])
test_score_list.append(evaluate(subgraph.ndata['feat'], model, subgraph, subgraph.ndata['label'], loss_fcn))
print("Test F1-Score: {:.4f}".format(np.array(test_score_list).mean()))
if __name__ == '__main__':
......
......@@ -24,7 +24,6 @@ ml-100k, no feature
python3 train.py --data_name=ml-100k --use_one_hot_fea --gcn_agg_accum=stack
```
Results: RMSE=0.9088 (0.910 reported)
Speed: 0.0410s/epoch (vanilla implementation: 0.1008s/epoch)
ml-100k, with feature
```bash
......@@ -37,7 +36,6 @@ ml-1m, no feature
python3 train.py --data_name=ml-1m --gcn_agg_accum=sum --use_one_hot_fea
```
Results: RMSE=0.8377 (0.832 reported)
Speed: 0.0844s/epoch (vanilla implementation: 1.538s/epoch)
ml-10m, no feature
```bash
......@@ -46,7 +44,6 @@ python3 train.py --data_name=ml-10m --gcn_agg_accum=stack --gcn_dropout=0.3 \
--use_one_hot_fea --gen_r_num_basis_func=4
```
Results: RMSE=0.7800 (0.777 reported)
Speed: 1.1982/epoch (vanilla implementation: OOM)
Testbed: EC2 p3.2xlarge instance(Amazon Linux 2)
### Train with minibatch on a single GPU
......@@ -67,8 +64,6 @@ python3 train_sampling.py --data_name=ml-100k \
--gpu 0
```
Results: RMSE=0.9380
Speed: 1.059s/epoch (Run with 70 epoches)
Speed: 1.046s/epoch (mix_cpu_gpu)
ml-100k, with feature
```bash
......@@ -97,8 +92,6 @@ python3 train_sampling.py --data_name=ml-1m \
--gpu 0
```
Results: RMSE=0.8632
Speed: 7.852s/epoch (Run with 60 epoches)
Speed: 7.788s/epoch (mix_cpu_gpu)
ml-10m, no feature
```bash
......@@ -126,8 +119,6 @@ python3 train_sampling.py --data_name=ml-10m \
--gpu 0
```
Results: RMSE=0.8050
Speed: 394.304s/epoch (Run with 60 epoches)
Speed: 408.749s/epoch (mix_cpu_gpu)
Testbed: EC2 p3.2xlarge instance
### Train with minibatch on multi-GPU
......@@ -151,8 +142,6 @@ python train_sampling.py --data_name=ml-100k \
--gpu 0,1,2,3,4,5,6,7
```
Result: RMSE=0.9397
Speed: 1.202s/epoch (Run with only 30 epoches)
Speed: 1.245/epoch (mix_cpu_gpu)
ml-100k, with feature
```bash
......@@ -162,7 +151,6 @@ python train_sampling.py --data_name=ml-100k \
--gpu 0,1,2,3,4,5,6,7
```
Result: RMSE=0.9655
Speed: 1.265/epoch (Run with 30 epoches)
ml-1m, no feature
```bash
......@@ -182,8 +170,6 @@ python train_sampling.py --data_name=ml-1m \
--gpu 0,1,2,3,4,5,6,7
```
Results: RMSE=0.8621
Speed: 11.612s/epoch (Run with 40 epoches)
Speed: 12.483s/epoch (mix_cpu_gpu)
ml-10m, no feature
```bash
......@@ -211,8 +197,6 @@ python train_sampling.py --data_name=ml-10m \
--gpu 0,1,2,3,4,5,6,7
```
Results: RMSE=0.8084
Speed: 632.868s/epoch (Run with 30 epoches)
Speed: 633.397s/epoch (mix_cpu_gpu)
Testbed: EC2 p3.16xlarge instance
### Train with minibatch on CPU
......@@ -223,5 +207,4 @@ python3 train_sampling.py --data_name=ml-100k \
--gcn_agg_accum=stack \
--gpu -1
```
Speed 1.591s/epoch
Testbed: EC2 r5.xlarge instance
......@@ -340,7 +340,7 @@ class BiDecoder(nn.Module):
for i in range(self._num_basis):
graph.nodes['user'].data['h'] = ufeat @ self.Ps[i]
graph.apply_edges(fn.u_dot_v('h', 'h', 'sr'))
basis_out.append(graph.edata['sr'].unsqueeze(1))
basis_out.append(graph.edata['sr'])
out = th.cat(basis_out, dim=1)
out = self.combine_basis(out)
return out
......
......@@ -105,12 +105,12 @@ def train(args):
count_num = 0
count_loss = 0
dataset.train_enc_graph = dataset.train_enc_graph.to(args.device)
dataset.train_dec_graph = dataset.train_dec_graph.to(args.device)
dataset.train_enc_graph = dataset.train_enc_graph.int().to(args.device)
dataset.train_dec_graph = dataset.train_dec_graph.int().to(args.device)
dataset.valid_enc_graph = dataset.train_enc_graph
dataset.valid_dec_graph = dataset.valid_dec_graph.to(args.device)
dataset.test_enc_graph = dataset.test_enc_graph.to(args.device)
dataset.test_dec_graph = dataset.test_dec_graph.to(args.device)
dataset.valid_dec_graph = dataset.valid_dec_graph.int().to(args.device)
dataset.test_enc_graph = dataset.test_enc_graph.int().to(args.device)
dataset.test_dec_graph = dataset.test_dec_graph.int().to(args.device)
print("Start training ...")
dur = []
......
......@@ -38,7 +38,7 @@ def main(args):
cuda = False
else:
cuda = True
g = g.to(args.gpu)
g = g.int().to(args.gpu)
features = g.ndata['feat']
labels = g.ndata['label']
......
......@@ -109,7 +109,7 @@ class SAGE(nn.Module):
end = start + batch_size
batch_nodes = nodes[start:end]
block = dgl.to_block(dgl.in_subgraph(g, batch_nodes), batch_nodes)
block = block.to(device)
block = block.int().to(device)
induced_nodes = block.srcdata[dgl.NID]
h = x[induced_nodes].to(device)
......@@ -188,7 +188,7 @@ def load_subtensor(g, labels, blocks, hist_blocks, dev_id, aggregation_on_device
hist_block = hist_block.to(dev_id)
hist_block.update_all(fn.copy_u('hist', 'm'), fn.mean('m', 'agg_hist'))
block = block.to(dev_id)
block = block.int().to(dev_id)
if not aggregation_on_device:
hist_block = hist_block.to(dev_id)
block.dstdata['agg_hist'] = hist_block.dstdata['agg_hist']
......@@ -220,8 +220,8 @@ def run(args, dev_id, data):
# Unpack data
train_mask, val_mask, in_feats, labels, n_classes, g = data
train_nid = train_mask.nonzero()[:, 0]
val_nid = val_mask.nonzero()[:, 0]
train_nid = train_mask.nonzero().squeeze()
val_nid = val_mask.nonzero().squeeze()
# Create sampler
sampler = NeighborSampler(g, [int(_) for _ in args.fan_out.split(',')])
......
......@@ -262,8 +262,8 @@ def run(proc_id, n_gpus, args, devices, data):
# Unpack data
train_mask, val_mask, in_feats, labels, n_classes, g = data
train_nid = train_mask.nonzero()[:, 0]
val_nid = val_mask.nonzero()[:, 0]
train_nid = train_mask.nonzero().squeeze()
val_nid = val_mask.nonzero().squeeze()
# Split train_nid
train_nid = th.split(train_nid, math.ceil(len(train_nid) // n_gpus))[proc_id]
......
......@@ -11,6 +11,7 @@ import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
from dgl.nn.pytorch.conv import SAGEConv
......@@ -48,12 +49,12 @@ class GraphSAGE(nn.Module):
return h
def evaluate(model, graph, features, labels, mask):
def evaluate(model, graph, features, labels, nid):
model.eval()
with torch.no_grad():
logits = model(graph, features)
logits = logits[mask]
labels = labels[mask]
logits = logits[nid]
labels = labels[nid]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
......@@ -61,18 +62,14 @@ def evaluate(model, graph, features, labels, mask):
def main(args):
# load and preprocess dataset
data = load_data(args)
features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels)
if hasattr(torch, 'BoolTensor'):
train_mask = torch.BoolTensor(data.train_mask)
val_mask = torch.BoolTensor(data.val_mask)
test_mask = torch.BoolTensor(data.test_mask)
else:
train_mask = torch.ByteTensor(data.train_mask)
val_mask = torch.ByteTensor(data.val_mask)
test_mask = torch.ByteTensor(data.test_mask)
g = data[0]
features = g.ndata['feat']
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
in_feats = features.shape[1]
n_classes = data.num_labels
n_classes = data.num_classes
n_edges = data.graph.number_of_edges()
print("""----Data statistics------'
#Edges %d
......@@ -97,11 +94,15 @@ def main(args):
test_mask = test_mask.cuda()
print("use cuda:", args.gpu)
train_nid = train_mask.nonzero().squeeze()
val_nid = val_mask.nonzero().squeeze()
test_nid = test_mask.nonzero().squeeze()
# graph preprocess and calculate normalization factor
g = data.graph
g.remove_edges_from(nx.selfloop_edges(g))
g = DGLGraph(g)
g = dgl.remove_self_loop(g)
n_edges = g.number_of_edges()
if cuda:
g = g.int().to(args.gpu)
# create GraphSAGE model
model = GraphSAGE(in_feats,
......@@ -126,7 +127,7 @@ def main(args):
t0 = time.time()
# forward
logits = model(g, features)
loss = F.cross_entropy(logits[train_mask], labels[train_mask])
loss = F.cross_entropy(logits[train_nid], labels[train_nid])
optimizer.zero_grad()
loss.backward()
......@@ -135,13 +136,13 @@ def main(args):
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, g, features, labels, val_mask)
acc = evaluate(model, g, features, labels, val_nid)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}".format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print()
acc = evaluate(model, g, features, labels, test_mask)
acc = evaluate(model, g, features, labels, test_nid)
print("Test Accuracy {:.4f}".format(acc))
......
......@@ -77,7 +77,7 @@ class SAGE(nn.Module):
for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader):
block = blocks[0]
block = block.to(device)
block = block.int().to(device)
h = x[input_nodes].to(device)
h = layer(block, h)
if l != len(self.layers) - 1:
......@@ -159,7 +159,7 @@ def run(args, device, data):
for step, (input_nodes, seeds, blocks) in enumerate(dataloader):
# Load the input features as well as output labels
batch_inputs, batch_labels = load_subtensor(train_g, seeds, input_nodes, device)
blocks = [block.to(device) for block in blocks]
blocks = [block.int().to(device) for block in blocks]
# Compute loss and prediction
batch_pred = model(blocks, batch_inputs)
......@@ -202,7 +202,7 @@ if __name__ == '__main__':
argparser.add_argument('--eval-every', type=int, default=5)
argparser.add_argument('--lr', type=float, default=0.003)
argparser.add_argument('--dropout', type=float, default=0.5)
argparser.add_argument('--num-workers', type=int, default=0,
argparser.add_argument('--num-workers', type=int, default=4,
help="Number of sampling processes. Use 0 for no extra process.")
argparser.add_argument('--inductive', action='store_true',
help="Inductive learning setting")
......@@ -222,8 +222,6 @@ if __name__ == '__main__':
in_feats = g.ndata['features'].shape[1]
g = dgl.as_heterograph(g)
if args.inductive:
train_g, val_g, test_g = inductive_split(g)
else:
......
......@@ -79,7 +79,7 @@ class SAGE(nn.Module):
for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader):
block = blocks[0]
block = block.to(device)
block = block.int().to(device)
h = x[input_nodes].to(device)
h = layer(block, h)
if l != len(self.layers) - 1:
......@@ -141,9 +141,9 @@ def run(proc_id, n_gpus, args, devices, data):
train_mask = train_g.ndata['train_mask']
val_mask = val_g.ndata['val_mask']
test_mask = ~(test_g.ndata['train_mask'] | test_g.ndata['val_mask'])
train_nid = train_mask.nonzero()[:, 0]
val_nid = val_mask.nonzero()[:, 0]
test_nid = test_mask.nonzero()[:, 0]
train_nid = train_mask.nonzero().squeeze()
val_nid = val_mask.nonzero().squeeze()
test_nid = test_mask.nonzero().squeeze()
# Split train_nid
train_nid = th.split(train_nid, math.ceil(len(train_nid) // n_gpus))[proc_id]
......@@ -183,7 +183,7 @@ def run(proc_id, n_gpus, args, devices, data):
# Load the input features as well as output labels
batch_inputs, batch_labels = load_subtensor(train_g, train_g.ndata['labels'], seeds, input_nodes, dev_id)
blocks = [block.to(dev_id) for block in blocks]
blocks = [block.int().to(dev_id) for block in blocks]
# Compute loss and prediction
batch_pred = model(blocks, batch_inputs)
loss = loss_fcn(batch_pred, batch_labels)
......
......@@ -82,7 +82,7 @@ class NeighborSampler(object):
block = dgl.to_block(frontier, seeds)
# Pre-generate CSR format that it can be used in training directly
block.in_degree(0)
block.create_format_()
# Obtain the seed nodes for next layer.
seeds = block.srcdata[dgl.NID]
......@@ -149,7 +149,7 @@ class SAGE(nn.Module):
end = start + batch_size
batch_nodes = nodes[start:end]
block = dgl.to_block(dgl.in_subgraph(g, batch_nodes), batch_nodes)
block = block.to(device)
block = block.int().to(device)
input_nodes = block.srcdata[dgl.NID]
h = x[input_nodes].to(device)
......@@ -184,23 +184,22 @@ def compute_acc(emb, labels, train_nids, val_nids, test_nids):
Compute the accuracy of prediction given the labels.
"""
emb = emb.cpu().numpy()
labels = labels.cpu().numpy()
train_nids = train_nids.cpu().numpy()
train_labels = labels[train_nids].cpu().numpy()
train_labels = labels[train_nids]
val_nids = val_nids.cpu().numpy()
val_labels = labels[val_nids].cpu().numpy()
val_labels = labels[val_nids]
test_nids = test_nids.cpu().numpy()
test_labels = labels[test_nids].cpu().numpy()
test_labels = labels[test_nids]
emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True)
lr = lm.LogisticRegression(multi_class='multinomial', max_iter=10000)
lr.fit(emb[train_nids], labels[train_nids])
lr.fit(emb[train_nids], train_labels)
pred = lr.predict(emb)
f1_micro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='micro')
f1_micro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='micro')
f1_macro_eval = skm.f1_score(labels[val_nids], pred[val_nids], average='macro')
f1_macro_test = skm.f1_score(labels[test_nids], pred[test_nids], average='macro')
f1_micro_eval = skm.f1_score(val_labels, pred[val_nids], average='micro')
f1_micro_test = skm.f1_score(test_labels, pred[test_nids], average='micro')
return f1_micro_eval, f1_micro_test
def evaluate(model, g, inputs, labels, train_nids, val_nids, test_nids, batch_size, device):
......@@ -238,9 +237,13 @@ def run(proc_id, n_gpus, args, devices, data):
rank=proc_id)
train_mask, val_mask, test_mask, in_feats, labels, n_classes, g = data
train_nid = th.LongTensor(np.nonzero(train_mask)[0])
val_nid = th.LongTensor(np.nonzero(val_mask)[0])
test_nid = th.LongTensor(np.nonzero(test_mask)[0])
train_nid = th.LongTensor(np.nonzero(train_mask)).squeeze()
val_nid = th.LongTensor(np.nonzero(val_mask)).squeeze()
test_nid = th.LongTensor(np.nonzero(test_mask)).squeeze()
#train_nid = th.LongTensor(np.nonzero(train_mask)[0])
#val_nid = th.LongTensor(np.nonzero(val_mask)[0])
#test_nid = th.LongTensor(np.nonzero(test_mask)[0])
# Create sampler
sampler = NeighborSampler(g, [int(fanout) for fanout in args.fan_out.split(',')], args.num_negs, args.neg_share)
......@@ -296,7 +299,7 @@ def run(proc_id, n_gpus, args, devices, data):
pos_graph = pos_graph.to(device)
neg_graph = neg_graph.to(device)
blocks = [block.to(device) for block in blocks]
blocks = [block.int().to(device) for block in blocks]
# Compute loss and prediction
batch_pred = model(blocks, batch_inputs)
loss = loss_fcn(batch_pred, pos_graph, neg_graph)
......@@ -338,7 +341,9 @@ def main(args, devices):
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
g.ndata['features'] = features
g.create_format_()
# Pack data
data = train_mask, val_mask, test_mask, in_feats, labels, n_classes, g
......@@ -357,8 +362,6 @@ def main(args, devices):
for p in procs:
p.join()
run(args, device, data)
if __name__ == '__main__':
argparser = argparse.ArgumentParser("multi-gpu training")
......
......@@ -21,7 +21,7 @@ In this example we implement the GraphWriter, [Text Generation from Knowledge Gr
| |BLEU|METEOR| training time per epoch|
|-|-|-|-|
|Author's implementation|14.3+-1.01| 18.8+-0.28| 1970s|
|DGL implementation|14.31+-0.34|19.74+-0.69| 1192s|
|DGL implementation|14.31+-0.34|19.74+-0.69| 1080s|
We use the author's code for the speed test, and our testbed is V100 GPU.
......
......@@ -121,7 +121,7 @@ class GAT(nn.Module):
# compute edge attention
graph.apply_edges(fn.u_dot_v('el', 'er', 'e'))
e = graph.edata.pop('e') / math.sqrt(self._out_feats * self._num_heads)
graph.edata['a'] = edge_softmax(graph, e).unsqueeze(-1)
graph.edata['a'] = edge_softmax(graph, e)
# message passing
graph.update_all(fn.u_mul_e('ft', 'a', 'm'),
fn.sum('m', 'ft2'))
......
......@@ -67,4 +67,5 @@ def subgraph_collate_fn(g, batch):
g1.ndata['feat'] = g.ndata['feat'][nid]
g1.ndata['labels'] = g.ndata['labels'][nid]
g1.ndata['train_mask'] = g.ndata['train_mask'][nid]
g1.create_format_()
return g1
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment