Unverified Commit 4ddd477f authored by Quan (Andy) Gan's avatar Quan (Andy) Gan Committed by GitHub
Browse files

[Model] GraphSAGE inductive example (#1741)

* graphsage inductive example

* fix
parent 168a88e5
...@@ -33,7 +33,9 @@ python3 train_full.py --dataset cora --gpu 0 # full graph ...@@ -33,7 +33,9 @@ python3 train_full.py --dataset cora --gpu 0 # full graph
Train w/ mini-batch sampling (on the Reddit dataset) Train w/ mini-batch sampling (on the Reddit dataset)
```bash ```bash
python3 train_sampling.py --num-epochs 30 # neighbor sampling python3 train_sampling.py --num-epochs 30 # neighbor sampling
python3 train_sampling.py --num-epochs 30 --inductive # inductive learning with neighbor sampling
python3 train_sampling_multi_gpu.py --num-epochs 30 # neighbor sampling with multi GPU python3 train_sampling_multi_gpu.py --num-epochs 30 # neighbor sampling with multi GPU
python3 train_sampling_multi_gpu.py --num-epochs 30 --inductive # inductive learning with neighbor sampling, multi GPU
python3 train_cv.py --num-epochs 30 # control variate sampling python3 train_cv.py --num-epochs 30 # control variate sampling
python3 train_cv_multi_gpu.py --num-epochs 30 # control variate sampling with multi GPU python3 train_cv_multi_gpu.py --num-epochs 30 # control variate sampling with multi GPU
``` ```
...@@ -44,6 +46,7 @@ Accuracy: ...@@ -44,6 +46,7 @@ Accuracy:
|:---------------------:|:--------:| |:---------------------:|:--------:|
| Full Graph | 0.9504 | | Full Graph | 0.9504 |
| Neighbor Sampling | 0.9495 | | Neighbor Sampling | 0.9495 |
| N.S. (Inductive) | 0.9460 |
| Control Variate | 0.9490 | | Control Variate | 0.9490 |
### Unsupervised training ### Unsupervised training
......
...@@ -15,9 +15,9 @@ def load_reddit(): ...@@ -15,9 +15,9 @@ def load_reddit():
g = data.graph g = data.graph
g.ndata['features'] = features g.ndata['features'] = features
g.ndata['labels'] = labels g.ndata['labels'] = labels
g.ndata['train_mask'] = th.LongTensor(data.train_mask) g.ndata['train_mask'] = th.BoolTensor(data.train_mask)
g.ndata['val_mask'] = th.LongTensor(data.val_mask) g.ndata['val_mask'] = th.BoolTensor(data.val_mask)
g.ndata['test_mask'] = th.LongTensor(data.test_mask) g.ndata['test_mask'] = th.BoolTensor(data.test_mask)
return g, data.num_labels return g, data.num_labels
def load_ogb(name): def load_ogb(name):
...@@ -35,13 +35,21 @@ def load_ogb(name): ...@@ -35,13 +35,21 @@ def load_ogb(name):
# Find the node IDs in the training, validation, and test set. # Find the node IDs in the training, validation, and test set.
train_nid, val_nid, test_nid = splitted_idx['train'], splitted_idx['valid'], splitted_idx['test'] train_nid, val_nid, test_nid = splitted_idx['train'], splitted_idx['valid'], splitted_idx['test']
train_mask = th.zeros((graph.number_of_nodes(),), dtype=th.int64) train_mask = th.zeros((graph.number_of_nodes(),), dtype=th.bool)
train_mask[train_nid] = 1 train_mask[train_nid] = True
val_mask = th.zeros((graph.number_of_nodes(),), dtype=th.int64) val_mask = th.zeros((graph.number_of_nodes(),), dtype=th.bool)
val_mask[val_nid] = 1 val_mask[val_nid] = True
test_mask = th.zeros((graph.number_of_nodes(),), dtype=th.int64) test_mask = th.zeros((graph.number_of_nodes(),), dtype=th.bool)
test_mask[test_nid] = 1 test_mask[test_nid] = True
graph.ndata['train_mask'] = train_mask graph.ndata['train_mask'] = train_mask
graph.ndata['val_mask'] = val_mask graph.ndata['val_mask'] = val_mask
graph.ndata['test_mask'] = test_mask graph.ndata['test_mask'] = test_mask
return graph, len(th.unique(graph.ndata['labels'])) return graph, len(th.unique(graph.ndata['labels']))
def inductive_split(g):
"""Split the graph into training graph, validation graph, and test graph by training
and validation masks. Suitable for inductive models."""
train_g = g.subgraph(g.ndata['train_mask'])
val_g = g.subgraph(g.ndata['train_mask'] | g.ndata['val_mask'])
test_g = g
return train_g, val_g, test_g
...@@ -16,7 +16,7 @@ from dgl.data import RedditDataset ...@@ -16,7 +16,7 @@ from dgl.data import RedditDataset
import tqdm import tqdm
import traceback import traceback
from load_graph import load_reddit, load_ogb from load_graph import load_reddit, load_ogb, inductive_split
class SAGE(nn.Module): class SAGE(nn.Module):
def __init__(self, def __init__(self,
...@@ -142,15 +142,16 @@ def load_subtensor(g, seeds, input_nodes, device): ...@@ -142,15 +142,16 @@ def load_subtensor(g, seeds, input_nodes, device):
#### Entry point #### Entry point
def run(args, device, data): def run(args, device, data):
# Unpack data # Unpack data
train_mask, val_mask, in_feats, n_classes, g = data in_feats, n_classes, train_g, val_g, test_g = data
train_nid = th.nonzero(train_mask, as_tuple=True)[0] train_nid = th.nonzero(train_g.ndata['train_mask'], as_tuple=True)[0]
val_nid = th.nonzero(val_mask, as_tuple=True)[0] val_nid = th.nonzero(val_g.ndata['val_mask'], as_tuple=True)[0]
test_nid = th.nonzero(~(test_g.ndata['train_mask'] | test_g.ndata['val_mask']), as_tuple=True)[0]
# Create PyTorch DataLoader for constructing blocks # Create PyTorch DataLoader for constructing blocks
sampler = dgl.sampling.MultiLayerNeighborSampler( sampler = dgl.sampling.MultiLayerNeighborSampler(
[int(fanout) for fanout in args.fan_out.split(',')]) [int(fanout) for fanout in args.fan_out.split(',')])
dataloader = dgl.sampling.NodeDataLoader( dataloader = dgl.sampling.NodeDataLoader(
g, train_g,
train_nid, train_nid,
sampler, sampler,
batch_size=args.batch_size, batch_size=args.batch_size,
...@@ -177,7 +178,7 @@ def run(args, device, data): ...@@ -177,7 +178,7 @@ def run(args, device, data):
tic_step = time.time() tic_step = time.time()
# Load the input features as well as output labels # Load the input features as well as output labels
batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device) batch_inputs, batch_labels = load_subtensor(train_g, seeds, input_nodes, device)
# Compute loss and prediction # Compute loss and prediction
batch_pred = model(blocks, batch_inputs) batch_pred = model(blocks, batch_inputs)
...@@ -198,8 +199,10 @@ def run(args, device, data): ...@@ -198,8 +199,10 @@ def run(args, device, data):
if epoch >= 5: if epoch >= 5:
avg += toc - tic avg += toc - tic
if epoch % args.eval_every == 0 and epoch != 0: if epoch % args.eval_every == 0 and epoch != 0:
eval_acc = evaluate(model, g, g.ndata['features'], g.ndata['labels'], val_nid, args.batch_size, device) eval_acc = evaluate(model, val_g, val_g.ndata['features'], val_g.ndata['labels'], val_nid, args.batch_size, device)
print('Eval Acc {:.4f}'.format(eval_acc)) print('Eval Acc {:.4f}'.format(eval_acc))
test_acc = evaluate(model, test_g, test_g.ndata['features'], test_g.ndata['labels'], test_nid, args.batch_size, device)
print('Test Acc: {:.4f}'.format(test_acc))
print('Avg epoch time: {}'.format(avg / (epoch - 4))) print('Avg epoch time: {}'.format(avg / (epoch - 4)))
...@@ -219,6 +222,8 @@ if __name__ == '__main__': ...@@ -219,6 +222,8 @@ if __name__ == '__main__':
argparser.add_argument('--dropout', type=float, default=0.5) argparser.add_argument('--dropout', type=float, default=0.5)
argparser.add_argument('--num-workers', type=int, default=0, argparser.add_argument('--num-workers', type=int, default=0,
help="Number of sampling processes. Use 0 for no extra process.") help="Number of sampling processes. Use 0 for no extra process.")
argparser.add_argument('--inductive', action='store_true',
help="Inductive learning setting")
args = argparser.parse_args() args = argparser.parse_args()
if args.gpu >= 0: if args.gpu >= 0:
...@@ -232,12 +237,20 @@ if __name__ == '__main__': ...@@ -232,12 +237,20 @@ if __name__ == '__main__':
g, n_classes = load_ogb('ogbn-products') g, n_classes = load_ogb('ogbn-products')
else: else:
raise Exception('unknown dataset') raise Exception('unknown dataset')
g = dgl.as_heterograph(g)
in_feats = g.ndata['features'].shape[1] in_feats = g.ndata['features'].shape[1]
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask'] g = dgl.as_heterograph(g)
prepare_mp(g)
if args.inductive:
train_g, val_g, test_g = inductive_split(g)
else:
train_g = val_g = test_g = g
prepare_mp(train_g)
prepare_mp(val_g)
prepare_mp(test_g)
# Pack data # Pack data
data = train_mask, val_mask, in_feats, n_classes, g data = in_feats, n_classes, train_g, val_g, test_g
run(args, device, data) run(args, device, data)
...@@ -16,6 +16,7 @@ import tqdm ...@@ -16,6 +16,7 @@ import tqdm
import traceback import traceback
from utils import thread_wrapped_func from utils import thread_wrapped_func
from load_graph import load_reddit, inductive_split
class SAGE(nn.Module): class SAGE(nn.Module):
def __init__(self, def __init__(self,
...@@ -114,13 +115,13 @@ def compute_acc(pred, labels): ...@@ -114,13 +115,13 @@ def compute_acc(pred, labels):
""" """
return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred)
def evaluate(model, g, inputs, labels, val_mask, batch_size, device): def evaluate(model, g, inputs, labels, val_nid, batch_size, device):
""" """
Evaluate the model on the validation set specified by ``val_mask``. Evaluate the model on the validation set specified by ``val_nid``.
g : The entire graph. g : The entire graph.
inputs : The features of all the nodes. inputs : The features of all the nodes.
labels : The labels of all the nodes. labels : The labels of all the nodes.
val_mask : A 0-1 mask indicating which nodes do we actually compute the accuracy for. val_nid : A node ID tensor indicating which nodes do we actually compute the accuracy for.
batch_size : Number of nodes to compute at the same time. batch_size : Number of nodes to compute at the same time.
device : The GPU device to evaluate on. device : The GPU device to evaluate on.
""" """
...@@ -128,7 +129,7 @@ def evaluate(model, g, inputs, labels, val_mask, batch_size, device): ...@@ -128,7 +129,7 @@ def evaluate(model, g, inputs, labels, val_mask, batch_size, device):
with th.no_grad(): with th.no_grad():
pred = model.inference(g, inputs, batch_size, device) pred = model.inference(g, inputs, batch_size, device)
model.train() model.train()
return compute_acc(pred[val_mask], labels[val_mask]) return compute_acc(pred[val_nid], labels[val_nid])
def load_subtensor(g, labels, seeds, input_nodes, dev_id): def load_subtensor(g, labels, seeds, input_nodes, dev_id):
""" """
...@@ -154,11 +155,13 @@ def run(proc_id, n_gpus, args, devices, data): ...@@ -154,11 +155,13 @@ def run(proc_id, n_gpus, args, devices, data):
th.cuda.set_device(dev_id) th.cuda.set_device(dev_id)
# Unpack data # Unpack data
train_mask, val_mask, in_feats, labels, n_classes, g = data in_feats, n_classes, train_g, val_g, test_g = data
train_nid = th.LongTensor(np.nonzero(train_mask)[0]) train_mask = train_g.ndata['train_mask']
val_nid = th.LongTensor(np.nonzero(val_mask)[0]) val_mask = val_g.ndata['val_mask']
train_mask = th.BoolTensor(train_mask) test_mask = ~(train_g.ndata['train_mask'] | val_g.ndata['val_mask'])
val_mask = th.BoolTensor(val_mask) train_nid = train_mask.nonzero()[:, 0]
val_nid = val_mask.nonzero()[:, 0]
test_nid = test_mask.nonzero()[:, 0]
# Split train_nid # Split train_nid
train_nid = th.split(train_nid, len(train_nid) // n_gpus)[proc_id] train_nid = th.split(train_nid, len(train_nid) // n_gpus)[proc_id]
...@@ -167,7 +170,7 @@ def run(proc_id, n_gpus, args, devices, data): ...@@ -167,7 +170,7 @@ def run(proc_id, n_gpus, args, devices, data):
sampler = dgl.sampling.MultiLayerNeighborSampler( sampler = dgl.sampling.MultiLayerNeighborSampler(
[int(fanout) for fanout in args.fan_out.split(',')]) [int(fanout) for fanout in args.fan_out.split(',')])
dataloader = dgl.sampling.NodeDataLoader( dataloader = dgl.sampling.NodeDataLoader(
g, train_g,
train_nid, train_nid,
sampler, sampler,
batch_size=args.batch_size, batch_size=args.batch_size,
...@@ -197,7 +200,7 @@ def run(proc_id, n_gpus, args, devices, data): ...@@ -197,7 +200,7 @@ def run(proc_id, n_gpus, args, devices, data):
tic_step = time.time() tic_step = time.time()
# Load the input features as well as output labels # Load the input features as well as output labels
batch_inputs, batch_labels = load_subtensor(g, labels, seeds, input_nodes, dev_id) batch_inputs, batch_labels = load_subtensor(train_g, train_g.ndata['labels'], seeds, input_nodes, dev_id)
# Compute loss and prediction # Compute loss and prediction
batch_pred = model(blocks, batch_inputs) batch_pred = model(blocks, batch_inputs)
...@@ -230,10 +233,17 @@ def run(proc_id, n_gpus, args, devices, data): ...@@ -230,10 +233,17 @@ def run(proc_id, n_gpus, args, devices, data):
avg += toc - tic avg += toc - tic
if epoch % args.eval_every == 0 and epoch != 0: if epoch % args.eval_every == 0 and epoch != 0:
if n_gpus == 1: if n_gpus == 1:
eval_acc = evaluate(model, g, g.ndata['features'], labels, val_mask, args.batch_size, devices[0]) eval_acc = evaluate(
model, val_g, val_g.ndata['features'], val_g.ndata['labels'], val_nid, args.batch_size, devices[0])
test_acc = evaluate(
model, test_g, test_g.ndata['features'], test_g.ndata['labels'], test_nid, args.batch_size, devices[0])
else: else:
eval_acc = evaluate(model.module, g, g.ndata['features'], labels, val_mask, args.batch_size, devices[0]) eval_acc = evaluate(
model.module, val_g, val_g.ndata['features'], val_g.ndata['labels'], val_nid, args.batch_size, devices[0])
test_acc = evaluate(
model.module, test_g, test_g.ndata['features'], test_g.ndata['labels'], test_nid, args.batch_size, devices[0])
print('Eval Acc {:.4f}'.format(eval_acc)) print('Eval Acc {:.4f}'.format(eval_acc))
print('Test Acc: {:.4f}'.format(test_acc))
if n_gpus > 1: if n_gpus > 1:
...@@ -256,25 +266,28 @@ if __name__ == '__main__': ...@@ -256,25 +266,28 @@ if __name__ == '__main__':
argparser.add_argument('--dropout', type=float, default=0.5) argparser.add_argument('--dropout', type=float, default=0.5)
argparser.add_argument('--num-workers', type=int, default=0, argparser.add_argument('--num-workers', type=int, default=0,
help="Number of sampling processes. Use 0 for no extra process.") help="Number of sampling processes. Use 0 for no extra process.")
argparser.add_argument('--inductive', action='store_true',
help="Inductive learning setting")
args = argparser.parse_args() args = argparser.parse_args()
devices = list(map(int, args.gpu.split(','))) devices = list(map(int, args.gpu.split(',')))
n_gpus = len(devices) n_gpus = len(devices)
# load reddit data g, n_classes = load_reddit()
data = RedditDataset(self_loop=True)
train_mask = data.train_mask
val_mask = data.val_mask
features = th.Tensor(data.features)
in_feats = features.shape[1]
labels = th.LongTensor(data.labels)
n_classes = data.num_labels
# Construct graph # Construct graph
g = dgl.graph(data.graph.all_edges()) g = dgl.as_heterograph(g)
g.ndata['features'] = features in_feats = g.ndata['features'].shape[1]
prepare_mp(g)
if args.inductive:
train_g, val_g, test_g = inductive_split(g)
else:
train_g = val_g = test_g = g
prepare_mp(train_g)
prepare_mp(val_g)
prepare_mp(test_g)
# Pack data # Pack data
data = train_mask, val_mask, in_feats, labels, n_classes, g data = in_feats, n_classes, train_g, val_g, test_g
if n_gpus == 1: if n_gpus == 1:
run(0, n_gpus, args, devices, data) run(0, n_gpus, args, devices, data)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment