Unverified Commit d4b5ddcd authored by Mingbang Wang's avatar Mingbang Wang Committed by GitHub
Browse files

[Example] modify dgl rgcn example to make it consistent with graphbolt's (#6882)

parent 397b7599
......@@ -25,12 +25,10 @@ Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 9
### Accuracies
```
Final performance:
All runs:
Highest Train: 83.22 ± 0.00
Highest Valid: 48.25 ± 0.20
Final Train: 68.45 ± 9.81
Final Test: 47.51 ± 0.19
Epoch: 01, Loss: 2.3625, Valid: 48.25%, Test: 47.91%, Time 86.0210
Epoch: 02, Loss: 1.5852, Valid: 48.56%, Test: 46.98%, Time 84.2728
Epoch: 03, Loss: 1.1974, Valid: 45.99%, Test: 44.05%, Time 85.7916
Test accuracy 44.1165
```
## Run on `ogb-lsc-mag240m` dataset
......
......@@ -46,9 +46,11 @@ main
└───> EntityClassify.evaluate
"""
import argparse
import itertools
import sys
import time
import dgl
import dgl.nn as dglnn
......@@ -56,7 +58,7 @@ import numpy as np
import psutil
import torch as th
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import AddReverse, Compose, ToSimple
......@@ -83,21 +85,22 @@ def prepare_data(args, device):
# Apply transformation to the graph.
# - "ToSimple()" removes multi-edge between two nodes.
# - "AddReverse()" adds reverse edges to the graph.
print("Start to transform graph. This may take a while...")
transform = Compose([ToSimple(), AddReverse()])
g = transform(g)
else:
dataset = MAG240MDataset(root=args.rootdir)
(g,), _ = dgl.load_graphs(args.graph_path)
g = g.formats(["csc"])
labels = th.as_tensor(dataset.paper_label).long()
labels = torch.as_tensor(dataset.paper_label).long()
# As feature data is too large to fit in memory, we read it from disk.
feats["paper"] = th.as_tensor(
feats["paper"] = torch.as_tensor(
np.load(args.paper_feature_path, mmap_mode="r+")
)
feats["author"] = th.as_tensor(
feats["author"] = torch.as_tensor(
np.load(args.author_feature_path, mmap_mode="r+")
)
feats["institution"] = th.as_tensor(
feats["institution"] = torch.as_tensor(
np.load(args.inst_feature_path, mmap_mode="r+")
)
print(f"Loaded graph: {g}")
......@@ -356,57 +359,6 @@ class EntityClassify(nn.Module):
return h
class Logger(object):
r"""
This class was taken directly from the PyG implementation and can be found
here: https://github.com/snap-stanford/ogb/blob/master/examples/nodeproppre
d/mag/logger.py
This was done to ensure that performance was measured in precisely the same
way
"""
def __init__(self, runs):
self.results = [[] for _ in range(runs)]
def add_result(self, run, result):
assert len(result) == 3
assert run >= 0 and run < len(self.results)
self.results[run].append(result)
def print_statistics(self, run=None):
if run is not None:
result = 100 * th.tensor(self.results[run])
argmax = result[:, 1].argmax().item()
print(f"Run {run + 1:02d}:")
print(f"Highest Train: {result[:, 0].max():.2f}")
print(f"Highest Valid: {result[:, 1].max():.2f}")
print(f" Final Train: {result[argmax, 0]:.2f}")
print(f" Final Test: {result[argmax, 2]:.2f}")
else:
result = 100 * th.tensor(self.results)
best_results = []
for r in result:
train1 = r[:, 0].max().item()
valid = r[:, 1].max().item()
train2 = r[r[:, 1].argmax(), 0].item()
test = r[r[:, 1].argmax(), 2].item()
best_results.append((train1, valid, train2, test))
best_result = th.tensor(best_results)
print("All runs:")
r = best_result[:, 0]
print(f"Highest Train: {r.mean():.2f} ± {r.std():.2f}")
r = best_result[:, 1]
print(f"Highest Valid: {r.mean():.2f} ± {r.std():.2f}")
r = best_result[:, 2]
print(f" Final Train: {r.mean():.2f} ± {r.std():.2f}")
r = best_result[:, 3]
print(f" Final Test: {r.mean():.2f} ± {r.std():.2f}")
def extract_node_features(name, g, input_nodes, node_embed, feats, device):
"""Extract the node features from embedding layer or raw features."""
if name == "ogbn-mag":
......@@ -440,17 +392,16 @@ def train(
train_loader,
split_idx,
labels,
logger,
device,
run,
):
print("start training...")
print("Start training...")
category = "paper"
# Typically, the best Validation performance is obtained after
# the 1st or 2nd epoch. This is why the max epoch is set to 3.
for epoch in range(3):
num_train = split_idx["train"][category].shape[0]
t0 = time.time()
model.train()
total_loss = 0
......@@ -482,19 +433,10 @@ def train(
total_loss += loss.item() * batch_size
t1 = time.time()
loss = total_loss / num_train
# Evaluate the model on the train/val/test set.
train_acc = evaluate(
dataset,
g,
feats,
model,
node_embed,
labels,
device,
split_idx["train"],
)
# Evaluate the model on the val/test set.
valid_acc = evaluate(
dataset,
g,
......@@ -517,20 +459,16 @@ def train(
split_idx[test_key],
save_test_submission=(dataset == "ogb-lsc-mag240m"),
)
logger.add_result(run, (train_acc, valid_acc, test_acc))
print(
f"Run: {run + 1:02d}, "
f"Epoch: {epoch +1 :02d}, "
f"Loss: {loss:.4f}, "
f"Train: {100 * train_acc:.2f}%, "
f"Valid: {100 * valid_acc:.2f}%, "
f"Test: {100 * test_acc:.2f}%"
f"Test: {100 * test_acc:.2f}%, "
f"Time {t1 - t0:.4f}"
)
return logger
@th.no_grad()
@torch.no_grad()
def evaluate(
dataset,
g,
......@@ -580,9 +518,9 @@ def evaluate(
y_hats.append(y_hat.cpu())
y_true.append(labels[seeds["paper"].cpu()])
y_pred = th.cat(y_hats, dim=0)
y_true = th.cat(y_true, dim=0)
y_true = th.unsqueeze(y_true, 1)
y_pred = torch.cat(y_hats, dim=0)
y_true = torch.cat(y_true, dim=0)
y_true = torch.unsqueeze(y_true, 1)
if dataset == "ogb-lsc-mag240m":
y_pred = y_pred.view(-1)
......@@ -596,10 +534,9 @@ def evaluate(
def main(args):
device = "cuda:0" if th.cuda.is_available() and args.num_gpus > 0 else "cpu"
# Initialize a logger.
logger = Logger(args.runs)
device = (
"cuda:0" if torch.cuda.is_available() and args.num_gpus > 0 else "cpu"
)
# Prepare the data.
g, labels, num_classes, split_idx, train_loader, feats = prepare_data(
......@@ -625,7 +562,6 @@ def main(args):
f"{sum(p.numel() for p in model.parameters())}"
)
for run in range(args.runs):
try:
if embed_layer is not None:
embed_layer.reset_parameters()
......@@ -652,7 +588,7 @@ def main(args):
model.parameters(),
[] if embed_layer is None else embed_layer.parameters(),
)
optimizer = th.optim.Adam(all_params, lr=0.01)
optimizer = torch.optim.Adam(all_params, lr=0.01)
# `expected_max`` is the number of physical cores on your machine.
# The `logical` parameter, when set to False, ensures that the count
......@@ -665,7 +601,7 @@ def main(args):
f"cores, please set any number less than {expected_max}",
file=sys.stderr,
)
logger = train(
train(
args.dataset,
g,
feats,
......@@ -675,14 +611,23 @@ def main(args):
train_loader,
split_idx,
labels,
logger,
device,
run,
)
logger.print_statistics(run)
print("Final performance: ")
logger.print_statistics()
print("Testing...")
test_key = "test" if args.dataset == "ogbn-mag" else "test-dev"
test_acc = evaluate(
args.dataset,
g,
feats,
model,
embed_layer,
labels,
device,
split_idx[test_key],
save_test_submission=(args.dataset == "ogb-lsc-mag240m"),
)
print(f"Test accuracy {test_acc*100:.4f}")
if __name__ == "__main__":
......@@ -699,12 +644,6 @@ if __name__ == "__main__":
default=0,
help="Number of GPUs. Use 0 for CPU training.",
)
parser.add_argument(
"--runs",
type=int,
default=5,
help="Number of runs. Each run will train the model from scratch.",
)
parser.add_argument(
"--num_workers",
type=int,
......@@ -714,7 +653,7 @@ if __name__ == "__main__":
parser.add_argument(
"--rootdir",
type=str,
default="./",
default="./dataset/",
help="Directory to download the OGB dataset.",
)
parser.add_argument(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment