Unverified Commit a1f74982 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Example] Fix Various Examples Related to TorchMetrics (#5521)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-36-188.ap-northeast-1.compute.internal>
parent 97286f98
......@@ -10,7 +10,7 @@ Dependencies
- Python 3.7+(for string formatting features)
- PyTorch 1.9.0+
- sklearn
- TorchMetrics
- TorchMetrics 0.11.4
## Run Experiments
......
......@@ -72,7 +72,12 @@ for _ in range(10):
loss.backward()
opt.step()
if it % 20 == 0:
acc = MF.accuracy(y_hat[m], y[m])
acc = MF.accuracy(
y_hat[m],
y[m],
task="multiclass",
num_classes=dataset.num_classes,
)
mem = torch.cuda.max_memory_allocated() / 1000000
print("Loss", loss.item(), "Acc", acc.item(), "GPU Mem", mem, "MB")
tt = time.time()
......@@ -97,8 +102,18 @@ for _ in range(10):
val_labels = torch.cat(val_labels, 0)
test_preds = torch.cat(test_preds, 0)
test_labels = torch.cat(test_labels, 0)
val_acc = MF.accuracy(val_preds, val_labels)
test_acc = MF.accuracy(test_preds, test_labels)
val_acc = MF.accuracy(
val_preds,
val_labels,
task="multiclass",
num_classes=dataset.num_classes,
)
test_acc = MF.accuracy(
test_preds,
test_labels,
task="multiclass",
num_classes=dataset.num_classes,
)
print("Validation acc:", val_acc.item(), "Test acc:", test_acc.item())
print(np.mean(durations[4:]), np.std(durations[4:]))
......@@ -10,7 +10,7 @@ Requirements
------------
```bash
pip install requests torchmetrics
pip install requests torchmetrics==0.11.4 ogb
```
How to run
......@@ -45,8 +45,7 @@ Test Accuracy: 0.7632
### PyTorch Lightning for node classification
Train w/ mini-batch sampling for node classification with PyTorch Lightning on OGB-products.
Works with both single GPU and multiple GPUs:
Train w/ mini-batch sampling for node classification with PyTorch Lightning on OGB-products. It requires PyTorch Lightning 2.0.1. It works with both single GPU and multiple GPUs:
```bash
python3 lightning/node_classification.py
......
......@@ -27,8 +27,8 @@ class SAGE(LightningModule):
self.dropout = nn.Dropout(0.5)
self.n_hidden = n_hidden
self.n_classes = n_classes
self.train_acc = Accuracy()
self.val_acc = Accuracy()
self.train_acc = Accuracy(task="multiclass", num_classes=n_classes)
self.val_acc = Accuracy(task="multiclass", num_classes=n_classes)
def forward(self, blocks, x):
h = x
......@@ -180,9 +180,11 @@ if __name__ == "__main__":
# Train
checkpoint_callback = ModelCheckpoint(monitor="val_acc", save_top_k=1)
# Use this for single GPU
# trainer = Trainer(gpus=[0], max_epochs=10, callbacks=[checkpoint_callback])
# trainer = Trainer(accelerator="gpu", devices=[0], max_epochs=10,
# callbacks=[checkpoint_callback])
trainer = Trainer(
gpus=[0, 1, 2, 3],
accelerator="gpu",
devices=[0, 1, 2, 3],
max_epochs=10,
callbacks=[checkpoint_callback],
strategy="ddp_spawn",
......@@ -203,5 +205,7 @@ if __name__ == "__main__":
pred = model.inference(graph, "cuda", 4096, 12, graph.device)
pred = pred[test_idx]
label = graph.ndata["label"][test_idx]
acc = MF.accuracy(pred, label)
acc = MF.accuracy(
pred, label, task="multiclass", num_classes=datamodule.n_classes
)
print("Test accuracy:", acc)
......@@ -5,7 +5,6 @@ import dgl.nn as dglnn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchmetrics.functional as MF
import tqdm
from dgl.dataloading import (
as_edge_prediction_sampler,
......
......@@ -74,7 +74,7 @@ class SAGE(nn.Module):
return y
def evaluate(model, graph, dataloader):
def evaluate(model, graph, dataloader, num_classes):
model.eval()
ys = []
y_hats = []
......@@ -83,10 +83,15 @@ def evaluate(model, graph, dataloader):
x = blocks[0].srcdata["feat"]
ys.append(blocks[-1].dstdata["label"])
y_hats.append(model(blocks, x))
return MF.accuracy(torch.cat(y_hats), torch.cat(ys))
return MF.accuracy(
torch.cat(y_hats),
torch.cat(ys),
task="multiclass",
num_classes=num_classes,
)
def layerwise_infer(device, graph, nid, model, batch_size):
def layerwise_infer(device, graph, nid, model, num_classes, batch_size):
model.eval()
with torch.no_grad():
pred = model.inference(
......@@ -94,10 +99,12 @@ def layerwise_infer(device, graph, nid, model, batch_size):
) # pred in buffer_device
pred = pred[nid]
label = graph.ndata["label"][nid].to(pred.device)
return MF.accuracy(pred, label)
return MF.accuracy(
pred, label, task="multiclass", num_classes=num_classes
)
def train(args, device, g, dataset, model):
def train(args, device, g, dataset, model, num_classes):
# create sampler & dataloader
train_idx = dataset.train_idx.to(device)
val_idx = dataset.val_idx.to(device)
......@@ -147,7 +154,7 @@ def train(args, device, g, dataset, model):
loss.backward()
opt.step()
total_loss += loss.item()
acc = evaluate(model, g, val_dataloader)
acc = evaluate(model, g, val_dataloader, num_classes)
print(
"Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format(
epoch, total_loss / (it + 1), acc.item()
......@@ -174,6 +181,7 @@ if __name__ == "__main__":
dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products"))
g = dataset[0]
g = g.to("cuda" if args.mode == "puregpu" else "cpu")
num_classes = dataset.num_classes
device = torch.device("cpu" if args.mode == "cpu" else "cuda")
# create GraphSAGE model
......@@ -183,9 +191,11 @@ if __name__ == "__main__":
# model training
print("Training...")
train(args, device, g, dataset, model)
train(args, device, g, dataset, model, num_classes)
# test the model
print("Testing...")
acc = layerwise_infer(device, g, dataset.test_idx, model, batch_size=4096)
acc = layerwise_infer(
device, g, dataset.test_idx, model, num_classes, batch_size=4096
)
print("Test Accuracy {:.4f}".format(acc.item()))
......@@ -5,7 +5,7 @@ Requirements
------------
```bash
pip install torchmetrics
pip install torchmetrics==0.11.4
```
How to run
......
......@@ -86,7 +86,7 @@ class SAGE(nn.Module):
return y
def evaluate(model, g, dataloader):
def evaluate(model, g, num_classes, dataloader):
model.eval()
ys = []
y_hats = []
......@@ -95,11 +95,16 @@ def evaluate(model, g, dataloader):
x = blocks[0].srcdata["feat"]
ys.append(blocks[-1].dstdata["label"])
y_hats.append(model(blocks, x))
return MF.accuracy(torch.cat(y_hats), torch.cat(ys))
return MF.accuracy(
torch.cat(y_hats),
torch.cat(ys),
task="multiclass",
num_classes=num_classes,
)
def layerwise_infer(
proc_id, device, g, nid, model, use_uva, batch_size=2**16
proc_id, device, g, num_classes, nid, model, use_uva, batch_size=2**16
):
model.eval()
with torch.no_grad():
......@@ -107,11 +112,15 @@ def layerwise_infer(
pred = pred[nid]
labels = g.ndata["label"][nid].to(pred.device)
if proc_id == 0:
acc = MF.accuracy(pred, labels)
acc = MF.accuracy(
pred, labels, task="multiclass", num_classes=num_classes
)
print("Test Accuracy {:.4f}".format(acc.item()))
def train(proc_id, nprocs, device, g, train_idx, val_idx, model, use_uva):
def train(
proc_id, nprocs, device, g, num_classes, train_idx, val_idx, model, use_uva
):
sampler = NeighborSampler(
[10, 10, 10], prefetch_node_feats=["feat"], prefetch_labels=["label"]
)
......@@ -154,7 +163,9 @@ def train(proc_id, nprocs, device, g, train_idx, val_idx, model, use_uva):
loss.backward()
opt.step()
total_loss += loss
acc = evaluate(model, g, val_dataloader).to(device) / nprocs
acc = (
evaluate(model, g, num_classes, val_dataloader).to(device) / nprocs
)
dist.reduce(acc, 0)
if proc_id == 0:
print(
......@@ -175,20 +186,30 @@ def run(proc_id, nprocs, devices, g, data, mode):
world_size=nprocs,
rank=proc_id,
)
out_size, train_idx, val_idx, test_idx = data
num_classes, train_idx, val_idx, test_idx = data
train_idx = train_idx.to(device)
val_idx = val_idx.to(device)
g = g.to(device if mode == "puregpu" else "cpu")
# create GraphSAGE model (distributed)
in_size = g.ndata["feat"].shape[1]
model = SAGE(in_size, 256, out_size).to(device)
model = SAGE(in_size, 256, num_classes).to(device)
model = DistributedDataParallel(
model, device_ids=[device], output_device=device
)
# training + testing
use_uva = mode == "mixed"
train(proc_id, nprocs, device, g, train_idx, val_idx, model, use_uva)
layerwise_infer(proc_id, device, g, test_idx, model, use_uva)
train(
proc_id,
nprocs,
device,
g,
num_classes,
train_idx,
val_idx,
model,
use_uva,
)
layerwise_infer(proc_id, device, g, num_classes, test_idx, model, use_uva)
# cleanup process group
dist.destroy_process_group()
......
......@@ -4,11 +4,11 @@ This is an adaptation of RGCN where graph convolution is replaced with graph att
Dependencies
------------
- torchmetrics
- torchmetrics 0.11.4
Install as follows:
```bash
pip install torchmetrics
pip install torchmetrics==0.11.4
```
How to Run
......
......@@ -57,7 +57,7 @@ class HeteroGAT(nn.Module):
return self.linear(h["paper"])
def evaluate(model, dataloader, desc):
def evaluate(num_classes, model, dataloader, desc):
preds = []
labels = []
with torch.no_grad():
......@@ -71,11 +71,13 @@ def evaluate(model, dataloader, desc):
labels.append(y.cpu())
preds = torch.cat(preds, 0)
labels = torch.cat(labels, 0)
acc = MF.accuracy(preds, labels)
acc = MF.accuracy(
preds, labels, task="multiclass", num_classes=num_classes
)
return acc
def train(train_loader, val_loader, test_loader, model):
def train(train_loader, val_loader, test_loader, num_classes, model):
# loss function and optimizer
loss_fcn = nn.CrossEntropyLoss()
opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4)
......@@ -96,8 +98,8 @@ def train(train_loader, val_loader, test_loader, model):
opt.step()
total_loss += loss.item()
model.eval()
val_acc = evaluate(model, val_dataloader, "Val. ")
test_acc = evaluate(model, test_dataloader, "Test ")
val_acc = evaluate(num_classes, model, val_dataloader, "Val. ")
test_acc = evaluate(num_classes, model, test_dataloader, "Test ")
print(
f"Epoch {epoch:05d} | Loss {total_loss/(it+1):.4f} | Validation Acc. {val_acc.item():.4f} | Test Acc. {test_acc.item():.4f}"
)
......@@ -138,8 +140,8 @@ if __name__ == "__main__":
# create RGAT model
in_size = graph.ndata["feat"]["paper"].shape[1]
out_size = dataset.num_classes
model = HeteroGAT(graph.etypes, in_size, 256, out_size).to(device)
num_classes = dataset.num_classes
model = HeteroGAT(graph.etypes, in_size, 256, num_classes).to(device)
# dataloader + model training + testing
train_sampler = NeighborSampler(
......@@ -186,4 +188,4 @@ if __name__ == "__main__":
use_uva=torch.cuda.is_available(),
)
train(train_dataloader, val_dataloader, test_dataloader, model)
train(train_dataloader, val_dataloader, test_dataloader, num_classes, model)
......@@ -6,12 +6,12 @@
### Dependencies
- rdflib
- torchmetrics
- torchmetrics 0.11.4
Install as follows:
```bash
pip install rdflib
pip install torchmetrics
pip install torchmetrics==0.11.4
```
How to run
......
......@@ -38,16 +38,21 @@ class RGCN(nn.Module):
return h
def evaluate(g, target_idx, labels, test_mask, model):
def evaluate(g, target_idx, labels, num_classes, test_mask, model):
test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
model.eval()
with torch.no_grad():
logits = model(g)
logits = logits[target_idx]
return accuracy(logits[test_idx].argmax(dim=1), labels[test_idx]).item()
return accuracy(
logits[test_idx].argmax(dim=1),
labels[test_idx],
task="multiclass",
num_classes=num_classes,
).item()
def train(g, target_idx, labels, train_mask, model):
def train(g, target_idx, labels, num_classes, train_mask, model):
# define train idx, loss function and optimizer
train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
loss_fcn = nn.CrossEntropyLoss()
......@@ -62,7 +67,10 @@ def train(g, target_idx, labels, train_mask, model):
loss.backward()
optimizer.step()
acc = accuracy(
logits[train_idx].argmax(dim=1), labels[train_idx]
logits[train_idx].argmax(dim=1),
labels[train_idx],
task="multiclass",
num_classes=num_classes,
).item()
print(
"Epoch {:05d} | Loss {:.4f} | Train Accuracy {:.4f} ".format(
......@@ -112,9 +120,9 @@ if __name__ == "__main__":
target_idx = node_ids[g.ndata[dgl.NTYPE] == category_id]
# create RGCN model
in_size = g.num_nodes() # featureless with one-hot encoding
out_size = data.num_classes
model = RGCN(in_size, 16, out_size, num_rels).to(device)
num_classes = data.num_classes
model = RGCN(in_size, 16, num_classes, num_rels).to(device)
train(g, target_idx, labels, train_mask, model)
acc = evaluate(g, target_idx, labels, test_mask, model)
train(g, target_idx, labels, num_classes, train_mask, model)
acc = evaluate(g, target_idx, labels, num_classes, test_mask, model)
print("Test accuracy {:.4f}".format(acc))
......@@ -41,7 +41,7 @@ class RGCN(nn.Module):
return h
def evaluate(model, label, dataloader, inv_target):
def evaluate(model, labels, num_classes, dataloader, inv_target):
model.eval()
eval_logits = []
eval_seeds = []
......@@ -55,10 +55,15 @@ def evaluate(model, label, dataloader, inv_target):
eval_seeds.append(output_nodes.cpu().detach())
eval_logits = torch.cat(eval_logits)
eval_seeds = torch.cat(eval_seeds)
return accuracy(eval_logits.argmax(dim=1), labels[eval_seeds].cpu()).item()
return accuracy(
eval_logits.argmax(dim=1),
labels[eval_seeds].cpu(),
task="multiclass",
num_classes=num_classes,
).item()
def train(device, g, target_idx, labels, train_mask, model):
def train(device, g, target_idx, labels, train_mask, num_classes, model):
# define train idx, loss function and optimizer
train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
loss_fcn = nn.CrossEntropyLoss()
......@@ -95,7 +100,7 @@ def train(device, g, target_idx, labels, train_mask, model):
loss.backward()
optimizer.step()
total_loss += loss.item()
acc = evaluate(model, labels, val_loader, inv_target)
acc = evaluate(model, labels, num_classes, val_loader, inv_target)
print(
"Epoch {:05d} | Loss {:.4f} | Val. Accuracy {:.4f} ".format(
epoch, total_loss / (it + 1), acc
......@@ -150,10 +155,10 @@ if __name__ == "__main__":
# create RGCN model
in_size = g.num_nodes() # featureless with one-hot encoding
out_size = data.num_classes
model = RGCN(in_size, 16, out_size, num_rels).to(device)
num_classes = data.num_classes
model = RGCN(in_size, 16, num_classes, num_rels).to(device)
train(device, g, target_idx, labels, train_mask, model)
train(device, g, target_idx, labels, train_mask, num_classes, model)
test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
test_sampler = MultiLayerNeighborSampler(
[-1, -1]
......@@ -166,5 +171,5 @@ if __name__ == "__main__":
batch_size=32,
shuffle=False,
)
acc = evaluate(model, labels, test_loader, inv_target)
acc = evaluate(model, labels, num_classes, test_loader, inv_target)
print("Test accuracy {:.4f}".format(acc))
......@@ -45,7 +45,7 @@ class RGCN(nn.Module):
return h
def evaluate(model, labels, dataloader, inv_target):
def evaluate(model, labels, num_classes, dataloader, inv_target):
model.eval()
eval_logits = []
eval_seeds = []
......@@ -61,12 +61,25 @@ def evaluate(model, labels, dataloader, inv_target):
eval_seeds = torch.cat(eval_seeds)
num_seeds = len(eval_seeds)
loc_sum = accuracy(
eval_logits.argmax(dim=1), labels[eval_seeds].cpu()
eval_logits.argmax(dim=1),
labels[eval_seeds].cpu(),
task="multiclass",
num_classes=num_classes,
) * float(num_seeds)
return torch.tensor([loc_sum.item(), float(num_seeds)])
def train(proc_id, device, g, target_idx, labels, train_idx, inv_target, model):
def train(
proc_id,
device,
g,
target_idx,
labels,
num_classes,
train_idx,
inv_target,
model,
):
# define loss function and optimizer
loss_fcn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
......@@ -106,9 +119,9 @@ def train(proc_id, device, g, target_idx, labels, train_idx, inv_target, model):
total_loss += loss.item()
# torchmetric accuracy defined as num_correct_labels / num_train_nodes
# loc_acc_split = [loc_accuracy * loc_num_train_nodes, loc_num_train_nodes]
loc_acc_split = evaluate(model, labels, val_loader, inv_target).to(
device
)
loc_acc_split = evaluate(
model, labels, num_classes, val_loader, inv_target
).to(device)
dist.reduce(loc_acc_split, 0)
if proc_id == 0:
acc = loc_acc_split[0] / loc_acc_split[1]
......@@ -143,13 +156,22 @@ def run(proc_id, nprocs, devices, g, data):
inv_target = inv_target.to(device)
# create RGCN model (distributed)
in_size = g.num_nodes()
out_size = num_classes
model = RGCN(in_size, 16, out_size, num_rels).to(device)
model = RGCN(in_size, 16, num_classes, num_rels).to(device)
model = DistributedDataParallel(
model, device_ids=[device], output_device=device
)
# training + testing
train(proc_id, device, g, target_idx, labels, train_idx, inv_target, model)
train(
proc_id,
device,
g,
target_idx,
labels,
num_classes,
train_idx,
inv_target,
model,
)
test_sampler = MultiLayerNeighborSampler(
[-1, -1]
) # -1 for sampling all neighbors
......@@ -162,7 +184,9 @@ def run(proc_id, nprocs, devices, g, data):
shuffle=False,
use_ddp=True,
)
loc_acc_split = evaluate(model, labels, test_loader, inv_target).to(device)
loc_acc_split = evaluate(
model, labels, num_classes, test_loader, inv_target
).to(device)
dist.reduce(loc_acc_split, 0)
if proc_id == 0:
acc = loc_acc_split[0] / loc_acc_split[1]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment