"src/array/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "40b44a43a98b4a5dbc088fd1a6839adbda370fb5"
Unverified Commit d64f7c48 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[Benchmark] log train time and test accuracy, choose device (#5662)

parent ac681ee3
Graph Attention Networks (GAT)
============
- Paper link: [https://arxiv.org/abs/1710.10903](https://arxiv.org/abs/1710.10903)
- Author's code repo (tensorflow implementation):
[https://github.com/PetarV-/GAT](https://github.com/PetarV-/GAT).
- Popular pytorch implementation:
[https://github.com/Diego999/pyGAT](https://github.com/Diego999/pyGAT).
How to run
-------
Run with the following for multiclass node classification (available datasets: "cora", "citeseer", "pubmed")
```bash
python3 train.py --dataset cora
```
> **_NOTE:_** Users may occasionally run into low accuracy issue (e.g., test accuracy < 0.8) due to overfitting. This can be resolved by adding Early Stopping or reducing maximum number of training epochs.
Summary
-------
* cora: ~0.821
* citeseer: ~0.710
* pubmed: ~0.780
import argparse
import time
import dgl.nn as dglnn
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import AddSelfLoop
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
class GAT(nn.Module):
def __init__(self, in_size, hid_size, out_size, heads):
super().__init__()
self.gat_layers = nn.ModuleList()
# two-layer GAT
self.gat_layers.append(
dglnn.GATConv(
in_size,
hid_size,
heads[0],
feat_drop=0.6,
attn_drop=0.6,
activation=F.elu,
)
)
self.gat_layers.append(
dglnn.GATConv(
hid_size * heads[0],
out_size,
heads[1],
feat_drop=0.6,
attn_drop=0.6,
activation=None,
)
)
def forward(self, g, inputs):
h = inputs
for i, layer in enumerate(self.gat_layers):
h = layer(g, h)
if i == len(self.gat_layers) - 1: # last layer
h = h.mean(1)
else: # other layer(s)
h = h.flatten(1)
return h
def evaluate(g, features, labels, mask, model):
model.eval()
with torch.no_grad():
logits = model(g, features)
logits = logits[mask]
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
def train(g, features, labels, masks, model, num_epochs):
# Define train/val samples, loss function and optimizer
train_mask = masks[0]
val_mask = masks[1]
loss_fcn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-3, weight_decay=5e-4)
for epoch in range(num_epochs):
t0 = time.time()
model.train()
logits = model(g, features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc = evaluate(g, features, labels, val_mask, model)
t1 = time.time()
print(
"Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} | Time {:.4f}".format(
epoch, loss.item(), acc, t1 - t0
)
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--dataset",
type=str,
default="cora",
help="Dataset name ('cora', 'citeseer', 'pubmed').",
)
parser.add_argument(
"--num_epochs",
type=int,
default=200,
help="Number of epochs for train.",
)
parser.add_argument(
"--num_gpus",
type=int,
default=0,
help="Number of GPUs used for train and evaluation.",
)
args = parser.parse_args()
print(f"Training with DGL built-in GATConv module.")
# Load and preprocess dataset
transform = (
AddSelfLoop()
) # by default, it will first remove self-loops to prevent duplication
if args.dataset == "cora":
data = CoraGraphDataset(transform=transform)
elif args.dataset == "citeseer":
data = CiteseerGraphDataset(transform=transform)
elif args.dataset == "pubmed":
data = PubmedGraphDataset(transform=transform)
else:
raise ValueError("Unknown dataset: {}".format(args.dataset))
g = data[0]
if args.num_gpus > 0 and torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
g = g.int().to(device)
features = g.ndata["feat"]
labels = g.ndata["label"]
masks = g.ndata["train_mask"], g.ndata["val_mask"], g.ndata["test_mask"]
# Create GAT model
in_size = features.shape[1]
out_size = data.num_classes
model = GAT(in_size, 8, out_size, heads=[8, 1]).to(device)
print("Training...")
train(g, features, labels, masks, model, args.num_epochs)
print("Testing...")
acc = evaluate(g, features, labels, masks[2], model)
print("Test accuracy {:.4f}".format(acc))
...@@ -10,6 +10,8 @@ Graph Attention Networks (GAT) ...@@ -10,6 +10,8 @@ Graph Attention Networks (GAT)
How to run How to run
------- -------
> **_NOTE:_** `train.py` is deprecated and please check the new version in `//examples/core/gat/train.py`.
Run with the following for multiclass node classification (available datasets: "cora", "citeseer", "pubmed") Run with the following for multiclass node classification (available datasets: "cora", "citeseer", "pubmed")
```bash ```bash
python3 train.py --dataset cora python3 train.py --dataset cora
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment