[Benchmark] log train time and test accuracy, choose device (#5662)

d64f7c48 · Rhett Ying · GitHub · ac681ee3 · d64f7c48 · d64f7c48
Unverified Commit d64f7c48 authored May 10, 2023 by Rhett Ying Committed by GitHub May 10, 2023
Showing with 166 additions and 0 deletions

examples/core/gat/README.md examples/core/gat/README.md +24 -0

examples/core/gat/train.py examples/core/gat/train.py +140 -0

examples/pytorch/gat/README.md examples/pytorch/gat/README.md +2 -0

No files found.
--- a/examples/core/gat/README.md
+++ b/examples/core/gat/README.md
+Graph Attention Networks (GAT)
+============
+- Paper link: [https://arxiv.org/abs/1710.10903](https://arxiv.org/abs/1710.10903)
+- Author's code repo (tensorflow implementation):
+  [https://github.com/PetarV-/GAT](https://github.com/PetarV-/GAT).
+- Popular pytorch implementation:
+  [https://github.com/Diego999/pyGAT](https://github.com/Diego999/pyGAT).
+How to run
+-------
+Run with the following for multiclass node classification (available datasets: "cora", "citeseer", "pubmed")
+```bash
+python3 train.py --dataset cora
+```
+> **_NOTE:_**  Users may occasionally run into low accuracy issue (e.g., test accuracy < 0.8) due to overfitting. This can be resolved by adding Early Stopping or reducing maximum number of training epochs.
+Summary
+-------
+* cora: ~0.821
+* citeseer: ~0.710
+* pubmed: ~0.780
--- a/examples/core/gat/train.py
+++ b/examples/core/gat/train.py
+import argparse
+import time
+import dgl.nn as dglnn
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from dgl import AddSelfLoop
+from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
+class GAT(nn.Module):
+    def __init__(self, in_size, hid_size, out_size, heads):
+        super().__init__()
+        self.gat_layers = nn.ModuleList()
+        # two-layer GAT
+        self.gat_layers.append(
+            dglnn.GATConv(
+                in_size,
+                hid_size,
+                heads[0],
+                feat_drop=0.6,
+                attn_drop=0.6,
+                activation=F.elu,
+            )
+        )
+        self.gat_layers.append(
+            dglnn.GATConv(
+                hid_size * heads[0],
+                out_size,
+                heads[1],
+                feat_drop=0.6,
+                attn_drop=0.6,
+                activation=None,
+            )
+        )
+    def forward(self, g, inputs):
+        h = inputs
+        for i, layer in enumerate(self.gat_layers):
+            h = layer(g, h)
+            if i == len(self.gat_layers) - 1:  # last layer
+                h = h.mean(1)
+            else:  # other layer(s)
+                h = h.flatten(1)
+        return h
+def evaluate(g, features, labels, mask, model):
+    model.eval()
+    with torch.no_grad():
+        logits = model(g, features)
+        logits = logits[mask]
+        labels = labels[mask]
+        _, indices = torch.max(logits, dim=1)
+        correct = torch.sum(indices == labels)
+        return correct.item() * 1.0 / len(labels)
+def train(g, features, labels, masks, model, num_epochs):
+    # Define train/val samples, loss function and optimizer
+    train_mask = masks[0]
+    val_mask = masks[1]
+    loss_fcn = nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=5e-3, weight_decay=5e-4)
+    for epoch in range(num_epochs):
+        t0 = time.time()
+        model.train()
+        logits = model(g, features)
+        loss = loss_fcn(logits[train_mask], labels[train_mask])
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        acc = evaluate(g, features, labels, val_mask, model)
+        t1 = time.time()
+        print(
+            "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} | Time {:.4f}".format(
+                epoch, loss.item(), acc, t1 - t0
+            )
+        )
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="cora",
+        help="Dataset name ('cora', 'citeseer', 'pubmed').",
+    )
+    parser.add_argument(
+        "--num_epochs",
+        type=int,
+        default=200,
+        help="Number of epochs for train.",
+    )
+    parser.add_argument(
+        "--num_gpus",
+        type=int,
+        default=0,
+        help="Number of GPUs used for train and evaluation.",
+    )
+    args = parser.parse_args()
+    print(f"Training with DGL built-in GATConv module.")
+    # Load and preprocess dataset
+    transform = (
+        AddSelfLoop()
+    )  # by default, it will first remove self-loops to prevent duplication
+    if args.dataset == "cora":
+        data = CoraGraphDataset(transform=transform)
+    elif args.dataset == "citeseer":
+        data = CiteseerGraphDataset(transform=transform)
+    elif args.dataset == "pubmed":
+        data = PubmedGraphDataset(transform=transform)
+    else:
+        raise ValueError("Unknown dataset: {}".format(args.dataset))
+    g = data[0]
+    if args.num_gpus > 0 and torch.cuda.is_available():
+        device = torch.device("cuda")
+    else:
+        device = torch.device("cpu")
+    g = g.int().to(device)
+    features = g.ndata["feat"]
+    labels = g.ndata["label"]
+    masks = g.ndata["train_mask"], g.ndata["val_mask"], g.ndata["test_mask"]
+    # Create GAT model
+    in_size = features.shape[1]
+    out_size = data.num_classes
+    model = GAT(in_size, 8, out_size, heads=[8, 1]).to(device)
+    print("Training...")
+    train(g, features, labels, masks, model, args.num_epochs)
+    print("Testing...")
+    acc = evaluate(g, features, labels, masks[2], model)
+    print("Test accuracy {:.4f}".format(acc))
--- a/examples/pytorch/gat/README.md
+++ b/examples/pytorch/gat/README.md
@@ -10,6 +10,8 @@ Graph Attention Networks (GAT)
 How to run
 -------
+> **_NOTE:_**  `train.py` is deprecated and please check the new version in `//examples/core/gat/train.py`.
 Run with the following for multiclass node classification (available datasets: "cora", "citeseer", "pubmed")
 ```bash
 python3 train.py --dataset cora