""" [SIGN: Scalable Inception Graph Neural Networks] (https://arxiv.org/abs/2004.11198) This example shows a simplified version of SIGN: a precomputed 2-hops diffusion operator on top of symmetrically normalized adjacency matrix A_hat. """ import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import CoraGraphDataset from torch.optim import Adam ################################################################################ # (HIGHLIGHT) Take the advantage of DGL sparse APIs to implement the feature # diffusion in SIGN laconically. ################################################################################ def sign_diffusion(A, X, r): # Perform the r-hop diffusion operation. X_sign = [X] for _ in range(r): X = A @ X X_sign.append(X) return X_sign class SIGN(nn.Module): def __init__(self, in_size, out_size, r, hidden_size=256): super().__init__() # Note that theta and omega refer to the learnable matrices in the # original paper correspondingly. The variable r refers to subscript to # theta. self.theta = nn.ModuleList( [nn.Linear(in_size, hidden_size) for _ in range(r + 1)] ) self.omega = nn.Linear(hidden_size * (r + 1), out_size) def forward(self, X_sign): results = [] for i in range(len(X_sign)): results.append(self.theta[i](X_sign[i])) Z = F.relu(torch.cat(results, dim=1)) return self.omega(Z) def evaluate(g, pred): label = g.ndata["label"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] # Compute accuracy on validation/test set. val_acc = (pred[val_mask] == label[val_mask]).float().mean() test_acc = (pred[test_mask] == label[test_mask]).float().mean() return val_acc, test_acc def train(model, g, X_sign): label = g.ndata["label"] train_mask = g.ndata["train_mask"] optimizer = Adam(model.parameters(), lr=3e-3) for epoch in range(10): # Switch the model to training mode. model.train() # Forward. logits = model(X_sign) # Compute loss with nodes in training set. loss = F.cross_entropy(logits[train_mask], label[train_mask]) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() # Switch the model to evaluating mode. model.eval() # Compute prediction. logits = model(X_sign) pred = logits.argmax(1) # Evaluate the prediction. val_acc, test_acc = evaluate(g, pred) print( f"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}, test" f" acc: {test_acc:.3f}" ) if __name__ == "__main__": # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load graph from the existing dataset. dataset = CoraGraphDataset() g = dataset[0].to(dev) # Create the sparse adjacency matrix A (note that W was used as the notation # for adjacency matrix in the original paper). indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) # Calculate the symmetrically normalized adjacency matrix. I = dglsp.identity(A.shape, device=dev) A_hat = A + I D_hat = dglsp.diag(A_hat.sum(dim=1)) ** -0.5 A_hat = D_hat @ A_hat @ D_hat # 2-hop diffusion. r = 2 X = g.ndata["feat"] X_sign = sign_diffusion(A_hat, X, r) # Create SIGN model. in_size = X.shape[1] out_size = dataset.num_classes model = SIGN(in_size, out_size, r).to(dev) # Kick off training. train(model, g, X_sign)