Unverified Commit e9b624fe authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

Merge branch 'master' into dist_part

parents 8086d1ed a88e7f7e
import numpy as np
import torch
class EarlyStopping:
def __init__(self, patience=10):
self.patience = patience
self.counter = 0
self.best_score = None
self.early_stop = False
def step(self, acc, model):
score = acc
if self.best_score is None:
self.best_score = score
self.save_checkpoint(model)
elif score < self.best_score:
self.counter += 1
print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
if self.counter >= self.patience:
self.early_stop = True
else:
self.best_score = score
self.save_checkpoint(model)
self.counter = 0
return self.early_stop
def save_checkpoint(self, model):
'''Saves model when validation loss decrease.'''
torch.save(model.state_dict(), 'es_checkpoint.pt')
...@@ -2,33 +2,21 @@ Graph Convolutional Networks (GCN) ...@@ -2,33 +2,21 @@ Graph Convolutional Networks (GCN)
============ ============
- Paper link: [https://arxiv.org/abs/1609.02907](https://arxiv.org/abs/1609.02907) - Paper link: [https://arxiv.org/abs/1609.02907](https://arxiv.org/abs/1609.02907)
- Author's code repo: [https://github.com/tkipf/gcn](https://github.com/tkipf/gcn). Note that the original code is - Author's code repo: [https://github.com/tkipf/gcn](https://github.com/tkipf/gcn).
implemented with Tensorflow for the paper.
Dependencies How to run
------------
- PyTorch 0.4.1+
- requests
``bash
pip install torch requests
``
Codes
-----
The folder contains three implementations of GCN:
- `gcn.py` uses DGL's predefined graph convolution module.
- `gcn_mp.py` uses user-defined message and reduce functions.
Modify `train.py` to switch between different implementations.
Results
------- -------
Run with following (available dataset: "cora", "citeseer", "pubmed") ### DGL built-in GraphConv module
Run with the following (available dataset: "cora", "citeseer", "pubmed")
```bash ```bash
python3 train.py --dataset cora --gpu 0 --self-loop python3 train.py --dataset cora
``` ```
* cora: ~0.810 (0.79-0.83) (paper: 0.815) Summary
* citeseer: 0.707 (paper: 0.703) -------
* pubmed: 0.792 (paper: 0.790) * cora: ~0.810 (paper: 0.815)
* citeseer: ~0.707 (paper: 0.703)
* pubmed: ~0.792 (paper: 0.790)
"""GCN using DGL nn package
References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
"""
import torch
import torch.nn as nn
from dgl.nn.pytorch import GraphConv
class GCN(nn.Module):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
super(GCN, self).__init__()
self.g = g
self.layers = nn.ModuleList()
# input layer
self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
# output layer
self.layers.append(GraphConv(n_hidden, n_classes))
self.dropout = nn.Dropout(p=dropout)
def forward(self, features):
h = features
for i, layer in enumerate(self.layers):
if i != 0:
h = self.dropout(h)
h = layer(self.g, h)
return h
"""GCN using basic message passing
References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
"""
import argparse, time, math
import numpy as np
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from dgl.data import register_data_args
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
def gcn_msg(edge):
msg = edge.src['h'] * edge.src['norm']
return {'m': msg}
def gcn_reduce(node):
accum = torch.sum(node.mailbox['m'], 1) * node.data['norm']
return {'h': accum}
class NodeApplyModule(nn.Module):
def __init__(self, out_feats, activation=None, bias=True):
super(NodeApplyModule, self).__init__()
if bias:
self.bias = nn.Parameter(torch.Tensor(out_feats))
else:
self.bias = None
self.activation = activation
self.reset_parameters()
def reset_parameters(self):
if self.bias is not None:
stdv = 1. / math.sqrt(self.bias.size(0))
self.bias.data.uniform_(-stdv, stdv)
def forward(self, nodes):
h = nodes.data['h']
if self.bias is not None:
h = h + self.bias
if self.activation:
h = self.activation(h)
return {'h': h}
class GCNLayer(nn.Module):
def __init__(self,
g,
in_feats,
out_feats,
activation,
dropout,
bias=True):
super(GCNLayer, self).__init__()
self.g = g
self.weight = nn.Parameter(torch.Tensor(in_feats, out_feats))
if dropout:
self.dropout = nn.Dropout(p=dropout)
else:
self.dropout = 0.
self.node_update = NodeApplyModule(out_feats, activation, bias)
self.reset_parameters()
def reset_parameters(self):
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
def forward(self, h):
if self.dropout:
h = self.dropout(h)
self.g.ndata['h'] = torch.mm(h, self.weight)
self.g.update_all(gcn_msg, gcn_reduce, self.node_update)
h = self.g.ndata.pop('h')
return h
class GCN(nn.Module):
def __init__(self,
g,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
super(GCN, self).__init__()
self.layers = nn.ModuleList()
# input layer
self.layers.append(GCNLayer(g, in_feats, n_hidden, activation, dropout))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(GCNLayer(g, n_hidden, n_hidden, activation, dropout))
# output layer
self.layers.append(GCNLayer(g, n_hidden, n_classes, None, dropout))
def forward(self, features):
h = features
for layer in self.layers:
h = layer(h)
return h
def evaluate(model, features, labels, mask):
model.eval()
with torch.no_grad():
logits = model(features)
logits = logits[mask]
labels = labels[mask]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels)
def main(args):
# load and preprocess dataset
if args.dataset == 'cora':
data = CoraGraphDataset()
elif args.dataset == 'citeseer':
data = CiteseerGraphDataset()
elif args.dataset == 'pubmed':
data = PubmedGraphDataset()
else:
raise ValueError('Unknown dataset: {}'.format(args.dataset))
g = data[0]
if args.gpu < 0:
cuda = False
else:
cuda = True
g = g.to(args.gpu)
features = g.ndata['feat']
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = g.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.int().sum().item(),
val_mask.int().sum().item(),
test_mask.int().sum().item()))
# add self loop
g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
n_edges = g.number_of_edges()
# normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
if cuda:
norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)
# create GCN model
model = GCN(g,
in_feats,
args.n_hidden,
n_classes,
args.n_layers,
F.relu,
args.dropout)
if cuda:
model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()
# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=args.lr,
weight_decay=args.weight_decay)
# initialize graph
dur = []
for epoch in range(args.n_epochs):
model.train()
if epoch >= 3:
t0 = time.time()
# forward
logits = model(features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print()
acc = evaluate(model, features, labels, test_mask)
print("Test Accuracy {:.4f}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
args = parser.parse_args()
print(args)
main(args)
import argparse
import time
import numpy as np
import torch import torch
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import dgl import dgl
import dgl.nn as dglnn
from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl import AddSelfLoop
import argparse
from gcn import GCN class GCN(nn.Module):
#from gcn_mp import GCN def __init__(self, in_size, hid_size, out_size):
#from gcn_spmv import GCN super().__init__()
self.layers = nn.ModuleList()
# two-layer GCN
def evaluate(model, features, labels, mask): self.layers.append(dglnn.GraphConv(in_size, hid_size, activation=F.relu))
self.layers.append(dglnn.GraphConv(hid_size, out_size))
self.dropout = nn.Dropout(0.5)
def forward(self, g, features):
h = features
for i, layer in enumerate(self.layers):
if i != 0:
h = self.dropout(h)
h = layer(g, h)
return h
def evaluate(g, features, labels, mask, model):
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
logits = model(features) logits = model(g, features)
logits = logits[mask] logits = logits[mask]
labels = labels[mask] labels = labels[mask]
_, indices = torch.max(logits, dim=1) _, indices = torch.max(logits, dim=1)
...@@ -22,124 +35,65 @@ def evaluate(model, features, labels, mask): ...@@ -22,124 +35,65 @@ def evaluate(model, features, labels, mask):
return correct.item() * 1.0 / len(labels) return correct.item() * 1.0 / len(labels)
def main(args): def train(g, features, labels, masks, model):
# define train/val samples, loss function and optimizer
train_mask = masks[0]
val_mask = masks[1]
loss_fcn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
# training loop
for epoch in range(200):
model.train()
logits = model(g, features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc = evaluate(g, features, labels, val_mask, model)
print("Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} "
. format(epoch, loss.item(), acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--dataset", type=str, default="cora",
help="Dataset name ('cora', 'citeseer', 'pubmed').")
args = parser.parse_args()
print(f'Training with DGL built-in GraphConv module.')
# load and preprocess dataset # load and preprocess dataset
transform = AddSelfLoop() # by default, it will first remove self-loops to prevent duplication
if args.dataset == 'cora': if args.dataset == 'cora':
data = CoraGraphDataset() data = CoraGraphDataset(transform=transform)
elif args.dataset == 'citeseer': elif args.dataset == 'citeseer':
data = CiteseerGraphDataset() data = CiteseerGraphDataset(transform=transform)
elif args.dataset == 'pubmed': elif args.dataset == 'pubmed':
data = PubmedGraphDataset() data = PubmedGraphDataset(transform=transform)
else: else:
raise ValueError('Unknown dataset: {}'.format(args.dataset)) raise ValueError('Unknown dataset: {}'.format(args.dataset))
g = data[0] g = data[0]
if args.gpu < 0: device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
cuda = False g = g.int().to(device)
else:
cuda = True
g = g.int().to(args.gpu)
features = g.ndata['feat'] features = g.ndata['feat']
labels = g.ndata['label'] labels = g.ndata['label']
train_mask = g.ndata['train_mask'] masks = g.ndata['train_mask'], g.ndata['val_mask'], g.ndata['test_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
in_feats = features.shape[1]
n_classes = data.num_labels
n_edges = g.number_of_edges()
print("""----Data statistics------'
#Edges %d
#Classes %d
#Train samples %d
#Val samples %d
#Test samples %d""" %
(n_edges, n_classes,
train_mask.int().sum().item(),
val_mask.int().sum().item(),
test_mask.int().sum().item()))
# add self loop
if args.self_loop:
g = dgl.remove_self_loop(g)
g = dgl.add_self_loop(g)
n_edges = g.number_of_edges()
# normalization # normalization
degs = g.in_degrees().float() degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5) norm = torch.pow(degs, -0.5).to(device)
norm[torch.isinf(norm)] = 0 norm[torch.isinf(norm)] = 0
if cuda:
norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1) g.ndata['norm'] = norm.unsqueeze(1)
# create GCN model # create GCN model
model = GCN(g, in_size = features.shape[1]
in_feats, out_size = data.num_classes
args.n_hidden, model = GCN(in_size, 16, out_size).to(device)
n_classes,
args.n_layers, # model training
F.relu, print('Training...')
args.dropout) train(g, features, labels, masks, model)
if cuda: # test the model
model.cuda() print('Testing...')
loss_fcn = torch.nn.CrossEntropyLoss() acc = evaluate(g, features, labels, masks[2], model)
print("Test accuracy {:.4f}".format(acc))
# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=args.lr,
weight_decay=args.weight_decay)
# initialize graph
dur = []
for epoch in range(args.n_epochs):
model.train()
if epoch >= 3:
t0 = time.time()
# forward
logits = model(features)
loss = loss_fcn(logits[train_mask], labels[train_mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >= 3:
dur.append(time.time() - t0)
acc = evaluate(model, features, labels, val_mask)
print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
"ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
acc, n_edges / np.mean(dur) / 1000))
print()
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='GCN')
parser.add_argument("--dataset", type=str, default="cora",
help="Dataset name ('cora', 'citeseer', 'pubmed').")
parser.add_argument("--dropout", type=float, default=0.5,
help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
help="learning rate")
parser.add_argument("--n-epochs", type=int, default=200,
help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
help="Weight for L2 loss")
parser.add_argument("--self-loop", action='store_true',
help="graph self-loop (default=False)")
parser.set_defaults(self_loop=False)
args = parser.parse_args()
print(args)
main(args)
...@@ -6,78 +6,28 @@ Graph Isomorphism Network (GIN) ...@@ -6,78 +6,28 @@ Graph Isomorphism Network (GIN)
Dependencies Dependencies
------------ ------------
- PyTorch 1.1.0+
- sklearn - sklearn
- tqdm
``bash Install as follows:
pip install torch sklearn tqdm
``
How to run
----------
An experiment on the GIN in default settings can be run with
```bash
python main.py
```
An experiment on the GIN in customized settings can be run with
```bash ```bash
python main.py [--device 0 | --disable-cuda] --dataset COLLAB \ pip install sklearn
--graph_pooling_type max --neighbor_pooling_type sum
``` ```
add `--degree_as_nlabel` to use one-hot encodings of node degrees as node feature vectors
Results How to run
------- -------
results may **fluctuate**, due to random factors and the relatively small data set. if you want to follow the paper's setting, consider the script below. Run with the following for bioinformatics graph classification (available datasets: MUTAG (default), PTC, NCI1, and PROTEINS)
```bash ```bash
# 4 bioinformatics datasets setting graph_pooling_type=sum, the nodes have categorical input features python3 train.py --dataset MUTAG
python main.py --dataset MUTAG --device 0 \
--graph_pooling_type sum --neighbor_pooling_type sum --filename MUTAG.txt
python main.py --dataset PTC --device 0 \
--graph_pooling_type sum --neighbor_pooling_type sum --filename PTC.txt
python main.py --dataset NCI1 --device 0 \
--graph_pooling_type sum --neighbor_pooling_type sum --filename NCI1.txt
python main.py --dataset PROTEINS --device 0 \
--graph_pooling_type sum --neighbor_pooling_type sum --filename PROTEINS.txt
# 5 social network datasets setting graph_pooling_type=mean, for the REDDIT datasets, we set all node feature vectors to be the same
# (thus, features here are uninformative); for the other social networks, we use one-hot encodings of node degrees.
python main.py --dataset COLLAB --device 0 \
--graph_pooling_type mean --neighbor_pooling_type sum --degree_as_nlabel --filename COLLAB.txt
python main.py --dataset IMDBBINARY --device 0 \
--graph_pooling_type mean --neighbor_pooling_type sum --degree_as_nlabel --filename IMDBBINARY.txt
python main.py --dataset IMDBMULTI --device 0 \
--graph_pooling_type mean --neighbor_pooling_type sum --degree_as_nlabel --filename IMDBMULTI.txt
python main.py --dataset REDDITBINARY --device 0 \
--graph_pooling_type mean --neighbor_pooling_type sum --filename REDDITBINARY.txt --fold_idx 6 --epoch 120
python main.py --dataset REDDITMULTI5K --device 0 \
--graph_pooling_type mean --neighbor_pooling_type sum --filename REDDITMULTI5K.txt
``` ```
one fold of 10 result are below. > **_NOTE:_** Users may observe results fluctuate due to the randomness with relatively small dataset. In consistence with the original [paper](https://arxiv.org/abs/1810.00826), five social network datasets, 'COLLAB', 'IMDBBINARY' 'IMDBMULTI' 'REDDITBINARY' and 'REDDITMULTI5K', are also available as the input. Users are encouraged to update the script slightly for social network applications, for example, replacing sum readout on bioinformatics datasets with mean readout on social network datasets and using one-hot encodings of node degrees by setting "degree_as_nlabel=True" in GINDataset.
| dataset | our result | paper report |
| ------------- | ---------- | ------------ |
| MUTAG | 89.4 | 89.4 ± 5.6 |
| PTC | 68.5 | 64.6 ± 7.0 |
| NCI1 | 78.5 | 82.7 ± 1.7 |
| PROTEINS | 72.3 | 76.2 ± 2.8 |
| COLLAB | 81.6 | 80.2 ± 1.9 |
| IMDBBINARY | 73.0 | 75.1 ± 5.1 |
| IMDBMULTI | 54.0 | 52.3 ± 2.8 |
| REDDITBINARY | 88.0 | 92.4 ± 2.5 |
| REDDITMULTI5K | 54.8 | 57.5 ± 1.5 |
Summary (10-fold cross-validation)
-------
| Dataset | Result
| ------------- | -------
| MUTAG | ~89.4
| PTC | ~68.5
| NCI1 | ~82.9
| PROTEINS | ~74.1
"""
PyTorch compatible dataloader
"""
import math
import numpy as np
import torch
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.model_selection import StratifiedKFold
import dgl
from dgl.dataloading import GraphDataLoader
class GINDataLoader():
def __init__(self,
dataset,
batch_size,
device,
collate_fn=None,
seed=0,
shuffle=True,
split_name='fold10',
fold_idx=0,
split_ratio=0.7):
self.shuffle = shuffle
self.seed = seed
self.kwargs = {'pin_memory': True} if 'cuda' in device.type else {}
labels = [l for _, l in dataset]
if split_name == 'fold10':
train_idx, valid_idx = self._split_fold10(
labels, fold_idx, seed, shuffle)
elif split_name == 'rand':
train_idx, valid_idx = self._split_rand(
labels, split_ratio, seed, shuffle)
else:
raise NotImplementedError()
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
self.train_loader = GraphDataLoader(
dataset, sampler=train_sampler,
batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)
self.valid_loader = GraphDataLoader(
dataset, sampler=valid_sampler,
batch_size=batch_size, collate_fn=collate_fn, **self.kwargs)
def train_valid_loader(self):
return self.train_loader, self.valid_loader
def _split_fold10(self, labels, fold_idx=0, seed=0, shuffle=True):
''' 10 flod '''
assert 0 <= fold_idx and fold_idx < 10, print(
"fold_idx must be from 0 to 9.")
skf = StratifiedKFold(n_splits=10, shuffle=shuffle, random_state=seed)
idx_list = []
for idx in skf.split(np.zeros(len(labels)), labels): # split(x, y)
idx_list.append(idx)
train_idx, valid_idx = idx_list[fold_idx]
print(
"train_set : test_set = %d : %d",
len(train_idx), len(valid_idx))
return train_idx, valid_idx
def _split_rand(self, labels, split_ratio=0.7, seed=0, shuffle=True):
num_entries = len(labels)
indices = list(range(num_entries))
np.random.seed(seed)
np.random.shuffle(indices)
split = int(math.floor(split_ratio * num_entries))
train_idx, valid_idx = indices[:split], indices[split:]
print(
"train_set : test_set = %d : %d",
len(train_idx), len(valid_idx))
return train_idx, valid_idx
"""
How Powerful are Graph Neural Networks
https://arxiv.org/abs/1810.00826
https://openreview.net/forum?id=ryGs6iA5Km
Author's implementation: https://github.com/weihua916/powerful-gnns
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch.conv import GINConv
from dgl.nn.pytorch.glob import SumPooling, AvgPooling, MaxPooling
class ApplyNodeFunc(nn.Module):
"""Update the node feature hv with MLP, BN and ReLU."""
def __init__(self, mlp):
super(ApplyNodeFunc, self).__init__()
self.mlp = mlp
self.bn = nn.BatchNorm1d(self.mlp.output_dim)
def forward(self, h):
h = self.mlp(h)
h = self.bn(h)
h = F.relu(h)
return h
class MLP(nn.Module):
"""MLP with linear output"""
def __init__(self, num_layers, input_dim, hidden_dim, output_dim):
"""MLP layers construction
Paramters
---------
num_layers: int
The number of linear layers
input_dim: int
The dimensionality of input features
hidden_dim: int
The dimensionality of hidden units at ALL layers
output_dim: int
The number of classes for prediction
"""
super(MLP, self).__init__()
self.linear_or_not = True # default is linear model
self.num_layers = num_layers
self.output_dim = output_dim
if num_layers < 1:
raise ValueError("number of layers should be positive!")
elif num_layers == 1:
# Linear model
self.linear = nn.Linear(input_dim, output_dim)
else:
# Multi-layer model
self.linear_or_not = False
self.linears = torch.nn.ModuleList()
self.batch_norms = torch.nn.ModuleList()
self.linears.append(nn.Linear(input_dim, hidden_dim, bias=False))
for layer in range(num_layers - 2):
self.linears.append(nn.Linear(hidden_dim, hidden_dim, bias=False))
self.linears.append(nn.Linear(hidden_dim, output_dim, bias=False))
for layer in range(num_layers - 1):
self.batch_norms.append(nn.BatchNorm1d((hidden_dim)))
def forward(self, x):
if self.linear_or_not:
# If linear model
return self.linear(x)
else:
# If MLP
h = x
for i in range(self.num_layers - 1):
h = F.relu(self.batch_norms[i](self.linears[i](h)))
return self.linears[-1](h)
class GIN(nn.Module):
"""GIN model"""
def __init__(self, num_layers, num_mlp_layers, input_dim, hidden_dim,
output_dim, final_dropout, learn_eps, graph_pooling_type,
neighbor_pooling_type):
"""model parameters setting
Paramters
---------
num_layers: int
The number of linear layers in the neural network
num_mlp_layers: int
The number of linear layers in mlps
input_dim: int
The dimensionality of input features
hidden_dim: int
The dimensionality of hidden units at ALL layers
output_dim: int
The number of classes for prediction
final_dropout: float
dropout ratio on the final linear layer
learn_eps: boolean
If True, learn epsilon to distinguish center nodes from neighbors
If False, aggregate neighbors and center nodes altogether.
neighbor_pooling_type: str
how to aggregate neighbors (sum, mean, or max)
graph_pooling_type: str
how to aggregate entire nodes in a graph (sum, mean or max)
"""
super(GIN, self).__init__()
self.num_layers = num_layers
self.learn_eps = learn_eps
# List of MLPs
self.ginlayers = torch.nn.ModuleList()
self.batch_norms = torch.nn.ModuleList()
for layer in range(self.num_layers - 1):
if layer == 0:
mlp = MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim)
else:
mlp = MLP(num_mlp_layers, hidden_dim, hidden_dim, hidden_dim)
self.ginlayers.append(
GINConv(ApplyNodeFunc(mlp), neighbor_pooling_type, 0, self.learn_eps))
self.batch_norms.append(nn.BatchNorm1d(hidden_dim))
# Linear function for graph poolings of output of each layer
# which maps the output of different layers into a prediction score
self.linears_prediction = torch.nn.ModuleList()
for layer in range(num_layers):
if layer == 0:
self.linears_prediction.append(
nn.Linear(input_dim, output_dim))
else:
self.linears_prediction.append(
nn.Linear(hidden_dim, output_dim))
self.drop = nn.Dropout(final_dropout)
if graph_pooling_type == 'sum':
self.pool = SumPooling()
elif graph_pooling_type == 'mean':
self.pool = AvgPooling()
elif graph_pooling_type == 'max':
self.pool = MaxPooling()
else:
raise NotImplementedError
def forward(self, g, h):
# list of hidden representation at each layer (including input)
hidden_rep = [h]
for i in range(self.num_layers - 1):
h = self.ginlayers[i](g, h)
h = self.batch_norms[i](h)
h = F.relu(h)
hidden_rep.append(h)
score_over_layer = 0
# perform pooling over all nodes in each graph in every layer
for i, h in enumerate(hidden_rep):
pooled_h = self.pool(g, h)
score_over_layer += self.drop(self.linears_prediction[i](pooled_h))
return score_over_layer
"""Parser for arguments
Put all arguments in one file and group similar arguments
"""
import argparse
class Parser():
def __init__(self, description):
'''
arguments parser
'''
self.parser = argparse.ArgumentParser(description=description)
self.args = None
self._parse()
def _parse(self):
# dataset
self.parser.add_argument(
'--dataset', type=str, default="MUTAG",
choices=['MUTAG', 'COLLAB', 'IMDBBINARY', 'IMDBMULTI', 'NCI1', 'PROTEINS', 'PTC', 'REDDITBINARY', 'REDDITMULTI5K'],
help='name of dataset (default: MUTAG)')
self.parser.add_argument(
'--batch_size', type=int, default=32,
help='batch size for training and validation (default: 32)')
self.parser.add_argument(
'--fold_idx', type=int, default=0,
help='the index(<10) of fold in 10-fold validation.')
self.parser.add_argument(
'--filename', type=str, default="",
help='output file')
self.parser.add_argument(
'--degree_as_nlabel', action="store_true",
help='use one-hot encodings of node degrees as node feature vectors')
# device
self.parser.add_argument(
'--disable-cuda', action='store_true',
help='Disable CUDA')
self.parser.add_argument(
'--device', type=int, default=0,
help='which gpu device to use (default: 0)')
# net
self.parser.add_argument(
'--num_layers', type=int, default=5,
help='number of layers (default: 5)')
self.parser.add_argument(
'--num_mlp_layers', type=int, default=2,
help='number of MLP layers(default: 2). 1 means linear model.')
self.parser.add_argument(
'--hidden_dim', type=int, default=64,
help='number of hidden units (default: 64)')
# graph
self.parser.add_argument(
'--graph_pooling_type', type=str,
default="sum", choices=["sum", "mean", "max"],
help='type of graph pooling: sum, mean or max')
self.parser.add_argument(
'--neighbor_pooling_type', type=str,
default="sum", choices=["sum", "mean", "max"],
help='type of neighboring pooling: sum, mean or max')
self.parser.add_argument(
'--learn_eps', action="store_true",
help='learn the epsilon weighting')
# learning
self.parser.add_argument(
'--seed', type=int, default=0,
help='random seed (default: 0)')
self.parser.add_argument(
'--epochs', type=int, default=350,
help='number of epochs to train (default: 350)')
self.parser.add_argument(
'--lr', type=float, default=0.01,
help='learning rate (default: 0.01)')
self.parser.add_argument(
'--final_dropout', type=float, default=0.5,
help='final layer dropout (default: 0.5)')
# done
self.args = self.parser.parse_args()
import sys
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from dgl.data import GINDataset
from dataloader import GINDataLoader
from ginparser import Parser
from gin import GIN
def train(args, net, trainloader, optimizer, criterion, epoch):
net.train()
running_loss = 0
total_iters = len(trainloader)
# setup the offset to avoid the overlap with mouse cursor
bar = tqdm(range(total_iters), unit='batch', position=2, file=sys.stdout)
for pos, (graphs, labels) in zip(bar, trainloader):
# batch graphs will be shipped to device in forward part of model
labels = labels.to(args.device)
graphs = graphs.to(args.device)
feat = graphs.ndata.pop('attr')
outputs = net(graphs, feat)
loss = criterion(outputs, labels)
running_loss += loss.item()
# backprop
optimizer.zero_grad()
loss.backward()
optimizer.step()
# report
bar.set_description('epoch-{}'.format(epoch))
bar.close()
# the final batch will be aligned
running_loss = running_loss / total_iters
return running_loss
def eval_net(args, net, dataloader, criterion):
net.eval()
total = 0
total_loss = 0
total_correct = 0
for data in dataloader:
graphs, labels = data
graphs = graphs.to(args.device)
labels = labels.to(args.device)
feat = graphs.ndata.pop('attr')
total += len(labels)
outputs = net(graphs, feat)
_, predicted = torch.max(outputs.data, 1)
total_correct += (predicted == labels.data).sum().item()
loss = criterion(outputs, labels)
# crossentropy(reduce=True) for default
total_loss += loss.item() * len(labels)
loss, acc = 1.0*total_loss / total, 1.0*total_correct / total
net.train()
return loss, acc
def main(args):
# set up seeds, args.seed supported
torch.manual_seed(seed=args.seed)
np.random.seed(seed=args.seed)
is_cuda = not args.disable_cuda and torch.cuda.is_available()
if is_cuda:
args.device = torch.device("cuda:" + str(args.device))
torch.cuda.manual_seed_all(seed=args.seed)
else:
args.device = torch.device("cpu")
dataset = GINDataset(args.dataset, not args.learn_eps, args.degree_as_nlabel)
trainloader, validloader = GINDataLoader(
dataset, batch_size=args.batch_size, device=args.device,
seed=args.seed, shuffle=True,
split_name='fold10', fold_idx=args.fold_idx).train_valid_loader()
# or split_name='rand', split_ratio=0.7
model = GIN(
args.num_layers, args.num_mlp_layers,
dataset.dim_nfeats, args.hidden_dim, dataset.gclasses,
args.final_dropout, args.learn_eps,
args.graph_pooling_type, args.neighbor_pooling_type).to(args.device)
criterion = nn.CrossEntropyLoss() # defaul reduce is true
optimizer = optim.Adam(model.parameters(), lr=args.lr)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
# it's not cost-effective to hanle the cursor and init 0
# https://stackoverflow.com/a/23121189
tbar = tqdm(range(args.epochs), unit="epoch", position=3, ncols=0, file=sys.stdout)
vbar = tqdm(range(args.epochs), unit="epoch", position=4, ncols=0, file=sys.stdout)
lrbar = tqdm(range(args.epochs), unit="epoch", position=5, ncols=0, file=sys.stdout)
for epoch, _, _ in zip(tbar, vbar, lrbar):
train(args, model, trainloader, optimizer, criterion, epoch)
scheduler.step()
train_loss, train_acc = eval_net(
args, model, trainloader, criterion)
tbar.set_description(
'train set - average loss: {:.4f}, accuracy: {:.0f}%'
.format(train_loss, 100. * train_acc))
valid_loss, valid_acc = eval_net(
args, model, validloader, criterion)
vbar.set_description(
'valid set - average loss: {:.4f}, accuracy: {:.0f}%'
.format(valid_loss, 100. * valid_acc))
if not args.filename == "":
with open(args.filename, 'a') as f:
f.write('%s %s %s %s %s' % (
args.dataset,
args.learn_eps,
args.neighbor_pooling_type,
args.graph_pooling_type,
epoch
))
f.write("\n")
f.write("%f %f %f %f" % (
train_loss,
train_acc,
valid_loss,
valid_acc
))
f.write("\n")
lrbar.set_description(
"Learning eps with learn_eps={}: {}".format(
args.learn_eps, [layer.eps.data.item() for layer in model.ginlayers]))
tbar.close()
vbar.close()
lrbar.close()
if __name__ == '__main__':
args = Parser(description='GIN').args
print('show all arguments configuration...')
print(args)
main(args)
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.model_selection import StratifiedKFold
from dgl.data import GINDataset
from dgl.dataloading import GraphDataLoader
from dgl.nn.pytorch.conv import GINConv
from dgl.nn.pytorch.glob import SumPooling
import argparse
class MLP(nn.Module):
"""Construct two-layer MLP-type aggreator for GIN model"""
def __init__(self, input_dim, hidden_dim, output_dim):
super().__init__()
self.linears = nn.ModuleList()
# two-layer MLP
self.linears.append(nn.Linear(input_dim, hidden_dim, bias=False))
self.linears.append(nn.Linear(hidden_dim, output_dim, bias=False))
self.batch_norm = nn.BatchNorm1d((hidden_dim))
def forward(self, x):
h = x
h = F.relu(self.batch_norm(self.linears[0](h)))
return self.linears[1](h)
class GIN(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim):
super().__init__()
self.ginlayers = nn.ModuleList()
self.batch_norms = nn.ModuleList()
num_layers = 5
# five-layer GCN with two-layer MLP aggregator and sum-neighbor-pooling scheme
for layer in range(num_layers - 1): # excluding the input layer
if layer == 0:
mlp = MLP(input_dim, hidden_dim, hidden_dim)
else:
mlp = MLP(hidden_dim, hidden_dim, hidden_dim)
self.ginlayers.append(GINConv(mlp, learn_eps=False)) # set to True if learning epsilon
self.batch_norms.append(nn.BatchNorm1d(hidden_dim))
# linear functions for graph sum poolings of output of each layer
self.linear_prediction = nn.ModuleList()
for layer in range(num_layers):
if layer == 0:
self.linear_prediction.append(nn.Linear(input_dim, output_dim))
else:
self.linear_prediction.append(nn.Linear(hidden_dim, output_dim))
self.drop = nn.Dropout(0.5)
self.pool = SumPooling() # change to mean readout (AvgPooling) on social network datasets
def forward(self, g, h):
# list of hidden representation at each layer (including the input layer)
hidden_rep = [h]
for i, layer in enumerate(self.ginlayers):
h = layer(g, h)
h = self.batch_norms[i](h)
h = F.relu(h)
hidden_rep.append(h)
score_over_layer = 0
# perform graph sum pooling over all nodes in each layer
for i, h in enumerate(hidden_rep):
pooled_h = self.pool(g, h)
score_over_layer += self.drop(self.linear_prediction[i](pooled_h))
return score_over_layer
def split_fold10(labels, fold_idx=0):
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)
idx_list = []
for idx in skf.split(np.zeros(len(labels)), labels):
idx_list.append(idx)
train_idx, valid_idx = idx_list[fold_idx]
return train_idx, valid_idx
def evaluate(dataloader, device, model):
model.eval()
total = 0
total_correct = 0
for batched_graph, labels in dataloader:
batched_graph = batched_graph.to(device)
labels = labels.to(device)
feat = batched_graph.ndata.pop('attr')
total += len(labels)
logits = model(batched_graph, feat)
_, predicted = torch.max(logits, 1)
total_correct += (predicted == labels).sum().item()
acc = 1.0 * total_correct / total
return acc
def train(train_loader, val_loader, device, model):
# loss function, optimizer and scheduler
loss_fcn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
# training loop
for epoch in range(350):
model.train()
total_loss = 0
for batch, (batched_graph, labels) in enumerate(train_loader):
batched_graph = batched_graph.to(device)
labels = labels.to(device)
feat = batched_graph.ndata.pop('attr')
logits = model(batched_graph, feat)
loss = loss_fcn(logits, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
scheduler.step()
train_acc = evaluate(train_loader, device, model)
valid_acc = evaluate(val_loader, device, model)
print("Epoch {:05d} | Loss {:.4f} | Train Acc. {:.4f} | Validation Acc. {:.4f} "
. format(epoch, total_loss / (batch + 1), train_acc, valid_acc))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default="MUTAG",
choices=['MUTAG', 'PTC', 'NCI1', 'PROTEINS'],
help='name of dataset (default: MUTAG)')
args = parser.parse_args()
print(f'Training with DGL built-in GINConv module with a fixed epsilon = 0')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# load and split dataset
dataset = GINDataset(args.dataset, self_loop=True, degree_as_nlabel=False) # add self_loop and disable one-hot encoding for input features
labels = [l for _, l in dataset]
train_idx, val_idx = split_fold10(labels)
# create dataloader
train_loader = GraphDataLoader(dataset, sampler=SubsetRandomSampler(train_idx),
batch_size=128, pin_memory=torch.cuda.is_available())
val_loader = GraphDataLoader(dataset, sampler=SubsetRandomSampler(val_idx),
batch_size=128, pin_memory=torch.cuda.is_available())
# create GIN model
in_size = dataset.dim_nfeats
out_size = dataset.gclasses
model = GIN(in_size, 16, out_size).to(device)
# model training/validating
print('Training...')
train(train_loader, val_loader, device, model)
...@@ -133,10 +133,9 @@ def run(proc_id, n_gpus, args, devices, data): ...@@ -133,10 +133,9 @@ def run(proc_id, n_gpus, args, devices, data):
# blocks. # blocks.
tic_step = time.time() tic_step = time.time()
for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(dataloader): for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(dataloader):
input_nodes = input_nodes.to(nfeat.device)
batch_inputs = nfeat[input_nodes].to(device) batch_inputs = nfeat[input_nodes].to(device)
pos_graph = pos_graph.to(device) blocks = [block.int() for block in blocks]
neg_graph = neg_graph.to(device)
blocks = [block.int().to(device) for block in blocks]
d_step = time.time() d_step = time.time()
# Compute loss and prediction # Compute loss and prediction
......
...@@ -106,7 +106,7 @@ def compute_mrr(model, node_emb, src, dst, neg_dst, device, batch_size=500): ...@@ -106,7 +106,7 @@ def compute_mrr(model, node_emb, src, dst, neg_dst, device, batch_size=500):
h_src = node_emb[src[start:end]][:, None, :].to(device) h_src = node_emb[src[start:end]][:, None, :].to(device)
h_dst = node_emb[all_dst.view(-1)].view(*all_dst.shape, -1).to(device) h_dst = node_emb[all_dst.view(-1)].view(*all_dst.shape, -1).to(device)
pred = model.predict(h_src, h_dst).squeeze(-1) pred = model.predict(h_src, h_dst).squeeze(-1)
relevance = torch.zeros(*pred.shape, dtype=torch.bool) relevance = torch.zeros(*pred.shape, dtype=torch.bool).to(pred.device)
relevance[:, 0] = True relevance[:, 0] = True
rr[start:end] = MF.retrieval_reciprocal_rank(pred, relevance) rr[start:end] = MF.retrieval_reciprocal_rank(pred, relevance)
return rr.mean() return rr.mean()
...@@ -117,9 +117,9 @@ def evaluate(model, edge_split, device, num_workers): ...@@ -117,9 +117,9 @@ def evaluate(model, edge_split, device, num_workers):
node_emb = model.inference(graph, device, 4096, num_workers, 'cpu') node_emb = model.inference(graph, device, 4096, num_workers, 'cpu')
results = [] results = []
for split in ['valid', 'test']: for split in ['valid', 'test']:
src = edge_split[split]['source_node'].to(device) src = edge_split[split]['source_node'].to(node_emb.device)
dst = edge_split[split]['target_node'].to(device) dst = edge_split[split]['target_node'].to(node_emb.device)
neg_dst = edge_split[split]['target_node_neg'].to(device) neg_dst = edge_split[split]['target_node_neg'].to(node_emb.device)
results.append(compute_mrr(model, node_emb, src, dst, neg_dst, device)) results.append(compute_mrr(model, node_emb, src, dst, neg_dst, device))
return results return results
......
...@@ -104,7 +104,6 @@ def train(rank, world_size, graph, num_classes, split_idx): ...@@ -104,7 +104,6 @@ def train(rank, world_size, graph, num_classes, split_idx):
# move ids to GPU # move ids to GPU
train_idx = train_idx.to('cuda') train_idx = train_idx.to('cuda')
valid_idx = valid_idx.to('cuda') valid_idx = valid_idx.to('cuda')
test_idx = test_idx.to('cuda')
# For training, each process/GPU will get a subset of the # For training, each process/GPU will get a subset of the
# train_idx/valid_idx, and generate mini-batches indepednetly. This allows # train_idx/valid_idx, and generate mini-batches indepednetly. This allows
......
...@@ -39,7 +39,7 @@ class SAGE(nn.Module): ...@@ -39,7 +39,7 @@ class SAGE(nn.Module):
g, torch.arange(g.num_nodes()).to(g.device), sampler, device=device, g, torch.arange(g.num_nodes()).to(g.device), sampler, device=device,
batch_size=batch_size, shuffle=False, drop_last=False, batch_size=batch_size, shuffle=False, drop_last=False,
num_workers=0) num_workers=0)
buffer_device = 'cpu' buffer_device = torch.device('cpu')
pin_memory = (buffer_device != device) pin_memory = (buffer_device != device)
for l, layer in enumerate(self.layers): for l, layer in enumerate(self.layers):
...@@ -72,9 +72,9 @@ def evaluate(model, graph, dataloader): ...@@ -72,9 +72,9 @@ def evaluate(model, graph, dataloader):
def layerwise_infer(device, graph, nid, model, batch_size): def layerwise_infer(device, graph, nid, model, batch_size):
model.eval() model.eval()
with torch.no_grad(): with torch.no_grad():
pred = model.inference(graph, device, batch_size) pred = model.inference(graph, device, batch_size) # pred in buffer_device
pred = pred[nid] pred = pred[nid]
label = graph.ndata['label'][nid] label = graph.ndata['label'][nid].to(pred.device)
return MF.accuracy(pred, label) return MF.accuracy(pred, label)
def train(args, device, g, dataset, model): def train(args, device, g, dataset, model):
...@@ -141,5 +141,5 @@ if __name__ == '__main__': ...@@ -141,5 +141,5 @@ if __name__ == '__main__':
# test the model # test the model
print('Testing...') print('Testing...')
acc = layerwise_infer(device, g, dataset.test_idx.to(device), model, batch_size=4096) acc = layerwise_infer(device, g, dataset.test_idx, model, batch_size=4096)
print("Test Accuracy {:.4f}".format(acc.item())) print("Test Accuracy {:.4f}".format(acc.item()))
# PSS
Code for the ECCV '22 submission "PSS: Progressive Sample Selection for Open-World Visual Representation Learning".
## Dependencies
We use python 3.7. The CUDA version needs to be 10.2. Besides DGL==0.6.1, we depend on several packages. To install dependencies using conda:
```commandline
conda create -n pss python=3.7 # create env
conda activate pss # activate env
conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.2 -c pytorch # install pytorch 1.7 version
conda install -y cudatoolkit=10.2 faiss-gpu=1.6.5 -c pytorch # install faiss gpu version matching cuda 10.2
pip install dgl-cu102 # install dgl for cuda 10.2
pip install tqdm # install tqdm
pip install matplotlib # install matplotlib
pip install pandas # install pandas
pip install pretrainedmodels # install pretrainedmodels
pip install tensorboardX # install tensorboardX
pip install seaborn # install seaborn
pip install scikit-learn
cd ..
git clone https://github.com/yjxiong/clustering-benchmark.git # install clustering-benchmark for evaluation
cd clustering-benchmark
python setup.py install
cd ../PSS
```
## Data
We use the iNaturalist 2018 dataset.
- download link: https://www.kaggle.com/c/inaturalist-2018/data;
- annotations are in `Smooth_AP/data/Inaturalist`;
- annotation txt files for different data splits are in [S3 link]|[[Google Drive](https://drive.google.com/drive/folders/1xrWogJGef4Ex5OGjiImgA06bAnk2MDrK?usp=sharing)]|[[Baidu Netdisk](https://pan.baidu.com/s/14S0Fns29a4o7kFDlNyyPjA?pwd=uwsg)] (password:uwsg).
Download `train_val2018.tar.gz` and the data split txt files to `data/Inaturalist/` folder. Extract the `tar.gz` files.
The data folder has the following structure:
```bash
PSS
|- data
| |- Inaturalist
| |- train2018.json.tar.gz
| |- train_val2018.tar.gz
| |- val2018.json.tar.gz
| |- train_val2018
| | |- Actinopterygii
| | |- ...
| |- lin_train_set1.txt
| |- train_set1.txt
| |- uin_train_set1.txt
| |- uout_train_set1.txt
| |- in_train_set1.txt
| |- Inaturalist_test_set1.txt
|-...
```
## Training
Run `bash train.sh` to train the model.
## Test
Run `bash test.sh` to evaluate on the test set.
\ No newline at end of file
# Smooth_AP
Referenced from the ECCV '20 paper ["Smooth-AP: Smoothing the Path Towards Large-Scale Image Retrieval"](https://www.robots.ox.ac.uk/~vgg/research/smooth-ap/), reference code is from https://github.com/Andrew-Brown1/Smooth_AP.
![teaser](https://github.com/Andrew-Brown1/Smooth_AP/blob/master/ims/teaser.png)
This diff is collapsed.
This diff is collapsed.
# repo originally forked from https://github.com/Confusezius/Deep-Metric-Learning-Baselines
##################################### LIBRARIES ###########################################
import warnings
warnings.filterwarnings("ignore")
import numpy as np, time, pickle as pkl, csv
import matplotlib.pyplot as plt
from scipy.spatial import distance
from sklearn.preprocessing import normalize
from tqdm import tqdm
import torch, torch.nn as nn
import auxiliaries as aux
import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')
"""=================================================================================================================="""
"""=================================================================================================================="""
"""========================================================="""
def evaluate(dataset, LOG, **kwargs):
"""
Given a dataset name, applies the correct evaluation function.
Args:
dataset: str, name of dataset.
LOG: aux.LOGGER instance, main logging class.
**kwargs: Input Argument Dict, depends on dataset.
Returns:
(optional) Computed metrics. Are normally written directly to LOG and printed.
"""
if dataset in ['Inaturalist', 'semi_fungi']:
ret = evaluate_one_dataset(LOG, **kwargs)
elif dataset in ['vehicle_id']:
ret = evaluate_multiple_datasets(LOG, **kwargs)
else:
raise Exception('No implementation for dataset {} available!')
return ret
"""========================================================="""
class DistanceMeasure():
"""
Container class to run and log the change of distance ratios
between intra-class distances and inter-class distances.
"""
def __init__(self, checkdata, opt, name='Train', update_epochs=1):
"""
Args:
checkdata: PyTorch DataLoader, data to check distance progression.
opt: argparse.Namespace, contains all training-specific parameters.
name: str, Name of instance. Important for savenames.
update_epochs: int, Only compute distance ratios every said epoch.
Returns:
Nothing!
"""
self.update_epochs = update_epochs
self.pars = opt
self.save_path = opt.save_path
self.name = name
self.csv_file = opt.save_path+'/distance_measures_{}.csv'.format(self.name)
with open(self.csv_file,'a') as csv_file:
writer = csv.writer(csv_file, delimiter=',')
writer.writerow(['Rel. Intra/Inter Distance'])
self.checkdata = checkdata
self.mean_class_dists = []
self.epochs = []
def measure(self, model, epoch):
"""
Compute distance ratios of intra- and interclass distance.
Args:
model: PyTorch Network, network that produces the resp. embeddings.
epoch: Current epoch.
Returns:
Nothing!
"""
if epoch%self.update_epochs: return
self.epochs.append(epoch)
torch.cuda.empty_cache()
_ = model.eval()
#Compute Embeddings
with torch.no_grad():
feature_coll, target_coll = [],[]
data_iter = tqdm(self.checkdata, desc='Estimating Data Distances...')
for idx, data in enumerate(data_iter):
input_img, target = data[1], data[0]
features = model(input_img.to(self.pars.device))
feature_coll.extend(features.cpu().detach().numpy().tolist())
target_coll.extend(target.numpy().tolist())
feature_coll = np.vstack(feature_coll).astype('float32')
target_coll = np.hstack(target_coll).reshape(-1)
avail_labels = np.unique(target_coll)
#Compute indixes of embeddings for each class.
class_positions = []
for lab in avail_labels:
class_positions.append(np.where(target_coll==lab)[0])
#Compute average intra-class distance and center of mass.
com_class, dists_class = [],[]
for class_pos in class_positions:
dists = distance.cdist(feature_coll[class_pos],feature_coll[class_pos],'cosine')
dists = np.sum(dists)/(len(dists)**2-len(dists))
# dists = np.linalg.norm(np.std(feature_coll_aux[class_pos],axis=0).reshape(1,-1)).reshape(-1)
com = normalize(np.mean(feature_coll[class_pos],axis=0).reshape(1,-1)).reshape(-1)
dists_class.append(dists)
com_class.append(com)
#Compute mean inter-class distances by the class-coms.
mean_inter_dist = distance.cdist(np.array(com_class), np.array(com_class), 'cosine')
mean_inter_dist = np.sum(mean_inter_dist)/(len(mean_inter_dist)**2-len(mean_inter_dist))
#Compute distance ratio
mean_class_dist = np.mean(np.array(dists_class)/mean_inter_dist)
self.mean_class_dists.append(mean_class_dist)
self.update(mean_class_dist)
def update(self, mean_class_dist):
"""
Update Loggers.
Args:
mean_class_dist: float, Distance Ratio
Returns:
Nothing!
"""
self.update_csv(mean_class_dist)
self.update_plot()
def update_csv(self, mean_class_dist):
"""
Update CSV.
Args:
mean_class_dist: float, Distance Ratio
Returns:
Nothing!
"""
with open(self.csv_file, 'a') as csv_file:
writer = csv.writer(csv_file, delimiter=',')
writer.writerow([mean_class_dist])
def update_plot(self):
"""
Update progression plot.
Args:
None.
Returns:
Nothing!
"""
plt.style.use('ggplot')
f,ax = plt.subplots(1)
ax.set_title('Mean Intra- over Interclassdistances')
ax.plot(self.epochs, self.mean_class_dists, label='Class')
f.legend()
f.set_size_inches(15,8)
f.savefig(self.save_path+'/distance_measures_{}.svg'.format(self.name))
class GradientMeasure():
"""
Container for gradient measure functionalities.
Measure the gradients coming from the embedding layer to the final conv. layer
to examine learning signal.
"""
def __init__(self, opt, name='class-it'):
"""
Args:
opt: argparse.Namespace, contains all training-specific parameters.
name: Name of class instance. Important for the savename.
Returns:
Nothing!
"""
self.pars = opt
self.name = name
self.saver = {'grad_normal_mean':[], 'grad_normal_std':[], 'grad_abs_mean':[], 'grad_abs_std':[]}
def include(self, params):
"""
Include the gradients for a set of parameters, normally the final embedding layer.
Args:
params: PyTorch Network layer after .backward() was called.
Returns:
Nothing!
"""
gradients = [params.weight.grad.detach().cpu().numpy()]
for grad in gradients:
### Shape: 128 x 2048
self.saver['grad_normal_mean'].append(np.mean(grad,axis=0))
self.saver['grad_normal_std'].append(np.std(grad,axis=0))
self.saver['grad_abs_mean'].append(np.mean(np.abs(grad),axis=0))
self.saver['grad_abs_std'].append(np.std(np.abs(grad),axis=0))
def dump(self, epoch):
"""
Append all gradients to a pickle file.
Args:
epoch: Current epoch
Returns:
Nothing!
"""
with open(self.pars.save_path+'/grad_dict_{}.pkl'.format(self.name),'ab') as f:
pkl.dump([self.saver], f)
self.saver = {'grad_normal_mean':[], 'grad_normal_std':[], 'grad_abs_mean':[], 'grad_abs_std':[]}
"""========================================================="""
def evaluate_one_dataset(LOG, dataloader, model, opt, save=True, give_return=True, epoch=0):
"""
Compute evaluation metrics, update LOGGER and print results.
Args:
LOG: aux.LOGGER-instance. Main Logging Functionality.
dataloader: PyTorch Dataloader, Testdata to be evaluated.
model: PyTorch Network, Network to evaluate.
opt: argparse.Namespace, contains all training-specific parameters.
save: bool, if True, Checkpoints are saved when testing metrics (specifically Recall @ 1) improve.
give_return: bool, if True, return computed metrics.
epoch: int, current epoch, required for logger.
Returns:
(optional) Computed metrics. Are normally written directly to LOG and printed.
"""
start = time.time()
image_paths = np.array(dataloader.dataset.image_list)
with torch.no_grad():
#Compute Metrics
F1, NMI, recall_at_ks, feature_matrix_all = aux.eval_metrics_one_dataset(model, dataloader, device=opt.device, k_vals=opt.k_vals, opt=opt)
#Make printable summary string.
result_str = ', '.join('@{0}: {1:.4f}'.format(k,rec) for k,rec in zip(opt.k_vals, recall_at_ks))
result_str = 'Epoch (Test) {0}: NMI [{1:.4f}] | F1 [{2:.4f}] | Recall [{3}]'.format(epoch, NMI, F1, result_str)
if LOG is not None:
if save:
if not len(LOG.progress_saver['val']['Recall @ 1']) or recall_at_ks[0]>np.max(LOG.progress_saver['val']['Recall @ 1']):
#Save Checkpoint
print("Set checkpoint at {}.".format(LOG.prop.save_path+'/checkpoint_{}.pth.tar'.format(opt.iter)))
aux.set_checkpoint(model, opt, LOG.progress_saver, LOG.prop.save_path+'/checkpoint_{}.pth.tar'.format(opt.iter))
# aux.recover_closest_one_dataset(feature_matrix_all, image_paths, LOG.prop.save_path+'/sample_recoveries.png')
#Update logs.
LOG.log('val', LOG.metrics_to_log['val'], [epoch, np.round(time.time()-start), NMI, F1]+recall_at_ks)
print(result_str)
if give_return:
return recall_at_ks, NMI, F1
else:
None
"""========================================================="""
def evaluate_query_and_gallery_dataset(LOG, query_dataloader, gallery_dataloader, model, opt, save=True, give_return=True, epoch=0):
"""
Compute evaluation metrics, update LOGGER and print results, specifically for In-Shop Clothes.
Args:
LOG: aux.LOGGER-instance. Main Logging Functionality.
query_dataloader: PyTorch Dataloader, Query-testdata to be evaluated.
gallery_dataloader: PyTorch Dataloader, Gallery-testdata to be evaluated.
model: PyTorch Network, Network to evaluate.
opt: argparse.Namespace, contains all training-specific parameters.
save: bool, if True, Checkpoints are saved when testing metrics (specifically Recall @ 1) improve.
give_return: bool, if True, return computed metrics.
epoch: int, current epoch, required for logger.
Returns:
(optional) Computed metrics. Are normally written directly to LOG and printed.
"""
start = time.time()
query_image_paths = np.array([x[0] for x in query_dataloader.dataset.image_list])
gallery_image_paths = np.array([x[0] for x in gallery_dataloader.dataset.image_list])
with torch.no_grad():
#Compute Metri cs.
F1, NMI, recall_at_ks, query_feature_matrix_all, gallery_feature_matrix_all = aux.eval_metrics_query_and_gallery_dataset(model, query_dataloader, gallery_dataloader, device=opt.device, k_vals = opt.k_vals, opt=opt)
#Generate printable summary string.
result_str = ', '.join('@{0}: {1:.4f}'.format(k,rec) for k,rec in zip(opt.k_vals, recall_at_ks))
result_str = 'Epoch (Test) {0}: NMI [{1:.4f}] | F1 [{2:.4f}] | Recall [{3}]'.format(epoch, NMI, F1, result_str)
if LOG is not None:
if save:
if not len(LOG.progress_saver['val']['Recall @ 1']) or recall_at_ks[0]>np.max(LOG.progress_saver['val']['Recall @ 1']):
#Save Checkpoint
aux.set_checkpoint(model, opt, LOG.progress_saver, LOG.prop.save_path+'/checkpoint.pth.tar')
aux.recover_closest_inshop(query_feature_matrix_all, gallery_feature_matrix_all, query_image_paths, gallery_image_paths, LOG.prop.save_path+'/sample_recoveries.png')
#Update logs.
LOG.log('val', LOG.metrics_to_log['val'], [epoch, np.round(time.time()-start), NMI, F1]+recall_at_ks)
print(result_str)
if give_return:
return recall_at_ks, NMI, F1
else:
None
"""========================================================="""
def evaluate_multiple_datasets(LOG, dataloaders, model, opt, save=True, give_return=True, epoch=0):
"""
Compute evaluation metrics, update LOGGER and print results, specifically for Multi-test datasets s.a. PKU Vehicle ID.
Args:
LOG: aux.LOGGER-instance. Main Logging Functionality.
dataloaders: List of PyTorch Dataloaders, test-dataloaders to evaluate.
model: PyTorch Network, Network to evaluate.
opt: argparse.Namespace, contains all training-specific parameters.
sa ve: bool, if True, Checkpoints are saved when testing metrics (specifically Recall @ 1) improve.
give_return: bool, i f True, return computed metrics.
epoch: int, current epoch, required for logger.
Returns :
(optional) Computed metrics. Are normally written directly to LOG and printed.
"""
start = time.time()
csv_data = [epoch]
with torch.no_grad():
for i,dataloader in enumerate(dataloaders):
print('Working on Set {}/{}'.format(i+1, len(dataloaders)))
image_paths = np.array(dataloader.dataset.image_list)
#Compute Metrics for specific testset.
F1, NMI, recall_at_ks, feature_matrix_all = aux.eval_metrics_one_dataset(model, dataloader, device=opt.device, k_vals=opt.k_vals, opt=opt)
#Generate printable summary string.
result_str = ', '.join('@{0}: {1:.4f}'.format(k,rec) for k,rec in zip(opt.k_vals, recall_at_ks))
result_str = 'SET {0}: Epoch (Test) {1}: NMI [{2:.4f}] | F1 {3:.4f}| Recall [{4}]'.format(i+1, epoch, NMI, F1, result_str)
if LOG is not None:
if save:
if not len(LOG.progress_saver['val']['Set {} Recall @ 1'.format(i)]) or recall_at_ks[0]>np.max(LOG.progress_saver['val']['Set {} Recall @ 1'.format(i)]):
#Save Checkpoint for specific test set.
aux.set_checkpoint(model, opt, LOG.progress_saver, LOG.prop.save_path+'/checkpoint_set{}.pth.tar'.format(i+1))
aux.recover_closest_one_dataset(feature_matrix_all, image_paths, LOG.prop.save_path+'/sample_recoveries_set{}.png'.format(i+1))
csv_data += [NMI, F1]+recall_at_ks
print(result_str)
csv_data.insert(0, np.round(time.time()-start))
#Update logs.
LOG.log('val', LOG.metrics_to_log['val'], csv_data)
#if give_return:
return csv_data[2:]
#else:
# None
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment