Unverified Commit d78a3a4b authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4640)



* auto fix

* add more

* sort
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 23d09057
import time import time
import dgl
from dgl.nn.pytorch import GATConv
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import dgl
from dgl.nn.pytorch import GATConv
from .. import utils from .. import utils
class GAT(nn.Module): class GAT(nn.Module):
def __init__(self, def __init__(
num_layers, self,
in_dim, num_layers,
num_hidden, in_dim,
num_classes, num_hidden,
heads, num_classes,
activation, heads,
feat_drop, activation,
attn_drop, feat_drop,
negative_slope, attn_drop,
residual): negative_slope,
residual,
):
super(GAT, self).__init__() super(GAT, self).__init__()
self.num_layers = num_layers self.num_layers = num_layers
self.gat_layers = nn.ModuleList() self.gat_layers = nn.ModuleList()
self.activation = activation self.activation = activation
# input projection (no residual) # input projection (no residual)
self.gat_layers.append(GATConv( self.gat_layers.append(
in_dim, num_hidden, heads[0], GATConv(
feat_drop, attn_drop, negative_slope, False, self.activation)) in_dim,
num_hidden,
heads[0],
feat_drop,
attn_drop,
negative_slope,
False,
self.activation,
)
)
# hidden layers # hidden layers
for l in range(1, num_layers): for l in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads # due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GATConv( self.gat_layers.append(
num_hidden * heads[l-1], num_hidden, heads[l], GATConv(
feat_drop, attn_drop, negative_slope, residual, self.activation)) num_hidden * heads[l - 1],
num_hidden,
heads[l],
feat_drop,
attn_drop,
negative_slope,
residual,
self.activation,
)
)
# output projection # output projection
self.gat_layers.append(GATConv( self.gat_layers.append(
num_hidden * heads[-2], num_classes, heads[-1], GATConv(
feat_drop, attn_drop, negative_slope, residual, None)) num_hidden * heads[-2],
num_classes,
heads[-1],
feat_drop,
attn_drop,
negative_slope,
residual,
None,
)
)
def forward(self, g, inputs): def forward(self, g, inputs):
h = inputs h = inputs
...@@ -46,8 +78,9 @@ class GAT(nn.Module): ...@@ -46,8 +78,9 @@ class GAT(nn.Module):
logits = self.gat_layers[-1](g, h).mean(1) logits = self.gat_layers[-1](g, h).mean(1)
return logits return logits
@utils.benchmark('time')
@utils.parametrize('data', ['cora', 'pubmed']) @utils.benchmark("time")
@utils.parametrize("data", ["cora", "pubmed"])
def track_time(data): def track_time(data):
data = utils.process_data(data) data = utils.process_data(data)
device = utils.get_bench_device() device = utils.get_bench_device()
...@@ -55,11 +88,11 @@ def track_time(data): ...@@ -55,11 +88,11 @@ def track_time(data):
g = data[0].to(device) g = data[0].to(device)
features = g.ndata['feat'] features = g.ndata["feat"]
labels = g.ndata['label'] labels = g.ndata["label"]
train_mask = g.ndata['train_mask'] train_mask = g.ndata["train_mask"]
val_mask = g.ndata['val_mask'] val_mask = g.ndata["val_mask"]
test_mask = g.ndata['test_mask'] test_mask = g.ndata["test_mask"]
in_feats = features.shape[1] in_feats = features.shape[1]
n_classes = data.num_classes n_classes = data.num_classes
...@@ -68,17 +101,14 @@ def track_time(data): ...@@ -68,17 +101,14 @@ def track_time(data):
g = dgl.add_self_loop(g) g = dgl.add_self_loop(g)
# create model # create model
model = GAT(1, in_feats, 8, n_classes, [8, 1], F.elu, model = GAT(1, in_feats, 8, n_classes, [8, 1], F.elu, 0.6, 0.6, 0.2, False)
0.6, 0.6, 0.2, False)
loss_fcn = torch.nn.CrossEntropyLoss() loss_fcn = torch.nn.CrossEntropyLoss()
model = model.to(device) model = model.to(device)
model.train() model.train()
# optimizer # optimizer
optimizer = torch.optim.Adam(model.parameters(), optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
lr=1e-2,
weight_decay=5e-4)
# dry run # dry run
for epoch in range(10): for epoch in range(10):
......
import time import time
import dgl
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import dgl
from .. import utils from .. import utils
class GraphConv(nn.Module): class GraphConv(nn.Module):
def __init__(self, in_dim, out_dim, activation=None): def __init__(self, in_dim, out_dim, activation=None):
super(GraphConv, self).__init__() super(GraphConv, self).__init__()
...@@ -19,39 +22,42 @@ class GraphConv(nn.Module): ...@@ -19,39 +22,42 @@ class GraphConv(nn.Module):
def forward(self, graph, feat): def forward(self, graph, feat):
with graph.local_scope(): with graph.local_scope():
graph.ndata['ci'] = torch.pow(graph.out_degrees().float().clamp(min=1), -0.5) graph.ndata["ci"] = torch.pow(
graph.ndata['cj'] = torch.pow(graph.in_degrees().float().clamp(min=1), -0.5) graph.out_degrees().float().clamp(min=1), -0.5
graph.ndata['h'] = feat )
graph.ndata["cj"] = torch.pow(
graph.in_degrees().float().clamp(min=1), -0.5
)
graph.ndata["h"] = feat
graph.update_all(self.mfunc, self.rfunc) graph.update_all(self.mfunc, self.rfunc)
h = graph.ndata['h'] h = graph.ndata["h"]
h = torch.matmul(h, self.weight) + self.bias h = torch.matmul(h, self.weight) + self.bias
if self.activation is not None: if self.activation is not None:
h = self.activation(h) h = self.activation(h)
return h return h
def mfunc(self, edges): def mfunc(self, edges):
return {'m' : edges.src['h'], 'ci' : edges.src['ci']} return {"m": edges.src["h"], "ci": edges.src["ci"]}
def rfunc(self, nodes): def rfunc(self, nodes):
ci = nodes.mailbox['ci'].unsqueeze(2) ci = nodes.mailbox["ci"].unsqueeze(2)
newh = (nodes.mailbox['m'] * ci).sum(1) * nodes.data['cj'].unsqueeze(1) newh = (nodes.mailbox["m"] * ci).sum(1) * nodes.data["cj"].unsqueeze(1)
return {'h' : newh} return {"h": newh}
class GCN(nn.Module): class GCN(nn.Module):
def __init__(self, def __init__(
in_feats, self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
n_hidden, ):
n_classes,
n_layers,
activation,
dropout):
super(GCN, self).__init__() super(GCN, self).__init__()
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
# input layer # input layer
self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
# hidden layers # hidden layers
for i in range(n_layers - 1): for i in range(n_layers - 1):
self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation)) self.layers.append(
GraphConv(n_hidden, n_hidden, activation=activation)
)
# output layer # output layer
self.layers.append(GraphConv(n_hidden, n_classes)) self.layers.append(GraphConv(n_hidden, n_classes))
self.dropout = nn.Dropout(p=dropout) self.dropout = nn.Dropout(p=dropout)
...@@ -64,19 +70,20 @@ class GCN(nn.Module): ...@@ -64,19 +70,20 @@ class GCN(nn.Module):
h = layer(g, h) h = layer(g, h)
return h return h
@utils.benchmark('time', timeout=300)
@utils.parametrize('data', ['cora', 'pubmed']) @utils.benchmark("time", timeout=300)
@utils.parametrize("data", ["cora", "pubmed"])
def track_time(data): def track_time(data):
data = utils.process_data(data) data = utils.process_data(data)
device = utils.get_bench_device() device = utils.get_bench_device()
g = data[0].to(device).int() g = data[0].to(device).int()
features = g.ndata['feat'] features = g.ndata["feat"]
labels = g.ndata['label'] labels = g.ndata["label"]
train_mask = g.ndata['train_mask'] train_mask = g.ndata["train_mask"]
val_mask = g.ndata['val_mask'] val_mask = g.ndata["val_mask"]
test_mask = g.ndata['test_mask'] test_mask = g.ndata["test_mask"]
in_feats = features.shape[1] in_feats = features.shape[1]
n_classes = data.num_classes n_classes = data.num_classes
...@@ -88,7 +95,7 @@ def track_time(data): ...@@ -88,7 +95,7 @@ def track_time(data):
degs = g.in_degrees().float() degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5) norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0 norm[torch.isinf(norm)] = 0
g.ndata['norm'] = norm.unsqueeze(1) g.ndata["norm"] = norm.unsqueeze(1)
# create GCN model # create GCN model
model = GCN(in_feats, 16, n_classes, 1, F.relu, 0.5) model = GCN(in_feats, 16, n_classes, 1, F.relu, 0.5)
...@@ -98,9 +105,7 @@ def track_time(data): ...@@ -98,9 +105,7 @@ def track_time(data):
model.train() model.train()
# optimizer # optimizer
optimizer = torch.optim.Adam(model.parameters(), optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
lr=1e-2,
weight_decay=5e-4)
# dry run # dry run
for epoch in range(5): for epoch in range(5):
logits = model(g, features) logits = model(g, features)
......
import pickle, time
import argparse import argparse
import pickle
import time
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.utils.data import IterableDataset, DataLoader from torch.utils.data import DataLoader, IterableDataset
import dgl import dgl
import dgl.function as fn import dgl.function as fn
from .. import utils from .. import utils
def _init_input_modules(g, ntype, textset, hidden_dims): def _init_input_modules(g, ntype, textset, hidden_dims):
# We initialize the linear projections of each input feature ``x`` as # We initialize the linear projections of each input feature ``x`` as
# follows: # follows:
...@@ -31,8 +35,7 @@ def _init_input_modules(g, ntype, textset, hidden_dims): ...@@ -31,8 +35,7 @@ def _init_input_modules(g, ntype, textset, hidden_dims):
module_dict[column] = m module_dict[column] = m
elif data.dtype == torch.int64: elif data.dtype == torch.int64:
assert data.ndim == 1 assert data.ndim == 1
m = nn.Embedding( m = nn.Embedding(data.max() + 2, hidden_dims, padding_idx=-1)
data.max() + 2, hidden_dims, padding_idx=-1)
nn.init.xavier_uniform_(m.weight) nn.init.xavier_uniform_(m.weight)
module_dict[column] = m module_dict[column] = m
...@@ -45,14 +48,17 @@ def _init_input_modules(g, ntype, textset, hidden_dims): ...@@ -45,14 +48,17 @@ def _init_input_modules(g, ntype, textset, hidden_dims):
return module_dict return module_dict
class BagOfWordsPretrained(nn.Module): class BagOfWordsPretrained(nn.Module):
def __init__(self, field, hidden_dims): def __init__(self, field, hidden_dims):
super().__init__() super().__init__()
input_dims = field.vocab.vectors.shape[1] input_dims = field.vocab.vectors.shape[1]
self.emb = nn.Embedding( self.emb = nn.Embedding(
len(field.vocab.itos), input_dims, len(field.vocab.itos),
padding_idx=field.vocab.stoi[field.pad_token]) input_dims,
padding_idx=field.vocab.stoi[field.pad_token],
)
self.emb.weight[:] = field.vocab.vectors self.emb.weight[:] = field.vocab.vectors
self.proj = nn.Linear(input_dims, hidden_dims) self.proj = nn.Linear(input_dims, hidden_dims)
nn.init.xavier_uniform_(self.proj.weight) nn.init.xavier_uniform_(self.proj.weight)
...@@ -68,18 +74,22 @@ class BagOfWordsPretrained(nn.Module): ...@@ -68,18 +74,22 @@ class BagOfWordsPretrained(nn.Module):
x = self.emb(x).sum(1) / length.unsqueeze(1).float() x = self.emb(x).sum(1) / length.unsqueeze(1).float()
return self.proj(x) return self.proj(x)
class BagOfWords(nn.Module): class BagOfWords(nn.Module):
def __init__(self, field, hidden_dims): def __init__(self, field, hidden_dims):
super().__init__() super().__init__()
self.emb = nn.Embedding( self.emb = nn.Embedding(
len(field.vocab.itos), hidden_dims, len(field.vocab.itos),
padding_idx=field.vocab.stoi[field.pad_token]) hidden_dims,
padding_idx=field.vocab.stoi[field.pad_token],
)
nn.init.xavier_uniform_(self.emb.weight) nn.init.xavier_uniform_(self.emb.weight)
def forward(self, x, length): def forward(self, x, length):
return self.emb(x).sum(1) / length.unsqueeze(1).float() return self.emb(x).sum(1) / length.unsqueeze(1).float()
class WeightedSAGEConv(nn.Module): class WeightedSAGEConv(nn.Module):
def __init__(self, input_dims, hidden_dims, output_dims, act=F.relu): def __init__(self, input_dims, hidden_dims, output_dims, act=F.relu):
super().__init__() super().__init__()
...@@ -91,7 +101,7 @@ class WeightedSAGEConv(nn.Module): ...@@ -91,7 +101,7 @@ class WeightedSAGEConv(nn.Module):
self.dropout = nn.Dropout(0.5) self.dropout = nn.Dropout(0.5)
def reset_parameters(self): def reset_parameters(self):
gain = nn.init.calculate_gain('relu') gain = nn.init.calculate_gain("relu")
nn.init.xavier_uniform_(self.Q.weight, gain=gain) nn.init.xavier_uniform_(self.Q.weight, gain=gain)
nn.init.xavier_uniform_(self.W.weight, gain=gain) nn.init.xavier_uniform_(self.W.weight, gain=gain)
nn.init.constant_(self.Q.bias, 0) nn.init.constant_(self.Q.bias, 0)
...@@ -105,18 +115,21 @@ class WeightedSAGEConv(nn.Module): ...@@ -105,18 +115,21 @@ class WeightedSAGEConv(nn.Module):
""" """
h_src, h_dst = h h_src, h_dst = h
with g.local_scope(): with g.local_scope():
g.srcdata['n'] = self.act(self.Q(self.dropout(h_src))) g.srcdata["n"] = self.act(self.Q(self.dropout(h_src)))
g.edata['w'] = weights.float() g.edata["w"] = weights.float()
g.update_all(fn.u_mul_e('n', 'w', 'm'), fn.sum('m', 'n')) g.update_all(fn.u_mul_e("n", "w", "m"), fn.sum("m", "n"))
g.update_all(fn.copy_e('w', 'm'), fn.sum('m', 'ws')) g.update_all(fn.copy_e("w", "m"), fn.sum("m", "ws"))
n = g.dstdata['n'] n = g.dstdata["n"]
ws = g.dstdata['ws'].unsqueeze(1).clamp(min=1) ws = g.dstdata["ws"].unsqueeze(1).clamp(min=1)
z = self.act(self.W(self.dropout(torch.cat([n / ws, h_dst], 1)))) z = self.act(self.W(self.dropout(torch.cat([n / ws, h_dst], 1))))
z_norm = z.norm(2, 1, keepdim=True) z_norm = z.norm(2, 1, keepdim=True)
z_norm = torch.where(z_norm == 0, torch.tensor(1.).to(z_norm), z_norm) z_norm = torch.where(
z_norm == 0, torch.tensor(1.0).to(z_norm), z_norm
)
z = z / z_norm z = z / z_norm
return z return z
class SAGENet(nn.Module): class SAGENet(nn.Module):
def __init__(self, hidden_dims, n_layers): def __init__(self, hidden_dims, n_layers):
""" """
...@@ -130,28 +143,34 @@ class SAGENet(nn.Module): ...@@ -130,28 +143,34 @@ class SAGENet(nn.Module):
self.convs = nn.ModuleList() self.convs = nn.ModuleList()
for _ in range(n_layers): for _ in range(n_layers):
self.convs.append(WeightedSAGEConv(hidden_dims, hidden_dims, hidden_dims)) self.convs.append(
WeightedSAGEConv(hidden_dims, hidden_dims, hidden_dims)
)
def forward(self, blocks, h): def forward(self, blocks, h):
for layer, block in zip(self.convs, blocks): for layer, block in zip(self.convs, blocks):
h_dst = h[:block.number_of_nodes('DST/' + block.ntypes[0])] h_dst = h[: block.number_of_nodes("DST/" + block.ntypes[0])]
h = layer(block, (h, h_dst), block.edata['weights']) h = layer(block, (h, h_dst), block.edata["weights"])
return h return h
class LinearProjector(nn.Module): class LinearProjector(nn.Module):
""" """
Projects each input feature of the graph linearly and sums them up Projects each input feature of the graph linearly and sums them up
""" """
def __init__(self, full_graph, ntype, textset, hidden_dims): def __init__(self, full_graph, ntype, textset, hidden_dims):
super().__init__() super().__init__()
self.ntype = ntype self.ntype = ntype
self.inputs = _init_input_modules(full_graph, ntype, textset, hidden_dims) self.inputs = _init_input_modules(
full_graph, ntype, textset, hidden_dims
)
def forward(self, ndata): def forward(self, ndata):
projections = [] projections = []
for feature, data in ndata.items(): for feature, data in ndata.items():
if feature == dgl.NID or feature.endswith('__len'): if feature == dgl.NID or feature.endswith("__len"):
# This is an additional feature indicating the length of the ``feature`` # This is an additional feature indicating the length of the ``feature``
# column; we shouldn't process this. # column; we shouldn't process this.
continue continue
...@@ -159,7 +178,7 @@ class LinearProjector(nn.Module): ...@@ -159,7 +178,7 @@ class LinearProjector(nn.Module):
module = self.inputs[feature] module = self.inputs[feature]
if isinstance(module, (BagOfWords, BagOfWordsPretrained)): if isinstance(module, (BagOfWords, BagOfWordsPretrained)):
# Textual feature; find the length and pass it to the textual module. # Textual feature; find the length and pass it to the textual module.
length = ndata[feature + '__len'] length = ndata[feature + "__len"]
result = module(data, length) result = module(data, length)
else: else:
result = module(data) result = module(data)
...@@ -167,6 +186,7 @@ class LinearProjector(nn.Module): ...@@ -167,6 +186,7 @@ class LinearProjector(nn.Module):
return torch.stack(projections, 1).sum(1) return torch.stack(projections, 1).sum(1)
class ItemToItemScorer(nn.Module): class ItemToItemScorer(nn.Module):
def __init__(self, full_graph, ntype): def __init__(self, full_graph, ntype):
super().__init__() super().__init__()
...@@ -177,7 +197,7 @@ class ItemToItemScorer(nn.Module): ...@@ -177,7 +197,7 @@ class ItemToItemScorer(nn.Module):
def _add_bias(self, edges): def _add_bias(self, edges):
bias_src = self.bias[edges.src[dgl.NID]] bias_src = self.bias[edges.src[dgl.NID]]
bias_dst = self.bias[edges.dst[dgl.NID]] bias_dst = self.bias[edges.dst[dgl.NID]]
return {'s': edges.data['s'] + bias_src + bias_dst} return {"s": edges.data["s"] + bias_src + bias_dst}
def forward(self, item_item_graph, h): def forward(self, item_item_graph, h):
""" """
...@@ -185,12 +205,13 @@ class ItemToItemScorer(nn.Module): ...@@ -185,12 +205,13 @@ class ItemToItemScorer(nn.Module):
h : hidden state of every node h : hidden state of every node
""" """
with item_item_graph.local_scope(): with item_item_graph.local_scope():
item_item_graph.ndata['h'] = h item_item_graph.ndata["h"] = h
item_item_graph.apply_edges(fn.u_dot_v('h', 'h', 's')) item_item_graph.apply_edges(fn.u_dot_v("h", "h", "s"))
item_item_graph.apply_edges(self._add_bias) item_item_graph.apply_edges(self._add_bias)
pair_score = item_item_graph.edata['s'] pair_score = item_item_graph.edata["s"]
return pair_score return pair_score
class PinSAGEModel(nn.Module): class PinSAGEModel(nn.Module):
def __init__(self, full_graph, ntype, textsets, hidden_dims, n_layers): def __init__(self, full_graph, ntype, textsets, hidden_dims, n_layers):
super().__init__() super().__init__()
...@@ -210,6 +231,7 @@ class PinSAGEModel(nn.Module): ...@@ -210,6 +231,7 @@ class PinSAGEModel(nn.Module):
h_item_dst = self.proj(blocks[-1].dstdata) h_item_dst = self.proj(blocks[-1].dstdata)
return h_item_dst + self.sage(blocks, h_item) return h_item_dst + self.sage(blocks, h_item)
def compact_and_copy(frontier, seeds): def compact_and_copy(frontier, seeds):
block = dgl.to_block(frontier, seeds) block = dgl.to_block(frontier, seeds)
for col, data in frontier.edata.items(): for col, data in frontier.edata.items():
...@@ -218,6 +240,7 @@ def compact_and_copy(frontier, seeds): ...@@ -218,6 +240,7 @@ def compact_and_copy(frontier, seeds):
block.edata[col] = data[block.edata[dgl.EID]] block.edata[col] = data[block.edata[dgl.EID]]
return block return block
class ItemToItemBatchSampler(IterableDataset): class ItemToItemBatchSampler(IterableDataset):
def __init__(self, g, user_type, item_type, batch_size): def __init__(self, g, user_type, item_type, batch_size):
self.g = g self.g = g
...@@ -229,42 +252,69 @@ class ItemToItemBatchSampler(IterableDataset): ...@@ -229,42 +252,69 @@ class ItemToItemBatchSampler(IterableDataset):
def __iter__(self): def __iter__(self):
while True: while True:
heads = torch.randint(0, self.g.number_of_nodes(self.item_type), (self.batch_size,)) heads = torch.randint(
0, self.g.number_of_nodes(self.item_type), (self.batch_size,)
)
tails = dgl.sampling.random_walk( tails = dgl.sampling.random_walk(
self.g, self.g,
heads, heads,
metapath=[self.item_to_user_etype, self.user_to_item_etype])[0][:, 2] metapath=[self.item_to_user_etype, self.user_to_item_etype],
neg_tails = torch.randint(0, self.g.number_of_nodes(self.item_type), (self.batch_size,)) )[0][:, 2]
neg_tails = torch.randint(
0, self.g.number_of_nodes(self.item_type), (self.batch_size,)
)
mask = (tails != -1) mask = tails != -1
yield heads[mask], tails[mask], neg_tails[mask] yield heads[mask], tails[mask], neg_tails[mask]
class NeighborSampler(object): class NeighborSampler(object):
def __init__(self, g, user_type, item_type, random_walk_length, random_walk_restart_prob, def __init__(
num_random_walks, num_neighbors, num_layers): self,
g,
user_type,
item_type,
random_walk_length,
random_walk_restart_prob,
num_random_walks,
num_neighbors,
num_layers,
):
self.g = g self.g = g
self.user_type = user_type self.user_type = user_type
self.item_type = item_type self.item_type = item_type
self.user_to_item_etype = list(g.metagraph()[user_type][item_type])[0] self.user_to_item_etype = list(g.metagraph()[user_type][item_type])[0]
self.item_to_user_etype = list(g.metagraph()[item_type][user_type])[0] self.item_to_user_etype = list(g.metagraph()[item_type][user_type])[0]
self.samplers = [ self.samplers = [
dgl.sampling.PinSAGESampler(g, item_type, user_type, random_walk_length, dgl.sampling.PinSAGESampler(
random_walk_restart_prob, num_random_walks, num_neighbors) g,
for _ in range(num_layers)] item_type,
user_type,
random_walk_length,
random_walk_restart_prob,
num_random_walks,
num_neighbors,
)
for _ in range(num_layers)
]
def sample_blocks(self, seeds, heads=None, tails=None, neg_tails=None): def sample_blocks(self, seeds, heads=None, tails=None, neg_tails=None):
blocks = [] blocks = []
for sampler in self.samplers: for sampler in self.samplers:
frontier = sampler(seeds) frontier = sampler(seeds)
if heads is not None: if heads is not None:
eids = frontier.edge_ids(torch.cat([heads, heads]), torch.cat([tails, neg_tails]), return_uv=True)[2] eids = frontier.edge_ids(
torch.cat([heads, heads]),
torch.cat([tails, neg_tails]),
return_uv=True,
)[2]
if len(eids) > 0: if len(eids) > 0:
old_frontier = frontier old_frontier = frontier
frontier = dgl.remove_edges(old_frontier, eids) frontier = dgl.remove_edges(old_frontier, eids)
#print(old_frontier) # print(old_frontier)
#print(frontier) # print(frontier)
#print(frontier.edata['weights']) # print(frontier.edata['weights'])
#frontier.edata['weights'] = old_frontier.edata['weights'][frontier.edata[dgl.EID]] # frontier.edata['weights'] = old_frontier.edata['weights'][frontier.edata[dgl.EID]]
block = compact_and_copy(frontier, seeds) block = compact_and_copy(frontier, seeds)
seeds = block.srcdata[dgl.NID] seeds = block.srcdata[dgl.NID]
blocks.insert(0, block) blocks.insert(0, block)
...@@ -274,17 +324,18 @@ class NeighborSampler(object): ...@@ -274,17 +324,18 @@ class NeighborSampler(object):
# Create a graph with positive connections only and another graph with negative # Create a graph with positive connections only and another graph with negative
# connections only. # connections only.
pos_graph = dgl.graph( pos_graph = dgl.graph(
(heads, tails), (heads, tails), num_nodes=self.g.number_of_nodes(self.item_type)
num_nodes=self.g.number_of_nodes(self.item_type)) )
neg_graph = dgl.graph( neg_graph = dgl.graph(
(heads, neg_tails), (heads, neg_tails), num_nodes=self.g.number_of_nodes(self.item_type)
num_nodes=self.g.number_of_nodes(self.item_type)) )
pos_graph, neg_graph = dgl.compact_graphs([pos_graph, neg_graph]) pos_graph, neg_graph = dgl.compact_graphs([pos_graph, neg_graph])
seeds = pos_graph.ndata[dgl.NID] seeds = pos_graph.ndata[dgl.NID]
blocks = self.sample_blocks(seeds, heads, tails, neg_tails) blocks = self.sample_blocks(seeds, heads, tails, neg_tails)
return pos_graph, neg_graph, blocks return pos_graph, neg_graph, blocks
def assign_simple_node_features(ndata, g, ntype, assign_id=False): def assign_simple_node_features(ndata, g, ntype, assign_id=False):
""" """
Copies data to the given block from the corresponding nodes in the original graph. Copies data to the given block from the corresponding nodes in the original graph.
...@@ -295,6 +346,7 @@ def assign_simple_node_features(ndata, g, ntype, assign_id=False): ...@@ -295,6 +346,7 @@ def assign_simple_node_features(ndata, g, ntype, assign_id=False):
induced_nodes = ndata[dgl.NID] induced_nodes = ndata[dgl.NID]
ndata[col] = g.nodes[ntype].data[col][induced_nodes] ndata[col] = g.nodes[ntype].data[col][induced_nodes]
def assign_textual_node_features(ndata, textset, ntype): def assign_textual_node_features(ndata, textset, ntype):
""" """
Assigns numericalized tokens from a torchtext dataset to given block. Assigns numericalized tokens from a torchtext dataset to given block.
...@@ -327,7 +379,8 @@ def assign_textual_node_features(ndata, textset, ntype): ...@@ -327,7 +379,8 @@ def assign_textual_node_features(ndata, textset, ntype):
tokens = tokens.t() tokens = tokens.t()
ndata[field_name] = tokens ndata[field_name] = tokens
ndata[field_name + '__len'] = lengths ndata[field_name + "__len"] = lengths
def assign_features_to_blocks(blocks, g, textset, ntype): def assign_features_to_blocks(blocks, g, textset, ntype):
# For the first block (which is closest to the input), copy the features from # For the first block (which is closest to the input), copy the features from
...@@ -337,6 +390,7 @@ def assign_features_to_blocks(blocks, g, textset, ntype): ...@@ -337,6 +390,7 @@ def assign_features_to_blocks(blocks, g, textset, ntype):
assign_simple_node_features(blocks[-1].dstdata, g, ntype) assign_simple_node_features(blocks[-1].dstdata, g, ntype)
assign_textual_node_features(blocks[-1].dstdata, textset, ntype) assign_textual_node_features(blocks[-1].dstdata, textset, ntype)
class PinSAGECollator(object): class PinSAGECollator(object):
def __init__(self, sampler, g, ntype, textset): def __init__(self, sampler, g, ntype, textset):
self.sampler = sampler self.sampler = sampler
...@@ -347,7 +401,9 @@ class PinSAGECollator(object): ...@@ -347,7 +401,9 @@ class PinSAGECollator(object):
def collate_train(self, batches): def collate_train(self, batches):
heads, tails, neg_tails = batches[0] heads, tails, neg_tails = batches[0]
# Construct multilayer neighborhood via PinSAGE... # Construct multilayer neighborhood via PinSAGE...
pos_graph, neg_graph, blocks = self.sampler.sample_from_item_pairs(heads, tails, neg_tails) pos_graph, neg_graph, blocks = self.sampler.sample_from_item_pairs(
heads, tails, neg_tails
)
assign_features_to_blocks(blocks, self.g, self.textset, self.ntype) assign_features_to_blocks(blocks, self.g, self.textset, self.ntype)
return pos_graph, neg_graph, blocks return pos_graph, neg_graph, blocks
...@@ -358,8 +414,9 @@ class PinSAGECollator(object): ...@@ -358,8 +414,9 @@ class PinSAGECollator(object):
assign_features_to_blocks(blocks, self.g, self.textset, self.ntype) assign_features_to_blocks(blocks, self.g, self.textset, self.ntype)
return blocks return blocks
@utils.benchmark('time', 600)
@utils.parametrize('data', ['nowplaying_rs']) @utils.benchmark("time", 600)
@utils.parametrize("data", ["nowplaying_rs"])
def track_time(data): def track_time(data):
dataset = utils.process_data(data) dataset = utils.process_data(data)
device = utils.get_bench_device() device = utils.get_bench_device()
...@@ -383,24 +440,35 @@ def track_time(data): ...@@ -383,24 +440,35 @@ def track_time(data):
g = dataset[0] g = dataset[0]
# Sampler # Sampler
batch_sampler = ItemToItemBatchSampler( batch_sampler = ItemToItemBatchSampler(
g, user_ntype, item_ntype, batch_size) g, user_ntype, item_ntype, batch_size
)
neighbor_sampler = NeighborSampler( neighbor_sampler = NeighborSampler(
g, user_ntype, item_ntype, random_walk_length, g,
random_walk_restart_prob, num_random_walks, num_neighbors, user_ntype,
num_layers) item_ntype,
random_walk_length,
random_walk_restart_prob,
num_random_walks,
num_neighbors,
num_layers,
)
collator = PinSAGECollator(neighbor_sampler, g, item_ntype, textset) collator = PinSAGECollator(neighbor_sampler, g, item_ntype, textset)
dataloader = DataLoader( dataloader = DataLoader(
batch_sampler, batch_sampler,
collate_fn=collator.collate_train, collate_fn=collator.collate_train,
num_workers=num_workers) num_workers=num_workers,
)
dataloader_test = DataLoader( dataloader_test = DataLoader(
torch.arange(g.number_of_nodes(item_ntype)), torch.arange(g.number_of_nodes(item_ntype)),
batch_size=batch_size, batch_size=batch_size,
collate_fn=collator.collate_test, collate_fn=collator.collate_test,
num_workers=num_workers) num_workers=num_workers,
)
# Model # Model
model = PinSAGEModel(g, item_ntype, textset, hidden_dims, num_layers).to(device) model = PinSAGEModel(g, item_ntype, textset, hidden_dims, num_layers).to(
device
)
# Optimizer # Optimizer
opt = torch.optim.Adam(model.parameters(), lr=lr) opt = torch.optim.Adam(model.parameters(), lr=lr)
...@@ -423,7 +491,9 @@ def track_time(data): ...@@ -423,7 +491,9 @@ def track_time(data):
# start timer at before iter_start # start timer at before iter_start
if batch_id == iter_start - 1: if batch_id == iter_start - 1:
t0 = time.time() t0 = time.time()
elif batch_id == iter_count + iter_start - 1: # time iter_count iterations elif (
batch_id == iter_count + iter_start - 1
): # time iter_count iterations
break break
t1 = time.time() t1 = time.time()
......
import time import time
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torchmetrics.functional import accuracy from torchmetrics.functional import accuracy
from .. import utils
from .. import rgcn from .. import rgcn, utils
@utils.benchmark('time', 1200) @utils.benchmark("time", 1200)
@utils.parametrize('data', ['aifb', 'am']) @utils.parametrize("data", ["aifb", "am"])
def track_time(data): def track_time(data):
# args # args
if data == 'aifb': if data == "aifb":
num_bases = -1 num_bases = -1
l2norm = 0. l2norm = 0.0
elif data == 'am': elif data == "am":
num_bases = 40 num_bases = 40
l2norm = 5e-4 l2norm = 5e-4
else: else:
raise ValueError() raise ValueError()
g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = rgcn.load_data( (
data, get_norm=True) g,
num_rels,
num_classes,
labels,
train_idx,
test_idx,
target_idx,
) = rgcn.load_data(data, get_norm=True)
num_hidden = 16 num_hidden = 16
model = rgcn.RGCN(g.num_nodes(), model = rgcn.RGCN(
num_hidden, g.num_nodes(), num_hidden, num_classes, num_rels, num_bases=num_bases
num_classes, )
num_rels,
num_bases=num_bases)
device = utils.get_bench_device() device = utils.get_bench_device()
labels = labels.to(device) labels = labels.to(device)
model = model.to(device) model = model.to(device)
g = g.int().to(device) g = g.int().to(device)
optimizer = torch.optim.Adam( optimizer = torch.optim.Adam(
model.parameters(), lr=1e-2, weight_decay=l2norm) model.parameters(), lr=1e-2, weight_decay=l2norm
)
model.train() model.train()
num_epochs = 30 num_epochs = 30
......
import dgl import time
import numpy as np import numpy as np
import torch as th import torch as th
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import dgl.nn.pytorch as dglnn
import dgl
import dgl.function as fn import dgl.function as fn
import time import dgl.nn.pytorch as dglnn
from .. import utils from .. import utils
class NegativeSampler(object): class NegativeSampler(object):
def __init__(self, g, k, neg_share=False): def __init__(self, g, k, neg_share=False):
self.weights = g.in_degrees().float() ** 0.75 self.weights = g.in_degrees().float() ** 0.75
...@@ -23,34 +26,32 @@ class NegativeSampler(object): ...@@ -23,34 +26,32 @@ class NegativeSampler(object):
dst = self.weights.multinomial(n, replacement=True) dst = self.weights.multinomial(n, replacement=True)
dst = dst.view(-1, 1, self.k).expand(-1, self.k, -1).flatten() dst = dst.view(-1, 1, self.k).expand(-1, self.k, -1).flatten()
else: else:
dst = self.weights.multinomial(n*self.k, replacement=True) dst = self.weights.multinomial(n * self.k, replacement=True)
src = src.repeat_interleave(self.k) src = src.repeat_interleave(self.k)
return src, dst return src, dst
def load_subtensor(g, input_nodes, device): def load_subtensor(g, input_nodes, device):
""" """
Copys features and labels of a set of nodes onto GPU. Copys features and labels of a set of nodes onto GPU.
""" """
batch_inputs = g.ndata['features'][input_nodes].to(device) batch_inputs = g.ndata["features"][input_nodes].to(device)
return batch_inputs return batch_inputs
class SAGE(nn.Module): class SAGE(nn.Module):
def __init__(self, def __init__(
in_feats, self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
n_hidden, ):
n_classes,
n_layers,
activation,
dropout):
super().__init__() super().__init__()
self.n_layers = n_layers self.n_layers = n_layers
self.n_hidden = n_hidden self.n_hidden = n_hidden
self.n_classes = n_classes self.n_classes = n_classes
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean')) self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean"))
for i in range(1, n_layers - 1): for i in range(1, n_layers - 1):
self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean')) self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean"))
self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean')) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean"))
self.dropout = nn.Dropout(dropout) self.dropout = nn.Dropout(dropout)
self.activation = activation self.activation = activation
...@@ -63,40 +64,45 @@ class SAGE(nn.Module): ...@@ -63,40 +64,45 @@ class SAGE(nn.Module):
h = self.dropout(h) h = self.dropout(h)
return h return h
def load_subtensor(g, input_nodes, device): def load_subtensor(g, input_nodes, device):
""" """
Copys features and labels of a set of nodes onto GPU. Copys features and labels of a set of nodes onto GPU.
""" """
batch_inputs = g.ndata['features'][input_nodes].to(device) batch_inputs = g.ndata["features"][input_nodes].to(device)
return batch_inputs return batch_inputs
class CrossEntropyLoss(nn.Module): class CrossEntropyLoss(nn.Module):
def forward(self, block_outputs, pos_graph, neg_graph): def forward(self, block_outputs, pos_graph, neg_graph):
with pos_graph.local_scope(): with pos_graph.local_scope():
pos_graph.ndata['h'] = block_outputs pos_graph.ndata["h"] = block_outputs
pos_graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) pos_graph.apply_edges(fn.u_dot_v("h", "h", "score"))
pos_score = pos_graph.edata['score'] pos_score = pos_graph.edata["score"]
with neg_graph.local_scope(): with neg_graph.local_scope():
neg_graph.ndata['h'] = block_outputs neg_graph.ndata["h"] = block_outputs
neg_graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) neg_graph.apply_edges(fn.u_dot_v("h", "h", "score"))
neg_score = neg_graph.edata['score'] neg_score = neg_graph.edata["score"]
score = th.cat([pos_score, neg_score]) score = th.cat([pos_score, neg_score])
label = th.cat([th.ones_like(pos_score), th.zeros_like(neg_score)]).long() label = th.cat(
[th.ones_like(pos_score), th.zeros_like(neg_score)]
).long()
loss = F.binary_cross_entropy_with_logits(score, label.float()) loss = F.binary_cross_entropy_with_logits(score, label.float())
return loss return loss
@utils.benchmark('time', 600)
@utils.parametrize('data', ['reddit']) @utils.benchmark("time", 600)
@utils.parametrize('num_negs', [2, 8, 32]) @utils.parametrize("data", ["reddit"])
@utils.parametrize('batch_size', [1024, 2048, 8192]) @utils.parametrize("num_negs", [2, 8, 32])
@utils.parametrize("batch_size", [1024, 2048, 8192])
def track_time(data, num_negs, batch_size): def track_time(data, num_negs, batch_size):
data = utils.process_data(data) data = utils.process_data(data)
device = utils.get_bench_device() device = utils.get_bench_device()
g = data[0] g = data[0]
g.ndata['features'] = g.ndata['feat'] g.ndata["features"] = g.ndata["feat"]
g.ndata['labels'] = g.ndata['label'] g.ndata["labels"] = g.ndata["label"]
in_feats = g.ndata['features'].shape[1] in_feats = g.ndata["features"].shape[1]
n_classes = data.num_classes n_classes = data.num_classes
# Create csr/coo/csc formats before launching training processes with multi-gpu. # Create csr/coo/csc formats before launching training processes with multi-gpu.
...@@ -106,7 +112,7 @@ def track_time(data, num_negs, batch_size): ...@@ -106,7 +112,7 @@ def track_time(data, num_negs, batch_size):
num_epochs = 2 num_epochs = 2
num_hidden = 16 num_hidden = 16
num_layers = 2 num_layers = 2
fan_out = '10,25' fan_out = "10,25"
lr = 0.003 lr = 0.003
dropout = 0.5 dropout = 0.5
num_workers = 4 num_workers = 4
...@@ -119,21 +125,26 @@ def track_time(data, num_negs, batch_size): ...@@ -119,21 +125,26 @@ def track_time(data, num_negs, batch_size):
# Create PyTorch DataLoader for constructing blocks # Create PyTorch DataLoader for constructing blocks
sampler = dgl.dataloading.MultiLayerNeighborSampler( sampler = dgl.dataloading.MultiLayerNeighborSampler(
[int(fanout) for fanout in fan_out.split(',')]) [int(fanout) for fanout in fan_out.split(",")]
)
sampler = dgl.dataloading.as_edge_prediction_sampler( sampler = dgl.dataloading.as_edge_prediction_sampler(
sampler, exclude='reverse_id', sampler,
exclude="reverse_id",
# For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2. # For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2.
reverse_eids=th.cat([ reverse_eids=th.cat(
th.arange( [th.arange(n_edges // 2, n_edges), th.arange(0, n_edges // 2)]
n_edges // 2, n_edges), ),
th.arange(0, n_edges // 2)]), negative_sampler=NegativeSampler(g, num_negs),
negative_sampler=NegativeSampler(g, num_negs)) )
dataloader = dgl.dataloading.DataLoader( dataloader = dgl.dataloading.DataLoader(
g, train_seeds, sampler, g,
train_seeds,
sampler,
batch_size=batch_size, batch_size=batch_size,
shuffle=True, shuffle=True,
drop_last=False, drop_last=False,
num_workers=num_workers) num_workers=num_workers,
)
# Define model and optimizer # Define model and optimizer
model = SAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout) model = SAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout)
...@@ -145,7 +156,9 @@ def track_time(data, num_negs, batch_size): ...@@ -145,7 +156,9 @@ def track_time(data, num_negs, batch_size):
# Training loop # Training loop
avg = 0 avg = 0
iter_tput = [] iter_tput = []
for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(dataloader): for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(
dataloader
):
# Load the input features as well as output labels # Load the input features as well as output labels
batch_inputs = load_subtensor(g, input_nodes, device) batch_inputs = load_subtensor(g, input_nodes, device)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment