"src/turbomind/models/vscode:/vscode.git/clone" did not exist on "b8354dae03e3b942ddef98c3447545f78b84f902"
Unverified Commit d78a3a4b authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4640)



* auto fix

* add more

* sort
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 23d09057
import time
import dgl
from dgl.nn.pytorch import GATConv
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from dgl.nn.pytorch import GATConv
from .. import utils
class GAT(nn.Module):
def __init__(self,
num_layers,
in_dim,
num_hidden,
num_classes,
heads,
activation,
feat_drop,
attn_drop,
negative_slope,
residual):
def __init__(
self,
num_layers,
in_dim,
num_hidden,
num_classes,
heads,
activation,
feat_drop,
attn_drop,
negative_slope,
residual,
):
super(GAT, self).__init__()
self.num_layers = num_layers
self.gat_layers = nn.ModuleList()
self.activation = activation
# input projection (no residual)
self.gat_layers.append(GATConv(
in_dim, num_hidden, heads[0],
feat_drop, attn_drop, negative_slope, False, self.activation))
self.gat_layers.append(
GATConv(
in_dim,
num_hidden,
heads[0],
feat_drop,
attn_drop,
negative_slope,
False,
self.activation,
)
)
# hidden layers
for l in range(1, num_layers):
# due to multi-head, the in_dim = num_hidden * num_heads
self.gat_layers.append(GATConv(
num_hidden * heads[l-1], num_hidden, heads[l],
feat_drop, attn_drop, negative_slope, residual, self.activation))
self.gat_layers.append(
GATConv(
num_hidden * heads[l - 1],
num_hidden,
heads[l],
feat_drop,
attn_drop,
negative_slope,
residual,
self.activation,
)
)
# output projection
self.gat_layers.append(GATConv(
num_hidden * heads[-2], num_classes, heads[-1],
feat_drop, attn_drop, negative_slope, residual, None))
self.gat_layers.append(
GATConv(
num_hidden * heads[-2],
num_classes,
heads[-1],
feat_drop,
attn_drop,
negative_slope,
residual,
None,
)
)
def forward(self, g, inputs):
h = inputs
......@@ -46,8 +78,9 @@ class GAT(nn.Module):
logits = self.gat_layers[-1](g, h).mean(1)
return logits
@utils.benchmark('time')
@utils.parametrize('data', ['cora', 'pubmed'])
@utils.benchmark("time")
@utils.parametrize("data", ["cora", "pubmed"])
def track_time(data):
data = utils.process_data(data)
device = utils.get_bench_device()
......@@ -55,11 +88,11 @@ def track_time(data):
g = data[0].to(device)
features = g.ndata['feat']
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
features = g.ndata["feat"]
labels = g.ndata["label"]
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
n_classes = data.num_classes
......@@ -68,17 +101,14 @@ def track_time(data):
g = dgl.add_self_loop(g)
# create model
model = GAT(1, in_feats, 8, n_classes, [8, 1], F.elu,
0.6, 0.6, 0.2, False)
model = GAT(1, in_feats, 8, n_classes, [8, 1], F.elu, 0.6, 0.6, 0.2, False)
loss_fcn = torch.nn.CrossEntropyLoss()
model = model.to(device)
model.train()
# optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=1e-2,
weight_decay=5e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
# dry run
for epoch in range(10):
......
import time
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from .. import utils
class GraphConv(nn.Module):
def __init__(self, in_dim, out_dim, activation=None):
super(GraphConv, self).__init__()
......@@ -19,39 +22,42 @@ class GraphConv(nn.Module):
def forward(self, graph, feat):
with graph.local_scope():
graph.ndata['ci'] = torch.pow(graph.out_degrees().float().clamp(min=1), -0.5)
graph.ndata['cj'] = torch.pow(graph.in_degrees().float().clamp(min=1), -0.5)
graph.ndata['h'] = feat
graph.ndata["ci"] = torch.pow(
graph.out_degrees().float().clamp(min=1), -0.5
)
graph.ndata["cj"] = torch.pow(
graph.in_degrees().float().clamp(min=1), -0.5
)
graph.ndata["h"] = feat
graph.update_all(self.mfunc, self.rfunc)
h = graph.ndata['h']
h = graph.ndata["h"]
h = torch.matmul(h, self.weight) + self.bias
if self.activation is not None:
h = self.activation(h)
return h
def mfunc(self, edges):
return {'m' : edges.src['h'], 'ci' : edges.src['ci']}
return {"m": edges.src["h"], "ci": edges.src["ci"]}
def rfunc(self, nodes):
ci = nodes.mailbox['ci'].unsqueeze(2)
newh = (nodes.mailbox['m'] * ci).sum(1) * nodes.data['cj'].unsqueeze(1)
return {'h' : newh}
ci = nodes.mailbox["ci"].unsqueeze(2)
newh = (nodes.mailbox["m"] * ci).sum(1) * nodes.data["cj"].unsqueeze(1)
return {"h": newh}
class GCN(nn.Module):
def __init__(self,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
def __init__(
self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
):
super(GCN, self).__init__()
self.layers = nn.ModuleList()
# input layer
self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
# hidden layers
for i in range(n_layers - 1):
self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
self.layers.append(
GraphConv(n_hidden, n_hidden, activation=activation)
)
# output layer
self.layers.append(GraphConv(n_hidden, n_classes))
self.dropout = nn.Dropout(p=dropout)
......@@ -64,19 +70,20 @@ class GCN(nn.Module):
h = layer(g, h)
return h
@utils.benchmark('time', timeout=300)
@utils.parametrize('data', ['cora', 'pubmed'])
@utils.benchmark("time", timeout=300)
@utils.parametrize("data", ["cora", "pubmed"])
def track_time(data):
data = utils.process_data(data)
device = utils.get_bench_device()
g = data[0].to(device).int()
features = g.ndata['feat']
labels = g.ndata['label']
train_mask = g.ndata['train_mask']
val_mask = g.ndata['val_mask']
test_mask = g.ndata['test_mask']
features = g.ndata["feat"]
labels = g.ndata["label"]
train_mask = g.ndata["train_mask"]
val_mask = g.ndata["val_mask"]
test_mask = g.ndata["test_mask"]
in_feats = features.shape[1]
n_classes = data.num_classes
......@@ -88,7 +95,7 @@ def track_time(data):
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
g.ndata['norm'] = norm.unsqueeze(1)
g.ndata["norm"] = norm.unsqueeze(1)
# create GCN model
model = GCN(in_feats, 16, n_classes, 1, F.relu, 0.5)
......@@ -98,9 +105,7 @@ def track_time(data):
model.train()
# optimizer
optimizer = torch.optim.Adam(model.parameters(),
lr=1e-2,
weight_decay=5e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
# dry run
for epoch in range(5):
logits = model(g, features)
......
import pickle, time
import argparse
import pickle
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import IterableDataset, DataLoader
from torch.utils.data import DataLoader, IterableDataset
import dgl
import dgl.function as fn
from .. import utils
def _init_input_modules(g, ntype, textset, hidden_dims):
# We initialize the linear projections of each input feature ``x`` as
# follows:
......@@ -31,8 +35,7 @@ def _init_input_modules(g, ntype, textset, hidden_dims):
module_dict[column] = m
elif data.dtype == torch.int64:
assert data.ndim == 1
m = nn.Embedding(
data.max() + 2, hidden_dims, padding_idx=-1)
m = nn.Embedding(data.max() + 2, hidden_dims, padding_idx=-1)
nn.init.xavier_uniform_(m.weight)
module_dict[column] = m
......@@ -45,14 +48,17 @@ def _init_input_modules(g, ntype, textset, hidden_dims):
return module_dict
class BagOfWordsPretrained(nn.Module):
def __init__(self, field, hidden_dims):
super().__init__()
input_dims = field.vocab.vectors.shape[1]
self.emb = nn.Embedding(
len(field.vocab.itos), input_dims,
padding_idx=field.vocab.stoi[field.pad_token])
len(field.vocab.itos),
input_dims,
padding_idx=field.vocab.stoi[field.pad_token],
)
self.emb.weight[:] = field.vocab.vectors
self.proj = nn.Linear(input_dims, hidden_dims)
nn.init.xavier_uniform_(self.proj.weight)
......@@ -68,18 +74,22 @@ class BagOfWordsPretrained(nn.Module):
x = self.emb(x).sum(1) / length.unsqueeze(1).float()
return self.proj(x)
class BagOfWords(nn.Module):
def __init__(self, field, hidden_dims):
super().__init__()
self.emb = nn.Embedding(
len(field.vocab.itos), hidden_dims,
padding_idx=field.vocab.stoi[field.pad_token])
len(field.vocab.itos),
hidden_dims,
padding_idx=field.vocab.stoi[field.pad_token],
)
nn.init.xavier_uniform_(self.emb.weight)
def forward(self, x, length):
return self.emb(x).sum(1) / length.unsqueeze(1).float()
class WeightedSAGEConv(nn.Module):
def __init__(self, input_dims, hidden_dims, output_dims, act=F.relu):
super().__init__()
......@@ -91,7 +101,7 @@ class WeightedSAGEConv(nn.Module):
self.dropout = nn.Dropout(0.5)
def reset_parameters(self):
gain = nn.init.calculate_gain('relu')
gain = nn.init.calculate_gain("relu")
nn.init.xavier_uniform_(self.Q.weight, gain=gain)
nn.init.xavier_uniform_(self.W.weight, gain=gain)
nn.init.constant_(self.Q.bias, 0)
......@@ -105,18 +115,21 @@ class WeightedSAGEConv(nn.Module):
"""
h_src, h_dst = h
with g.local_scope():
g.srcdata['n'] = self.act(self.Q(self.dropout(h_src)))
g.edata['w'] = weights.float()
g.update_all(fn.u_mul_e('n', 'w', 'm'), fn.sum('m', 'n'))
g.update_all(fn.copy_e('w', 'm'), fn.sum('m', 'ws'))
n = g.dstdata['n']
ws = g.dstdata['ws'].unsqueeze(1).clamp(min=1)
g.srcdata["n"] = self.act(self.Q(self.dropout(h_src)))
g.edata["w"] = weights.float()
g.update_all(fn.u_mul_e("n", "w", "m"), fn.sum("m", "n"))
g.update_all(fn.copy_e("w", "m"), fn.sum("m", "ws"))
n = g.dstdata["n"]
ws = g.dstdata["ws"].unsqueeze(1).clamp(min=1)
z = self.act(self.W(self.dropout(torch.cat([n / ws, h_dst], 1))))
z_norm = z.norm(2, 1, keepdim=True)
z_norm = torch.where(z_norm == 0, torch.tensor(1.).to(z_norm), z_norm)
z_norm = torch.where(
z_norm == 0, torch.tensor(1.0).to(z_norm), z_norm
)
z = z / z_norm
return z
class SAGENet(nn.Module):
def __init__(self, hidden_dims, n_layers):
"""
......@@ -130,28 +143,34 @@ class SAGENet(nn.Module):
self.convs = nn.ModuleList()
for _ in range(n_layers):
self.convs.append(WeightedSAGEConv(hidden_dims, hidden_dims, hidden_dims))
self.convs.append(
WeightedSAGEConv(hidden_dims, hidden_dims, hidden_dims)
)
def forward(self, blocks, h):
for layer, block in zip(self.convs, blocks):
h_dst = h[:block.number_of_nodes('DST/' + block.ntypes[0])]
h = layer(block, (h, h_dst), block.edata['weights'])
h_dst = h[: block.number_of_nodes("DST/" + block.ntypes[0])]
h = layer(block, (h, h_dst), block.edata["weights"])
return h
class LinearProjector(nn.Module):
"""
Projects each input feature of the graph linearly and sums them up
"""
def __init__(self, full_graph, ntype, textset, hidden_dims):
super().__init__()
self.ntype = ntype
self.inputs = _init_input_modules(full_graph, ntype, textset, hidden_dims)
self.inputs = _init_input_modules(
full_graph, ntype, textset, hidden_dims
)
def forward(self, ndata):
projections = []
for feature, data in ndata.items():
if feature == dgl.NID or feature.endswith('__len'):
if feature == dgl.NID or feature.endswith("__len"):
# This is an additional feature indicating the length of the ``feature``
# column; we shouldn't process this.
continue
......@@ -159,7 +178,7 @@ class LinearProjector(nn.Module):
module = self.inputs[feature]
if isinstance(module, (BagOfWords, BagOfWordsPretrained)):
# Textual feature; find the length and pass it to the textual module.
length = ndata[feature + '__len']
length = ndata[feature + "__len"]
result = module(data, length)
else:
result = module(data)
......@@ -167,6 +186,7 @@ class LinearProjector(nn.Module):
return torch.stack(projections, 1).sum(1)
class ItemToItemScorer(nn.Module):
def __init__(self, full_graph, ntype):
super().__init__()
......@@ -177,7 +197,7 @@ class ItemToItemScorer(nn.Module):
def _add_bias(self, edges):
bias_src = self.bias[edges.src[dgl.NID]]
bias_dst = self.bias[edges.dst[dgl.NID]]
return {'s': edges.data['s'] + bias_src + bias_dst}
return {"s": edges.data["s"] + bias_src + bias_dst}
def forward(self, item_item_graph, h):
"""
......@@ -185,12 +205,13 @@ class ItemToItemScorer(nn.Module):
h : hidden state of every node
"""
with item_item_graph.local_scope():
item_item_graph.ndata['h'] = h
item_item_graph.apply_edges(fn.u_dot_v('h', 'h', 's'))
item_item_graph.ndata["h"] = h
item_item_graph.apply_edges(fn.u_dot_v("h", "h", "s"))
item_item_graph.apply_edges(self._add_bias)
pair_score = item_item_graph.edata['s']
pair_score = item_item_graph.edata["s"]
return pair_score
class PinSAGEModel(nn.Module):
def __init__(self, full_graph, ntype, textsets, hidden_dims, n_layers):
super().__init__()
......@@ -210,6 +231,7 @@ class PinSAGEModel(nn.Module):
h_item_dst = self.proj(blocks[-1].dstdata)
return h_item_dst + self.sage(blocks, h_item)
def compact_and_copy(frontier, seeds):
block = dgl.to_block(frontier, seeds)
for col, data in frontier.edata.items():
......@@ -218,6 +240,7 @@ def compact_and_copy(frontier, seeds):
block.edata[col] = data[block.edata[dgl.EID]]
return block
class ItemToItemBatchSampler(IterableDataset):
def __init__(self, g, user_type, item_type, batch_size):
self.g = g
......@@ -229,42 +252,69 @@ class ItemToItemBatchSampler(IterableDataset):
def __iter__(self):
while True:
heads = torch.randint(0, self.g.number_of_nodes(self.item_type), (self.batch_size,))
heads = torch.randint(
0, self.g.number_of_nodes(self.item_type), (self.batch_size,)
)
tails = dgl.sampling.random_walk(
self.g,
heads,
metapath=[self.item_to_user_etype, self.user_to_item_etype])[0][:, 2]
neg_tails = torch.randint(0, self.g.number_of_nodes(self.item_type), (self.batch_size,))
metapath=[self.item_to_user_etype, self.user_to_item_etype],
)[0][:, 2]
neg_tails = torch.randint(
0, self.g.number_of_nodes(self.item_type), (self.batch_size,)
)
mask = (tails != -1)
mask = tails != -1
yield heads[mask], tails[mask], neg_tails[mask]
class NeighborSampler(object):
def __init__(self, g, user_type, item_type, random_walk_length, random_walk_restart_prob,
num_random_walks, num_neighbors, num_layers):
def __init__(
self,
g,
user_type,
item_type,
random_walk_length,
random_walk_restart_prob,
num_random_walks,
num_neighbors,
num_layers,
):
self.g = g
self.user_type = user_type
self.item_type = item_type
self.user_to_item_etype = list(g.metagraph()[user_type][item_type])[0]
self.item_to_user_etype = list(g.metagraph()[item_type][user_type])[0]
self.samplers = [
dgl.sampling.PinSAGESampler(g, item_type, user_type, random_walk_length,
random_walk_restart_prob, num_random_walks, num_neighbors)
for _ in range(num_layers)]
dgl.sampling.PinSAGESampler(
g,
item_type,
user_type,
random_walk_length,
random_walk_restart_prob,
num_random_walks,
num_neighbors,
)
for _ in range(num_layers)
]
def sample_blocks(self, seeds, heads=None, tails=None, neg_tails=None):
blocks = []
for sampler in self.samplers:
frontier = sampler(seeds)
if heads is not None:
eids = frontier.edge_ids(torch.cat([heads, heads]), torch.cat([tails, neg_tails]), return_uv=True)[2]
eids = frontier.edge_ids(
torch.cat([heads, heads]),
torch.cat([tails, neg_tails]),
return_uv=True,
)[2]
if len(eids) > 0:
old_frontier = frontier
frontier = dgl.remove_edges(old_frontier, eids)
#print(old_frontier)
#print(frontier)
#print(frontier.edata['weights'])
#frontier.edata['weights'] = old_frontier.edata['weights'][frontier.edata[dgl.EID]]
# print(old_frontier)
# print(frontier)
# print(frontier.edata['weights'])
# frontier.edata['weights'] = old_frontier.edata['weights'][frontier.edata[dgl.EID]]
block = compact_and_copy(frontier, seeds)
seeds = block.srcdata[dgl.NID]
blocks.insert(0, block)
......@@ -274,17 +324,18 @@ class NeighborSampler(object):
# Create a graph with positive connections only and another graph with negative
# connections only.
pos_graph = dgl.graph(
(heads, tails),
num_nodes=self.g.number_of_nodes(self.item_type))
(heads, tails), num_nodes=self.g.number_of_nodes(self.item_type)
)
neg_graph = dgl.graph(
(heads, neg_tails),
num_nodes=self.g.number_of_nodes(self.item_type))
(heads, neg_tails), num_nodes=self.g.number_of_nodes(self.item_type)
)
pos_graph, neg_graph = dgl.compact_graphs([pos_graph, neg_graph])
seeds = pos_graph.ndata[dgl.NID]
blocks = self.sample_blocks(seeds, heads, tails, neg_tails)
return pos_graph, neg_graph, blocks
def assign_simple_node_features(ndata, g, ntype, assign_id=False):
"""
Copies data to the given block from the corresponding nodes in the original graph.
......@@ -295,6 +346,7 @@ def assign_simple_node_features(ndata, g, ntype, assign_id=False):
induced_nodes = ndata[dgl.NID]
ndata[col] = g.nodes[ntype].data[col][induced_nodes]
def assign_textual_node_features(ndata, textset, ntype):
"""
Assigns numericalized tokens from a torchtext dataset to given block.
......@@ -327,7 +379,8 @@ def assign_textual_node_features(ndata, textset, ntype):
tokens = tokens.t()
ndata[field_name] = tokens
ndata[field_name + '__len'] = lengths
ndata[field_name + "__len"] = lengths
def assign_features_to_blocks(blocks, g, textset, ntype):
# For the first block (which is closest to the input), copy the features from
......@@ -337,6 +390,7 @@ def assign_features_to_blocks(blocks, g, textset, ntype):
assign_simple_node_features(blocks[-1].dstdata, g, ntype)
assign_textual_node_features(blocks[-1].dstdata, textset, ntype)
class PinSAGECollator(object):
def __init__(self, sampler, g, ntype, textset):
self.sampler = sampler
......@@ -347,7 +401,9 @@ class PinSAGECollator(object):
def collate_train(self, batches):
heads, tails, neg_tails = batches[0]
# Construct multilayer neighborhood via PinSAGE...
pos_graph, neg_graph, blocks = self.sampler.sample_from_item_pairs(heads, tails, neg_tails)
pos_graph, neg_graph, blocks = self.sampler.sample_from_item_pairs(
heads, tails, neg_tails
)
assign_features_to_blocks(blocks, self.g, self.textset, self.ntype)
return pos_graph, neg_graph, blocks
......@@ -358,8 +414,9 @@ class PinSAGECollator(object):
assign_features_to_blocks(blocks, self.g, self.textset, self.ntype)
return blocks
@utils.benchmark('time', 600)
@utils.parametrize('data', ['nowplaying_rs'])
@utils.benchmark("time", 600)
@utils.parametrize("data", ["nowplaying_rs"])
def track_time(data):
dataset = utils.process_data(data)
device = utils.get_bench_device()
......@@ -383,24 +440,35 @@ def track_time(data):
g = dataset[0]
# Sampler
batch_sampler = ItemToItemBatchSampler(
g, user_ntype, item_ntype, batch_size)
g, user_ntype, item_ntype, batch_size
)
neighbor_sampler = NeighborSampler(
g, user_ntype, item_ntype, random_walk_length,
random_walk_restart_prob, num_random_walks, num_neighbors,
num_layers)
g,
user_ntype,
item_ntype,
random_walk_length,
random_walk_restart_prob,
num_random_walks,
num_neighbors,
num_layers,
)
collator = PinSAGECollator(neighbor_sampler, g, item_ntype, textset)
dataloader = DataLoader(
batch_sampler,
collate_fn=collator.collate_train,
num_workers=num_workers)
num_workers=num_workers,
)
dataloader_test = DataLoader(
torch.arange(g.number_of_nodes(item_ntype)),
batch_size=batch_size,
collate_fn=collator.collate_test,
num_workers=num_workers)
num_workers=num_workers,
)
# Model
model = PinSAGEModel(g, item_ntype, textset, hidden_dims, num_layers).to(device)
model = PinSAGEModel(g, item_ntype, textset, hidden_dims, num_layers).to(
device
)
# Optimizer
opt = torch.optim.Adam(model.parameters(), lr=lr)
......@@ -423,7 +491,9 @@ def track_time(data):
# start timer at before iter_start
if batch_id == iter_start - 1:
t0 = time.time()
elif batch_id == iter_count + iter_start - 1: # time iter_count iterations
elif (
batch_id == iter_count + iter_start - 1
): # time iter_count iterations
break
t1 = time.time()
......
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchmetrics.functional import accuracy
from .. import utils
from .. import rgcn
from .. import rgcn, utils
@utils.benchmark('time', 1200)
@utils.parametrize('data', ['aifb', 'am'])
@utils.benchmark("time", 1200)
@utils.parametrize("data", ["aifb", "am"])
def track_time(data):
# args
if data == 'aifb':
if data == "aifb":
num_bases = -1
l2norm = 0.
elif data == 'am':
l2norm = 0.0
elif data == "am":
num_bases = 40
l2norm = 5e-4
else:
raise ValueError()
g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = rgcn.load_data(
data, get_norm=True)
(
g,
num_rels,
num_classes,
labels,
train_idx,
test_idx,
target_idx,
) = rgcn.load_data(data, get_norm=True)
num_hidden = 16
model = rgcn.RGCN(g.num_nodes(),
num_hidden,
num_classes,
num_rels,
num_bases=num_bases)
model = rgcn.RGCN(
g.num_nodes(), num_hidden, num_classes, num_rels, num_bases=num_bases
)
device = utils.get_bench_device()
labels = labels.to(device)
model = model.to(device)
g = g.int().to(device)
optimizer = torch.optim.Adam(
model.parameters(), lr=1e-2, weight_decay=l2norm)
model.parameters(), lr=1e-2, weight_decay=l2norm
)
model.train()
num_epochs = 30
......
import dgl
import time
import numpy as np
import torch as th
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import dgl.nn.pytorch as dglnn
import dgl
import dgl.function as fn
import time
import dgl.nn.pytorch as dglnn
from .. import utils
class NegativeSampler(object):
def __init__(self, g, k, neg_share=False):
self.weights = g.in_degrees().float() ** 0.75
......@@ -23,34 +26,32 @@ class NegativeSampler(object):
dst = self.weights.multinomial(n, replacement=True)
dst = dst.view(-1, 1, self.k).expand(-1, self.k, -1).flatten()
else:
dst = self.weights.multinomial(n*self.k, replacement=True)
dst = self.weights.multinomial(n * self.k, replacement=True)
src = src.repeat_interleave(self.k)
return src, dst
def load_subtensor(g, input_nodes, device):
"""
Copys features and labels of a set of nodes onto GPU.
"""
batch_inputs = g.ndata['features'][input_nodes].to(device)
batch_inputs = g.ndata["features"][input_nodes].to(device)
return batch_inputs
class SAGE(nn.Module):
def __init__(self,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout):
def __init__(
self, in_feats, n_hidden, n_classes, n_layers, activation, dropout
):
super().__init__()
self.n_layers = n_layers
self.n_hidden = n_hidden
self.n_classes = n_classes
self.layers = nn.ModuleList()
self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean'))
self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean"))
for i in range(1, n_layers - 1):
self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean'))
self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean'))
self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean"))
self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean"))
self.dropout = nn.Dropout(dropout)
self.activation = activation
......@@ -63,40 +64,45 @@ class SAGE(nn.Module):
h = self.dropout(h)
return h
def load_subtensor(g, input_nodes, device):
"""
Copys features and labels of a set of nodes onto GPU.
"""
batch_inputs = g.ndata['features'][input_nodes].to(device)
batch_inputs = g.ndata["features"][input_nodes].to(device)
return batch_inputs
class CrossEntropyLoss(nn.Module):
def forward(self, block_outputs, pos_graph, neg_graph):
with pos_graph.local_scope():
pos_graph.ndata['h'] = block_outputs
pos_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
pos_score = pos_graph.edata['score']
pos_graph.ndata["h"] = block_outputs
pos_graph.apply_edges(fn.u_dot_v("h", "h", "score"))
pos_score = pos_graph.edata["score"]
with neg_graph.local_scope():
neg_graph.ndata['h'] = block_outputs
neg_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
neg_score = neg_graph.edata['score']
neg_graph.ndata["h"] = block_outputs
neg_graph.apply_edges(fn.u_dot_v("h", "h", "score"))
neg_score = neg_graph.edata["score"]
score = th.cat([pos_score, neg_score])
label = th.cat([th.ones_like(pos_score), th.zeros_like(neg_score)]).long()
label = th.cat(
[th.ones_like(pos_score), th.zeros_like(neg_score)]
).long()
loss = F.binary_cross_entropy_with_logits(score, label.float())
return loss
@utils.benchmark('time', 600)
@utils.parametrize('data', ['reddit'])
@utils.parametrize('num_negs', [2, 8, 32])
@utils.parametrize('batch_size', [1024, 2048, 8192])
@utils.benchmark("time", 600)
@utils.parametrize("data", ["reddit"])
@utils.parametrize("num_negs", [2, 8, 32])
@utils.parametrize("batch_size", [1024, 2048, 8192])
def track_time(data, num_negs, batch_size):
data = utils.process_data(data)
device = utils.get_bench_device()
g = data[0]
g.ndata['features'] = g.ndata['feat']
g.ndata['labels'] = g.ndata['label']
in_feats = g.ndata['features'].shape[1]
g.ndata["features"] = g.ndata["feat"]
g.ndata["labels"] = g.ndata["label"]
in_feats = g.ndata["features"].shape[1]
n_classes = data.num_classes
# Create csr/coo/csc formats before launching training processes with multi-gpu.
......@@ -106,7 +112,7 @@ def track_time(data, num_negs, batch_size):
num_epochs = 2
num_hidden = 16
num_layers = 2
fan_out = '10,25'
fan_out = "10,25"
lr = 0.003
dropout = 0.5
num_workers = 4
......@@ -119,21 +125,26 @@ def track_time(data, num_negs, batch_size):
# Create PyTorch DataLoader for constructing blocks
sampler = dgl.dataloading.MultiLayerNeighborSampler(
[int(fanout) for fanout in fan_out.split(',')])
[int(fanout) for fanout in fan_out.split(",")]
)
sampler = dgl.dataloading.as_edge_prediction_sampler(
sampler, exclude='reverse_id',
sampler,
exclude="reverse_id",
# For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2.
reverse_eids=th.cat([
th.arange(
n_edges // 2, n_edges),
th.arange(0, n_edges // 2)]),
negative_sampler=NegativeSampler(g, num_negs))
reverse_eids=th.cat(
[th.arange(n_edges // 2, n_edges), th.arange(0, n_edges // 2)]
),
negative_sampler=NegativeSampler(g, num_negs),
)
dataloader = dgl.dataloading.DataLoader(
g, train_seeds, sampler,
g,
train_seeds,
sampler,
batch_size=batch_size,
shuffle=True,
drop_last=False,
num_workers=num_workers)
num_workers=num_workers,
)
# Define model and optimizer
model = SAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout)
......@@ -145,7 +156,9 @@ def track_time(data, num_negs, batch_size):
# Training loop
avg = 0
iter_tput = []
for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(dataloader):
for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(
dataloader
):
# Load the input features as well as output labels
batch_inputs = load_subtensor(g, input_nodes, device)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment