Unverified Commit e9b624fe authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

Merge branch 'master' into dist_part

parents 8086d1ed a88e7f7e
...@@ -34,9 +34,6 @@ class EntityClassify(nn.Module): ...@@ -34,9 +34,6 @@ class EntityClassify(nn.Module):
Dropout Dropout
use_self_loop : bool use_self_loop : bool
Use self loop if True, default False. Use self loop if True, default False.
low_mem : bool
True to use low memory implementation of relation message passing function
trade speed with memory consumption
""" """
def __init__(self, def __init__(self,
device, device,
...@@ -48,7 +45,6 @@ class EntityClassify(nn.Module): ...@@ -48,7 +45,6 @@ class EntityClassify(nn.Module):
num_hidden_layers=1, num_hidden_layers=1,
dropout=0, dropout=0,
use_self_loop=False, use_self_loop=False,
low_mem=False,
layer_norm=False): layer_norm=False):
super(EntityClassify, self).__init__() super(EntityClassify, self).__init__()
self.device = device self.device = device
...@@ -60,7 +56,6 @@ class EntityClassify(nn.Module): ...@@ -60,7 +56,6 @@ class EntityClassify(nn.Module):
self.num_hidden_layers = num_hidden_layers self.num_hidden_layers = num_hidden_layers
self.dropout = dropout self.dropout = dropout
self.use_self_loop = use_self_loop self.use_self_loop = use_self_loop
self.low_mem = low_mem
self.layer_norm = layer_norm self.layer_norm = layer_norm
self.layers = nn.ModuleList() self.layers = nn.ModuleList()
...@@ -68,19 +63,19 @@ class EntityClassify(nn.Module): ...@@ -68,19 +63,19 @@ class EntityClassify(nn.Module):
self.layers.append(RelGraphConv( self.layers.append(RelGraphConv(
self.h_dim, self.h_dim, self.num_rels, "basis", self.h_dim, self.h_dim, self.num_rels, "basis",
self.num_bases, activation=F.relu, self_loop=self.use_self_loop, self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm)) dropout=self.dropout, layer_norm = layer_norm))
# h2h # h2h
for idx in range(self.num_hidden_layers): for idx in range(self.num_hidden_layers):
self.layers.append(RelGraphConv( self.layers.append(RelGraphConv(
self.h_dim, self.h_dim, self.num_rels, "basis", self.h_dim, self.h_dim, self.num_rels, "basis",
self.num_bases, activation=F.relu, self_loop=self.use_self_loop, self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm)) dropout=self.dropout, layer_norm = layer_norm))
# h2o # h2o
self.layers.append(RelGraphConv( self.layers.append(RelGraphConv(
self.h_dim, self.out_dim, self.num_rels, "basis", self.h_dim, self.out_dim, self.num_rels, "basis",
self.num_bases, activation=None, self.num_bases, activation=None,
self_loop=self.use_self_loop, self_loop=self.use_self_loop,
low_mem=self.low_mem, layer_norm = layer_norm)) layer_norm = layer_norm))
def forward(self, blocks, feats, norm=None): def forward(self, blocks, feats, norm=None):
if blocks is None: if blocks is None:
...@@ -196,7 +191,6 @@ def track_time(data): ...@@ -196,7 +191,6 @@ def track_time(data):
dropout = 0.5 dropout = 0.5
use_self_loop = True use_self_loop = True
lr = 0.01 lr = 0.01
low_mem = True
num_workers = 4 num_workers = 4
iter_start = 3 iter_start = 3
iter_count = 10 iter_count = 10
...@@ -276,7 +270,6 @@ def track_time(data): ...@@ -276,7 +270,6 @@ def track_time(data):
num_hidden_layers=n_layers - 2, num_hidden_layers=n_layers - 2,
dropout=dropout, dropout=dropout,
use_self_loop=use_self_loop, use_self_loop=use_self_loop,
low_mem=low_mem,
layer_norm=False) layer_norm=False)
embed_layer = embed_layer.to(device) embed_layer = embed_layer.to(device)
......
...@@ -4,12 +4,9 @@ import torch as th ...@@ -4,12 +4,9 @@ import torch as th
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import torch.multiprocessing as mp
from torch.utils.data import DataLoader
import dgl.nn.pytorch as dglnn import dgl.nn.pytorch as dglnn
import dgl.function as fn import dgl.function as fn
import time import time
import traceback
from .. import utils from .. import utils
...@@ -123,17 +120,19 @@ def track_time(data, num_negs, batch_size): ...@@ -123,17 +120,19 @@ def track_time(data, num_negs, batch_size):
# Create PyTorch DataLoader for constructing blocks # Create PyTorch DataLoader for constructing blocks
sampler = dgl.dataloading.MultiLayerNeighborSampler( sampler = dgl.dataloading.MultiLayerNeighborSampler(
[int(fanout) for fanout in fan_out.split(',')]) [int(fanout) for fanout in fan_out.split(',')])
dataloader = dgl.dataloading.EdgeDataLoader( sampler = dgl.dataloading.as_edge_prediction_sampler(
g, train_seeds, sampler, exclude='reverse_id', sampler, exclude='reverse_id',
# For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2. # For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2.
reverse_eids=th.cat([ reverse_eids=th.cat([
th.arange(n_edges // 2, n_edges), th.arange(
th.arange(0, n_edges // 2)]), n_edges // 2, n_edges),
negative_sampler=NegativeSampler(g, num_negs), th.arange(0, n_edges // 2)]),
negative_sampler=NegativeSampler(g, num_negs))
dataloader = dgl.dataloading.DataLoader(
g, train_seeds, sampler,
batch_size=batch_size, batch_size=batch_size,
shuffle=True, shuffle=True,
drop_last=False, drop_last=False,
pin_memory=True,
num_workers=num_workers) num_workers=num_workers)
# Define model and optimizer # Define model and optimizer
......
import dgl
from dgl.nn.pytorch import RelGraphConv
import torch
import torch.nn as nn
import torch.nn.functional as F
from . import utils
class RGCN(nn.Module):
def __init__(self, num_nodes, h_dim, out_dim, num_rels,
regularizer="basis", num_bases=-1, dropout=0.,
self_loop=False,
ns_mode=False):
super(RGCN, self).__init__()
if num_bases == -1:
num_bases = num_rels
self.emb = nn.Embedding(num_nodes, h_dim)
self.conv1 = RelGraphConv(h_dim, h_dim, num_rels, regularizer,
num_bases, self_loop=self_loop)
self.conv2 = RelGraphConv(
h_dim, out_dim, num_rels, regularizer, num_bases, self_loop=self_loop)
self.dropout = nn.Dropout(dropout)
self.ns_mode = ns_mode
def forward(self, g, nids=None):
if self.ns_mode:
# forward for neighbor sampling
x = self.emb(g[0].srcdata[dgl.NID])
h = self.conv1(g[0], x, g[0].edata[dgl.ETYPE], g[0].edata['norm'])
h = self.dropout(F.relu(h))
h = self.conv2(g[1], h, g[1].edata[dgl.ETYPE], g[1].edata['norm'])
return h
else:
x = self.emb.weight if nids is None else self.emb(nids)
h = self.conv1(g, x, g.edata[dgl.ETYPE], g.edata['norm'])
h = self.dropout(F.relu(h))
h = self.conv2(g, h, g.edata[dgl.ETYPE], g.edata['norm'])
return h
def load_data(data_name, get_norm=False, inv_target=False):
dataset = utils.process_data(data_name)
# Load hetero-graph
hg = dataset[0]
num_rels = len(hg.canonical_etypes)
category = dataset.predict_category
num_classes = dataset.num_classes
labels = hg.nodes[category].data.pop('labels')
train_mask = hg.nodes[category].data.pop('train_mask')
test_mask = hg.nodes[category].data.pop('test_mask')
train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
if get_norm:
# Calculate normalization weight for each edge,
# 1. / d, d is the degree of the destination node
for cetype in hg.canonical_etypes:
hg.edges[cetype].data['norm'] = dgl.norm_by_dst(
hg, cetype).unsqueeze(1)
edata = ['norm']
else:
edata = None
# get target category id
category_id = hg.ntypes.index(category)
g = dgl.to_homogeneous(hg, edata=edata)
# Rename the fields as they can be changed by for example DataLoader
g.ndata['ntype'] = g.ndata.pop(dgl.NTYPE)
g.ndata['type_id'] = g.ndata.pop(dgl.NID)
node_ids = torch.arange(g.num_nodes())
# find out the target node ids in g
loc = (g.ndata['ntype'] == category_id)
target_idx = node_ids[loc]
if inv_target:
# Map global node IDs to type-specific node IDs. This is required for
# looking up type-specific labels in a minibatch
inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64)
inv_target[target_idx] = torch.arange(0, target_idx.shape[0],
dtype=inv_target.dtype)
return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target
else:
return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
--find-links https://download.pytorch.org/whl/lts/1.8/torch_lts.html --find-links https://download.pytorch.org/whl/torch
torch==1.8.1+cu111 torch==1.9.0+cu111
torchvision==0.9.1+cu111 torchvision
pytest pytest
nose nose
numpy numpy
...@@ -12,7 +12,8 @@ nltk ...@@ -12,7 +12,8 @@ nltk
requests[security] requests[security]
tqdm tqdm
awscli awscli
torchtext==0.9.1 torchtext
pandas pandas
rdflib rdflib
ogb==1.3.1 ogb==1.3.1
\ No newline at end of file torchmetrics
\ No newline at end of file
...@@ -61,7 +61,7 @@ Let's use one of the most classical setups -- training a GraphSAGE model for nod ...@@ -61,7 +61,7 @@ Let's use one of the most classical setups -- training a GraphSAGE model for nod
classification on the Cora citation graph dataset as an classification on the Cora citation graph dataset as an
example. example.
### Step one: `dgl configure` ### Step 1: `dgl configure`
First step, use `dgl configure` to generate a YAML configuration file. First step, use `dgl configure` to generate a YAML configuration file.
...@@ -85,7 +85,7 @@ At this point you can also change options to explore optimization potentials. ...@@ -85,7 +85,7 @@ At this point you can also change options to explore optimization potentials.
The snippet below shows the configuration file generated by the command above. The snippet below shows the configuration file generated by the command above.
```yaml ```yaml
version: 0.0.1 version: 0.0.2
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train pipeline_mode: train
device: cpu device: cpu
...@@ -181,7 +181,7 @@ That's all! Basically you only need two commands to train a graph neural network ...@@ -181,7 +181,7 @@ That's all! Basically you only need two commands to train a graph neural network
### Step 3: `dgl export` for more advanced customization ### Step 3: `dgl export` for more advanced customization
That's not everything yet. You may want to open the hood and and invoke deeper That's not everything yet. You may want to open the hood and invoke deeper
customization. DGL-Go can export a **self-contained, reproducible** Python customization. DGL-Go can export a **self-contained, reproducible** Python
script for you to do anything you like. script for you to do anything you like.
......
...@@ -23,7 +23,7 @@ class PipelineConfig(DGLBaseModel): ...@@ -23,7 +23,7 @@ class PipelineConfig(DGLBaseModel):
loss: str = "CrossEntropyLoss" loss: str = "CrossEntropyLoss"
class UserConfig(DGLBaseModel): class UserConfig(DGLBaseModel):
version: Optional[str] = "0.0.1" version: Optional[str] = "0.0.2"
pipeline_name: PipelineFactory.get_pipeline_enum() pipeline_name: PipelineFactory.get_pipeline_enum()
pipeline_mode: str pipeline_mode: str
device: str = "cpu" device: str = "cpu"
version: 0.0.1 version: 0.0.2
pipeline_name: graphpred pipeline_name: graphpred
pipeline_mode: train pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0 device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
......
version: 0.0.1 version: 0.0.2
pipeline_name: graphpred pipeline_name: graphpred
pipeline_mode: train pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0 device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
......
version: 0.0.1 version: 0.0.2
pipeline_name: graphpred pipeline_name: graphpred
pipeline_mode: train pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0 device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
......
version: 0.0.1 version: 0.0.2
pipeline_name: linkpred pipeline_name: linkpred
pipeline_mode: train pipeline_mode: train
device: cpu device: cpu
......
version: 0.0.1 version: 0.0.2
pipeline_name: linkpred pipeline_name: linkpred
pipeline_mode: train pipeline_mode: train
device: cpu device: cpu
......
version: 0.0.1 version: 0.0.2
pipeline_name: linkpred pipeline_name: linkpred
pipeline_mode: train pipeline_mode: train
device: cuda device: cuda
......
# Accuracy across 5 runs: 0.593288 ± 0.006103 # Accuracy across 5 runs: 0.593288 ± 0.006103
version: 0.0.1 version: 0.0.2
pipeline_name: nodepred-ns pipeline_name: nodepred-ns
pipeline_mode: train pipeline_mode: train
device: 'cuda:0' device: 'cuda:0'
......
# Accuracy across 1 runs: 0.796911 # Accuracy across 1 runs: 0.796911
version: 0.0.1 version: 0.0.2
pipeline_name: nodepred-ns pipeline_name: nodepred-ns
pipeline_mode: train pipeline_mode: train
device: cuda device: cuda
......
# Accuracy across 10 runs: 0.7097 ± 0.006914 # Accuracy across 10 runs: 0.7097 ± 0.006914
version: 0.0.1 version: 0.0.2
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train pipeline_mode: train
device: cuda:0 device: cuda:0
......
# Accuracy across 10 runs: 0.6852 ± 0.008875 # Accuracy across 10 runs: 0.6852 ± 0.008875
version: 0.0.1 version: 0.0.2
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train pipeline_mode: train
device: cuda:0 device: cuda:0
......
# Accuracy across 10 runs: 0.6994 ± 0.004005 # Accuracy across 10 runs: 0.6994 ± 0.004005
version: 0.0.1 version: 0.0.2
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train pipeline_mode: train
device: cuda:0 device: cuda:0
......
# Accuracy across 10 runs: 0.8208 ± 0.00663 # Accuracy across 10 runs: 0.8208 ± 0.00663
version: 0.0.1 version: 0.0.2
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train pipeline_mode: train
device: cuda:0 device: cuda:0
......
# Accuracy across 10 runs: 0.802 ± 0.005329 # Accuracy across 10 runs: 0.802 ± 0.005329
version: 0.0.1 version: 0.0.2
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train pipeline_mode: train
device: cuda:0 device: cuda:0
......
# Accuracy across 10 runs: 0.8163 ± 0.006856 # Accuracy across 10 runs: 0.8163 ± 0.006856
version: 0.0.1 version: 0.0.2
pipeline_name: nodepred pipeline_name: nodepred
pipeline_mode: train pipeline_mode: train
device: cuda:0 device: cuda:0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment