You need to sign in or sign up before continuing.
Unverified Commit e9b624fe authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

Merge branch 'master' into dist_part

parents 8086d1ed a88e7f7e
......@@ -34,9 +34,6 @@ class EntityClassify(nn.Module):
Dropout
use_self_loop : bool
Use self loop if True, default False.
low_mem : bool
True to use low memory implementation of relation message passing function
trade speed with memory consumption
"""
def __init__(self,
device,
......@@ -48,7 +45,6 @@ class EntityClassify(nn.Module):
num_hidden_layers=1,
dropout=0,
use_self_loop=False,
low_mem=False,
layer_norm=False):
super(EntityClassify, self).__init__()
self.device = device
......@@ -60,7 +56,6 @@ class EntityClassify(nn.Module):
self.num_hidden_layers = num_hidden_layers
self.dropout = dropout
self.use_self_loop = use_self_loop
self.low_mem = low_mem
self.layer_norm = layer_norm
self.layers = nn.ModuleList()
......@@ -68,19 +63,19 @@ class EntityClassify(nn.Module):
self.layers.append(RelGraphConv(
self.h_dim, self.h_dim, self.num_rels, "basis",
self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm))
dropout=self.dropout, layer_norm = layer_norm))
# h2h
for idx in range(self.num_hidden_layers):
self.layers.append(RelGraphConv(
self.h_dim, self.h_dim, self.num_rels, "basis",
self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm))
dropout=self.dropout, layer_norm = layer_norm))
# h2o
self.layers.append(RelGraphConv(
self.h_dim, self.out_dim, self.num_rels, "basis",
self.num_bases, activation=None,
self_loop=self.use_self_loop,
low_mem=self.low_mem, layer_norm = layer_norm))
layer_norm = layer_norm))
def forward(self, blocks, feats, norm=None):
if blocks is None:
......@@ -196,7 +191,6 @@ def track_time(data):
dropout = 0.5
use_self_loop = True
lr = 0.01
low_mem = True
num_workers = 4
iter_start = 3
iter_count = 10
......@@ -276,7 +270,6 @@ def track_time(data):
num_hidden_layers=n_layers - 2,
dropout=dropout,
use_self_loop=use_self_loop,
low_mem=low_mem,
layer_norm=False)
embed_layer = embed_layer.to(device)
......
......@@ -4,12 +4,9 @@ import torch as th
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.multiprocessing as mp
from torch.utils.data import DataLoader
import dgl.nn.pytorch as dglnn
import dgl.function as fn
import time
import traceback
from .. import utils
......@@ -123,17 +120,19 @@ def track_time(data, num_negs, batch_size):
# Create PyTorch DataLoader for constructing blocks
sampler = dgl.dataloading.MultiLayerNeighborSampler(
[int(fanout) for fanout in fan_out.split(',')])
dataloader = dgl.dataloading.EdgeDataLoader(
g, train_seeds, sampler, exclude='reverse_id',
sampler = dgl.dataloading.as_edge_prediction_sampler(
sampler, exclude='reverse_id',
# For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2.
reverse_eids=th.cat([
th.arange(n_edges // 2, n_edges),
th.arange(0, n_edges // 2)]),
negative_sampler=NegativeSampler(g, num_negs),
th.arange(
n_edges // 2, n_edges),
th.arange(0, n_edges // 2)]),
negative_sampler=NegativeSampler(g, num_negs))
dataloader = dgl.dataloading.DataLoader(
g, train_seeds, sampler,
batch_size=batch_size,
shuffle=True,
drop_last=False,
pin_memory=True,
num_workers=num_workers)
# Define model and optimizer
......
import dgl
from dgl.nn.pytorch import RelGraphConv
import torch
import torch.nn as nn
import torch.nn.functional as F
from . import utils
class RGCN(nn.Module):
def __init__(self, num_nodes, h_dim, out_dim, num_rels,
regularizer="basis", num_bases=-1, dropout=0.,
self_loop=False,
ns_mode=False):
super(RGCN, self).__init__()
if num_bases == -1:
num_bases = num_rels
self.emb = nn.Embedding(num_nodes, h_dim)
self.conv1 = RelGraphConv(h_dim, h_dim, num_rels, regularizer,
num_bases, self_loop=self_loop)
self.conv2 = RelGraphConv(
h_dim, out_dim, num_rels, regularizer, num_bases, self_loop=self_loop)
self.dropout = nn.Dropout(dropout)
self.ns_mode = ns_mode
def forward(self, g, nids=None):
if self.ns_mode:
# forward for neighbor sampling
x = self.emb(g[0].srcdata[dgl.NID])
h = self.conv1(g[0], x, g[0].edata[dgl.ETYPE], g[0].edata['norm'])
h = self.dropout(F.relu(h))
h = self.conv2(g[1], h, g[1].edata[dgl.ETYPE], g[1].edata['norm'])
return h
else:
x = self.emb.weight if nids is None else self.emb(nids)
h = self.conv1(g, x, g.edata[dgl.ETYPE], g.edata['norm'])
h = self.dropout(F.relu(h))
h = self.conv2(g, h, g.edata[dgl.ETYPE], g.edata['norm'])
return h
def load_data(data_name, get_norm=False, inv_target=False):
dataset = utils.process_data(data_name)
# Load hetero-graph
hg = dataset[0]
num_rels = len(hg.canonical_etypes)
category = dataset.predict_category
num_classes = dataset.num_classes
labels = hg.nodes[category].data.pop('labels')
train_mask = hg.nodes[category].data.pop('train_mask')
test_mask = hg.nodes[category].data.pop('test_mask')
train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
if get_norm:
# Calculate normalization weight for each edge,
# 1. / d, d is the degree of the destination node
for cetype in hg.canonical_etypes:
hg.edges[cetype].data['norm'] = dgl.norm_by_dst(
hg, cetype).unsqueeze(1)
edata = ['norm']
else:
edata = None
# get target category id
category_id = hg.ntypes.index(category)
g = dgl.to_homogeneous(hg, edata=edata)
# Rename the fields as they can be changed by for example DataLoader
g.ndata['ntype'] = g.ndata.pop(dgl.NTYPE)
g.ndata['type_id'] = g.ndata.pop(dgl.NID)
node_ids = torch.arange(g.num_nodes())
# find out the target node ids in g
loc = (g.ndata['ntype'] == category_id)
target_idx = node_ids[loc]
if inv_target:
# Map global node IDs to type-specific node IDs. This is required for
# looking up type-specific labels in a minibatch
inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64)
inv_target[target_idx] = torch.arange(0, target_idx.shape[0],
dtype=inv_target.dtype)
return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target
else:
return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx
--find-links https://download.pytorch.org/whl/lts/1.8/torch_lts.html
torch==1.8.1+cu111
torchvision==0.9.1+cu111
--find-links https://download.pytorch.org/whl/torch
torch==1.9.0+cu111
torchvision
pytest
nose
numpy
......@@ -12,7 +12,8 @@ nltk
requests[security]
tqdm
awscli
torchtext==0.9.1
torchtext
pandas
rdflib
ogb==1.3.1
\ No newline at end of file
ogb==1.3.1
torchmetrics
\ No newline at end of file
......@@ -61,7 +61,7 @@ Let's use one of the most classical setups -- training a GraphSAGE model for nod
classification on the Cora citation graph dataset as an
example.
### Step one: `dgl configure`
### Step 1: `dgl configure`
First step, use `dgl configure` to generate a YAML configuration file.
......@@ -85,7 +85,7 @@ At this point you can also change options to explore optimization potentials.
The snippet below shows the configuration file generated by the command above.
```yaml
version: 0.0.1
version: 0.0.2
pipeline_name: nodepred
pipeline_mode: train
device: cpu
......@@ -181,7 +181,7 @@ That's all! Basically you only need two commands to train a graph neural network
### Step 3: `dgl export` for more advanced customization
That's not everything yet. You may want to open the hood and and invoke deeper
That's not everything yet. You may want to open the hood and invoke deeper
customization. DGL-Go can export a **self-contained, reproducible** Python
script for you to do anything you like.
......
......@@ -23,7 +23,7 @@ class PipelineConfig(DGLBaseModel):
loss: str = "CrossEntropyLoss"
class UserConfig(DGLBaseModel):
version: Optional[str] = "0.0.1"
version: Optional[str] = "0.0.2"
pipeline_name: PipelineFactory.get_pipeline_enum()
pipeline_mode: str
device: str = "cpu"
version: 0.0.1
version: 0.0.2
pipeline_name: graphpred
pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
......
version: 0.0.1
version: 0.0.2
pipeline_name: graphpred
pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
......
version: 0.0.1
version: 0.0.2
pipeline_name: graphpred
pipeline_mode: train
device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0
......
version: 0.0.1
version: 0.0.2
pipeline_name: linkpred
pipeline_mode: train
device: cpu
......
version: 0.0.1
version: 0.0.2
pipeline_name: linkpred
pipeline_mode: train
device: cpu
......
version: 0.0.1
version: 0.0.2
pipeline_name: linkpred
pipeline_mode: train
device: cuda
......
# Accuracy across 5 runs: 0.593288 ± 0.006103
version: 0.0.1
version: 0.0.2
pipeline_name: nodepred-ns
pipeline_mode: train
device: 'cuda:0'
......
# Accuracy across 1 runs: 0.796911
version: 0.0.1
version: 0.0.2
pipeline_name: nodepred-ns
pipeline_mode: train
device: cuda
......
# Accuracy across 10 runs: 0.7097 ± 0.006914
version: 0.0.1
version: 0.0.2
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
......
# Accuracy across 10 runs: 0.6852 ± 0.008875
version: 0.0.1
version: 0.0.2
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
......
# Accuracy across 10 runs: 0.6994 ± 0.004005
version: 0.0.1
version: 0.0.2
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
......
# Accuracy across 10 runs: 0.8208 ± 0.00663
version: 0.0.1
version: 0.0.2
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
......
# Accuracy across 10 runs: 0.802 ± 0.005329
version: 0.0.1
version: 0.0.2
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
......
# Accuracy across 10 runs: 0.8163 ± 0.006856
version: 0.0.1
version: 0.0.2
pipeline_name: nodepred
pipeline_mode: train
device: cuda:0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment