Unverified Commit 56ffb650 authored by peizhou001's avatar peizhou001 Committed by GitHub
Browse files

[API Deprecation]Deprecate contrib module (#5114)

parent 436de3d1
"""Package for DGL scheduler and runtime."""
from __future__ import absolute_import
from . import scheduler
from .runtime import Runtime
from .adapter import GraphAdapter
"""Temporary adapter to unify DGLGraph and HeteroGraph for scheduler.
NOTE(minjie): remove once all scheduler codes are migrated to heterograph
"""
from __future__ import absolute_import
from abc import ABC, abstractmethod
class GraphAdapter(ABC):
"""Temporary adapter class to unify DGLGraph and DGLHeteroGraph for schedulers."""
@property
@abstractmethod
def gidx(self):
"""Get graph index object."""
@abstractmethod
def num_src(self):
"""Number of source nodes."""
@abstractmethod
def num_dst(self):
"""Number of destination nodes."""
@abstractmethod
def num_edges(self):
"""Number of edges."""
@property
@abstractmethod
def srcframe(self):
"""Frame to store source node features."""
@property
@abstractmethod
def dstframe(self):
"""Frame to store source node features."""
@property
@abstractmethod
def edgeframe(self):
"""Frame to store edge features."""
@property
@abstractmethod
def msgframe(self):
"""Frame to store messages."""
@property
@abstractmethod
def msgindicator(self):
"""Message indicator tensor."""
@msgindicator.setter
@abstractmethod
def msgindicator(self, val):
"""Set new message indicator tensor."""
@abstractmethod
def in_edges(self, nodes):
"""Get in edges
Parameters
----------
nodes : utils.Index
Nodes
Returns
-------
tuple of utils.Index
(src, dst, eid)
"""
@abstractmethod
def out_edges(self, nodes):
"""Get out edges
Parameters
----------
nodes : utils.Index
Nodes
Returns
-------
tuple of utils.Index
(src, dst, eid)
"""
@abstractmethod
def edges(self, form):
"""Get all edges
Parameters
----------
form : str
"eid", "uv", etc.
Returns
-------
tuple of utils.Index
(src, dst, eid)
"""
@abstractmethod
def get_immutable_gidx(self, ctx):
"""Get immutable graph index for kernel computation.
Parameters
----------
ctx : DGLContext
The context of the returned graph.
Returns
-------
GraphIndex
"""
@abstractmethod
def bits_needed(self):
"""Return the number of integer bits needed to represent the graph
Returns
-------
int
The number of bits needed
"""
"""Module for degree bucketing schedulers."""
from __future__ import absolute_import
from functools import partial
from ..._ffi.function import _init_api
from ... import backend as F
from ..udf import NodeBatch, EdgeBatch
from ... import utils
from . import ir
from .ir import var
def gen_degree_bucketing_schedule(
reduce_udf,
message_ids,
dst_nodes,
recv_nodes,
var_nf,
var_mf,
var_out,
ntype=None):
"""Create degree bucketing schedule.
The messages will be divided by their receivers into buckets. Each bucket
contains nodes that have the same in-degree. The reduce UDF will be applied
on each bucket. The per-bucket result will be merged according to the
*unique-ascending order* of the recv node ids. The order is important to
be compatible with other reduce scheduler such as v2v_spmv.
Parameters
----------
reduce_udf : callable
The UDF to reduce messages.
message_ids : utils.Index
The variable for message ids.
Invariant: len(message_ids) == len(dst_nodes)
dst_nodes : utils.Index
The variable for dst node of each message.
Invariant: len(message_ids) == len(dst_nodes)
recv_nodes : utils.Index
The unique nodes that perform recv.
Invariant: recv_nodes = sort(unique(dst_nodes))
var_nf : var.FEAT_DICT
The variable for node feature frame.
var_mf : var.FEAT_DICT
The variable for message frame.
var_out : var.FEAT_DICT
The variable for output feature dicts.
ntype : str, optional
The node type, if running on a heterograph.
If None, assuming it's running on a homogeneous graph.
"""
buckets = _degree_bucketing_schedule(message_ids, dst_nodes, recv_nodes)
# generate schedule
_, degs, buckets, msg_ids, zero_deg_nodes = buckets
# loop over each bucket
idx_list = []
fd_list = []
for deg, vbkt, mid in zip(degs, buckets, msg_ids):
# create per-bkt rfunc
rfunc = _create_per_bkt_rfunc(reduce_udf, deg, vbkt, ntype=ntype)
# vars
vbkt = var.IDX(vbkt)
mid = var.IDX(mid)
rfunc = var.FUNC(rfunc)
# recv on each bucket
fdvb = ir.READ_ROW(var_nf, vbkt)
fdmail = ir.READ_ROW(var_mf, mid)
fdvb = ir.NODE_UDF(rfunc, fdvb, fdmail, ret=fdvb) # reuse var
# save for merge
idx_list.append(vbkt)
fd_list.append(fdvb)
if zero_deg_nodes is not None:
# NOTE: there must be at least one non-zero-deg node; otherwise,
# degree bucketing should not be called.
var_0deg = var.IDX(zero_deg_nodes)
zero_feat = ir.NEW_DICT(var_out, var_0deg, fd_list[0])
idx_list.append(var_0deg)
fd_list.append(zero_feat)
# merge buckets according to the ascending order of the node ids.
all_idx = F.cat([idx.data.tousertensor() for idx in idx_list], dim=0)
_, order = F.sort_1d(all_idx)
var_order = var.IDX(utils.toindex(order))
reduced_feat = ir.MERGE_ROW(var_order, fd_list)
ir.WRITE_DICT_(var_out, reduced_feat)
def _degree_bucketing_schedule(mids, dsts, v):
"""Return the bucketing by degree scheduling for destination nodes of
messages
Parameters
----------
mids: utils.Index
edge id for each message
dsts: utils.Index
destination node for each message
v: utils.Index
all receiving nodes (for checking zero degree nodes)
"""
buckets = _CAPI_DGLDegreeBucketing(mids.todgltensor(), dsts.todgltensor(),
v.todgltensor())
return _process_node_buckets(buckets)
def _process_node_buckets(buckets):
"""read bucketing auxiliary data
Returns
-------
unique_v: utils.Index
unqiue destination nodes
degrees: numpy.ndarray
A list of degree for each bucket
v_bkt: list of utils.Index
A list of node id buckets, nodes in each bucket have the same degree
msg_ids: list of utils.Index
A list of message id buckets, each node in the ith node id bucket has
degree[i] messages in the ith message id bucket
zero_deg_nodes : utils.Index
The zero-degree nodes
"""
# get back results
dtype = buckets(0).dtype
degs = utils.toindex(buckets(0), dtype)
v = utils.toindex(buckets(1), dtype)
# XXX: convert directly from ndarary to python list?
v_section = buckets(2).asnumpy().tolist()
msg_ids = utils.toindex(buckets(3), dtype)
msg_section = buckets(4).asnumpy().tolist()
# split buckets
msg_ids = msg_ids.tousertensor()
dsts = F.split(v.tousertensor(), v_section, 0)
msg_ids = F.split(msg_ids, msg_section, 0)
# convert to utils.Index
dsts = [utils.toindex(dst, dtype) for dst in dsts]
msg_ids = [utils.toindex(msg_id, dtype) for msg_id in msg_ids]
# handle zero deg
degs = degs.tonumpy()
if degs[-1] == 0:
degs = degs[:-1]
zero_deg_nodes = dsts[-1]
dsts = dsts[:-1]
else:
zero_deg_nodes = None
return v, degs, dsts, msg_ids, zero_deg_nodes
def _create_per_bkt_rfunc(reduce_udf, deg, vbkt, ntype=None):
"""Internal function to generate the per degree bucket node UDF."""
def _rfunc_wrapper(node_data, mail_data):
def _reshaped_getter(key):
msg = mail_data[key]
new_shape = (len(vbkt), deg) + F.shape(msg)[1:]
return F.reshape(msg, new_shape)
reshaped_mail_data = utils.LazyDict(_reshaped_getter, mail_data.keys())
nbatch = NodeBatch(vbkt, node_data, reshaped_mail_data, ntype=ntype)
return reduce_udf(nbatch)
return _rfunc_wrapper
def gen_group_apply_edge_schedule(
apply_func,
u, v, eid,
group_by,
var_src_nf,
var_dst_nf,
var_ef,
var_out,
canonical_etype=(None, None, None)):
"""Create degree bucketing schedule for group_apply_edge
Edges will be grouped by either its source node or destination node
specified by 'group_by', and will be divided into buckets in which
'group_by' nodes have the same degree. The apply_func UDF will be applied
to each bucket. The per-bucket result will be merged according to the
*unique-ascending order* of the edge ids.
Parameters
----------
apply_func: callable
The edge_apply_func UDF
u: utils.Index
Source nodes of edges to apply
v: utils.Index
Destination nodes of edges to apply
eid: utils.Index
Edges to apply
group_by: str
If "src", group by u. If "dst", group by v
var_src_nf : var.FEAT_DICT
The variable for source feature frame.
var_dst_nf : var.FEAT_DICT
The variable for destination feature frame.
var_ef : var.FEAT_DICT
The variable for edge frame.
var_out : var.FEAT_DICT
The variable for output feature dicts.
canonical_etype : tuple[str, str, str], optional
Canonical edge type if running on a heterograph.
Default: (None, None, None), if running on a homogeneous graph.
"""
if group_by == "src":
buckets = _degree_bucketing_for_edge_grouping(u, v, eid)
degs, uids, vids, eids = buckets
elif group_by == "dst":
buckets = _degree_bucketing_for_edge_grouping(v, u, eid)
degs, vids, uids, eids = buckets
else:
raise DGLError("group_apply_edge must be grouped by either src or dst")
idx_list = []
fd_list = []
for deg, u_bkt, v_bkt, eid_bkt in zip(degs, uids, vids, eids):
# create per-bkt efunc
_efunc = var.FUNC(_create_per_bkt_efunc(apply_func, deg,
u_bkt, v_bkt, eid_bkt,
canonical_etype=canonical_etype))
# vars
var_u = var.IDX(u_bkt)
var_v = var.IDX(v_bkt)
var_eid = var.IDX(eid_bkt)
# apply edge UDF on each bucket
fdsrc = ir.READ_ROW(var_src_nf, var_u)
fddst = ir.READ_ROW(var_dst_nf, var_v)
fdedge = ir.READ_ROW(var_ef, var_eid)
fdedge = ir.EDGE_UDF(_efunc, fdsrc, fdedge, fddst, ret=fdedge) # reuse var
# save for merge
idx_list.append(var_eid)
fd_list.append(fdedge)
# merge buckets according to the ascending order of the edge ids.
all_idx = F.cat([idx.data.tousertensor() for idx in idx_list], dim=0)
_, order = F.sort_1d(all_idx)
var_order = var.IDX(utils.toindex(order))
ir.MERGE_ROW(var_order, fd_list, ret=var_out)
def _degree_bucketing_for_edge_grouping(uids, vids, eids):
"""Return the edge buckets by degree and grouped nodes for group_apply_edge
Parameters
----------
degree
uids: utils.Index
node id of one end of eids, based on which edges are grouped
vids: utils.Index
node id of the other end of eids
eids: utils.Index
edge id for each edge
"""
buckets = _CAPI_DGLGroupEdgeByNodeDegree(uids.todgltensor(),
vids.todgltensor(),
eids.todgltensor())
return _process_edge_buckets(buckets)
def _process_edge_buckets(buckets):
"""read bucketing auxiliary data for group_apply_edge buckets
Returns
-------
degrees: numpy.ndarray
A list of degree for each bucket
uids: list of utils.Index
A list of node id buckets, nodes in each bucket have the same degree
vids: list of utils.Index
A list of node id buckets
eids: list of utils.Index
A list of edge id buckets
"""
# get back results
dtype = buckets(0).dtype
degs = buckets(0).asnumpy()
uids = utils.toindex(buckets(1), dtype)
vids = utils.toindex(buckets(2), dtype)
eids = utils.toindex(buckets(3), dtype)
# XXX: convert directly from ndarary to python list?
sections = buckets(4).asnumpy().tolist()
# split buckets and convert to index
def split(to_split):
res = F.split(to_split.tousertensor(), sections, 0)
return map(partial(utils.toindex, dtype=dtype), res)
uids = split(uids)
vids = split(vids)
eids = split(eids)
return degs, uids, vids, eids
def _create_per_bkt_efunc(apply_func, deg, u, v, eid, canonical_etype=(None, None, None)):
"""Internal function to generate the per degree bucket edge UDF."""
batch_size = len(u) // deg
def _efunc_wrapper(src_data, edge_data, dst_data):
def _reshape_func(data):
def _reshaped_getter(key):
feat = data[key]
new_shape = (batch_size, deg) + F.shape(feat)[1:]
return F.reshape(feat, new_shape)
return _reshaped_getter
def _reshape_back(data):
shape = F.shape(data)[2:]
new_shape = (batch_size * deg,) + shape
return F.reshape(data, new_shape)
reshaped_src_data = utils.LazyDict(_reshape_func(src_data),
src_data.keys())
reshaped_edge_data = utils.LazyDict(_reshape_func(edge_data),
edge_data.keys())
reshaped_dst_data = utils.LazyDict(_reshape_func(dst_data),
dst_data.keys())
ebatch = EdgeBatch((u, v, eid), reshaped_src_data,
reshaped_edge_data, reshaped_dst_data,
canonical_etype=canonical_etype)
return {k: _reshape_back(v) for k, v in apply_func(ebatch).items()}
return _efunc_wrapper
_init_api("dgl._deprecate.runtime.degree_bucketing")
"""Package for DGL's internal IR."""
from .executor import *
from .program import get_current_prog, prog
"""Module for executors."""
# pylint: disable=invalid-name
from __future__ import absolute_import
from abc import abstractmethod
from .... import backend as F
from ...frame import FrameRef, Frame
from .... import utils
from .program import get_current_prog
from . import var
from .var import VarType
from .registry import IR_REGISTRY
__all__ = [
'OpCode', 'Executor',
'NodeUDFExecutor', 'NODE_UDF',
'EdgeUDFExecutor', 'EDGE_UDF',
'ReadExecutor', 'READ',
'ReadColExecutor', 'READ_COL',
'ReadRowExecutor', 'READ_ROW',
'MergeRowExecutor', 'MERGE_ROW',
'UpdateDictExecutor', 'UPDATE_DICT',
'NewDictExecutor', 'NEW_DICT',
'Write_Executor', 'WRITE_',
'WriteCol_Executor', 'WRITE_COL_',
'WriteRow_Executor', 'WRITE_ROW_',
'WriteDict_Executor', 'WRITE_DICT_',
'AppendRow_Executor', 'APPEND_ROW_',
'WriteRowInplace_Executor', 'WRITE_ROW_INPLACE_',
'ClearFrame_Executor', 'CLEAR_FRAME_',
'BinaryReduceExecutor', 'BINARY_REDUCE',
'CopyReduceExecutor', 'COPY_REDUCE',
]
class OpCode(object):
"""Opcode for all the executor types."""
# immutable op
NODE_UDF = 0
EDGE_UDF = 1
READ = 4
READ_COL = 5
READ_ROW = 6
MERGE_ROW = 7
UPDATE_DICT = 8
NEW_DICT = 9
# mutable op (no return)
# remember the name is suffixed with "_"
WRITE_ = 21
WRITE_COL_ = 22
WRITE_ROW_ = 23
WRITE_DICT_ = 24
APPEND_ROW_ = 25
WRITE_ROW_INPLACE_ = 26
CLEAR_FRAME_ = 27
# DGL kernels
BINARY_REDUCE = 50
COPY_REDUCE = 51
class Executor(object):
"""Base executor class.
An executor is similar to a basic operator in dataflow-based framework.
The executor can be evaluated by the ``run`` function.
"""
@abstractmethod
def opcode(self):
"""Return the opcode of this executor."""
raise NotImplementedError
@abstractmethod
def arg_vars(self):
"""Return the argument variable list of this executor."""
raise NotImplementedError
@abstractmethod
def ret_var(self):
"""Return the result variable of this executor."""
raise NotImplementedError
@abstractmethod
def run(self):
"""Evaluate this executor.
The function takes no argument and returns none, which means all the
argument and result variables must be pre-bound.
"""
raise NotImplementedError
class NodeUDFExecutor(Executor):
"""Executor for Node UDF call.
Parameters
----------
fn : var.Var
The UDF.
fdnode : var.Var
The node feature dict.
fdmail : var.Var
The mailbox data dict.
ret : var.Var
The return new node feature dict.
"""
def __init__(self, fn, fdnode, fdmail, ret):
self.fn = fn
self.fdnode = fdnode
self.fdmail = fdmail
self.ret = ret
def opcode(self):
return OpCode.NODE_UDF
def arg_vars(self):
if self.fdmail is None:
return [self.fn, self.fdnode]
else:
return [self.fn, self.fdnode, self.fdmail]
def ret_var(self):
return self.ret
def run(self):
fn_data = self.fn.data
node_data = self.fdnode.data
if self.fdmail is None:
udf_ret = fn_data(node_data)
else:
mail_data = self.fdmail.data
udf_ret = fn_data(node_data, mail_data)
self.ret.data = FrameRef(Frame(udf_ret))
IR_REGISTRY[OpCode.NODE_UDF] = {
'name' : 'NODE_UDF',
'args_type' : [VarType.FUNC, VarType.FEAT_DICT, VarType.FEAT_DICT],
'ret_type' : VarType.FEAT_DICT,
'executor_cls' : NodeUDFExecutor,
}
def NODE_UDF(fn, fdnode, fdmail=None, ret=None):
"""Apply the node UDF and get the new node feature symbolically.
Parameters
----------
fn : var.Var
The UDF.
fdnode : var.Var
The node feature dict.
fdmail : var.Var
The mailbox data dict.
ret : var.Var, optional
The return variable for new node feature dict. If not give,
a new variable will be created.
Returns
-------
var.Var
Variable for the result.
"""
reg = IR_REGISTRY[OpCode.NODE_UDF]
ret = var.new(reg['ret_type']) if ret is None else ret
get_current_prog().issue(reg['executor_cls'](fn, fdnode, fdmail, ret))
return ret
class EdgeUDFExecutor(Executor):
"""Executor for edge UDF call.
Parameters
----------
fn : var.Var
The UDF.
fdsrc : var.Var
The src node feature dict.
fdedge : var.Var
The edge feature dict.
fddst : var.Var
The dst node feature dict.
ret : var.Var
The return new edge feature dict.
"""
def __init__(self, fn, fdsrc, fdedge, fddst, ret):
self.fn = fn
self.fdsrc = fdsrc
self.fdedge = fdedge
self.fddst = fddst
self.ret = ret
def opcode(self):
return OpCode.EDGE_UDF
def arg_vars(self):
return [self.fn, self.fdsrc, self.fdedge, self.fddst]
def ret_var(self):
return self.ret
def run(self):
fn_data = self.fn.data
src_data = self.fdsrc.data
edge_data = self.fdedge.data
dst_data = self.fddst.data
udf_ret = fn_data(src_data, edge_data, dst_data)
self.ret.data = FrameRef(Frame(udf_ret))
IR_REGISTRY[OpCode.EDGE_UDF] = {
'name' : 'EDGE_UDF',
'args_type' : [VarType.FUNC, VarType.FEAT_DICT, VarType.FEAT_DICT],
'ret_type' : VarType.FEAT_DICT,
'executor_cls' : EdgeUDFExecutor,
}
def EDGE_UDF(fn, fdsrc, fdedge, fddst, ret=None):
"""Apply the edge UDF and get the new edge feature symbolically.
Parameters
----------
fn : var.Var
The UDF.
fdsrc : var.Var
The src node feature dict.
fdedge : var.Var
The edge feature dict.
fddst : var.Var
The dst node feature dict.
ret : var.Var, optional
The return variable for new node feature dict. If not give,
a new variable will be created.
Returns
-------
var.Var
Variable for the result.
"""
reg = IR_REGISTRY[OpCode.EDGE_UDF]
ret = var.new(reg['ret_type']) if ret is None else ret
get_current_prog().issue(reg['executor_cls'](fn, fdsrc, fdedge, fddst, ret))
return ret
class ReadExecutor(Executor):
"""Executor for read data from feature dict.
Parameters
----------
fd : var.Var
The feature dict.
row : var.Var
The row index.
col : var.Var
The column name.
ret : var.Var
The return feature tensor.
"""
def __init__(self, fd, row, col, ret):
self.fd = fd
self.row = row
self.col = col
self.ret = ret
def opcode(self):
return OpCode.READ
def arg_vars(self):
return [self.fd, self.row, self.col]
def ret_var(self):
return self.ret
def run(self):
fd_data = self.fd.data # feature dict
row_data = self.row.data # idx
col_data = self.col.data # key str
self.ret.data = fd_data[row_data][col_data]
IR_REGISTRY[OpCode.READ] = {
'name' : 'READ',
'args_type' : [VarType.FEAT_DICT, VarType.IDX, VarType.STR],
'ret_type' : VarType.FEAT,
'executor_cls' : ReadExecutor,
}
def READ(fd, row, col, ret=None):
"""Read the feature data from the dictionary specified by the row and column symbolically.
Parameters
----------
fd : var.Var
The feature dict.
row : var.Var
The row index.
col : var.Var
The column name.
ret : var.Var, optional
The return feature tensor. If not give, a new variable will be created.
Returns
-------
var.Var
Variable for the result.
"""
reg = IR_REGISTRY[OpCode.READ]
ret = var.new(reg['ret_type']) if ret is None else ret
get_current_prog().issue(reg['executor_cls'](fd, row, col, ret))
return ret
class ReadColExecutor(Executor):
"""Executor for read column data from feature dict.
Parameters
----------
fd : var.Var
The feature dict.
col : var.Var
The column name.
ret : var.Var
The return feature tensor.
"""
def __init__(self, fd, col, ret):
self.fd = fd
self.col = col
self.ret = ret
def opcode(self):
return OpCode.READ_COL
def arg_vars(self):
return [self.fd, self.col]
def ret_var(self):
return self.ret
def run(self):
fd_data = self.fd.data
col_data = self.col.data
self.ret.data = fd_data[col_data]
IR_REGISTRY[OpCode.READ_COL] = {
'name' : 'READ_COL',
'args_type' : [VarType.FEAT_DICT, VarType.STR],
'ret_type' : VarType.FEAT,
'executor_cls' : ReadColExecutor,
}
def READ_COL(fd, col, ret=None):
"""Read the column data from the dictionary.
Parameters
----------
fd : var.Var
The feature dict.
col : var.Var
The column name.
ret : var.Var, optional
The return feature tensor. If not give, a new variable will be created.
Returns
-------
var.Var
Variable for the result.
"""
reg = IR_REGISTRY[OpCode.READ_COL]
ret = var.new(reg['ret_type']) if ret is None else ret
get_current_prog().issue(reg['executor_cls'](fd, col, ret))
return ret
class ReadRowExecutor(Executor):
"""Executor for read row data from feature dict.
Parameters
----------
fd : var.Var
The feature dict.
row : var.Var
The row index.
ret : var.Var
The return feature tensor.
"""
def __init__(self, fd, row, ret):
self.fd = fd
self.row = row
self.ret = ret
def opcode(self):
return OpCode.READ_ROW
def arg_vars(self):
return [self.fd, self.row]
def ret_var(self):
return self.ret
def run(self):
fd_data = self.fd.data
row_data = self.row.data # idx
self.ret.data = fd_data[row_data]
IR_REGISTRY[OpCode.READ_ROW] = {
'name' : 'READ_ROW',
'args_type' : [VarType.FEAT_DICT, VarType.IDX],
'ret_type' : VarType.FEAT_DICT,
'executor_cls' : ReadRowExecutor,
}
def READ_ROW(fd, row, ret=None):
"""Read the row data from the dictionary.
Parameters
----------
fd : var.Var
The feature dict.
row : var.Var
The row index.
ret : var.Var, optional
The return feature tensor. If not give, a new variable will be created.
Returns
-------
var.Var
Variable for the result.
"""
reg = IR_REGISTRY[OpCode.READ_ROW]
ret = var.new(reg['ret_type']) if ret is None else ret
get_current_prog().issue(reg['executor_cls'](fd, row, ret))
return ret
class MergeRowExecutor(Executor):
"""Executor for merge row data according to the given order.
Parameters
----------
order : var.Var
The order index.
fd_list : list of var.Var
The list of row data variables. Each represents a feature dict.
ret : var.Var
Variable for the result.
"""
def __init__(self, order, fd_list, ret):
self.order = order
self.fd_list = fd_list
self.ret = ret
def opcode(self):
return OpCode.MERGE_ROW
def arg_vars(self):
return [self.order] + self.fd_list
def ret_var(self):
return self.ret
def run(self):
# merge buckets according to the ascending order of the node ids.
order_data = self.order.data
fd_data = [fd.data for fd in self.fd_list]
keys = fd_data[0].keys()
all_fd = {key : F.cat([fd[key] for fd in fd_data], dim=0)
for key in keys}
ret_fd = utils.reorder(all_fd, order_data)
self.ret.data = ret_fd
IR_REGISTRY[OpCode.MERGE_ROW] = {
'name' : 'MERGE_ROW',
'args_type' : [VarType.IDX, VarType.IDX, '*', VarType.FEAT_DICT, '*'],
'ret_type' : VarType.FEAT_DICT,
'executor_cls' : MergeRowExecutor,
}
def MERGE_ROW(idx_list, fd_list, ret=None):
"""Merge row data according to the given order symbolically.
Parameters
----------
order : var.Var
The order index.
fd_list : list of var.Var
The list of row data variables. Each represents a feature dict.
ret : var.Var, optional
Variable for the result. If not give, a new variable will be created.
Returns
-------
var.Var
Variable for the result.
"""
reg = IR_REGISTRY[OpCode.MERGE_ROW]
ret = var.new(reg['ret_type']) if ret is None else ret
get_current_prog().issue(reg['executor_cls'](idx_list, fd_list, ret))
return ret
class UpdateDictExecutor(Executor):
"""Executor for update feature dictionary with another one.
Similar to python dict's update but return a new dictionary.
Parameters
----------
fd1 : var.Var
Variable for the feature dict to be updated.
fd2 : var.Var
Variable for the provided feature dict.
ret : var.Var
Variable for the result.
"""
def __init__(self, fd1, fd2, ret):
self.fd1 = fd1
self.fd2 = fd2
self.ret = ret
def opcode(self):
return OpCode.UPDATE_DICT
def arg_vars(self):
return [self.fd1, self.fd2]
def ret_var(self):
return self.ret
def run(self):
fd1_data = self.fd1.data
fd2_data = self.fd2.data
if (isinstance(fd1_data, utils.LazyDict)
or isinstance(fd2_data, utils.LazyDict)):
# NOTE: fd2 has higher priority
ret_data = utils.HybridDict(fd2_data, fd1_data)
else:
ret_data = {k : v for k, v in fd1_data.items()}
ret_data.update(fd2_data)
self.ret.data = ret_data
IR_REGISTRY[OpCode.UPDATE_DICT] = {
'name' : 'UPDATE_DICT',
'args_type' : [VarType.FEAT_DICT, VarType.FEAT_DICT],
'ret_type' : VarType.FEAT_DICT,
'executor_cls' : UpdateDictExecutor,
}
def UPDATE_DICT(fd1, fd2, ret=None):
"""Executor for update feature dictionary with another one.
Similar to python dict's update but return a new dictionary.
Parameters
----------
fd1 : var.Var
Variable for the feature dict to be updated.
fd2 : var.Var
Variable for the provided feature dict.
ret : var.Var, optional
Variable for the result. If not give, a new variable will be created.
Returns
-------
var.Var
Variable for the result.
"""
reg = IR_REGISTRY[OpCode.UPDATE_DICT]
ret = var.new(reg['ret_type']) if ret is None else ret
get_current_prog().issue(reg['executor_cls'](fd1, fd2, ret))
return ret
class NewDictExecutor(Executor):
"""Executor for creating new feature dictionary.
Parameters
----------
fd_init : var.Var
The feat dict to borrow initializer.
idx : var.Var
The index to look for number or rows.
fd_scheme : var.Var
The feat dict to look for column scheme.
ret : var.Var
Variable for the result.
"""
def __init__(self, fd_init, idx, fd_scheme, ret):
self.fd_init = fd_init # the feat dict to borrow initializer
self.idx = idx # the index to look for number or rows
self.fd_scheme = fd_scheme # the feat dict to look for column scheme
self.ret = ret # the result
def opcode(self):
return OpCode.NEW_DICT
def arg_vars(self):
return [self.fd_init, self.idx, self.fd_scheme]
def ret_var(self):
return self.ret
def run(self):
fd_init_data = self.fd_init.data
idx_data = self.idx.data
fd_scheme_data = self.fd_scheme.data
schemes = fd_scheme_data.schemes
ret_dict = {}
for key, sch in schemes.items():
initializer = fd_init_data.get_initializer(key)
ctx = F.context(fd_scheme_data[key])
shape = (len(idx_data),) + sch.shape
# FIXME: the last argument here can only be idx; range
# is meaningless. Need to rethink the signature.
ret_dict[key] = initializer(shape, sch.dtype, ctx, idx_data)
self.ret.data = FrameRef(Frame(ret_dict))
IR_REGISTRY[OpCode.NEW_DICT] = {
'name' : 'NEW_DICT',
'args_type' : [VarType.FEAT_DICT, VarType.IDX, VarType.FEAT_DICT],
'ret_type' : VarType.FEAT_DICT,
'executor_cls' : NewDictExecutor,
}
def NEW_DICT(fd_init, idx, fd_scheme, ret=None):
"""Create a new dictionary symbolically.
Parameters
----------
fd_init : var.Var
The feat dict to borrow initializer.
idx : var.Var
The index to look for number or rows.
fd_scheme : var.Var
The feat dict to look for column scheme.
ret : var.Var
Variable for the result. If not give, a new variable will be created.
Returns
-------
var.Var
Variable for the result.
"""
reg = IR_REGISTRY[OpCode.NEW_DICT]
ret = var.new(reg['ret_type']) if ret is None else ret
get_current_prog().issue(reg['executor_cls'](fd_init, idx, fd_scheme, ret))
return ret
class Write_Executor(Executor):
"""Executor for writing the given data to the feature dict.
Parameters
----------
fd : var.Var
The feature dict.
row : var.Var
The row index.
col : var.Var
The column name.
val : var.Var
The given feature data.
"""
def __init__(self, fd, row, col, val):
self.fd = fd
self.row = row
self.col = col
self.val = val
def opcode(self):
return OpCode.WRITE_
def arg_vars(self):
return [self.fd, self.row, self.col, self.val]
def ret_var(self):
return None
def run(self):
fd_data = self.fd.data # feature dict
row_data = self.row.data # idx
col_data = self.col.data # key str
val_data = self.val.data
fd_data[col_data][row_data] = val_data
IR_REGISTRY[OpCode.WRITE_] = {
'name' : 'WRITE_',
'args_type' : [VarType.FEAT_DICT, VarType.IDX, VarType.STR, VarType.FEAT],
'ret_type' : None,
'executor_cls' : Write_Executor,
}
def WRITE_(fd, row, col, val):
"""Write the given data to the feature dict symbolically.
Parameters
----------
fd : var.Var
The feature dict.
row : var.Var
The row index.
col : var.Var
The column name.
val : var.Var
The given feature data.
"""
reg = IR_REGISTRY[OpCode.WRITE_]
get_current_prog().issue(reg['executor_cls'](fd, row, col, val))
class WriteCol_Executor(Executor):
"""Executor for writing the given column data to the feature dict.
Parameters
----------
fd : var.Var
The feature dict.
col : var.Var
The column name.
val : var.Var
The given feature data.
"""
def __init__(self, fd, col, val):
self.fd = fd
self.col = col
self.val = val
def opcode(self):
return OpCode.WRITE_COL_
def arg_vars(self):
return [self.fd, self.col, self.val]
def ret_var(self):
return None
def run(self):
fd_data = self.fd.data # feature dict
col_data = self.col.data # key str
val_data = self.val.data
fd_data[col_data] = val_data
IR_REGISTRY[OpCode.WRITE_COL_] = {
'name' : 'WRITE_COL_',
'args_type' : [VarType.FEAT_DICT, VarType.STR, VarType.FEAT],
'ret_type' : None,
'executor_cls' : WriteCol_Executor,
}
def WRITE_COL_(fd, col, val):
"""Writing the given column data to the feature dict symbolically.
Parameters
----------
fd : var.Var
The feature dict.
col : var.Var
The column name.
val : var.Var
The given feature data.
"""
reg = IR_REGISTRY[OpCode.WRITE_COL_]
get_current_prog().issue(reg['executor_cls'](fd, col, val))
class WriteRow_Executor(Executor):
"""Executor for writing the given row data to the feature dict.
Parameters
----------
fd : var.Var
The feature dict.
row : var.Var
The row index.
val : var.Var
The given feature data.
"""
def __init__(self, fd, row, val):
self.fd = fd
self.row = row
self.val = val
def opcode(self):
return OpCode.WRITE_ROW_
def arg_vars(self):
return [self.fd, self.row, self.val]
def ret_var(self):
return None
def run(self):
fd_data = self.fd.data # feature dict
row_data = self.row.data # idx
val_data = self.val.data
fd_data[row_data] = val_data
IR_REGISTRY[OpCode.WRITE_ROW_] = {
'name' : 'WRITE_ROW_',
'args_type' : [VarType.FEAT_DICT, VarType.IDX, VarType.FEAT_DICT],
'ret_type' : None,
'executor_cls' : WriteRow_Executor,
}
def WRITE_ROW_(fd, row, val):
"""Write the given row data to the feature dict symbolically.
Parameters
----------
fd : var.Var
The feature dict.
row : var.Var
The row index.
val : var.Var
The given feature data.
"""
reg = IR_REGISTRY[OpCode.WRITE_ROW_]
get_current_prog().issue(reg['executor_cls'](fd, row, val))
class WriteRowInplace_Executor(Executor):
"""Executor for writing the given row data to the feature dict in-place.
Parameters
----------
fd : var.Var
The feature dict.
row : var.Var
The row index.
val : var.Var
The given feature data.
"""
def __init__(self, fd, row, val):
self.fd = fd
self.row = row
self.val = val
def opcode(self):
return OpCode.WRITE_ROW_INPLACE_
def arg_vars(self):
return [self.fd, self.row, self.val]
def ret_var(self):
return None
def run(self):
fd_data = self.fd.data # feature dict
row_data = self.row.data # idx
val_data = self.val.data
fd_data.update_data(row_data, val_data, inplace=True)
IR_REGISTRY[OpCode.WRITE_ROW_INPLACE_] = {
'name' : 'WRITE_ROW_INPLACE_',
'args_type' : [VarType.FEAT_DICT, VarType.IDX, VarType.FEAT_DICT],
'ret_type' : None,
'executor_cls' : WriteRowInplace_Executor,
}
def WRITE_ROW_INPLACE_(fd, row, val):
"""Write the given row data to the feature dict in-place symbolically.
Parameters
----------
fd : var.Var
The feature dict.
row : var.Var
The row index.
val : var.Var
The given feature data.
"""
reg = IR_REGISTRY[OpCode.WRITE_ROW_INPLACE_]
get_current_prog().issue(reg['executor_cls'](fd, row, val))
class WriteDict_Executor(Executor):
"""Executor for writing the given feature dict data into the another one.
Parameters
----------
fd1 : var.Var
The feature dict to be mutated.
fd2 : var.Var
The feature dict data.
"""
def __init__(self, fd1, fd2):
self.fd1 = fd1
self.fd2 = fd2
def opcode(self):
return OpCode.WRITE_DICT_
def arg_vars(self):
return [self.fd1, self.fd2]
def ret_var(self):
return None
def run(self):
fd1_data = self.fd1.data
fd2_data = self.fd2.data
for k, v in fd2_data.items():
fd1_data[k] = v
IR_REGISTRY[OpCode.WRITE_DICT_] = {
'name' : 'WRITE_DICT_',
'args_type' : [VarType.FEAT_DICT, VarType.FEAT_DICT],
'ret_type' : None,
'executor_cls' : WriteDict_Executor,
}
def WRITE_DICT_(fd1, fd2):
"""Writing the given feature dict data into the another one symbolically.
Parameters
----------
fd1 : var.Var
The feature dict to be mutated.
fd2 : var.Var
The feature dict data.
"""
reg = IR_REGISTRY[OpCode.WRITE_DICT_]
get_current_prog().issue(reg['executor_cls'](fd1, fd2))
class AppendRow_Executor(Executor):
"""Executor for appending one feature dict to another.
Parameters
----------
fd1 : var.Var
The feature dict in the front.
fd2 : var.Var
The feature dict in the back.
"""
def __init__(self, fd1, fd2):
self.fd1 = fd1
self.fd2 = fd2
def opcode(self):
return OpCode.APPEND_ROW_
def arg_vars(self):
return [self.fd1, self.fd2]
def ret_var(self):
return None
def run(self):
fd1_data = self.fd1.data
fd2_data = self.fd2.data
fd1_data.append(fd2_data)
IR_REGISTRY[OpCode.APPEND_ROW_] = {
'name' : 'APPEND_ROW_',
'args_type' : [VarType.FEAT_DICT, VarType.FEAT_DICT],
'ret_type' : None,
'executor_cls' : AppendRow_Executor,
}
def APPEND_ROW_(fd1, fd2):
"""Append one feature dict to another symbolically.
Parameters
----------
fd1 : var.Var
The feature dict in the front.
fd2 : var.Var
The feature dict in the back.
"""
reg = IR_REGISTRY[OpCode.APPEND_ROW_]
get_current_prog().issue(reg['executor_cls'](fd1, fd2))
class ClearFrame_Executor(Executor):
"""Executor for clear the feature dict.
Parameters
----------
fd : var.Var
The feature dict to be cleared.
"""
def __init__(self, fd):
self.fd = fd
def opcode(self):
return OpCode.CLEAR_FRAME_
def arg_vars(self):
return [self.fd]
def ret_var(self):
return None
def run(self):
frame = self.fd.data
num_rows = frame.num_rows
frame.clear()
frame.add_rows(num_rows)
IR_REGISTRY[OpCode.CLEAR_FRAME_] = {
'name': 'CLEAR_FRAME_',
'args_type': [VarType.FEAT_DICT],
'ret_type': None,
'executor_cls': ClearFrame_Executor,
}
def CLEAR_FRAME_(fd):
"""Clear the feature dict symbolically.
Parameters
----------
fd : var.Var
The feature dict to be cleared.
"""
reg = IR_REGISTRY[OpCode.CLEAR_FRAME_]
get_current_prog().issue(reg['executor_cls'](fd))
class BinaryReduceExecutor(Executor):
"""Executor for BINARY_REDUCE
Parameters
----------
reducer : str
String representing reduction to perform, can be "sum", "max", "min",
"mean", "prod", "none" (no reduction)
binary_op : str
String representing binary operation to perform, can be "add", "mul",
"sub", "div", "dot"
graph : var.Var
Variable for graph index lambda. The lambda returns the immutable graph
index given a context object.
lhs: int
The lhs target (src, dst, edge)
rhs: int
The rhs target (src, dst, edge)
lhs_data : var.Var
Variable for the lhs data
rhs_data : var.Var
Variable for the rhs data
out_size : int
Output size
lhs_map : var.Var
Variable for mapping lambda. The lambda returns the lhs id mapping
array on given context
rhs_map : var.Var
Variable for mapping lambda. The lambda returns the rhs id mapping
array on given context
out_map : var.Var
Variable for mapping lambda. The lambda returns the output id mapping
array on given context
ret : var.Var
Variable for the result.
"""
def __init__(self, reducer, binary_op, graph, lhs, rhs, lhs_data,
rhs_data, out_size, lhs_map, rhs_map, out_map, ret):
self.reducer = reducer
self.binary_op = binary_op
self.graph = graph
self.lhs = lhs
self.rhs = rhs
self.lhs_data = lhs_data
self.rhs_data = rhs_data
self.out_size = out_size
self.lhs_map = lhs_map
self.rhs_map = rhs_map
self.out_map = out_map
self.ret = ret
def opcode(self):
return OpCode.BINARY_REDUCE
def arg_vars(self):
return [self.reducer, self.binary_op, self.graph, self.lhs, self.rhs,
self.lhs_data, self.rhs_data, self.out_size, self.lhs_map,
self.rhs_map, self.out_map]
def ret_var(self):
return self.ret
def run(self):
lhs_data = self.lhs_data.data
rhs_data = self.rhs_data.data
ctx = utils.to_dgl_context(F.context(lhs_data))
graph = self.graph.data(ctx)
lhs_map = self.lhs_map.data(ctx) if self.lhs_map.data else None
rhs_map = self.rhs_map.data(ctx) if self.rhs_map.data else None
out_map = self.out_map.data(ctx) if self.out_map.data else None
if not isinstance(lhs_map, tuple):
lhs_map = (lhs_map, lhs_map)
if not isinstance(rhs_map, tuple):
rhs_map = (rhs_map, rhs_map)
if not isinstance(out_map, tuple):
out_map = (out_map, out_map)
self.ret.data = F.binary_reduce(
self.reducer, self.binary_op, graph, self.lhs, self.rhs,
lhs_data, rhs_data, self.out_size, lhs_map, rhs_map, out_map)
IR_REGISTRY[OpCode.BINARY_REDUCE] = {
'name': 'BINARY_REDUCE',
'args_type': [VarType.STR, VarType.STR, VarType.GRAPH, VarType.INT,
VarType.INT, VarType.FEAT, VarType.FEAT, VarType.INT,
VarType.MAP, VarType.MAP, VarType.MAP],
'ret_type': VarType.FEAT,
'executor_cls': BinaryReduceExecutor,
}
def BINARY_REDUCE(reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data,
out_size, lhs_map, rhs_map, out_map, ret=None):
"""Perform BINARY_REDUCE symbolically.
Parameters
----------
reducer : str
String representing reduction to perform, can be "sum", "max", "min",
"mean", "prod", "none" (no reduction)
binary_op : str
String representing binary operation to perform, can be "add", "mul",
"sub", "div", "dot"
graph : var.Var
Variable for graph index lambda. The lambda returns the immutable graph
index given a context object.
lhs: int
The lhs target (src, dst, edge)
rhs: int
The rhs target (src, dst, edge)
lhs_data : var.Var
Variable for the lhs data
rhs_data : var.Var
Variable for the rhs data
out_size : int
Output size
lhs_map : var.Var
Variable for mapping lambda. The lambda returns the lhs id mapping
array on given context
rhs_map : var.Var
Variable for mapping lambda. The lambda returns the rhs id mapping
array on given context
out_map : var.Var
Variable for mapping lambda. The lambda returns the output id mapping
array on given context
ret : var.Var, optional
Variable for the result. If not give, a new variable will be created.
Returns
-------
var.Var
Variable for the result.
"""
reg = IR_REGISTRY[OpCode.BINARY_REDUCE]
ret = var.new(reg['ret_type']) if ret is None else ret
get_current_prog().issue(reg['executor_cls'](
reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data, out_size,
lhs_map, rhs_map, out_map, ret))
return ret
class CopyReduceExecutor(Executor):
"""Executor for COPY_REDUCE
Parameters
----------
reducer : str
String representing reduction to perform, can be "sum", "max", "min",
"mean", "prod", "none" (no reduction)
graph : var.Var
Variable for graph index lambda. The lambda returns the immutable graph
index given a context object.
target: int
The input target (src, dst, edge)
in_data : var.Var
Variable for the input data
out_size : int
Output size
in_map : var.Var
Variable for mapping lambda. The lambda returns the input id mapping
array on given context
out_map : var.Var
Variable for mapping lambda. The lambda returns the output id mapping
array on given context
ret : var.Var
Variable for the result.
"""
def __init__(self, reducer, graph, target, in_data, out_size, in_map,
out_map, ret):
self.reducer = reducer
self.graph = graph
self.target = target
self.in_data = in_data
self.out_size = out_size
self.in_map = in_map
self.out_map = out_map
self.ret = ret
def opcode(self):
return OpCode.COPY_REDUCE
def arg_vars(self):
return [self.reducer, self.graph, self.target, self.in_data,
self.out_size, self.in_map, self.out_map]
def ret_var(self):
return self.ret
def run(self):
in_data = self.in_data.data
ctx = utils.to_dgl_context(F.context(in_data))
graph = self.graph.data(ctx)
in_map = self.in_map.data(ctx) if self.in_map.data else None
out_map = self.out_map.data(ctx) if self.out_map.data else None
if not isinstance(in_map, tuple):
in_map = (in_map, in_map)
if not isinstance(out_map, tuple):
out_map = (out_map, out_map)
self.ret.data = F.copy_reduce(
self.reducer, graph, self.target, in_data, self.out_size, in_map,
out_map)
IR_REGISTRY[OpCode.COPY_REDUCE] = {
'name': 'COPY_REDUCE',
'args_type': [VarType.STR, VarType.GRAPH, VarType.INT, VarType.FEAT, VarType.INT,
VarType.MAP, VarType.MAP],
'ret_type': VarType.FEAT,
'executor_cls': CopyReduceExecutor,
}
def COPY_REDUCE(reducer, graph, target, in_data, out_size, in_map, out_map,
ret=None):
"""Perform COPY_REDUCE symbolically.
Parameters
----------
reducer : str
String representing reduction to perform, can be "sum", "max", "min",
"mean", "prod", "none" (no reduction)
graph : var.Var
Variable for graph index lambda. The lambda returns the immutable graph
index given a context object.
target: int
The input target (src, dst, edge)
in_data : var.Var
Variable for the input data
out_size : int
Output size
in_map : var.Var
Variable for mapping lambda. The lambda returns the input id mapping
array on given context
out_map : var.Var
Variable for mapping lambda. The lambda returns the output id mapping
array on given context
ret : var.Var, optional
Variable for the result. If not give, a new variable will be created.
Returns
-------
var.Var
Variable for the result.
"""
reg = IR_REGISTRY[OpCode.COPY_REDUCE]
ret = var.new(reg['ret_type']) if ret is None else ret
get_current_prog().issue(reg['executor_cls'](
reducer, graph, target, in_data, out_size, in_map, out_map, ret))
return ret
"""Module for program."""
from __future__ import absolute_import
from contextlib import contextmanager
import threading
from .registry import IR_REGISTRY
class Prog(object):
"""The program.
A program is simply a list of executors.
"""
def __init__(self):
self.execs = []
self.varcount = 0
def issue(self, exe):
"""Issue an executor to this program.
Parameters
----------
exe : Executor
The executor.
"""
self.execs.append(exe)
def pprint_exe(self, exe):
"""Internal function to pretty-print the executor."""
argstr = ', '.join([str(av) for av in exe.arg_vars()])
if exe.ret_var() is None:
# stmt
print("%s(%s)" % (
IR_REGISTRY[exe.opcode()]['name'],
argstr))
else:
print("%s %s = %s(%s)" % (
exe.ret_var().typestr(),
exe.ret.name,
IR_REGISTRY[exe.opcode()]['name'],
argstr))
def pprint(self):
"""Pretty-print the program."""
for exe in self.execs:
self.pprint_exe(exe)
class CurrentProgram(threading.local):
"""Thread local storage to keep the reference of current thread's program"""
def __init__(self):
super(CurrentProgram, self).__init__()
self.prog = None
def get_prog(self):
"""Get program"""
return self.prog
def set_prog(self, program):
"""Set program"""
self.prog = program
# current program
CURRENT_PROG = CurrentProgram()
def get_current_prog():
"""Get the current program."""
return CURRENT_PROG.get_prog()
def set_current_prog(program):
"""Set the current program."""
CURRENT_PROG.set_prog(program)
@contextmanager
def prog():
"""A context manager to create a new program."""
set_current_prog(Prog())
yield get_current_prog()
set_current_prog(None)
"""Module for ir registry."""
from __future__ import absolute_import
IR_REGISTRY = {}
"""Module for variables."""
# pylint: disable=invalid-name
from __future__ import absolute_import
from .program import get_current_prog
class VarType(object):
"""Variable types."""
# Types for symbolic objects (i.e, they might not be
# concretized before evaluation.
FEAT = 0
FEAT_DICT = 1
# Types for concrete objects (i.e, they must have values).
GRAPH = 2
IDX = 3
STR = 4
FUNC = 5
MAP = 6
INT = 7
VAR_TYPE_NAME_MAP = [
'Feat',
'FeatDict',
'GRAPH',
'Idx',
'Str',
'Func',
'Map',
'Int',
]
class Var(object):
"""Class for variables in IR.
Variables represent data in the IR. A variable can contain concrete values.
Otherwise, it can act as a "symbol", whose values are not materialized at
the moment, but later.
Parameters
----------
name : str
The variable name.
type : int
The type code.
data : any, default=None (not concretized)
The data.
"""
__slots__ = ['name', 'typecode', 'data']
def __init__(self, name, typecode, data):
self.name = name
self.typecode = typecode
self.data = data
def __str__(self):
if self.typecode == VarType.STR:
return '"%s"' % self.data
else:
return self.name
def typestr(self):
"""Return the type string of this variable."""
return VAR_TYPE_NAME_MAP[self.typecode]
def new(typecode, data=None, name=None):
"""Create a new variable."""
if name is None:
cur_prog = get_current_prog()
name = '_z%d' % cur_prog.varcount
cur_prog.varcount += 1
return Var(name, typecode, data)
def FEAT(data=None, name=None):
"""Create a variable for feature tensor."""
return new(VarType.FEAT, data, name)
def FEAT_DICT(data=None, name=None):
"""Create a variable for feature dict."""
return new(VarType.FEAT_DICT, data, name)
def GRAPH(data=None, name=None):
"""Create a variable for graph index lambda."""
return new(VarType.GRAPH, data, name)
def IDX(data=None, name=None):
"""Create a variable for index."""
return new(VarType.IDX, data, name)
def STR(data=None, name=None):
"""Create a variable for string value."""
return new(VarType.STR, data, name)
def FUNC(data=None, name=None):
"""Create a variable for function."""
return new(VarType.FUNC, data, name)
def MAP(data=None, name=None):
"""Create a variable for mapping lambda"""
return new(VarType.MAP, data, name)
def INT(data=None, name=None):
"""Create a variable for int value"""
return new(VarType.INT, data, name)
"""DGL mini-runtime."""
class Runtime(object):
"""The mini runtime class."""
@staticmethod
def run(prog):
"""Run the given program."""
for exe in prog.execs:
# prog.pprint_exe(exe)
exe.run()
"""For different schedulers"""
from __future__ import absolute_import
from ... import utils
from ..._ffi.function import _init_api
from ...base import DGLError
from ... import backend as F
from ..frame import frame_like, FrameRef
from ...function.base import BuiltinFunction
from ..udf import EdgeBatch, NodeBatch
from ... import ndarray as nd
from . import ir
from .ir import var
from . import degree_bucketing as db
from . import spmv
__all__ = [
"schedule_send",
"schedule_recv",
"schedule_update_all",
"schedule_snr",
"schedule_apply_nodes",
"schedule_apply_edges",
"schedule_group_apply_edge",
"schedule_push",
"schedule_pull"
]
def schedule_send(graph,
u, v, eid,
message_func,
msgframe=None):
"""Schedule send
Parameters
----------
graph: GraphAdaptor
Graph
u : utils.Index
Source nodes
v : utils.Index
Destination nodes
eid : utils.Index
Ids of sending edges
message_func: callable or list of callable
The message function
msgframe : FrameRef, optional
The storage to write messages to. If None, use graph.msgframe.
"""
var_mf = var.FEAT_DICT(msgframe if msgframe is not None else graph.msgframe)
var_src_nf = var.FEAT_DICT(graph.srcframe)
var_dst_nf = var.FEAT_DICT(graph.dstframe)
var_ef = var.FEAT_DICT(graph.edgeframe)
var_eid = var.IDX(eid)
var_msg = _gen_send(graph=graph,
u=u,
v=v,
eid=eid,
mfunc=message_func,
var_src_nf=var_src_nf,
var_dst_nf=var_dst_nf,
var_ef=var_ef)
# write tmp msg back
ir.WRITE_ROW_(var_mf, var_eid, var_msg)
# set message indicator to 1
graph.msgindicator = graph.msgindicator.set_items(eid, 1)
def schedule_recv(graph,
recv_nodes,
reduce_func,
apply_func,
inplace,
outframe=None):
"""Schedule recv.
Parameters
----------
graph: GraphAdaptor
Graph
recv_nodes: utils.Index
Nodes to recv.
reduce_func: callable or list of callable
The reduce function
apply_func: callable
The apply node function
inplace: bool
If True, the update will be done in place
outframe : FrameRef, optional
The storage to write output data. If None, use graph.dstframe.
"""
src, dst, eid = graph.in_edges(recv_nodes)
if len(eid) > 0:
nonzero_idx = graph.msgindicator.get_items(eid).nonzero()
eid = eid.get_items(nonzero_idx)
src = src.get_items(nonzero_idx)
dst = dst.get_items(nonzero_idx)
if len(eid) == 0:
# Downgrade to apply nodes if
# 1) all recv nodes are 0-degree nodes
# 2) no send has been called
if apply_func is not None:
schedule_apply_nodes(recv_nodes, apply_func, graph.dstframe,
inplace, outframe, ntype=graph.canonical_etype[-1])
else:
var_dst_nf = var.FEAT_DICT(graph.dstframe, 'dst_nf')
var_out_nf = var_dst_nf if outframe is None else var.FEAT_DICT(outframe, name='out_nf')
# sort and unique the argument
recv_nodes, _ = F.sort_1d(F.unique(recv_nodes.tousertensor()))
recv_nodes = utils.toindex(recv_nodes, graph.gidx.dtype)
var_recv_nodes = var.IDX(recv_nodes, name='recv_nodes')
# reduce
reduced_feat = _gen_reduce(graph, reduce_func, (src, dst, eid),
recv_nodes)
# apply
final_feat = _apply_with_accum(var_recv_nodes, var_dst_nf,
reduced_feat, apply_func,
ntype=graph.canonical_etype[-1])
if inplace:
ir.WRITE_ROW_INPLACE_(var_out_nf, var_recv_nodes, final_feat)
else:
ir.WRITE_ROW_(var_out_nf, var_recv_nodes, final_feat)
# set message indicator to 0
graph.msgindicator = graph.msgindicator.set_items(eid, 0)
if not graph.msgindicator.has_nonzero():
ir.CLEAR_FRAME_(var.FEAT_DICT(graph.msgframe, name='mf'))
def schedule_snr(graph,
edge_tuples,
message_func,
reduce_func,
apply_func,
inplace,
outframe=None):
"""Schedule send_and_recv.
Currently it builds a subgraph from edge_tuples with the same number of
nodes as the original graph, so that routines for whole-graph updates
(e.g. fused kernels) could be reused.
Parameters
----------
graph: GraphAdaptor
Graph
edge_tuples: tuple
A tuple of (src ids, dst ids, edge ids) representing edges to perform
send_and_recv
message_func: callable or list of callable
The message function
reduce_func: callable or list of callable
The reduce function
apply_func: callable
The apply node function
inplace: bool
If True, the update will be done in place
outframe : FrameRef, optional
The storage to write output data. If None, use graph.dstframe.
"""
u, v, eid = edge_tuples
recv_nodes, _ = F.sort_1d(F.unique(v.tousertensor()))
recv_nodes = utils.toindex(recv_nodes, graph.gidx.dtype)
# create vars
var_dst_nf = var.FEAT_DICT(graph.dstframe, 'dst_nf')
var_out_nf = var_dst_nf if outframe is None else var.FEAT_DICT(outframe, name='out_nf')
var_u = var.IDX(u)
var_v = var.IDX(v)
var_eid = var.IDX(eid)
var_recv_nodes = var.IDX(recv_nodes, name='recv_nodes')
# generate send and reduce schedule
uv_getter = lambda: (var_u, var_v)
adj_creator = lambda: spmv.build_gidx_and_mapping_uv(
edge_tuples, graph.num_src(), graph.num_dst())
out_map_creator = lambda nbits: _build_idx_map(recv_nodes, nbits)
reduced_feat = _gen_send_reduce(src_node_frame=graph.srcframe,
dst_node_frame=graph.dstframe,
edge_frame=graph.edgeframe,
message_func=message_func,
reduce_func=reduce_func,
var_send_edges=var_eid,
var_reduce_nodes=var_recv_nodes,
uv_getter=uv_getter,
adj_creator=adj_creator,
out_map_creator=out_map_creator,
canonical_etype=graph.canonical_etype)
# generate apply schedule
final_feat = _apply_with_accum(var_recv_nodes, var_dst_nf, reduced_feat,
apply_func, ntype=graph.canonical_etype[-1])
if inplace:
ir.WRITE_ROW_INPLACE_(var_out_nf, var_recv_nodes, final_feat)
else:
ir.WRITE_ROW_(var_out_nf, var_recv_nodes, final_feat)
def schedule_update_all(graph,
message_func,
reduce_func,
apply_func,
outframe=None):
"""Get send and recv schedule
Parameters
----------
graph: GraphAdaptor
Graph
message_func: callable or list of callable
The message function
reduce_func: callable or list of callable
The reduce function
apply_func: callable
The apply node function
outframe : FrameRef, optional
The storage to write output data. If None, use graph.dstframe.
"""
if graph.num_edges() == 0:
# All the nodes are zero degree; downgrade to apply nodes
if apply_func is not None:
nodes = utils.toindex(slice(0, graph.num_dst()), graph.gidx.dtype)
schedule_apply_nodes(nodes, apply_func, graph.dstframe,
inplace=False, outframe=outframe,
ntype=graph.canonical_etype[-1])
else:
eid = utils.toindex(slice(0, graph.num_edges()), graph.gidx.dtype) # ALL
recv_nodes = utils.toindex(slice(0, graph.num_dst()), graph.gidx.dtype) # ALL
# create vars
var_dst_nf = var.FEAT_DICT(graph.dstframe, name='dst_nf')
var_out_nf = var_dst_nf if outframe is None else var.FEAT_DICT(outframe, name='out_nf')
var_recv_nodes = var.IDX(recv_nodes, name='recv_nodes')
var_eid = var.IDX(eid)
# generate send + reduce
def uv_getter():
src, dst, _ = graph.edges('eid')
return var.IDX(src), var.IDX(dst)
adj_creator = lambda: spmv.build_gidx_and_mapping_graph(graph)
out_map_creator = lambda nbits: None
reduced_feat = _gen_send_reduce(src_node_frame=graph.srcframe,
dst_node_frame=graph.dstframe,
edge_frame=graph.edgeframe,
message_func=message_func,
reduce_func=reduce_func,
var_send_edges=var_eid,
var_reduce_nodes=var_recv_nodes,
uv_getter=uv_getter,
adj_creator=adj_creator,
out_map_creator=out_map_creator,
canonical_etype=graph.canonical_etype)
# generate optional apply
final_feat = _apply_with_accum(var_recv_nodes, var_dst_nf,
reduced_feat, apply_func,
ntype=graph.canonical_etype[-1])
ir.WRITE_DICT_(var_out_nf, final_feat)
def schedule_apply_nodes(v,
apply_func,
node_frame,
inplace,
outframe=None,
ntype=None):
"""Get apply nodes schedule
Parameters
----------
v : utils.Index
Nodes to apply
apply_func : callable
The apply node function
node_frame : FrameRef
Node feature frame.
inplace: bool
If True, the update will be done in place
outframe : FrameRef, optional
The storage to write output data. If None, use the given node_frame.
ntype : str, optional
The node type, if running on a heterograph.
If None, assuming it's running on a homogeneous graph.
Returns
-------
A list of executors for DGL Runtime
"""
var_v = var.IDX(v)
var_nf = var.FEAT_DICT(node_frame, name='nf')
var_out_nf = var_nf if outframe is None else var.FEAT_DICT(outframe, name='out_nf')
v_nf = ir.READ_ROW(var_nf, var_v)
def _afunc_wrapper(node_data):
nbatch = NodeBatch(v, node_data, ntype=ntype)
return apply_func(nbatch)
afunc = var.FUNC(_afunc_wrapper)
applied_feat = ir.NODE_UDF(afunc, v_nf)
if inplace:
ir.WRITE_ROW_INPLACE_(var_out_nf, var_v, applied_feat)
else:
ir.WRITE_ROW_(var_out_nf, var_v, applied_feat)
def schedule_nodeflow_apply_nodes(graph,
layer_id,
v,
apply_func,
inplace):
"""Get apply nodes schedule in NodeFlow.
Parameters
----------
graph: NodeFlow
The NodeFlow to use
layer_id : int
The layer where we apply node update function.
v : utils.Index
Nodes to apply
apply_func: callable
The apply node function
inplace: bool
If True, the update will be done in place
Returns
-------
A list of executors for DGL Runtime
"""
var_nf = var.FEAT_DICT(graph._get_node_frame(layer_id), name='nf')
var_v = var.IDX(v)
v_nf = ir.READ_ROW(var_nf, var_v)
def _afunc_wrapper(node_data):
nbatch = NodeBatch(v, node_data)
return apply_func(nbatch)
afunc = var.FUNC(_afunc_wrapper)
applied_feat = ir.NODE_UDF(afunc, v_nf)
# TODO we need to avoid index_copy here.
if inplace:
ir.WRITE_ROW_INPLACE_(var_nf, var_v, applied_feat)
else:
ir.WRITE_ROW_(var_nf, var_v, applied_feat)
def schedule_apply_edges(graph,
u, v, eid,
apply_func,
inplace,
outframe=None):
"""Get apply edges schedule
Parameters
----------
graph: GraphAdaptor
Graph
u : utils.Index
Source nodes of edges to apply
v : utils.Index
Destination nodes of edges to apply
eid : utils.Index
Ids of sending edges
apply_func: callable
The apply edge function
inplace: bool
If True, the update will be done in place
outframe : FrameRef, optional
The storage to write output data. If None, use graph.edge_frame.
Returns
-------
A list of executors for DGL Runtime
"""
# vars
var_src_nf = var.FEAT_DICT(graph.srcframe, 'uframe')
var_dst_nf = var.FEAT_DICT(graph.dstframe, 'vframe')
var_ef = var.FEAT_DICT(graph.edgeframe, 'eframe')
var_out_ef = var_ef if outframe is None else var.FEAT_DICT(outframe, 'out_ef')
var_out = _gen_send(graph=graph, u=u, v=v, eid=eid, mfunc=apply_func,
var_src_nf=var_src_nf, var_dst_nf=var_dst_nf,
var_ef=var_ef)
var_eid = var.IDX(eid)
# schedule apply edges
if inplace:
ir.WRITE_ROW_INPLACE_(var_out_ef, var_eid, var_out)
else:
ir.WRITE_ROW_(var_ef, var_eid, var_out)
def schedule_nodeflow_apply_edges(graph, block_id,
u, v, eid,
apply_func,
inplace):
"""Get apply edges schedule in NodeFlow.
Parameters
----------
graph: NodeFlow
The NodeFlow to use
block_id : int
The block whose edges we apply edge update function.
u : utils.Index
Source nodes of edges to apply
v : utils.Index
Destination nodes of edges to apply
eid : utils.Index
Ids of sending edges
apply_func: callable
The apply edge function
inplace: bool
If True, the update will be done in place
Returns
-------
A list of executors for DGL Runtime
"""
# vars
in_var_nf = var.FEAT_DICT(graph._get_node_frame(block_id), name='in_nf')
out_var_nf = var.FEAT_DICT(graph._get_node_frame(block_id + 1),
name='out_nf')
var_ef = var.FEAT_DICT(graph._get_edge_frame(block_id), name='ef')
var_out = _gen_send(graph, u, v, eid, apply_func, in_var_nf, out_var_nf,
var_ef, block_id=block_id)
var_eid = var.IDX(eid)
if inplace:
ir.WRITE_ROW_INPLACE_(var_ef, var_eid, var_out)
else:
ir.WRITE_ROW_(var_ef, var_eid, var_out)
def schedule_push(graph,
u,
message_func,
reduce_func,
apply_func,
inplace,
outframe=None):
"""Get push schedule
Parameters
----------
graph: GraphAdaptor
Graph
u : utils.Index
Source nodes for push
message_func: callable or list of callable
The message function
reduce_func: callable or list of callable
The reduce function
apply_func: callable
The apply node function
inplace: bool
If True, the update will be done in place
outframe : FrameRef, optional
The storage to write output data. If None, use graph.dstframe.
"""
u, v, eid = graph.out_edges(u)
if len(eid) == 0:
# All the pushing nodes have no out edges. No computation is scheduled.
return
schedule_snr(graph, (u, v, eid),
message_func, reduce_func, apply_func,
inplace, outframe)
def schedule_pull(graph,
pull_nodes,
message_func,
reduce_func,
apply_func,
inplace,
outframe=None):
"""Get pull schedule
Parameters
----------
graph: GraphAdaptor
Graph
pull_nodes : utils.Index
Destination nodes for pull
message_func: callable or list of callable
The message function
reduce_func: callable or list of callable
The reduce function
apply_func: callable
The apply node function
inplace: bool
If True, the update will be done in place
outframe : FrameRef, optional
The storage to write output data. If None, use graph.dstframe.
"""
# TODO(minjie): `in_edges` can be omitted if message and reduce func pairs
# can be specialized to SPMV. This needs support for creating adjmat
# directly from pull node frontier.
u, v, eid = graph.in_edges(pull_nodes)
if len(eid) == 0:
# All the nodes are 0deg; downgrades to apply.
if apply_func is not None:
schedule_apply_nodes(pull_nodes, apply_func, graph.dstframe, inplace,
outframe, ntype=graph.canonical_etype[-1])
else:
# TODO(Allen): Change operation to dgl operation
pull_nodes, _ = F.sort_1d(F.unique(pull_nodes.tousertensor()))
pull_nodes = utils.toindex(pull_nodes, graph.gidx.dtype)
# create vars
var_dst_nf = var.FEAT_DICT(graph.dstframe, name='dst_nf')
var_out_nf = var_dst_nf if outframe is None else var.FEAT_DICT(outframe, name='out_nf')
var_pull_nodes = var.IDX(pull_nodes, name='pull_nodes')
var_u = var.IDX(u)
var_v = var.IDX(v)
var_eid = var.IDX(eid)
# generate send and reduce schedule
uv_getter = lambda: (var_u, var_v)
adj_creator = lambda: spmv.build_gidx_and_mapping_uv(
(u, v, eid), graph.num_src(), graph.num_dst())
out_map_creator = lambda nbits: _build_idx_map(pull_nodes, nbits)
reduced_feat = _gen_send_reduce(graph.srcframe,
graph.dstframe, graph.edgeframe,
message_func, reduce_func, var_eid,
var_pull_nodes, uv_getter, adj_creator,
out_map_creator,
canonical_etype=graph.canonical_etype)
# generate optional apply
final_feat = _apply_with_accum(var_pull_nodes, var_dst_nf,
reduced_feat, apply_func,
ntype=graph.canonical_etype[-1])
if inplace:
ir.WRITE_ROW_INPLACE_(var_out_nf, var_pull_nodes, final_feat)
else:
ir.WRITE_ROW_(var_out_nf, var_pull_nodes, final_feat)
def schedule_group_apply_edge(graph,
u, v, eid,
apply_func,
group_by,
inplace,
outframe=None):
"""Group apply edges schedule
Parameters
----------
graph: GraphAdaptor
Graph
u : utils.Index
Source nodes of edges to apply
v : utils.Index
Destination nodes of edges to apply
eid : utils.Index
Ids of sending edges
apply_func: callable
The apply edge function
group_by : str
Specify how to group edges. Expected to be either 'src' or 'dst'
inplace: bool
If True, the update will be done in place
outframe : FrameRef, optional
The storage to write output data. If None, use graph.edgeframe.
"""
# vars
var_src_nf = var.FEAT_DICT(graph.srcframe, name='src_nf')
var_dst_nf = var.FEAT_DICT(graph.dstframe, name='dst_nf')
var_ef = var.FEAT_DICT(graph.edgeframe, name='ef')
var_out_ef = var_ef if outframe is None else var.FEAT_DICT(outframe, name='out_ef')
var_out = var.FEAT_DICT(name='new_ef')
db.gen_group_apply_edge_schedule(apply_func, u, v, eid, group_by,
var_src_nf, var_dst_nf, var_ef, var_out,
canonical_etype=graph.canonical_etype)
var_eid = var.IDX(eid)
if inplace:
ir.WRITE_ROW_INPLACE_(var_out_ef, var_eid, var_out)
else:
ir.WRITE_ROW_(var_out_ef, var_eid, var_out)
def schedule_nodeflow_update_all(graph,
block_id,
message_func,
reduce_func,
apply_func):
"""Get update_all schedule in a block.
Parameters
----------
graph: NodeFlow
The NodeFlow to use
block_id : int
The block where we perform computation.
message_func: callable or list of callable
The message function
reduce_func: callable or list of callable
The reduce function
apply_func: callable
The apply node function
"""
# A NodeFlow shouldn't have 0 edges.
assert graph.block_size(block_id) > 0
eid = utils.toindex(slice(0, graph.block_size(block_id))) # ALL
dest_nodes = utils.toindex(slice(0, graph.layer_size(block_id + 1))) # ALL
# create vars
var_nf = var.FEAT_DICT(graph._get_node_frame(block_id + 1), name='out_nf')
var_dest_nodes = var.IDX(dest_nodes, name='dest_nodes')
var_eid = var.IDX(eid)
# generate send + reduce
def uv_getter():
src, dst, _ = graph.block_edges(block_id, remap_local=True)
return var.IDX(utils.toindex(src)), var.IDX(utils.toindex(dst))
adj_creator = lambda: spmv.build_gidx_and_mapping_block(graph, block_id)
out_map_creator = lambda nbits: None
reduced_feat = _gen_send_reduce(src_node_frame=graph._get_node_frame(block_id),
dst_node_frame=graph._get_node_frame(block_id + 1),
edge_frame=graph._get_edge_frame(block_id),
message_func=message_func,
reduce_func=reduce_func,
var_send_edges=var_eid,
var_reduce_nodes=var_dest_nodes,
uv_getter=uv_getter,
adj_creator=adj_creator,
out_map_creator=out_map_creator)
# generate optional apply
final_feat = _apply_with_accum(var_dest_nodes, var_nf, reduced_feat, apply_func)
ir.WRITE_DICT_(var_nf, final_feat)
def schedule_nodeflow_compute(graph,
block_id,
u, v, eid,
dest_nodes,
message_func,
reduce_func,
apply_func,
inplace):
"""Get flow compute schedule in NodeFlow
Parameters
----------
graph: NodeFlow
The NodeFlow to use
block_id : int
The block where we perform computation.
u : utils.Index
Source nodes of edges to apply
v : utils.Index
Destination nodes of edges to apply
eid : utils.Index
Ids of sending edges
dest_nodes : utils.Index
Destination nodes ids
message_func: callable or list of callable
The message function
reduce_func: callable or list of callable
The reduce function
apply_func: callable
The apply node function
inplace: bool
If True, the update will be done in place
"""
# TODO(minjie): `in_edges` can be omitted if message and reduce func pairs
# can be specialized to SPMV. This needs support for creating adjmat
# directly from pull node frontier.
if len(eid) == 0:
# All the nodes are 0deg; downgrades to apply.
if apply_func is not None:
schedule_nodeflow_apply_nodes(graph, block_id + 1, dest_nodes,
apply_func, inplace)
else:
# create vars
var_nf = var.FEAT_DICT(graph._get_node_frame(block_id + 1),
name='out_nf')
var_u = var.IDX(u)
var_v = var.IDX(v)
var_eid = var.IDX(eid)
var_dest_nodes = var.IDX(dest_nodes, name='dest_nodes')
# generate send and reduce schedule
uv_getter = lambda: (var_u, var_v)
adj_creator = lambda: spmv.build_gidx_and_mapping_block(
graph, block_id, (u, v, eid))
out_map_creator = lambda nbits: _build_idx_map(utils.toindex(dest_nodes), nbits)
reduced_feat = _gen_send_reduce(src_node_frame=graph._get_node_frame(block_id),
dst_node_frame=graph._get_node_frame(block_id + 1),
edge_frame=graph._get_edge_frame(block_id),
message_func=message_func,
reduce_func=reduce_func,
var_send_edges=var_eid,
var_reduce_nodes=var_dest_nodes,
uv_getter=uv_getter,
adj_creator=adj_creator,
out_map_creator=out_map_creator)
# generate optional apply
final_feat = _apply_with_accum(var_dest_nodes, var_nf,
reduced_feat, apply_func)
if inplace:
ir.WRITE_ROW_INPLACE_(var_nf, var_dest_nodes, final_feat)
else:
ir.WRITE_ROW_(var_nf, var_dest_nodes, final_feat)
def _check_builtin_func_list(func_list):
"""Check whether func_list only contains builtin functions."""
for fn in func_list:
if not isinstance(fn, BuiltinFunction):
raise DGLError("If specify multiple message/reduce functions, \
all of them must be builtin")
def _standardize_func_usage(func, func_name):
"""Standardize usages of message and reduce functions
Message or reduce funtion can be:
1. a UDF
2. a dgl builtin function
3. a list of dgl builtin function
This function checks if func meets the requirement, and merges last two
cases by putting builtin function in case 2 into a list
Returns:
One single UDF function or a list of builtin function
"""
if utils.is_iterable(func):
# func is a list of builtin
_check_builtin_func_list(func)
return func
elif isinstance(func, BuiltinFunction):
# func is one builtin-in
return [func]
else:
# func is one UDF
if not callable(func):
raise DGLError('User-defined %s function must be callable.'
' Got: %s' % (func_name, str(func)))
return func
def _apply_with_accum(var_nodes, var_nf, var_accum, apply_func, ntype=None):
"""Apply with accumulated features.
Paramters
---------
var_nodes : var.IDX
The nodes.
var_nf : var.FEAT_DICT
The node features.
var_accum : var.FEAT_DICT
The accumulated features.
apply_func : callable, None
The apply function.
ntype : str, optional
The node type, if running on a heterograph.
If None, assuming it's running on a homogeneous graph.
"""
if apply_func:
# To avoid writing reduced features back to node frame and reading
# it again for apply phase. Instead, we first read the the node
# features and "merge" it with the reduced features.
v_nf = ir.READ_ROW(var_nf, var_nodes)
v_nf = ir.UPDATE_DICT(v_nf, var_accum)
def _afunc_wrapper(node_data):
nbatch = NodeBatch(var_nodes.data, node_data, ntype=ntype)
return apply_func(nbatch)
afunc = var.FUNC(_afunc_wrapper)
applied_feat = ir.NODE_UDF(afunc, v_nf)
final_feat = ir.UPDATE_DICT(var_accum, applied_feat)
else:
final_feat = var_accum
return final_feat
def _gen_reduce(graph, reduce_func, edge_tuples, recv_nodes):
"""Generate reduce schedule
Parameters
----------
graph : GraphAdaptor
reduce_func : callable
edge_tuples : tuple of utils.Index
recv_nodes : utils.Index
Returns
-------
var.FEAT_DICT
The reduced feature dict.
"""
src, dst, eid = edge_tuples
rfunc = _standardize_func_usage(reduce_func, 'reduce')
rfunc_is_list = utils.is_iterable(rfunc)
# Create a tmp frame to hold the feature data.
# The frame has the same size and schemes of the
# node frame.
# TODO(minjie): should replace this with an IR call to make the program
# stateless.
tmpframe = FrameRef(frame_like(graph.dstframe._frame, len(recv_nodes)))
# vars
var_msg = var.FEAT_DICT(graph.msgframe, 'msg')
var_dst_nf = var.FEAT_DICT(graph.dstframe, 'nf')
var_out = var.FEAT_DICT(data=tmpframe)
if rfunc_is_list:
adj, edge_map, nbits = spmv.build_gidx_and_mapping_uv(
(src, dst, eid), graph.num_src(), graph.num_dst())
# using edge map instead of message map because messages are in global
# message frame
var_out_map = _build_idx_map(recv_nodes, nbits)
spmv.gen_e2v_spmv_schedule(graph=adj,
rfunc=rfunc,
message_frame=var_msg,
out=var_out,
out_size=len(recv_nodes),
edge_map=edge_map,
out_map=var_out_map)
return var_out
else:
# gen degree bucketing schedule for UDF recv
db.gen_degree_bucketing_schedule(rfunc, eid, dst, recv_nodes,
var_dst_nf, var_msg, var_out,
ntype=graph.canonical_etype[-1])
return var_out
def _gen_send_reduce(
src_node_frame,
dst_node_frame,
edge_frame,
message_func,
reduce_func,
var_send_edges,
var_reduce_nodes,
uv_getter,
adj_creator,
out_map_creator,
canonical_etype=(None, None, None)):
"""Generate send and reduce schedule.
The function generates symbolic program for computing
(1) message function on the given edges (var_send_edges).
(2) reduce function on the given nodes (var_reduce_nodes).
If both message_func and reduce_func are DGL builtin functions, the schedule
will invoke fused message passing kernels (e.g. dgl.backend.binary_reduce) to
avoid generating explicit edge messages.
If message_func is UDF while reduce_func is DGL builtin function, the schedule
first invokes UDF to generate explicit edge messages, and then invokes
dgl.backend.copy_reduce to reduce messages on the destination nodes.
If both message_func and reduce_func are UDFs, the schedule first invokes message
UDF to generate explicit edge messages and then use degree-bucketing to invoke
reduce UDF.
Parameters
----------
src_node_frame : NodeFrame
The node frame of the source nodes.
dst_node_frame : NodeFrame
The node frame of the destination nodes.
edge_frame : NodeFrame
The frame for the edges between the source and destination nodes.
message_func : callable, list of builtins
The message func(s).
reduce_func : callable, list of builtins
The reduce func(s).
var_send_edges : var.IDX
The edges (ids) to perform send.
var_reduce_nodes : var.IDX
Unique and sorted nodes to perform reduce. This should include
unique(v) + 0deg nodes.
uv_getter : callable
Function that returns a pair of var.IDX (u, v) for the triggered edges.
adj_creator : callable
Function that returns the adjmat, edge order of csr matrix, and
bit-width.
out_map_creator : callable
A function that returns a mapping from reduce_nodes to relabeled
consecutive ids
canonical_etype : tuple[str, str, str], optional
Canonical edge type if running on a heterograph.
Default: (None, None, None), if running on a homogeneous graph.
Returns
-------
var.FEAT_DICT
The reduced feature dict.
Notes
-----
Reduce_nodes are assumed to be in the *unique-ascending* order of the edge
destination node ids. The returned reduced features will be batched
following the order of reduce_nodes.
"""
# NOTE: currently, this function requires all var.IDX to contain concrete
# data.
reduce_nodes = var_reduce_nodes.data
# arg vars
var_src_nf = var.FEAT_DICT(src_node_frame, name='src_frame')
var_dst_nf = var.FEAT_DICT(dst_node_frame, name='dst_frame')
var_ef = var.FEAT_DICT(edge_frame, name='edge_frame')
var_eid = var_send_edges
# format the input functions
mfunc = _standardize_func_usage(message_func, 'message')
rfunc = _standardize_func_usage(reduce_func, 'reduce')
mfunc_is_list = utils.is_iterable(mfunc)
rfunc_is_list = utils.is_iterable(rfunc)
# Create a tmp frame to hold the feature data. The frame has the same size
# and schemes of the node frame.
# TODO(minjie): should replace this with an IR call to make the program
# stateless.
tmpframe = FrameRef(frame_like(dst_node_frame._frame, len(reduce_nodes)))
var_out = var.FEAT_DICT(data=tmpframe)
# 1. If either mfunc or rfunc is builtin, generate adjmat, edge mapping and
# message mapping
if mfunc_is_list or rfunc_is_list:
adj, edge_map, nbits = adj_creator()
# 2. If rfunc is builtin, generate a mapping from recv nodes to consecutive
# output id
if rfunc_is_list:
out_map = out_map_creator(nbits)
# 3. First try fused message and reduce function
if mfunc_is_list and rfunc_is_list:
# builtin message + builtin reducer
spmv.gen_v2v_spmv_schedule(graph=adj,
mfunc=mfunc,
rfunc=rfunc,
src_frame=var_src_nf,
dst_frame=var_dst_nf,
edge_frame=var_ef,
out=var_out,
out_size=len(reduce_nodes),
edge_map=edge_map,
out_map=out_map)
return var_out
var_u, var_v = uv_getter()
# 4. Unable to fuse, then generate message
if mfunc_is_list:
# messages are builtin but reduce is UDF
# Create a tmp frame to hold the message.
# TODO: should replace this with an IR call to make the program
# stateless.
n_message = len(var_eid.data)
tmp_msg_frame = FrameRef(frame_like(edge_frame._frame, n_message))
var_mf = var.FEAT_DICT(data=tmp_msg_frame)
spmv.gen_v2e_spmv_schedule(graph=adj,
mfunc=mfunc,
src_frame=var_src_nf,
dst_frame=var_dst_nf,
edge_frame=var_ef,
out=var_mf,
out_size=n_message,
edge_map=edge_map)
else:
# generate UDF send schedule
var_mf = _gen_udf_send(var_src_nf, var_dst_nf, var_ef, var_u,
var_v, var_eid, mfunc, canonical_etype=canonical_etype)
# 6. Generate reduce
if rfunc_is_list:
# UDF message + builtin reducer
spmv.gen_e2v_spmv_schedule(graph=adj,
rfunc=rfunc,
message_frame=var_mf,
out=var_out,
out_size=len(reduce_nodes),
edge_map=None, # messages are stored compactly
out_map=out_map)
return var_out
else:
# gen degree bucketing schedule for UDF recv
mid = utils.toindex(slice(0, len(var_v.data)), var_v.data.dtype)
db.gen_degree_bucketing_schedule(rfunc, mid, var_v.data,
reduce_nodes, var_dst_nf, var_mf,
var_out, ntype=canonical_etype[-1])
return var_out
def _gen_udf_send(var_src_nf, var_dst_nf, var_ef, u, v, eid, mfunc,
canonical_etype=(None, None, None)):
"""Internal function to generate send schedule for UDF message function."""
fdsrc = ir.READ_ROW(var_src_nf, u)
fddst = ir.READ_ROW(var_dst_nf, v)
fdedge = ir.READ_ROW(var_ef, eid)
def _mfunc_wrapper(src_data, edge_data, dst_data):
ebatch = EdgeBatch((u.data, v.data, eid.data),
src_data, edge_data, dst_data,
canonical_etype=canonical_etype)
return mfunc(ebatch)
_mfunc_wrapper = var.FUNC(_mfunc_wrapper)
msg = ir.EDGE_UDF(_mfunc_wrapper, fdsrc, fdedge, fddst)
return msg
def _gen_send(graph, u, v, eid, mfunc, var_src_nf, var_dst_nf, var_ef, block_id=None):
"""Internal function to generate send schedule"""
mfunc = _standardize_func_usage(mfunc, 'message')
mfunc_is_list = utils.is_iterable(mfunc)
# vars
var_u = var.IDX(u)
var_v = var.IDX(v)
var_eid = var.IDX(eid)
if mfunc_is_list:
if not hasattr(graph, 'num_edges'):
# XXX(minjie): a temporary hack to detect Nodeflow object
res = spmv.build_gidx_and_mapping_block(graph, block_id)
elif eid.is_slice(0, graph.num_edges()):
# full graph case
res = spmv.build_gidx_and_mapping_graph(graph)
else:
res = spmv.build_gidx_and_mapping_uv(
(u, v, eid), graph.num_src(), graph.num_dst())
adj, edge_map, _ = res
# create a tmp message frame
tmp_mfr = FrameRef(frame_like(var_ef.data._frame, len(eid)))
var_out = var.FEAT_DICT(data=tmp_mfr)
spmv.gen_v2e_spmv_schedule(graph=adj,
mfunc=mfunc,
src_frame=var_src_nf,
dst_frame=var_dst_nf,
edge_frame=var_ef,
out=var_out,
out_size=len(eid),
edge_map=edge_map)
else:
# UDF send
var_out = _gen_udf_send(var_src_nf, var_dst_nf, var_ef, var_u,
var_v, var_eid, mfunc,
canonical_etype=graph.canonical_etype)
return var_out
def _build_idx_map(idx, nbits):
"""Build a map from the input ids to continuous ids that starts from zero.
And the number of bits data type of each integer in the mapping uses will
be nbits
Examples
--------
>>> x = [1, 5, 3, 6]
>>> o2n = map_to_continuous(x)
>>> o2n
[n/a, 0, n/a, 2, n/a, 1, 3]
"n/a" will be filled with 0
Parameters
----------
x : Index
The input ids, assumed to be unique.
nbits: int
Number of bits each integer in the mapping should use, can be 32 or 64
Returns
-------
old_to_new : CtxCachedObject
The mapping from old id to new id. It is a vector of length MAX(x).
One can use advanced indexing to convert an old id tensor to a
new id tensor: new_id = old_to_new[old_id]
"""
x = idx.tousertensor()
map_len = int(F.asnumpy(F.max(x, dim=0))) + 1
old_to_new = F.full_1d(map_len, -1, dtype=F.int64, ctx=F.cpu())
# Use out-place update due to tensorflow compatibility
old_to_new = F.scatter_row(old_to_new, x, F.arange(0, len(x)))
old_to_new = utils.to_nbits_int(old_to_new, nbits)
old_to_new = F.zerocopy_to_dgl_ndarray(old_to_new)
return utils.CtxCachedObject(lambda ctx: nd.array(old_to_new, ctx=ctx))
_init_api("dgl._deprecate.runtime.scheduler")
"""Module for SPMV rules."""
from __future__ import absolute_import
from functools import partial
from ...base import DGLError
from ... import backend as F
from ... import utils
from ... import ndarray as nd
from ...heterograph_index import create_unitgraph_from_coo
from . import ir
from .ir import var
def gen_v2v_spmv_schedule(graph, mfunc, rfunc, src_frame, dst_frame,
edge_frame, out, out_size, src_map=None,
dst_map=None, edge_map=None, out_map=None):
"""Generate v2v spmv schedule.
Parameters
----------
graph : utils.CtxCachedObject
Function that generates immutable graph index on given context
mfunc : list of builtin message func
Builtin message function list
rfunc : list of builtin reduce func
Builtin reduce function list
src_frame : var.Var
Input source node features
dst_frame : var.Var
Input destination node features
edge_frame : var.Var
Input edge features
out : var.Var
Output node features
out_size : int
Number of output nodes
src_map : utils.CtxCachedObject
Function that generates source node id mapping array on given context
dst_map : utils.CtxCachedObject
Function that generates destination node id mapping array on given
context
edge_map : utils.CtxCachedObject
Function that generates edge id mapping array on given context
out_map : utils.CtxCachedObject
Function that generates output id mapping array on given context
"""
fld2mfunc = {fn.out_field: fn for fn in mfunc}
for rfn in rfunc:
mfld = rfn.msg_field
if mfld not in fld2mfunc:
raise DGLError('Reduce function requires message field "%s",'
' but no message function generates it.' % mfld)
mfn = fld2mfunc[mfld]
ftdst = mfn._invoke(graph, src_frame, dst_frame, edge_frame, out_size,
src_map, dst_map, edge_map, out_map,
reducer=rfn.name)
ir.WRITE_COL_(out, var.STR(rfn.out_field), ftdst)
def gen_v2e_spmv_schedule(graph, mfunc, src_frame, dst_frame, edge_frame, out,
out_size, src_map=None, dst_map=None, edge_map=None,
out_map=None):
"""Generate v2e SPMV schedule
Parameters
----------
graph : utils.CtxCachedObject
Function that generates immutable graph index on given context
mfunc : list of builtin message func
Builtin message function list
src_frame : var.Var
Input source node features
dst_frame : var.Var
Input destination node features
edge_frame : var.Var
Input edge features
out : var.Var
Output node features
out_size : int
Number of output nodes
src_map : utils.CtxCachedObject
Function that generates source node id mapping array on given context
dst_map : utils.CtxCachedObject
Function that generates destination node id mapping array on given
context
edge_map : utils.CtxCachedObject
Function that generates edge id mapping array on given context
out_map : utils.CtxCachedObject
Function that generates output id mapping array on given context
"""
for mfn in mfunc:
fmsg = mfn._invoke(graph, src_frame, dst_frame, edge_frame, out_size,
src_map, dst_map, edge_map, out_map=out_map,
reducer="none")
ir.WRITE_COL_(out, var.STR(mfn.out_field), fmsg)
def gen_e2v_spmv_schedule(graph, rfunc, message_frame, out, out_size,
edge_map=None, out_map=None):
"""Generate e2v SPMV schedule.
Parameters
----------
graph : utils.CtxCachedObject
Function that generates immutable graph index on given context
rfunc : list of builtin reduce func
Builtin reduce function list
message_frame : var.Var
Message features
out : var.Var
Output node features
out_size : int
Number of output nodes
edge_map : utils.CtxCachedObject
Function that generates edge id mapping array on given context
out_map : utils.CtxCachedObject
Function that generates output id mapping array on given context
"""
for rfn in rfunc:
ftdst = rfn._invoke(graph, message_frame, out_size, edge_map=edge_map,
out_map=out_map)
ir.WRITE_COL_(out, var.STR(rfn.out_field), ftdst)
def build_gidx_and_mapping_graph(graph):
"""Build immutable graph index of the whole graph.
Parameters
----------
graph : GraphAdapter
Graph
Returns
-------
graph : utils.CtxCachedObject
Function that generates a immutable graph index on given context
edge_map : utils.CtxCachedObject
Function that generates forward and backward edge mapping on given
context
nbits : int
Number of ints needed to represent the graph
"""
return graph.get_immutable_gidx, None, graph.bits_needed()
def build_gidx_and_mapping_uv(edge_tuples, num_src, num_dst):
"""Build immutable graph index and mapping using the given (u, v) edges
The matrix is of shape (num_src, num_dst).
Parameters
---------
edge_tuples : tuple of three utils.Index
A tuple of (u, v, eid)
num_src : int
Number of source nodes.
num_dst : int
Number of destination nodes.
Returns
-------
graph : utils.CtxCachedObject
Function that generates a immutable graph index on given context
edge_map : utils.CtxCachedObject
Function that generates forward and backward edge mapping on given
context
nbits : int
Number of ints needed to represent the graph
"""
u, v, eid = edge_tuples
gidx = create_unitgraph_from_coo(2, num_src, num_dst,
u.tousertensor(), v.tousertensor(), ['coo', 'csr', 'csc'])
forward, backward = gidx.get_csr_shuffle_order(0)
eid = eid.tousertensor()
nbits = gidx.bits_needed(0)
forward_map = utils.to_nbits_int(F.gather_row(eid, forward.tousertensor()), nbits)
backward_map = utils.to_nbits_int(F.gather_row(eid, backward.tousertensor()), nbits)
forward_map = F.zerocopy_to_dgl_ndarray(forward_map)
backward_map = F.zerocopy_to_dgl_ndarray(backward_map)
edge_map = utils.CtxCachedObject(
lambda ctx: (nd.array(forward_map, ctx=ctx),
nd.array(backward_map, ctx=ctx)))
return partial(gidx.get_unitgraph, 0), edge_map, nbits
def build_gidx_and_mapping_block(graph, block_id, edge_tuples=None):
"""Build immutable graph index and mapping for node flow
Parameters
----------
graph : NodeFlow
The NodeFlow
block_id : int
the block Id
edge_tuple : tuple of three utils.Index
A tuple of (u, v, eid)
Returns
-------
graph : utils.CtxCachedObject
Function that generates a immutable graph index on given context
edge_map : utils.CtxCachedObject
Function that generates forward and backward edge mapping on given
context
nbits : int
Number of ints needed to represent the graph
"""
if edge_tuples is None:
u, v, eid = graph.block_edges(block_id, remap_local=True)
u = utils.toindex(u)
v = utils.toindex(v)
eid = utils.toindex(eid)
else:
u, v, eid = edge_tuples
num_src, num_dst = graph.layer_size(block_id), graph.layer_size(block_id + 1)
gidx, edge_map, nbits = build_gidx_and_mapping_uv((u, v, eid), num_src, num_dst)
return gidx, edge_map, nbits
"""User-defined function related data structures."""
from __future__ import absolute_import
class EdgeBatch(object):
"""The class that can represent a batch of edges.
Parameters
----------
edges : tuple of utils.Index
The edge tuple (u, v, eid). eid can be ALL
src_data : dict
The src node features, in the form of ``dict``
with ``str`` keys and ``tensor`` values
edge_data : dict
The edge features, in the form of ``dict`` with
``str`` keys and ``tensor`` values
dst_data : dict of tensors
The dst node features, in the form of ``dict``
with ``str`` keys and ``tensor`` values
canonical_etype : tuple of (str, str, str), optional
Canonical edge type of the edge batch, if UDF is
running on a heterograph.
"""
def __init__(self, edges, src_data, edge_data, dst_data,
canonical_etype=(None, None, None)):
self._edges = edges
self._src_data = src_data
self._edge_data = edge_data
self._dst_data = dst_data
self._canonical_etype = canonical_etype
@property
def src(self):
"""Return the feature data of the source nodes.
Returns
-------
dict with str keys and tensor values
Features of the source nodes.
"""
return self._src_data
@property
def dst(self):
"""Return the feature data of the destination nodes.
Returns
-------
dict with str keys and tensor values
Features of the destination nodes.
"""
return self._dst_data
@property
def data(self):
"""Return the edge feature data.
Returns
-------
dict with str keys and tensor values
Features of the edges.
"""
return self._edge_data
def edges(self):
"""Return the edges contained in this batch.
Returns
-------
tuple of three tensors
The edge tuple :math:`(src, dst, eid)`. :math:`src[i],
dst[i], eid[i]` separately specifies the source node,
destination node and the edge id for the ith edge
in the batch.
"""
u, v, eid = self._edges
return (u.tousertensor(), v.tousertensor(), eid.tousertensor())
def batch_size(self):
"""Return the number of edges in this edge batch.
Returns
-------
int
"""
return len(self._edges[0])
def __len__(self):
"""Return the number of edges in this edge batch.
Returns
-------
int
"""
return self.batch_size()
@property
def canonical_etype(self):
"""Return the canonical edge type (i.e. triplet of source, edge, and
destination node type) for this edge batch, if available."""
return self._canonical_etype
class NodeBatch(object):
"""The class that can represent a batch of nodes.
Parameters
----------
nodes : utils.Index
The node ids.
data : dict
The node features, in the form of ``dict``
with ``str`` keys and ``tensor`` values
msgs : dict, optional
The messages, , in the form of ``dict``
with ``str`` keys and ``tensor`` values
ntype : str, optional
The node type of this node batch, if running
on a heterograph.
"""
def __init__(self, nodes, data, msgs=None, ntype=None):
self._nodes = nodes
self._data = data
self._msgs = msgs
self._ntype = ntype
@property
def data(self):
"""Return the node feature data.
Returns
-------
dict with str keys and tensor values
Features of the nodes.
"""
return self._data
@property
def mailbox(self):
"""Return the received messages.
If no messages received, a ``None`` will be returned.
Returns
-------
dict or None
The messages nodes received. If dict, the keys are
``str`` and the values are ``tensor``.
"""
return self._msgs
def nodes(self):
"""Return the nodes contained in this batch.
Returns
-------
tensor
The nodes.
"""
return self._nodes.tousertensor()
def batch_size(self):
"""Return the number of nodes in this batch.
Returns
-------
int
"""
return len(self._nodes)
def __len__(self):
"""Return the number of nodes in this node batch.
Returns
-------
int
"""
return self.batch_size()
@property
def ntype(self):
"""Return the node type of this node batch, if available."""
return self._ntype
"""Views of DGLGraph."""
from __future__ import absolute_import
from collections import namedtuple
from collections.abc import MutableMapping
import numpy as np
from ..base import ALL, is_all, DGLError
from .. import backend as F
NodeSpace = namedtuple('NodeSpace', ['data'])
EdgeSpace = namedtuple('EdgeSpace', ['data'])
class NodeView(object):
"""A NodeView class to act as G.nodes for a DGLGraph.
Can be used to get a list of current nodes and get and set node data.
See Also
--------
dgl.DGLGraph.nodes
"""
__slots__ = ['_graph']
def __init__(self, graph):
self._graph = graph
def __len__(self):
return self._graph.number_of_nodes()
def __getitem__(self, nodes):
if isinstance(nodes, slice):
# slice
if not (nodes.start is None and nodes.stop is None
and nodes.step is None):
raise DGLError('Currently only full slice ":" is supported')
return NodeSpace(data=NodeDataView(self._graph, ALL))
else:
return NodeSpace(data=NodeDataView(self._graph, nodes))
def __call__(self):
"""Return the nodes."""
return F.copy_to(F.arange(0, len(self)), F.cpu())
class NodeDataView(MutableMapping):
"""The data view class when G.nodes[...].data is called.
See Also
--------
dgl.DGLGraph.nodes
"""
__slots__ = ['_graph', '_nodes']
def __init__(self, graph, nodes):
self._graph = graph
self._nodes = nodes
def __getitem__(self, key):
return self._graph.get_n_repr(self._nodes)[key]
def __setitem__(self, key, val):
if isinstance(val, np.ndarray):
val = F.zerocopy_from_numpy(val)
self._graph.set_n_repr({key : val}, self._nodes)
def __delitem__(self, key):
if not is_all(self._nodes):
raise DGLError('Delete feature data is not supported on only a subset'
' of nodes. Please use `del G.ndata[key]` instead.')
self._graph.pop_n_repr(key)
def __len__(self):
return len(self._graph._node_frame)
def __iter__(self):
return iter(self._graph._node_frame)
def __repr__(self):
data = self._graph.get_n_repr(self._nodes)
return repr({key : data[key] for key in self._graph._node_frame})
class EdgeView(object):
"""A EdgeView class to act as G.edges for a DGLGraph.
Can be used to get a list of current edges and get and set edge data.
See Also
--------
dgl.DGLGraph.edges
"""
__slots__ = ['_graph']
def __init__(self, graph):
self._graph = graph
def __len__(self):
return self._graph.number_of_edges()
def __getitem__(self, edges):
if isinstance(edges, slice):
# slice
if not (edges.start is None and edges.stop is None
and edges.step is None):
raise DGLError('Currently only full slice ":" is supported')
return EdgeSpace(data=EdgeDataView(self._graph, ALL))
else:
return EdgeSpace(data=EdgeDataView(self._graph, edges))
def __call__(self, *args, **kwargs):
"""Return all the edges."""
return self._graph.all_edges(*args, **kwargs)
class EdgeDataView(MutableMapping):
"""The data view class when G.edges[...].data is called.
See Also
--------
dgl.DGLGraph.edges
"""
__slots__ = ['_graph', '_edges']
def __init__(self, graph, edges):
self._graph = graph
self._edges = edges
def __getitem__(self, key):
return self._graph.get_e_repr(self._edges)[key]
def __setitem__(self, key, val):
if isinstance(val, np.ndarray):
val = F.zerocopy_from_numpy(val)
self._graph.set_e_repr({key : val}, self._edges)
def __delitem__(self, key):
if not is_all(self._edges):
raise DGLError('Delete feature data is not supported on only a subset'
' of nodes. Please use `del G.edata[key]` instead.')
self._graph.pop_e_repr(key)
def __len__(self):
return len(self._graph._edge_frame)
def __iter__(self):
return iter(self._graph._edge_frame)
def __repr__(self):
data = self._graph.get_e_repr(self._edges)
return repr({key : data[key] for key in self._graph._edge_frame})
class LayerView(object):
"""A LayerView class to act as nflow.layers for a NodeFlow.
Can be used to get a list of current nodes and get and set node data.
"""
__slots__ = ['_graph']
def __init__(self, graph):
self._graph = graph
def __len__(self):
return self._graph.num_layers()
def __getitem__(self, layer):
if not isinstance(layer, int):
raise DGLError('Currently we only support the view of one layer')
return NodeSpace(data=LayerDataView(self._graph, layer))
def __call__(self):
"""Return the nodes."""
return F.arange(0, len(self))
class LayerDataView(MutableMapping):
"""The data view class when G.layers[...].data is called.
"""
__slots__ = ['_graph', '_layer']
def __init__(self, graph, layer):
self._graph = graph
self._layer = layer
def __getitem__(self, key):
return self._graph._node_frames[self._layer][key]
def __setitem__(self, key, val):
self._graph._node_frames[self._layer][key] = val
def __delitem__(self, key):
del self._graph._node_frames[self._layer][key]
def __len__(self):
return len(self._graph._node_frames[self._layer])
def __iter__(self):
return iter(self._graph._node_frames[self._layer])
def __repr__(self):
data = self._graph._node_frames[self._layer]
return repr({key : data[key] for key in data})
class BlockView(object):
"""A BlockView class to act as nflow.blocks for a NodeFlow.
Can be used to get a list of current edges and get and set edge data.
"""
__slots__ = ['_graph']
def __init__(self, graph):
self._graph = graph
def __len__(self):
return self._graph.num_blocks
def __getitem__(self, flow):
if not isinstance(flow, int):
raise DGLError('Currently we only support the view of one flow')
return EdgeSpace(data=BlockDataView(self._graph, flow))
def __call__(self, *args, **kwargs):
"""Return all the edges."""
return self._graph.all_edges(*args, **kwargs)
class BlockDataView(MutableMapping):
"""The data view class when G.blocks[...].data is called.
"""
__slots__ = ['_graph', '_flow']
def __init__(self, graph, flow):
self._graph = graph
self._flow = flow
def __getitem__(self, key):
return self._graph._edge_frames[self._flow][key]
def __setitem__(self, key, val):
self._graph._edge_frames[self._flow][key] = val
def __delitem__(self, key):
del self._graph._edge_frames[self._flow][key]
def __len__(self):
return len(self._graph._edge_frames[self._flow])
def __iter__(self):
return iter(self._graph._edge_frames[self._flow])
def __repr__(self):
data = self._graph._edge_frames[self._flow]
return repr({key : data[key] for key in data})
......@@ -9,7 +9,6 @@ import mxnet.ndarray as nd
import numpy as np
from ... import ndarray as dglnd
from ..._deprecate import kernel as K
from ...function.base import TargetCode
from ...utils import version
......@@ -525,300 +524,6 @@ def zerocopy_from_dgl_ndarray(arr):
return nd.from_dlpack(arr.to_dlpack())
class BinaryReduce(mx.autograd.Function):
def __init__(
self,
reducer,
binary_op,
graph,
lhs,
rhs,
out_size,
lhs_map,
rhs_map,
out_map,
):
super(BinaryReduce, self).__init__()
self.reducer = reducer
self.binary_op = binary_op
self.graph = graph
self.lhs = lhs
self.rhs = rhs
self.out_size = out_size
self.lhs_map = lhs_map
self.rhs_map = rhs_map
self.out_map = out_map
def forward(self, lhs_data, rhs_data):
lhs_data_nd = zerocopy_to_dgl_ndarray(lhs_data)
rhs_data_nd = zerocopy_to_dgl_ndarray(rhs_data)
feat_shape = K.infer_binary_feature_shape(
self.binary_op, lhs_data_nd, rhs_data_nd
)
out_shape = feat_shape
if self.binary_op == "dot":
out_shape = feat_shape[:-1]
out_data = nd.empty(
(self.out_size,) + out_shape,
ctx=lhs_data.context,
dtype=lhs_data.dtype,
)
out_data_nd = zerocopy_to_dgl_ndarray_for_write(out_data)
K.binary_op_reduce(
self.reducer if self.reducer != "mean" else "sum",
self.binary_op,
self.graph,
self.lhs,
self.rhs,
lhs_data_nd,
rhs_data_nd,
out_data_nd,
self.lhs_map[0],
self.rhs_map[0],
self.out_map[0],
)
# normalize if mean reducer
# NOTE(zihao): this is a temporary hack and we should have better solution in the future.
if self.reducer == "mean":
degs = nd.empty(
(out_data.shape[0],), ctx=out_data.context, dtype=out_data.dtype
)
degs_nd = zerocopy_to_dgl_ndarray(degs)
if self.lhs != TargetCode.DST:
target = self.lhs
n = lhs_data.shape[0]
in_map = self.lhs_map[0]
else:
target = self.rhs
n = rhs_data.shape[0]
in_map = self.rhs_map[0]
in_ones = nd.ones((n,), ctx=lhs_data.context, dtype=lhs_data.dtype)
in_ones_nd = zerocopy_to_dgl_ndarray(in_ones)
K.copy_reduce(
"sum",
self.graph,
target,
in_ones_nd,
degs_nd,
in_map,
self.out_map[0],
)
# reshape
degs = degs.reshape(
(out_data.shape[0],) + (1,) * (out_data.ndim - 1)
).clip(1, float("inf"))
out_data = out_data / degs
else:
degs = None
self.save_for_backward(
lhs_data_nd, rhs_data_nd, out_data_nd, feat_shape, degs
)
return out_data
def backward(self, grad_out):
(
lhs_data_nd,
rhs_data_nd,
out_data_nd,
feat_shape,
degs,
) = self.saved_tensors
if self.reducer == "mean":
grad_out = grad_out / degs
grad_out_nd = zerocopy_to_dgl_ndarray(grad_out)
grad_lhs = nd.empty(
(lhs_data_nd.shape[0],) + feat_shape,
ctx=grad_out.context,
dtype=grad_out.dtype,
)
K.backward_lhs_binary_op_reduce(
self.reducer if self.reducer != "mean" else "sum",
self.binary_op,
self.graph,
self.lhs,
self.rhs,
lhs_data_nd,
rhs_data_nd,
out_data_nd,
grad_out_nd,
zerocopy_to_dgl_ndarray_for_write(grad_lhs),
self.lhs_map[1],
self.rhs_map[1],
self.out_map[1],
)
grad_lhs = _reduce_grad(grad_lhs, lhs_data_nd.shape)
grad_rhs = nd.empty(
(rhs_data_nd.shape[0],) + feat_shape,
ctx=grad_out.context,
dtype=grad_out.dtype,
)
K.backward_rhs_binary_op_reduce(
self.reducer if self.reducer != "mean" else "sum",
self.binary_op,
self.graph,
self.lhs,
self.rhs,
lhs_data_nd,
rhs_data_nd,
out_data_nd,
grad_out_nd,
zerocopy_to_dgl_ndarray_for_write(grad_rhs),
self.lhs_map[1],
self.rhs_map[1],
self.out_map[1],
)
grad_rhs = _reduce_grad(grad_rhs, rhs_data_nd.shape)
# clear saved tensors explicitly
self.saved_tensors = None
return grad_lhs, grad_rhs
def binary_reduce(
reducer,
binary_op,
graph,
lhs,
rhs,
lhs_data,
rhs_data,
out_size,
lhs_map=(None, None),
rhs_map=(None, None),
out_map=(None, None),
):
func = BinaryReduce(
reducer, binary_op, graph, lhs, rhs, out_size, lhs_map, rhs_map, out_map
)
return func(lhs_data, rhs_data)
class CopyReduce(mx.autograd.Function):
def __init__(self, reducer, graph, target, out_size, in_map, out_map):
super(CopyReduce, self).__init__()
self.reducer = reducer
self.graph = graph
self.target = target
self.out_size = out_size
self.in_map = in_map
self.out_map = out_map
def forward(self, in_data):
feat_shape = in_data.shape[1:]
out_data = nd.empty(
(self.out_size,) + feat_shape,
ctx=in_data.context,
dtype=in_data.dtype,
)
in_data_nd = zerocopy_to_dgl_ndarray(in_data)
out_data_nd = zerocopy_to_dgl_ndarray_for_write(out_data)
K.copy_reduce(
self.reducer if self.reducer != "mean" else "sum",
self.graph,
self.target,
in_data_nd,
out_data_nd,
self.in_map[0],
self.out_map[0],
)
# normalize if mean reducer
# NOTE(zihao): this is a temporary hack and we should have better solution in the future.
if self.reducer == "mean":
in_ones = nd.ones(
(in_data.shape[0],), ctx=in_data.context, dtype=in_data.dtype
)
degs = nd.empty(
(out_data.shape[0],), ctx=out_data.context, dtype=out_data.dtype
)
in_ones_nd = zerocopy_to_dgl_ndarray(in_ones)
degs_nd = zerocopy_to_dgl_ndarray(degs)
K.copy_reduce(
"sum",
self.graph,
self.target,
in_ones_nd,
degs_nd,
self.in_map[0],
self.out_map[0],
)
# reshape
degs = degs.reshape(
(out_data.shape[0],) + (1,) * (out_data.ndim - 1)
).clip(1, float("inf"))
out_data = out_data / degs
else:
degs = None
self.save_for_backward(in_data_nd, out_data_nd, degs)
return out_data
def backward(self, grad_out):
in_data_nd, out_data_nd, degs = self.saved_tensors
grad_in = nd.empty(
in_data_nd.shape, ctx=grad_out.context, dtype=grad_out.dtype
)
if self.reducer == "mean":
grad_out = grad_out / degs
grad_out_nd = zerocopy_to_dgl_ndarray(grad_out)
K.backward_copy_reduce(
self.reducer if self.reducer != "mean" else "sum",
self.graph,
self.target,
in_data_nd,
out_data_nd,
grad_out_nd,
zerocopy_to_dgl_ndarray_for_write(grad_in),
self.in_map[1],
self.out_map[1],
)
# clear saved tensors explicitly
self.saved_tensors = None
return grad_in
def copy_reduce(
reducer,
graph,
target,
in_data,
out_size,
in_map=(None, None),
out_map=(None, None),
):
func = CopyReduce(reducer, graph, target, out_size, in_map, out_map)
return func(in_data)
def _reduce_grad(grad, shape):
"""Reduce gradient on the broadcast dimension
If there is broadcast in forward pass, gradients need to be reduced on
broadcast dimension. This function checks the input tensor shape and
gradient shape and perform the reduction.
Parameters
----------
grad: Tensor
Gradient tensor
shape: tuple
Shape of input tensor
Returns
-------
Tensor
"""
grad_shape = grad.shape[1:]
in_shape = shape[1:]
if in_shape == grad_shape:
# no need to reduce
return grad
num_to_squeeze = len(grad_shape) - len(in_shape)
# pad in_shape
in_shape = (1,) * num_to_squeeze + in_shape
reduce_idx = np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape))[0]
reduce_idx += 1 # skip batch dim
grad = grad.sum(axis=tuple(reduce_idx), keepdims=True)
return grad.reshape(shape)
def sync():
"""Synchronize computation.
......
......@@ -9,7 +9,6 @@ import torch as th
from torch.utils import dlpack
from ... import ndarray as nd
from ..._deprecate import kernel as K
from ...function.base import TargetCode
from ...utils import version
......@@ -471,323 +470,6 @@ def zerocopy_from_dgl_ndarray(data):
return dlpack.from_dlpack(data.to_dlpack())
class BinaryReduce(th.autograd.Function):
@staticmethod
def forward(
ctx,
reducer,
binary_op,
graph,
lhs,
rhs,
lhs_data,
rhs_data,
out_data,
out_size,
lhs_map,
rhs_map,
out_map,
):
lhs_data_nd = zerocopy_to_dgl_ndarray(lhs_data)
rhs_data_nd = zerocopy_to_dgl_ndarray(rhs_data)
feat_shape = K.infer_binary_feature_shape(
binary_op, lhs_data_nd, rhs_data_nd
)
out_shape = feat_shape
if binary_op == "dot":
out_shape = feat_shape[:-1]
out_data_nd = zerocopy_to_dgl_ndarray(out_data)
K.binary_op_reduce(
reducer if reducer != "mean" else "sum",
binary_op,
graph,
lhs,
rhs,
lhs_data_nd,
rhs_data_nd,
out_data_nd,
lhs_map[0],
rhs_map[0],
out_map[0],
)
# normalize if mean reducer
# NOTE(zihao): this is a temporary hack and we should have better solution in the future.
if reducer == "mean":
degs = lhs_data.new_empty((out_data.shape[0],))
degs_nd = zerocopy_to_dgl_ndarray(degs)
if lhs != TargetCode.DST: # src or edge
target = lhs
n = lhs_data.shape[0]
in_map = lhs_map[0]
else: # rhs != TargetCode.DST
target = rhs
n = rhs_data.shape[0]
in_map = rhs_map[0]
in_ones = lhs_data.new_ones((n,))
in_ones_nd = zerocopy_to_dgl_ndarray(in_ones)
K.copy_reduce(
"sum", graph, target, in_ones_nd, degs_nd, in_map, out_map[0]
)
# reshape
degs = degs.reshape(
(out_data.shape[0],) + (1,) * (out_data.dim() - 1)
).clamp(min=1)
out_data = out_data / degs
else:
degs = None
# save_for_backward can only save variables
ctx.backward_cache = (
reducer,
binary_op,
graph,
lhs,
rhs,
lhs_map,
rhs_map,
out_map,
feat_shape,
degs,
)
ctx.save_for_backward(lhs_data, rhs_data, out_data)
return out_data
@staticmethod
def backward(ctx, grad_out):
(
reducer,
binary_op,
graph,
lhs,
rhs,
lhs_map,
rhs_map,
out_map,
feat_shape,
degs,
) = ctx.backward_cache
lhs_data, rhs_data, out_data = ctx.saved_tensors
lhs_data_nd = zerocopy_to_dgl_ndarray(lhs_data)
rhs_data_nd = zerocopy_to_dgl_ndarray(rhs_data)
out_data_nd = zerocopy_to_dgl_ndarray(out_data)
grad_lhs = None
grad_rhs = None
if reducer == "mean":
grad_out = grad_out / degs
grad_out_nd = zerocopy_to_dgl_ndarray(grad_out)
if ctx.needs_input_grad[5]:
grad_lhs = grad_out.new_empty((lhs_data_nd.shape[0],) + feat_shape)
K.backward_lhs_binary_op_reduce(
reducer if reducer != "mean" else "sum",
binary_op,
graph,
lhs,
rhs,
lhs_data_nd,
rhs_data_nd,
out_data_nd,
grad_out_nd,
zerocopy_to_dgl_ndarray(grad_lhs),
lhs_map[1],
rhs_map[1],
out_map[1],
)
grad_lhs = _reduce_grad(grad_lhs, lhs_data_nd.shape)
if ctx.needs_input_grad[6]:
grad_rhs = grad_out.new_empty((rhs_data_nd.shape[0],) + feat_shape)
K.backward_rhs_binary_op_reduce(
reducer if reducer != "mean" else "sum",
binary_op,
graph,
lhs,
rhs,
lhs_data_nd,
rhs_data_nd,
out_data_nd,
grad_out_nd,
zerocopy_to_dgl_ndarray(grad_rhs),
lhs_map[1],
rhs_map[1],
out_map[1],
)
grad_rhs = _reduce_grad(grad_rhs, rhs_data_nd.shape)
return (
None,
None,
None,
None,
None,
grad_lhs,
grad_rhs,
None,
None,
None,
None,
None,
)
def binary_reduce(
reducer,
binary_op,
graph,
lhs,
rhs,
lhs_data,
rhs_data,
out_size,
lhs_map=(None, None),
rhs_map=(None, None),
out_map=(None, None),
):
lhs_data_nd = zerocopy_to_dgl_ndarray(lhs_data)
rhs_data_nd = zerocopy_to_dgl_ndarray(rhs_data)
feat_shape = K.infer_binary_feature_shape(
binary_op, lhs_data_nd, rhs_data_nd
)
out_shape = feat_shape
if binary_op == "dot":
out_shape = feat_shape[:-1]
out_data = lhs_data.new_empty((out_size,) + out_shape)
return BinaryReduce.apply(
reducer,
binary_op,
graph,
lhs,
rhs,
lhs_data,
rhs_data,
out_data,
out_size,
lhs_map,
rhs_map,
out_map,
)
class CopyReduce(th.autograd.Function):
@staticmethod
def forward(
ctx,
reducer,
graph,
target,
in_data,
out_data,
out_size,
in_map,
out_map,
):
in_data_nd = zerocopy_to_dgl_ndarray(in_data)
out_data_nd = zerocopy_to_dgl_ndarray(out_data)
K.copy_reduce(
reducer if reducer != "mean" else "sum",
graph,
target,
in_data_nd,
out_data_nd,
in_map[0],
out_map[0],
)
# normalize if mean reducer
# NOTE(zihao): this is a temporary hack and we should have better solution in the future.
if reducer == "mean":
in_ones = in_data.new_ones((in_data.shape[0],))
degs = in_data.new_empty((out_data.shape[0],))
in_ones_nd = zerocopy_to_dgl_ndarray(in_ones)
degs_nd = zerocopy_to_dgl_ndarray(degs)
K.copy_reduce(
"sum", graph, target, in_ones_nd, degs_nd, in_map[0], out_map[0]
)
# reshape
degs = degs.reshape(
(out_data.shape[0],) + (1,) * (out_data.dim() - 1)
).clamp(min=1)
out_data = out_data / degs
else:
degs = None
# save_for_backward can only save variables
ctx.backward_cache = (reducer, graph, target, in_map, out_map, degs)
ctx.save_for_backward(in_data, out_data)
return out_data
@staticmethod
def backward(ctx, grad_out):
reducer, graph, target, in_map, out_map, degs = ctx.backward_cache
in_data, out_data = ctx.saved_tensors
in_data_nd = zerocopy_to_dgl_ndarray(in_data)
out_data_nd = zerocopy_to_dgl_ndarray(out_data)
grad_in = None
if reducer == "mean":
grad_out = grad_out / degs
grad_out_nd = zerocopy_to_dgl_ndarray(grad_out)
if ctx.needs_input_grad[3]:
grad_in = grad_out.new_empty(in_data_nd.shape)
K.backward_copy_reduce(
reducer if reducer != "mean" else "sum",
graph,
target,
in_data_nd,
out_data_nd,
grad_out_nd,
zerocopy_to_dgl_ndarray(grad_in),
in_map[1],
out_map[1],
)
return None, None, None, grad_in, None, None, None, None
def copy_reduce(
reducer,
graph,
target,
in_data,
out_size,
in_map=(None, None),
out_map=(None, None),
):
out_data = in_data.new_empty((out_size,) + in_data.shape[1:])
return CopyReduce.apply(
reducer, graph, target, in_data, out_data, out_size, in_map, out_map
)
def _reduce_grad(grad, shape):
"""Reduce gradient on the broadcast dimension
If there is broadcast in forward pass, gradients need to be reduced on
broadcast dimension. This function checks the input tensor shape and
gradient shape and perform the reduction.
Parameters
----------
grad: Tensor
Gradient tensor
shape: tuple
Shape of input tensor
Returns
-------
Tensor
"""
grad_shape = grad.shape[1:]
in_shape = shape[1:]
if in_shape == grad_shape:
# no need to reduce
return grad
num_to_squeeze = len(grad_shape) - len(in_shape)
# pad inshape
in_shape = (1,) * num_to_squeeze + in_shape
reduce_idx = th.nonzero(
th.tensor(grad_shape) - th.tensor(in_shape), as_tuple=False
)
reduce_idx += 1 # skip batch dim
grad = grad.sum(dim=tuple(reduce_idx), keepdim=True)
return grad.view(shape)
def sync():
# Pytorch performs computation synchronously, so no need for synchronization.
pass
......
......@@ -8,7 +8,6 @@ import numpy as np
import tensorflow as tf
from ... import ndarray as nd
from ..._deprecate import kernel as K
from ...function.base import TargetCode
from ...utils import version
......@@ -515,269 +514,6 @@ def zerocopy_from_dgl_ndarray(input):
return zerocopy_from_dlpack(input.to_dlpack())
def binary_reduce(
reducer,
binary_op,
graph,
lhs,
rhs,
lhs_data,
rhs_data,
out_size,
lhs_map=(None, None),
rhs_map=(None, None),
out_map=(None, None),
):
@tf.custom_gradient
def _lambda(lhs_data, rhs_data):
return binary_reduce_real(
reducer,
binary_op,
graph,
lhs,
rhs,
lhs_data,
rhs_data,
out_size,
lhs_map,
rhs_map,
out_map,
)
return _lambda(lhs_data, rhs_data)
def binary_reduce_real(
reducer,
binary_op,
graph,
lhs,
rhs,
lhs_data,
rhs_data,
out_size,
lhs_map,
rhs_map,
out_map,
):
with tf.device(lhs_data.device):
lhs_data_nd = zerocopy_to_dgl_ndarray(lhs_data)
rhs_data_nd = zerocopy_to_dgl_ndarray(rhs_data)
feat_shape = K.infer_binary_feature_shape(
binary_op, lhs_data_nd, rhs_data_nd
)
out_shape = feat_shape
if binary_op == "dot":
out_shape = feat_shape[:-1]
out_data = tf.zeros((out_size,) + out_shape, dtype=lhs_data.dtype)
out_data_nd = zerocopy_to_dgl_ndarray(out_data)
K.binary_op_reduce(
reducer if reducer != "mean" else "sum",
binary_op,
graph,
lhs,
rhs,
lhs_data_nd,
rhs_data_nd,
out_data_nd,
lhs_map[0],
rhs_map[0],
out_map[0],
)
# normalize if mean reducer
# NOTE(zihao): this is a temporary hack and we should have better solution in the future.
if reducer == "mean":
degs = tf.zeros((out_data.shape[0],), dtype=lhs_data.dtype)
degs_nd = zerocopy_to_dgl_ndarray(degs)
if lhs != TargetCode.DST: # src or edge
target = lhs
n = lhs_data.shape[0]
in_map = lhs_map[0]
else: # rhs != TargetCode.DST
target = rhs
n = rhs_data.shape[0]
in_map = rhs_map[0]
in_ones = tf.ones((n,), dtype=lhs_data.dtype)
in_ones_nd = zerocopy_to_dgl_ndarray(in_ones)
K.copy_reduce(
"sum", graph, target, in_ones_nd, degs_nd, in_map, out_map[0]
)
# reshape
degs = tf.reshape(
degs, (out_data.shape[0],) + (1,) * (out_data.ndim - 1)
)
degs = tf.clip_by_value(
degs, clip_value_min=1, clip_value_max=np.inf
) # ???
out_data = out_data / degs
else:
degs = None
def grad(grad_out):
with tf.device(grad_out.device):
grad_lhs = None
grad_rhs = None
if reducer == "mean":
grad_out = grad_out / degs
grad_out_nd = zerocopy_to_dgl_ndarray(grad_out)
# comptue gradient for lhs
grad_lhs = tf.zeros((lhs_data_nd.shape[0],) + feat_shape)
K.backward_lhs_binary_op_reduce(
reducer if reducer != "mean" else "sum",
binary_op,
graph,
lhs,
rhs,
lhs_data_nd,
rhs_data_nd,
out_data_nd,
grad_out_nd,
zerocopy_to_dgl_ndarray(grad_lhs),
lhs_map[1],
rhs_map[1],
out_map[1],
)
grad_lhs = _reduce_grad(grad_lhs, lhs_data_nd.shape)
# compute gradient for rhs
grad_rhs = tf.zeros((rhs_data_nd.shape[0],) + feat_shape)
K.backward_rhs_binary_op_reduce(
reducer if reducer != "mean" else "sum",
binary_op,
graph,
lhs,
rhs,
lhs_data_nd,
rhs_data_nd,
out_data_nd,
grad_out_nd,
zerocopy_to_dgl_ndarray(grad_rhs),
lhs_map[1],
rhs_map[1],
out_map[1],
)
grad_rhs = _reduce_grad(grad_rhs, rhs_data_nd.shape)
return grad_lhs, grad_rhs
return out_data, grad
def copy_reduce(
reducer,
graph,
target,
in_data,
out_size,
in_map=(None, None),
out_map=(None, None),
):
@tf.custom_gradient
def _lambda(in_data):
return copy_reduce_real(
reducer, graph, target, in_data, out_size, in_map, out_map
)
return _lambda(in_data)
def copy_reduce_real(
reducer, graph, target, in_data, out_size, in_map, out_map
):
with tf.device(in_data.device):
out_data = tf.zeros(
(out_size,) + tuple(in_data.shape[1:]), dtype=in_data.dtype
)
in_data_nd = zerocopy_to_dgl_ndarray(in_data)
out_data_nd = zerocopy_to_dgl_ndarray(out_data)
K.copy_reduce(
reducer if reducer != "mean" else "sum",
graph,
target,
in_data_nd,
out_data_nd,
in_map[0],
out_map[0],
)
# normalize if mean reducer
# NOTE(zihao): this is a temporary hack and we should have better solution in the future.
if reducer == "mean":
in_ones = tf.ones(in_data.shape[0], dtype=in_data.dtype)
degs = tf.zeros(out_data.shape[0], dtype=in_data.dtype)
in_ones_nd = zerocopy_to_dgl_ndarray(in_ones)
degs_nd = zerocopy_to_dgl_ndarray(degs)
K.copy_reduce(
"sum", graph, target, in_ones_nd, degs_nd, in_map[0], out_map[0]
)
# reshape
degs = tf.reshape(
degs, (out_data.shape[0],) + (1,) * (out_data.ndim - 1)
)
degs = tf.clip_by_value(
degs, clip_value_min=1, clip_value_max=np.inf
) # TODO: ???
out_data = out_data / degs
else:
degs = None
def grad(grad_out):
with tf.device(grad_out.device):
if reducer == "mean":
grad_out = grad_out / degs
grad_out_nd = zerocopy_to_dgl_ndarray(grad_out)
grad_in = tf.zeros(in_data_nd.shape)
K.backward_copy_reduce(
reducer if reducer != "mean" else "sum",
graph,
target,
in_data_nd,
out_data_nd,
grad_out_nd,
zerocopy_to_dgl_ndarray(grad_in),
in_map[1],
out_map[1],
)
return grad_in
return out_data, grad
def _reduce_grad(grad, shape):
"""Reduce gradient on the broadcast dimension
If there is broadcast in forward pass, gradients need to be reduced on
broadcast dimension. This function checks the input tensor shape and
gradient shape and perform the reduction.
Parameters
----------
grad: Tensor
Gradient tensor
shape: tuple
Shape of input tensor
Returns
-------
Tensor
"""
grad_shape = grad.shape[1:]
in_shape = shape[1:]
if in_shape == grad_shape:
# no need to reduce
return grad
num_to_squeeze = len(grad_shape) - len(in_shape)
# pad inshape
in_shape = (1,) * num_to_squeeze + in_shape
reduce_idx = np.asarray(
np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape))
)
reduce_idx += 1 # skip batch dim
reduce_idx_tensor = tf.constant(tuple(reduce_idx.flatten().tolist()))
grad = tf.reduce_sum(grad, axis=reduce_idx_tensor, keepdims=True)
return tf.reshape(grad, shape)
def sync():
context = context().context()
context.async_wait()
......
from . import sampling
from . import graph_store
from .dis_kvstore import KVClient, KVServer
from .dis_kvstore import read_ip_config
from .unified_tensor import UnifiedTensor
from __future__ import absolute_import
from . import knowledge_graph as knwlgrh
def load_data(dataset, bfs_level=3, relabel=False):
if dataset in ['aifb', 'mutag', 'bgs', 'am']:
return knwlgrh.load_entity(dataset, bfs_level, relabel)
elif dataset in ['FB15k', 'wn18', 'FB15k-237']:
return knwlgrh.load_link(dataset)
else:
raise ValueError('Unknown dataset: {}'.format(dataset))
""" Knowledge graph dataset for Relational-GCN
Code adapted from authors' implementation of Relational-GCN
https://github.com/tkipf/relational-gcn
https://github.com/MichSchli/RelationPrediction
"""
from __future__ import print_function
from __future__ import absolute_import
import numpy as np
import scipy.sparse as sp
import os, gzip
import rdflib as rdf
import pandas as pd
from collections import Counter
from dgl.data.utils import download, extract_archive, get_download_dir, _get_dgl_url
np.random.seed(123)
_downlaod_prefix = _get_dgl_url('dataset/')
class RGCNEntityDataset(object):
"""RGCN Entity Classification dataset
The dataset contains a graph depicting the connectivity of a knowledge
base. Currently, four knowledge bases from the
`RGCN paper <https://arxiv.org/pdf/1703.06103.pdf>`_ are supported: aifb,
mutag, bgs, and am.
The original knowledge base is stored as an RDF file, and this class will
download and parse the RDF file, and performs preprocessing.
An object of this class has 11 member attributes needed for entity
classification:
num_nodes: int
number of entities of knowledge base
num_rels: int
number of relations (including reverse relation) of knowledge base
num_classes: int
number of classes/labels that of entities in knowledge base
edge_src: numpy.array
source node ids of all edges
edge_dst: numpy.array
destination node ids of all edges
edge_type: numpy.array
type of all edges
edge_norm: numpy.array
normalization factor of all edges
labels: numpy.array
labels of node entities
train_idx: numpy.array
ids of entities used for training
valid_idx: numpy.array
ids of entities used for validation
test_idx: numpy.array
ids of entities used for testing
Usually, users don't need to directly use this class. Instead, DGL provides
wrapper function to load data (see example below).
When loading data, besides specifying dataset name, user can provide two
optional arguments:
Parameters
----------
bfs_level: int
prune out nodes that are more than ``bfs_level`` hops away from
labeled nodes, i.e., nodes won't be touched during propagation. If set
to a number less or equal to 0, all nodes will be retained.
relabel: bool
After pruning, whether or not to relabel all nodes with consecutive
node ids
Examples
--------
Load aifb dataset, prune out nodes that are more than 3 hops away from
labeled nodes, and relabel the remaining nodes with consecutive ids
>>> from dgl.contrib.data import load_data
>>> data = load_data(dataset='aifb', bfs_level=3, relabel=True)
"""
def __init__(self, name):
self.name = name
self.dir = get_download_dir()
tgz_path = os.path.join(self.dir, '{}.tgz'.format(self.name))
download(_downlaod_prefix + '{}.tgz'.format(self.name), tgz_path)
self.dir = os.path.join(self.dir, self.name)
extract_archive(tgz_path, self.dir)
def load(self, bfs_level=2, relabel=False):
self.num_nodes, edges, self.num_rels, self.labels, labeled_nodes_idx, self.train_idx, self.test_idx = _load_data(self.name, self.dir)
# bfs to reduce edges
if bfs_level > 0:
print("removing nodes that are more than {} hops away".format(bfs_level))
row, col, edge_type = edges.transpose()
A = sp.csr_matrix((np.ones(len(row)), (row, col)), shape=(self.num_nodes, self.num_nodes))
bfs_generator = _bfs_relational(A, labeled_nodes_idx)
lvls = list()
lvls.append(set(labeled_nodes_idx))
for _ in range(bfs_level):
lvls.append(next(bfs_generator))
to_delete = list(set(range(self.num_nodes)) - set.union(*lvls))
eid_to_delete = np.isin(row, to_delete) + np.isin(col, to_delete)
eid_to_keep = np.logical_not(eid_to_delete)
self.edge_src = row[eid_to_keep]
self.edge_dst = col[eid_to_keep]
self.edge_type = edge_type[eid_to_keep]
if relabel:
uniq_nodes, edges = np.unique((self.edge_src, self.edge_dst), return_inverse=True)
self.edge_src, self.edge_dst = np.reshape(edges, (2, -1))
node_map = np.zeros(self.num_nodes, dtype=int)
self.num_nodes = len(uniq_nodes)
node_map[uniq_nodes] = np.arange(self.num_nodes)
self.labels = self.labels[uniq_nodes]
self.train_idx = node_map[self.train_idx]
self.test_idx = node_map[self.test_idx]
print("{} nodes left".format(self.num_nodes))
else:
self.edge_src, self.edge_dst, self.edge_type = edges.transpose()
# normalize by dst degree
_, inverse_index, count = np.unique((self.edge_dst, self.edge_type), axis=1, return_inverse=True, return_counts=True)
degrees = count[inverse_index]
self.edge_norm = np.ones(len(self.edge_dst), dtype=np.float32) / degrees.astype(np.float32)
# convert to pytorch label format
self.num_classes = self.labels.shape[1]
self.labels = np.argmax(self.labels, axis=1)
class RGCNLinkDataset(object):
"""RGCN link prediction dataset
The dataset contains a graph depicting the connectivity of a knowledge
base. Currently, the knowledge bases from the
`RGCN paper <https://arxiv.org/pdf/1703.06103.pdf>`_ supported are
FB15k-237, FB15k, wn18
The original knowledge base is stored as an RDF file, and this class will
download and parse the RDF file, and performs preprocessing.
An object of this class has 5 member attributes needed for link
prediction:
num_nodes: int
number of entities of knowledge base
num_rels: int
number of relations (including reverse relation) of knowledge base
train: numpy.array
all relation triplets (src, rel, dst) for training
valid: numpy.array
all relation triplets (src, rel, dst) for validation
test: numpy.array
all relation triplets (src, rel, dst) for testing
Usually, user don't need to directly use this class. Instead, DGL provides
wrapper function to load data (see example below).
Examples
--------
Load FB15k-237 dataset
>>> from dgl.contrib.data import load_data
>>> data = load_data(dataset='FB15k-237')
"""
def __init__(self, name):
self.name = name
self.dir = get_download_dir()
tgz_path = os.path.join(self.dir, '{}.tar.gz'.format(self.name))
download(_downlaod_prefix + '{}.tgz'.format(self.name), tgz_path)
self.dir = os.path.join(self.dir, self.name)
extract_archive(tgz_path, self.dir)
def load(self):
entity_path = os.path.join(self.dir, 'entities.dict')
relation_path = os.path.join(self.dir, 'relations.dict')
train_path = os.path.join(self.dir, 'train.txt')
valid_path = os.path.join(self.dir, 'valid.txt')
test_path = os.path.join(self.dir, 'test.txt')
entity_dict = _read_dictionary(entity_path)
relation_dict = _read_dictionary(relation_path)
self.train = np.asarray(_read_triplets_as_list(train_path, entity_dict, relation_dict))
self.valid = np.asarray(_read_triplets_as_list(valid_path, entity_dict, relation_dict))
self.test = np.asarray(_read_triplets_as_list(test_path, entity_dict, relation_dict))
self.num_nodes = len(entity_dict)
print("# entities: {}".format(self.num_nodes))
self.num_rels = len(relation_dict)
print("# relations: {}".format(self.num_rels))
print("# edges: {}".format(len(self.train)))
def load_entity(dataset, bfs_level, relabel):
data = RGCNEntityDataset(dataset)
data.load(bfs_level, relabel)
return data
def load_link(dataset):
data = RGCNLinkDataset(dataset)
data.load()
return data
def _sp_row_vec_from_idx_list(idx_list, dim):
"""Create sparse vector of dimensionality dim from a list of indices."""
shape = (1, dim)
data = np.ones(len(idx_list))
row_ind = np.zeros(len(idx_list))
col_ind = list(idx_list)
return sp.csr_matrix((data, (row_ind, col_ind)), shape=shape)
def _get_neighbors(adj, nodes):
"""Takes a set of nodes and a graph adjacency matrix and returns a set of neighbors."""
sp_nodes = _sp_row_vec_from_idx_list(list(nodes), adj.shape[1])
sp_neighbors = sp_nodes.dot(adj)
neighbors = set(sp.find(sp_neighbors)[1]) # convert to set of indices
return neighbors
def _bfs_relational(adj, roots):
"""
BFS for graphs with multiple edge types. Returns list of level sets.
Each entry in list corresponds to relation specified by adj_list.
"""
visited = set()
current_lvl = set(roots)
next_lvl = set()
while current_lvl:
for v in current_lvl:
visited.add(v)
next_lvl = _get_neighbors(adj, current_lvl)
next_lvl -= visited # set difference
yield next_lvl
current_lvl = set.union(next_lvl)
class RDFReader(object):
__graph = None
__freq = {}
def __init__(self, file):
self.__graph = rdf.Graph()
if file.endswith('nt.gz'):
with gzip.open(file, 'rb') as f:
self.__graph.parse(file=f, format='nt')
else:
self.__graph.parse(file, format=rdf.util.guess_format(file))
# See http://rdflib.readthedocs.io for the rdflib documentation
self.__freq = Counter(self.__graph.predicates())
print("Graph loaded, frequencies counted.")
def triples(self, relation=None):
for s, p, o in self.__graph.triples((None, relation, None)):
yield s, p, o
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.__graph.destroy("store")
self.__graph.close(True)
def subjectSet(self):
return set(self.__graph.subjects())
def objectSet(self):
return set(self.__graph.objects())
def relationList(self):
"""
Returns a list of relations, ordered descending by frequency
:return:
"""
res = list(set(self.__graph.predicates()))
res.sort(key=lambda rel: - self.freq(rel))
return res
def __len__(self):
return len(self.__graph)
def freq(self, rel):
if rel not in self.__freq:
return 0
return self.__freq[rel]
def _load_sparse_csr(filename):
loader = np.load(filename)
return sp.csr_matrix((loader['data'], loader['indices'], loader['indptr']),
shape=loader['shape'], dtype=np.float32)
def _save_sparse_csr(filename, array):
np.savez(filename, data=array.data, indices=array.indices,
indptr=array.indptr, shape=array.shape)
def _load_data(dataset_str='aifb', dataset_path=None):
"""
:param dataset_str:
:param rel_layers:
:param limit: If > 0, will only load this many adj. matrices
All adjacencies are preloaded and saved to disk,
but only a limited a then restored to memory.
:return:
"""
print('Loading dataset', dataset_str)
graph_file = os.path.join(dataset_path, '{}_stripped.nt.gz'.format(dataset_str))
task_file = os.path.join(dataset_path, 'completeDataset.tsv')
train_file = os.path.join(dataset_path, 'trainingSet.tsv')
test_file = os.path.join(dataset_path, 'testSet.tsv')
if dataset_str == 'am':
label_header = 'label_cateogory'
nodes_header = 'proxy'
elif dataset_str == 'aifb':
label_header = 'label_affiliation'
nodes_header = 'person'
elif dataset_str == 'mutag':
label_header = 'label_mutagenic'
nodes_header = 'bond'
elif dataset_str == 'bgs':
label_header = 'label_lithogenesis'
nodes_header = 'rock'
else:
raise NameError('Dataset name not recognized: ' + dataset_str)
edge_file = os.path.join(dataset_path, 'edges.npz')
labels_file = os.path.join(dataset_path, 'labels.npz')
train_idx_file = os.path.join(dataset_path, 'train_idx.npy')
test_idx_file = os.path.join(dataset_path, 'test_idx.npy')
# train_names_file = os.path.join(dataset_path, 'train_names.npy')
# test_names_file = os.path.join(dataset_path, 'test_names.npy')
# rel_dict_file = os.path.join(dataset_path, 'rel_dict.pkl')
# nodes_file = os.path.join(dataset_path, 'nodes.pkl')
if os.path.isfile(edge_file) and os.path.isfile(labels_file) and \
os.path.isfile(train_idx_file) and os.path.isfile(test_idx_file):
# load precomputed adjacency matrix and labels
all_edges = np.load(edge_file)
num_node = all_edges['n'].item()
edge_list = all_edges['edges']
num_rel = all_edges['nrel'].item()
print('Number of nodes: ', num_node)
print('Number of edges: ', len(edge_list))
print('Number of relations: ', num_rel)
labels = _load_sparse_csr(labels_file)
labeled_nodes_idx = list(labels.nonzero()[0])
print('Number of classes: ', labels.shape[1])
train_idx = np.load(train_idx_file)
test_idx = np.load(test_idx_file)
# train_names = np.load(train_names_file)
# test_names = np.load(test_names_file)
# relations_dict = pkl.load(open(rel_dict_file, 'rb'))
else:
# loading labels of nodes
labels_df = pd.read_csv(task_file, sep='\t', encoding='utf-8')
labels_train_df = pd.read_csv(train_file, sep='\t', encoding='utf8')
labels_test_df = pd.read_csv(test_file, sep='\t', encoding='utf8')
with RDFReader(graph_file) as reader:
relations = reader.relationList()
subjects = reader.subjectSet()
objects = reader.objectSet()
nodes = list(subjects.union(objects))
num_node = len(nodes)
num_rel = len(relations)
num_rel = 2 * num_rel + 1 # +1 is for self-relation
assert num_node < np.iinfo(np.int32).max
print('Number of nodes: ', num_node)
print('Number of relations: ', num_rel)
relations_dict = {rel: i for i, rel in enumerate(list(relations))}
nodes_dict = {node: i for i, node in enumerate(nodes)}
edge_list = []
# self relation
for i in range(num_node):
edge_list.append((i, i, 0))
for i, (s, p, o) in enumerate(reader.triples()):
src = nodes_dict[s]
dst = nodes_dict[o]
assert src < num_node and dst < num_node
rel = relations_dict[p]
# relation id 0 is self-relation, so others should start with 1
edge_list.append((src, dst, 2 * rel + 1))
# reverse relation
edge_list.append((dst, src, 2 * rel + 2))
# sort indices by destination
edge_list = sorted(edge_list, key=lambda x: (x[1], x[0], x[2]))
edge_list = np.asarray(edge_list, dtype=np.int)
print('Number of edges: ', len(edge_list))
np.savez(edge_file, edges=edge_list, n=np.asarray(num_node), nrel=np.asarray(num_rel))
nodes_u_dict = {np.unicode(to_unicode(key)): val for key, val in
nodes_dict.items()}
labels_set = set(labels_df[label_header].values.tolist())
labels_dict = {lab: i for i, lab in enumerate(list(labels_set))}
print('{} classes: {}'.format(len(labels_set), labels_set))
labels = sp.lil_matrix((num_node, len(labels_set)))
labeled_nodes_idx = []
print('Loading training set')
train_idx = []
train_names = []
for nod, lab in zip(labels_train_df[nodes_header].values,
labels_train_df[label_header].values):
nod = np.unicode(to_unicode(nod)) # type: unicode
if nod in nodes_u_dict:
labeled_nodes_idx.append(nodes_u_dict[nod])
label_idx = labels_dict[lab]
labels[labeled_nodes_idx[-1], label_idx] = 1
train_idx.append(nodes_u_dict[nod])
train_names.append(nod)
else:
print(u'Node not in dictionary, skipped: ',
nod.encode('utf-8', errors='replace'))
print('Loading test set')
test_idx = []
test_names = []
for nod, lab in zip(labels_test_df[nodes_header].values,
labels_test_df[label_header].values):
nod = np.unicode(to_unicode(nod))
if nod in nodes_u_dict:
labeled_nodes_idx.append(nodes_u_dict[nod])
label_idx = labels_dict[lab]
labels[labeled_nodes_idx[-1], label_idx] = 1
test_idx.append(nodes_u_dict[nod])
test_names.append(nod)
else:
print(u'Node not in dictionary, skipped: ',
nod.encode('utf-8', errors='replace'))
labeled_nodes_idx = sorted(labeled_nodes_idx)
labels = labels.tocsr()
print('Number of classes: ', labels.shape[1])
_save_sparse_csr(labels_file, labels)
np.save(train_idx_file, train_idx)
np.save(test_idx_file, test_idx)
# np.save(train_names_file, train_names)
# np.save(test_names_file, test_names)
# pkl.dump(relations_dict, open(rel_dict_file, 'wb'))
# end if
return num_node, edge_list, num_rel, labels, labeled_nodes_idx, train_idx, test_idx
def to_unicode(input):
# FIXME (lingfan): not sure about python 2 and 3 str compatibility
return str(input)
""" lingfan: comment out for now
if isinstance(input, unicode):
return input
elif isinstance(input, str):
return input.decode('utf-8', errors='replace')
return str(input).decode('utf-8', errors='replace')
"""
def _read_dictionary(filename):
d = {}
with open(filename, 'r+') as f:
for line in f:
line = line.strip().split('\t')
d[line[1]] = int(line[0])
return d
def _read_triplets(filename):
with open(filename, 'r+') as f:
for line in f:
processed_line = line.strip().split('\t')
yield processed_line
def _read_triplets_as_list(filename, entity_dict, relation_dict):
l = []
for triplet in _read_triplets(filename):
s = entity_dict[triplet[0]]
r = relation_dict[triplet[1]]
o = entity_dict[triplet[2]]
l.append([s, r, o])
return l
# This file contains DGL distributed kvstore APIs.
from ..network import _create_sender, _create_receiver
from ..network import _finalize_sender, _finalize_receiver
from ..network import _network_wait, _add_receiver_addr
from ..network import _receiver_wait, _sender_connect
from ..network import _send_kv_msg, _recv_kv_msg
from ..network import _clear_kv_msg
from ..network import _fast_pull
from ..network import KVMsgType, KVStoreMsg
from .. import backend as F
from .._ffi.ndarray import empty_shared_mem
import os
import time
import random
import numpy as np
import socket
if os.name != 'nt':
import fcntl
import struct
def read_ip_config(filename):
"""Read network configuration information of kvstore from file.
The format of configuration file should be:
[ip] [base_port] [server_count]
172.31.40.143 30050 2
172.31.36.140 30050 2
172.31.47.147 30050 2
172.31.30.180 30050 2
Note that, DGL KVStore supports multiple servers that can share data with each other
on the same machine via shared-tensor. So the server_count should be >= 1.
Parameters
----------
filename : str
name of configuration file.
Returns
-------
dict
server namebook. e.g.,
[server_id]:[machine_id, ip, port, group_count]
{0:[0, '172.31.40.143', 30050, 2],
1:[0, '172.31.40.143', 30051, 2],
2:[1, '172.31.36.140', 30050, 2],
3:[1, '172.31.36.140', 30051, 2],
4:[2, '172.31.47.147', 30050, 2],
5:[2, '172.31.47.147', 30051, 2],
6:[3, '172.31.30.180', 30050, 2],
7:[3, '172.31.30.180', 30051, 2]}
"""
assert len(filename) > 0, 'filename cannot be empty.'
server_namebook = {}
try:
server_id = 0
machine_id = 0
lines = [line.rstrip('\n') for line in open(filename)]
for line in lines:
ip, port, server_count = line.split(' ')
for s_count in range(int(server_count)):
server_namebook[server_id] = [int(machine_id), ip, int(port)+s_count, int(server_count)]
server_id += 1
machine_id += 1
except:
print("Error: data format on each line should be: [ip] [base_port] [server_count]")
return server_namebook
def get_type_str(dtype):
"""Get data type string
"""
if 'float16' in str(dtype):
return 'float16'
elif 'float32' in str(dtype):
return 'float32'
elif 'float64' in str(dtype):
return 'float64'
elif 'uint8' in str(dtype):
return 'uint8'
elif 'int8' in str(dtype):
return 'int8'
elif 'int16' in str(dtype):
return 'int16'
elif 'int32' in str(dtype):
return 'int32'
elif 'int64' in str(dtype):
return 'int64'
else:
raise RuntimeError('Unknown data type: %s' % str(dtype))
class KVServer(object):
"""KVServer is a lightweight key-value store service for DGL distributed training.
In practice, developers can use KVServer to hold large-scale graph features or
graph embeddings across machines in a distributed setting. Also, user can re-wriite _push_handler()
and _pull_handler() API to support flexibale algorithms.
DGL kvstore supports multiple-servers on single-machine. That means we can lunach many servers on the same machine and all of
these servers will share the same shared-memory tensor for load-balance.
Note that, DO NOT use KVServer in multiple threads on Python because this behavior is not defined.
For now, KVServer can only run in CPU. We will support GPU KVServer in the future.
Parameters
----------
server_id : int
KVServer's ID (start from 0).
server_namebook: dict
IP address namebook of KVServer, where key is the KVServer's ID
(start from 0) and value is the server's machine_id, IP address and port, e.g.,
{0:'[0, 172.31.40.143, 30050],
1:'[0, 172.31.40.143, 30051],
2:'[1, 172.31.36.140, 30050],
3:'[1, 172.31.36.140, 30051],
4:'[2, 172.31.47.147, 30050],
5:'[2, 172.31.47.147, 30051],
6:'[3, 172.31.30.180, 30050],
7:'[3, 172.31.30.180, 30051]}
num_client : int
Total number of client nodes.
queue_size : int
Sise (bytes) of kvstore message queue buffer (~20 GB on default).
Note that the 20 GB is just an upper-bound number and DGL will not allocate 20GB memory.
net_type : str
networking type, e.g., 'socket' (default) or 'mpi' (do not support yet).
"""
def __init__(self, server_id, server_namebook, num_client, queue_size=20*1024*1024*1024, net_type='socket'):
assert server_id >= 0, 'server_id (%d) cannot be a negative number.' % server_id
assert len(server_namebook) > 0, 'server_namebook cannot be empty.'
assert num_client >= 0, 'num_client (%d) cannot be a negative number.' % num_client
assert queue_size > 0, 'queue_size (%d) cannot be a negative number.' % queue_size
assert net_type == 'socket' or net_type == 'mpi', 'net_type (%s) can only be \'socket\' or \'mpi\'.' % net_type
# check if target data has been initialized
self._has_data = set()
# Store the tensor data with specified data name
self._data_store = {}
# Used for barrier() API on KVClient
self._barrier_count = 0
# Server information
self._server_id = server_id
self._server_namebook = server_namebook
self._machine_id = server_namebook[server_id][0]
self._ip = server_namebook[server_id][1]
self._port = server_namebook[server_id][2]
self._group_count = server_namebook[server_id][3]
# client_namebook will be sent from remote client nodes
self._client_namebook = {}
self._client_count = num_client
# Create C communicator of sender and receiver
self._sender = _create_sender(net_type, queue_size)
self._receiver = _create_receiver(net_type, queue_size)
# Delete temp file when kvstore service is closed
self._open_file_list = []
# record for total message count
self._msg_count = 0
# user-defined push handler
self._udf_push_handler = None
self._udf_push_param = None
# user-defined pull handler
self._udf_pull_handler = None
def __del__(self):
"""Finalize KVServer
"""
# Finalize C communicator of sender and receiver
_finalize_sender(self._sender)
_finalize_receiver(self._receiver)
# Delete temp file when kvstore service is closed
for file in self._open_file_list:
if (os.path.exists(file)):
os.remove(file)
def set_global2local(self, name, global2local=None):
"""Set data mapping of global ID to local ID.
Parameters
----------
name : str
data name
global2local : list or tensor (mx.ndarray or torch.tensor)
A data mapping of global ID to local ID. KVStore will use global ID by default
if the global2local is not been set.
Note that, if the global2local is None KVServer will read shared-tensor.
"""
assert len(name) > 0, 'name cannot be empty.'
if global2local is not None: # Create shared-tensor
if isinstance(global2local, list):
global2local = F.tensor(global2local)
assert 'int64' == get_type_str(F.dtype(global2local)), 'global2local must be int64 type.'
shared_data = empty_shared_mem(name+'-g2l-', True, global2local.shape, 'int64')
dlpack = shared_data.to_dlpack()
self._data_store[name+'-g2l-'] = F.zerocopy_from_dlpack(dlpack)
self._data_store[name+'-g2l-'][:] = global2local[:]
# write data information to temp file that can be read by other processes
self._write_data_shape_type(name+'-g2l-shape-'+str(self._machine_id), global2local)
self._open_file_list.append(name+'-g2l-shape-'+str(self._machine_id))
else: # Read shared-tensor
while True:
if (os.path.exists(name+'-g2l-shape-'+str(self._machine_id))):
time.sleep(2) # wait writing finish
break
else:
time.sleep(2) # wait until the file been created
data_shape, data_type = self._read_data_shape_type(name+'-g2l-shape-'+str(self._machine_id))
assert data_type == 'int64'
shared_data = empty_shared_mem(name+'-g2l-', False, data_shape, 'int64')
dlpack = shared_data.to_dlpack()
self._data_store[name+'-g2l-'] = F.zerocopy_from_dlpack(dlpack)
self._has_data.add(name+'-g2l-')
def set_partition_book(self, name, partition_book=None):
"""Partition book contains the data mapping of global ID to machine ID.
Parameters
----------
name : str
data name
partition_book : list or tensor (mx.ndarray or torch.tensor)
Mapping global ID to target machine ID.
Note that, if the partition_book is None KVClient will read shared-tensor by name.
"""
assert len(name) > 0, 'name connot be empty.'
if partition_book is not None: # Create shared-tensor
if isinstance(partition_book, list):
partition_book = F.tensor(partition_book)
assert 'int64' == get_type_str(F.dtype(partition_book)), 'partition_book must be int64 type.'
shared_data = empty_shared_mem(name+'-part-', True, partition_book.shape, 'int64')
dlpack = shared_data.to_dlpack()
self._data_store[name+'-part-'] = F.zerocopy_from_dlpack(dlpack)
self._data_store[name+'-part-'][:] = partition_book[:]
self._write_data_shape_type(name+'-part-shape-'+str(self._machine_id), partition_book)
self._open_file_list.append(name+'-part-shape-'+str(self._machine_id))
else: # Read shared-tensor
while True:
if (os.path.exists(name+'-part-shape-'+str(self._machine_id))):
time.sleep(2) # wait writing finish
break
else:
time.sleep(2) # wait until the file been created
data_shape, data_type = self._read_data_shape_type(name+'-part-shape-'+str(self._machine_id))
assert data_type == 'int64'
shared_data = empty_shared_mem(name+'-part-', False, data_shape, 'int64')
dlpack = shared_data.to_dlpack()
self._data_store[name+'-part-'] = F.zerocopy_from_dlpack(dlpack)
self._has_data.add(name+'-part-')
def init_data(self, name, data_tensor=None):
"""Initialize data tensor on KVServe.
Parameters
----------
name : str
data name
data_tensor : tensor (mx.ndarray or torch.tensor)
data tensor
Note that, if the data_tensor is None KVServer will read shared-tensor.
"""
assert len(name) > 0, 'name cannot be empty.'
if data_tensor is not None: # Create shared-tensor
data_type = get_type_str(F.dtype(data_tensor))
shared_data = empty_shared_mem(name+'-data-', True, data_tensor.shape, data_type)
dlpack = shared_data.to_dlpack()
self._data_store[name+'-data-'] = F.zerocopy_from_dlpack(dlpack)
self._data_store[name+'-data-'][:] = data_tensor[:]
self._write_data_shape_type(name+'-data-shape-'+str(self._machine_id), data_tensor)
self._open_file_list.append(name+'-data-shape-'+str(self._machine_id))
else: # Read shared-tensor
while True:
if (os.path.exists(name+'-data-shape-'+str(self._machine_id))):
break
else:
time.sleep(2) # wait until the file been created
data_shape, data_type = self._read_data_shape_type(name+'-data-shape-'+str(self._machine_id))
shared_data = empty_shared_mem(name+'-data-', False, data_shape, data_type)
dlpack = shared_data.to_dlpack()
self._data_store[name+'-data-'] = F.zerocopy_from_dlpack(dlpack)
self._has_data.add(name+'-data-')
def get_id(self):
"""Get current server id
Return
------
int
KVServer ID
"""
return self._server_id
def get_addr(self):
"""Get current server IP address and port
Return
------
str
IP address and port
"""
return self._ip + ':' + str(self._port)
def get_machine_id(self):
"""Get local machine ID
Return
-------
int
machine ID
"""
return self._machine_id
def get_group_count(self):
"""Get count of server inside a machine
Return
------
int
count of server
"""
return self._group_count
def get_message_count(self):
"""Get total message count on current KVServer
Return
------
int
count of message
"""
return self._msg_count
def print(self):
"""Print server information (Used by debug)
"""
print("----- KVStore Info -----")
print("server id: %d" % self.get_id())
print("data:")
for name, data in self._data_store.items():
print(name)
print(data)
print("------------------------")
def start(self):
"""Start service of KVServer.
The start() api performs the following things:
1. Get connected with all client nodes.
2. Recv client address information.
3. assign client ID to each client node.
4. send shared-tensor information to each client node.
5. Service loop for listening requests from client nodes.
"""
# Get connected with all client nodes
_receiver_wait(self._receiver, self._ip, self._port, self._client_count)
print("%d clients connected!" % self._client_count)
# recv client address information
addr_list = []
for i in range(self._client_count):
msg = _recv_kv_msg(self._receiver)
assert msg.type == KVMsgType.IP_ID
addr_list.append(msg.name)
# Assign client ID to each client node
addr_list.sort()
for ID in range(len(addr_list)):
self._client_namebook[ID] = addr_list[ID]
_network_wait()
for ID, addr in self._client_namebook.items():
client_ip, client_port = addr.split(':')
_add_receiver_addr(self._sender, client_ip, int(client_port), ID)
_sender_connect(self._sender)
if self._server_id == 0:
for client_id in range(len(self._client_namebook)):
msg = KVStoreMsg(
type=KVMsgType.IP_ID,
rank=self._server_id,
name=str(client_id),
id=None,
data=None,
shape=None,
c_ptr=None)
_send_kv_msg(self._sender, msg, client_id)
# Send shared-tensor information to each client node
if self._server_id == 0:
shared_tensor = ''
for name in self._has_data:
shared_tensor += self._serialize_shared_tensor(
name, F.dtype(self._data_store[name]))
shared_tensor += '|'
msg = KVStoreMsg(
type=KVMsgType.IP_ID,
rank=self._server_id,
name=shared_tensor,
id=None,
data=None,
shape=None,
c_ptr=None)
for client_id in range(len(self._client_namebook)):
_send_kv_msg(self._sender, msg, client_id)
print('KVStore service %d start successfully! Listen for request ...' % self.get_id())
# Service loop
while True:
msg = _recv_kv_msg(self._receiver)
# Push message
if msg.type == KVMsgType.PUSH:
if (msg.name+'-g2l-' in self._has_data) == True:
local_id = self._data_store[msg.name+'-g2l-'][msg.id]
else:
local_id = msg.id
if self._udf_push_handler is not None:
self._udf_push_handler(msg.name+'-data-', local_id, msg.data, self._data_store, self._udf_push_param)
else:
self._default_push_handler(msg.name+'-data-', local_id, msg.data, self._data_store)
# Pull message
elif msg.type == KVMsgType.PULL:
if (msg.name+'-g2l-' in self._has_data) == True:
local_id = self._data_store[msg.name+'-g2l-'][msg.id]
else:
local_id = msg.id
if self._udf_pull_handler is not None:
res_tensor = self._udf_pull_handler(msg.name+'-data-', local_id, self._data_store)
else:
res_tensor = self._default_pull_handler(msg.name+'-data-', local_id, self._data_store)
back_msg = KVStoreMsg(
type=KVMsgType.PULL_BACK,
rank=self._server_id,
name=msg.name,
id=msg.id,
data=res_tensor,
shape=None,
c_ptr=None)
_send_kv_msg(self._sender, back_msg, msg.rank)
# Init new data
elif msg.type == KVMsgType.INIT:
assert msg.rank == 0
data_str, target_name = msg.name.split('|')
data_name, data_type = self._deserialize_shared_tensor(data_str)
dtype = F.data_type_dict[data_type]
data_shape = F.asnumpy(msg.shape).tolist()
if self._server_id % self._group_count == 0: # master server
data_tensor = F.zeros(data_shape, dtype, F.cpu())
self.init_data(name=data_name, data_tensor=data_tensor)
else: # backup server
self.init_data(name=data_name)
g2l = self._data_store[target_name+'-g2l-']
self._data_store[data_name+'-g2l-'] = g2l
self._has_data.add(data_name+'-g2l-')
back_msg = KVStoreMsg(
type=KVMsgType.INIT,
rank=self._server_id,
name=msg.name,
id=None,
data=None,
shape=msg.shape,
c_ptr=None)
_send_kv_msg(self._sender, back_msg, 0)
# Get shape message
elif msg.type == KVMsgType.GET_SHAPE:
data_shape = F.tensor(F.shape(self._data_store[msg.name+'-data-']))
back_msg = KVStoreMsg(
type=KVMsgType.GET_SHAPE_BACK,
rank=self._server_id,
name=msg.name,
id=None,
data=None,
shape=data_shape,
c_ptr=None)
_send_kv_msg(self._sender, back_msg, msg.rank)
# Barrier message
elif msg.type == KVMsgType.BARRIER:
self._barrier_count += 1
if self._barrier_count == self._client_count:
back_msg = KVStoreMsg(
type=KVMsgType.BARRIER,
rank=self._server_id,
name=None,
id=None,
data=None,
shape=None,
c_ptr=None)
for client_id in range(self._client_count):
_send_kv_msg(self._sender, back_msg, client_id)
self._barrier_count = 0
# Final message
elif msg.type == KVMsgType.FINAL:
print("Exit KVStore service %d, solved message count: %d" % (self.get_id(), self.get_message_count()))
break # exit loop
else:
raise RuntimeError('Unknown type of kvstore message: %d' % msg.type.value)
_clear_kv_msg(msg)
self._msg_count += 1
def _serialize_shared_tensor(self, name, dtype):
"""Serialize shared tensor information.
Parameters
----------
name : str
tensor name
dtype : dtype
data type
Returns
-------
str
serialized string
"""
assert len(name) > 0, 'data name cannot be empty.'
str_data = name
str_data += '/'
str_data += get_type_str(dtype)
return str_data
def _deserialize_shared_tensor(self, data):
"""Deserialize shared tensor information sent from server
Parameters
----------
data : str
serialized string
Returns
-------
str
tensor name
str
data type
"""
data_list = data.split('/')
tensor_name = data_list[0]
data_type = data_list[-1]
return tensor_name, data_type
def _write_data_shape_type(self, filename, data):
"""Write data shape to a temp file.
Parameters
----------
filename : str
name of temp file.
data : tensor (mx.ndarray or torch.tensor)
data tensor
"""
assert len(filename) > 0, 'filename cannot be empty.'
if(os.path.exists(filename)):
os.remove(filename)
shape = F.shape(data)
str_data = ''
str_data += get_type_str(F.dtype(data))
str_data += '|'
f = open(filename, "a");
for s in shape:
str_data += str(s)
str_data += '|'
f.write(str_data)
f.close()
def _read_data_shape_type(self, filename):
"""Read data shape from a tmp file.
Parameters
----------
filename : str
name of temp file
Return
------
tuple
data shape
"""
assert len(filename) > 0, 'filename cannot be empty.'
f = open(filename, "r")
str_data = f.read()
data_list = str_data.split('|')
data_type = data_list[0]
data_shape = []
for i in range(1, len(data_list)-1):
data_shape.append(int(data_list[i]))
f.close()
return data_shape, data_type
def _default_push_handler(self, name, ID, data, target):
"""Default handler for PUSH message.
On default, _push_handler perform update operation for the tensor.
Parameters
----------
name : str
data name
ID : tensor (mx.ndarray or torch.tensor)
a vector storing the ID list.
data : tensor (mx.ndarray or torch.tensor)
a tensor with the same row size of id
target : dict of data
self._data_store
"""
target[name][ID] = data
def _default_pull_handler(self, name, ID, target):
"""Default handler for PULL operation.
On default, _pull_handler perform get operation for the tensor.
Parameters
----------
name : str
data name
ID : tensor (mx.ndarray or torch.tensor)
a vector storing the ID list.
target : dict of data
self._data_store
Return
------
tensor
a tensor with the same row size of ID.
"""
return target[name][ID]
class KVClient(object):
"""KVClient is used to push/pull tensors to/from KVServer. If the server node and client node are on the
same machine, they can commuincate with each other using local shared-memory tensor, instead of TCP/IP connections.
Note that, DO NOT use KVClient in multiple threads on Python because this behavior is not defined.
For now, KVClient can only run in CPU, and we will support GPU KVClient in the future.
Parameters
----------
server_namebook: dict
IP address namebook of KVServer, where key is the KVServer's ID
(start from 0) and value is the server's machine_id, IP address and port, and group_count, e.g.,
{0:'[0, 172.31.40.143, 30050, 2],
1:'[0, 172.31.40.143, 30051, 2],
2:'[1, 172.31.36.140, 30050, 2],
3:'[1, 172.31.36.140, 30051, 2],
4:'[2, 172.31.47.147, 30050, 2],
5:'[2, 172.31.47.147, 30051, 2],
6:'[3, 172.31.30.180, 30050, 2],
7:'[3, 172.31.30.180, 30051, 2]}
queue_size : int
Sise (bytes) of kvstore message queue buffer (~20 GB on default).
net_type : str
networking type, e.g., 'socket' (default) or 'mpi'.
"""
def __init__(self, server_namebook, queue_size=20*1024*1024*1024, net_type='socket'):
assert len(server_namebook) > 0, 'server_namebook cannot be empty.'
assert queue_size > 0, 'queue_size (%d) cannot be a negative number.' % queue_size
assert net_type == 'socket' or net_type == 'mpi', 'net_type (%s) can only be \'socket\' or \'mpi\'.' % net_type
# check if target data has been initialized
self._has_data = set()
# This is used to store local data, which can share memory with local KVServer.
self._data_store = {}
self._full_data_shape = {}
self._data_name_list = []
# Server information
self._server_namebook = server_namebook
self._server_count = len(server_namebook)
self._group_count = server_namebook[0][3]
self._machine_count = int(self._server_count / self._group_count)
# client ID will be assign by server after connecting to server
self._client_id = -1
# Get local machine id via server_namebook
self._machine_id = self._get_local_machine_id()
# create C communicator of sender and receiver
self._sender = _create_sender(net_type, queue_size)
self._receiver = _create_receiver(net_type, queue_size)
# Delete temp file when kvstore service is closed
self._open_file_list = []
# Gargage_collection
self._garbage_msg = []
# User-defined pull handler
self._udf_pull_handler = None
# User-defined push handler
self._udf_push_handler = None
self._udf_push_param = None
# Used load-balance
random.seed(time.time())
def __del__(self):
"""Finalize KVClient
"""
# finalize C communicator of sender and receiver
_finalize_sender(self._sender)
_finalize_receiver(self._receiver)
# Delete temp file whhen kvstore service is closed
for file in self._open_file_list:
if(os.path.exists(file)):
os.remove(file)
def connect(self):
"""Connect to all the KVServer nodes
The connect() api performs the following things:
1. Get connected with all server nodes.
2. Send client address information to server.
3. Recv client ID from server.
4. Recv shared-tensor information from server.
"""
# Get connected with all server nodes
for ID, addr in self._server_namebook.items():
server_ip = addr[1]
server_port = addr[2]
_add_receiver_addr(self._sender, server_ip, server_port, ID)
_sender_connect(self._sender)
# Send client address to server nodes
self._addr = self._get_local_usable_addr()
client_ip, client_port = self._addr.split(':')
msg = KVStoreMsg(
type=KVMsgType.IP_ID,
rank=0, # a tmp client ID
name=self._addr,
id=None,
data=None,
shape=None,
c_ptr=None)
for server_id in range(self._server_count):
_send_kv_msg(self._sender, msg, server_id)
_receiver_wait(self._receiver, client_ip, int(client_port), self._server_count)
# Recv client ID from server
msg = _recv_kv_msg(self._receiver)
assert msg.rank == 0
self._client_id = int(msg.name)
# Recv shared-tensor information from server
msg = _recv_kv_msg(self._receiver)
assert msg.rank == 0
data_str = msg.name.split('|')
for data in data_str:
if data != '':
tensor_name, dtype = self._deserialize_shared_tensor(data)
while True:
if (os.path.exists(tensor_name+'shape-'+str(self._machine_id))):
break
else:
time.sleep(1) # wait until the file been created
shape, data_type = self._read_data_shape_type(tensor_name+'shape-'+str(self._machine_id))
assert data_type == dtype
shared_data = empty_shared_mem(tensor_name, False, shape, dtype)
dlpack = shared_data.to_dlpack()
self._data_store[tensor_name] = F.zerocopy_from_dlpack(dlpack)
if '-data-' in tensor_name:
self._data_name_list.append(tensor_name[0:-6])
self._has_data.add(tensor_name)
# Get full shape of each data
for name in self._data_name_list:
data_shape = list(F.shape(self._data_store[name+'-data-']))
data_shape[0] = 0
msg = KVStoreMsg(
type=KVMsgType.GET_SHAPE,
rank=self._client_id,
name=name,
id=None,
data=None,
shape=None,
c_ptr=None)
# send msg
for m_id in range(self._machine_count):
s_id = m_id * self._group_count
_send_kv_msg(self._sender, msg, s_id)
# recv msg
for m_id in range(self._machine_count):
back_msg = _recv_kv_msg(self._receiver)
assert back_msg.type == KVMsgType.GET_SHAPE_BACK
data_shape[0] += ((F.asnumpy(back_msg.shape)).tolist())[0]
self._full_data_shape[name] = tuple(data_shape)
print("KVClient %d connect to kvstore successfully!" % self.get_id())
def init_data(self, name, shape, dtype, target_name):
"""Send message to kvserver to initialize new data and
get corresponded shared-tensor (e.g., partition_book, g2l) on kvclient.
The new data will be initialized to zeros.
Note that, this API must be invoked after the conenct() API.
Parameters
----------
name : str
data name
shape : list or tuple of int
data shape
dtype : dtype
data type
target_name : str
target name is used to find existing partition_book and g2l mapping.
"""
assert len(name) > 0, 'name cannot be empty.'
assert len(shape) > 0, 'shape cannot be empty.'
assert len(target_name) > 0, 'target_name cannot be empty.'
if self._client_id == 0: # only client_0 send message to server
partition_book = self._data_store[target_name+'-part-']
machines, count = np.unique(F.asnumpy(partition_book), return_counts=True)
assert shape[0] == len(partition_book)
# send message to all of the server nodes
for idx in range(len(machines)):
m_id = machines[idx]
data_str = self._serialize_shared_tensor(name, dtype)
data_str = data_str + '|' + target_name
partitioned_shape = list(shape)
partitioned_shape[0] = count[idx]
for n in range(self._group_count):
server_id = m_id * self._group_count + n
msg = KVStoreMsg(
type=KVMsgType.INIT,
rank=0,
name=data_str,
id=None,
data=None,
shape=F.tensor(partitioned_shape),
c_ptr=None)
_send_kv_msg(self._sender, msg, server_id)
# recv confirmation message from server nodes
for server_id in range(self._server_count):
msg = _recv_kv_msg(self._receiver)
assert msg.type == KVMsgType.INIT
self.barrier() # wait all the client and server finish its job
g2l = self._data_store[target_name+'-g2l-']
partition_book = self._data_store[target_name+'-part-']
self._data_store[name+'-g2l-'] = g2l
self._data_store[name+'-part-'] = partition_book
self._has_data.add(name+'-g2l-')
self._has_data.add(name+'-part-')
# Read new data from shared-memory created by server
shape, data_type = self._read_data_shape_type(name+'-data-shape-'+str(self._machine_id))
assert data_type == get_type_str(dtype)
shared_data = empty_shared_mem(name+'-data-', False, shape, data_type)
dlpack = shared_data.to_dlpack()
self._data_store[name+'-data-'] = F.zerocopy_from_dlpack(dlpack)
self._has_data.add(name+'-data-')
self._data_name_list.append(name)
self._full_data_shape[name] = tuple(shape)
def print(self):
"""Print client information (Used by debug)
"""
print("----- KVClient Info -----")
print("client id: %d" % self.get_id())
print("data:")
for name, data in self._data_store.items():
print(name)
print(data)
print("-------------------------")
def get_id(self):
"""Get current client id
Return
------
int
KVClient ID
"""
return self._client_id
def get_addr(self):
"""Get current client IP address
Return
------
str
IP address
"""
return self._addr
def get_machine_id(self):
"""Get local machine ID
Return
-------
int
machine ID
"""
return self._machine_id
def get_data_name_list(self):
"""Get all the data name
Return
------
list of str
name list
"""
return self._data_name_list
def get_data_meta(self, name):
"""Get meta data (data_type, data_shape, partition_book) of the target shared-tensor
Parameter
---------
name : str
data name
Return
------
tuple
(data_type, data_shape, partition_book)
"""
assert len(name) > 0, 'name cannot be empty.'
assert name + '-data-' in self._has_data, 'Data (%s) does not exist!' % name
data_type = F.dtype(self._data_store[name+'-data-'])
partition_book = self._data_store[name+'-part-']
data_shape = self._full_data_shape[name]
return (data_type, data_shape, partition_book)
def push(self, name, id_tensor, data_tensor):
"""Push data to KVServer.
Note that push() is an async operation that will return immediately after calling.
Parameters
----------
name : str
data name
id_tensor : tensor (mx.ndarray or torch.tensor)
a vector storing the global data ID
data_tensor : tensor (mx.ndarray or torch.tensor)
a tensor with the same row size of data ID
"""
assert len(name) > 0, 'name cannot be empty.'
assert F.ndim(id_tensor) == 1, 'ID must be a vector.'
assert F.shape(id_tensor)[0] == F.shape(data_tensor)[0], 'The data must has the same row size with ID.'
# partition data
machine_id = self._data_store[name+'-part-'][id_tensor]
# sort index by machine id
sorted_id = F.tensor(np.argsort(F.asnumpy(machine_id)))
id_tensor = id_tensor[sorted_id]
data_tensor = data_tensor[sorted_id]
machine, count = np.unique(F.asnumpy(machine_id), return_counts=True)
# push data to server by order
start = 0
local_id = None
local_data = None
for idx in range(len(machine)):
end = start + count[idx]
if start == end: # No data for target machine
continue
partial_id = id_tensor[start:end]
partial_data = data_tensor[start:end]
if machine[idx] == self._machine_id: # local push
# Note that DO NOT push local data right now because we can overlap
# communication-local_push here
if (name+'-g2l-' in self._has_data) == True:
local_id = self._data_store[name+'-g2l-'][partial_id]
else:
local_id = partial_id
local_data = partial_data
else: # push data to remote server
msg = KVStoreMsg(
type=KVMsgType.PUSH,
rank=self._client_id,
name=name,
id=partial_id,
data=partial_data,
shape=None,
c_ptr=None)
# randomly select a server node in target machine for load-balance
s_id = random.randint(machine[idx]*self._group_count, (machine[idx]+1)*self._group_count-1)
_send_kv_msg(self._sender, msg, s_id)
start += count[idx]
if local_id is not None: # local push
if self._udf_push_handler is not None:
self._udf_push_handler(name+'-data-', local_id, local_data, self._data_store, self._udf_push_param)
else:
self._default_push_handler(name+'-data-', local_id, local_data, self._data_store)
def pull(self, name, id_tensor):
"""Pull message from KVServer.
Parameters
----------
name : str
data name
id_tensor : tensor (mx.ndarray or torch.tensor)
a vector storing the ID list
Returns
-------
tensor
a data tensor with the same row size of id_tensor.
"""
assert len(name) > 0, 'name cannot be empty.'
assert F.ndim(id_tensor) == 1, 'ID must be a vector.'
if self._udf_pull_handler is None: # Use fast-pull
g2l = None
if name+'-g2l-' in self._data_store:
g2l = self._data_store[name+'-g2l-']
return _fast_pull(name, id_tensor,
self._machine_count,
self._group_count,
self._machine_id,
self._client_id,
self._data_store[name+'-part-'],
g2l,
self._data_store[name+'-data-'],
self._sender,
self._receiver)
else:
for msg in self._garbage_msg:
_clear_kv_msg(msg)
self._garbage_msg = []
# partition data
machine_id = self._data_store[name+'-part-'][id_tensor]
# sort index by machine id
sorted_id = F.tensor(np.argsort(F.asnumpy(machine_id)))
back_sorted_id = F.tensor(np.argsort(F.asnumpy(sorted_id)))
id_tensor = id_tensor[sorted_id]
machine, count = np.unique(F.asnumpy(machine_id), return_counts=True)
# pull data from server by order
start = 0
pull_count = 0
local_id = None
for idx in range(len(machine)):
end = start + count[idx]
if start == end: # No data for target machine
continue
partial_id = id_tensor[start:end]
if machine[idx] == self._machine_id: # local pull
# Note that DO NOT pull local data right now because we can overlap
# communication-local_pull here
if (name+'-g2l-' in self._has_data) == True:
local_id = self._data_store[name+'-g2l-'][partial_id]
else:
local_id = partial_id
else: # pull data from remote server
msg = KVStoreMsg(
type=KVMsgType.PULL,
rank=self._client_id,
name=name,
id=partial_id,
data=None,
shape=None,
c_ptr=None)
# randomly select a server node in target machine for load-balance
s_id = random.randint(machine[idx]*self._group_count, (machine[idx]+1)*self._group_count-1)
_send_kv_msg(self._sender, msg, s_id)
pull_count += 1
start += count[idx]
msg_list = []
if local_id is not None: # local pull
local_data = self._udf_pull_handler(name+'-data-', local_id, self._data_store)
s_id = random.randint(self._machine_id*self._group_count, (self._machine_id+1)*self._group_count-1)
local_msg = KVStoreMsg(
type=KVMsgType.PULL_BACK,
rank=s_id,
name=name,
id=None,
data=local_data,
shape=None,
c_ptr=None)
msg_list.append(local_msg)
self._garbage_msg.append(local_msg)
# wait message from server nodes
for idx in range(pull_count):
remote_msg = _recv_kv_msg(self._receiver)
msg_list.append(remote_msg)
self._garbage_msg.append(remote_msg)
# sort msg by server id and merge tensor together
msg_list.sort(key=self._takeId)
data_tensor = F.cat(seq=[msg.data for msg in msg_list], dim=0)
return data_tensor[back_sorted_id] # return data with original index order
def barrier(self):
"""Barrier for all client nodes
This API will be blocked untill all the clients call this API.
"""
msg = KVStoreMsg(
type=KVMsgType.BARRIER,
rank=self._client_id,
name=None,
id=None,
data=None,
shape=None,
c_ptr=None)
for server_id in range(self._server_count):
_send_kv_msg(self._sender, msg, server_id)
for server_id in range(self._server_count):
back_msg = _recv_kv_msg(self._receiver)
assert back_msg.type == KVMsgType.BARRIER, 'Recv kv msg error.'
def shut_down(self):
"""Shut down all KVServer nodes.
We usually invoke this API by just one client (e.g., client_0).
"""
if self._client_id == 0:
for server_id in range(self._server_count):
msg = KVStoreMsg(
type=KVMsgType.FINAL,
rank=self._client_id,
name=None,
id=None,
data=None,
shape=None,
c_ptr=None)
_send_kv_msg(self._sender, msg, server_id)
def _get_local_usable_addr(self):
"""Get local available IP and port
Return
------
str
IP address, e.g., '192.168.8.12:50051'
"""
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
# doesn't even have to be reachable
s.connect(('10.255.255.255', 1))
IP = s.getsockname()[0]
except:
IP = '127.0.0.1'
finally:
s.close()
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.bind(("",0))
s.listen(1)
port = s.getsockname()[1]
s.close()
return IP + ':' + str(port)
def _get_local_machine_id(self):
"""Get local machine ID from server_namebook
Return
------
int
local machine ID
"""
res = 0
for ID, data in self._server_namebook.items():
machine_id = data[0]
ip = data[1]
if ip in self._local_ip4_addr_list():
res = machine_id
break
return res
def _local_ip4_addr_list(self):
"""Return a set of IPv4 address
"""
nic = set()
for ix in socket.if_nameindex():
name = ix[1]
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
ip = socket.inet_ntoa(fcntl.ioctl(
s.fileno(),
0x8915, # SIOCGIFADDR
struct.pack('256s', name[:15].encode("UTF-8")))[20:24])
nic.add(ip)
return nic
def _serialize_shared_tensor(self, name, dtype):
"""Serialize shared tensor information.
Parameters
----------
name : str
tensor name
dtype : dtype
data type
Returns
-------
str
serialized string
"""
assert len(name) > 0, 'data name cannot be empty.'
str_data = name
str_data += '/'
str_data += get_type_str(dtype)
return str_data
def _deserialize_shared_tensor(self, data):
"""Deserialize shared tensor information sent from server
Parameters
----------
data : str
serialized string
Returns
-------
str
tensor name
str
data type
"""
data_list = data.split('/')
tensor_name = data_list[0]
data_type = data_list[-1]
return tensor_name, data_type
def _write_data_shape(self, filename, data):
"""Write data shape to a temp file.
Parameters
----------
filename : str
name of temp file.
data : tensor (mx.ndarray or torch.tensor)
data tensor
"""
assert len(filename) > 0, 'filename cannot be empty.'
if(os.path.exists(filename)):
os.remove(filename)
shape = F.shape(data)
str_data = ''
f = open(filename, "a");
for s in shape:
str_data += str(s)
str_data += '|'
f.write(str_data)
f.close()
def _read_data_shape_type(self, filename):
"""Read data shape from a tmp file.
Parameters
----------
filename : str
name of temp file
Return
------
tuple
data shape
"""
assert len(filename) > 0, 'filename cannot be empty.'
f = open(filename, "r")
str_data = f.read()
data_list = str_data.split('|')
data_type = data_list[0]
data_shape = []
for i in range(1, len(data_list)-1):
data_shape.append(int(data_list[i]))
f.close()
return data_shape, data_type
def _takeId(self, elem):
"""Used by sort message list
"""
return elem.rank
def _default_push_handler(self, name, ID, data, target):
"""Default handler for PUSH message.
On default, _push_handler perform update operation for the tensor.
Parameters
----------
name : str
data name
ID : tensor (mx.ndarray or torch.tensor)
a vector storing the ID list.
data : tensor (mx.ndarray or torch.tensor)
a tensor with the same row size of id
target : dict of data
self._data_store
"""
target[name][ID] = data
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment