Unverified Commit b9631912 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Frame] Refactor frame. (#85)

* refactor frame codes

* fix unit test

* fix gcn example

* minor doc/message changes

* raise errors for non-exist columns in FrameRef; sanity check when append

* fix unittest; change error msg

* Add warning for none initializer

* fix unittest

* use warnings package
parent 66261aee
......@@ -16,10 +16,10 @@ from dgl import DGLGraph
from dgl.data import register_data_args, load_data
def gcn_msg(src, edge):
return src
return {'m' : src['h']}
def gcn_reduce(node, msgs):
return torch.sum(msgs, 1)
return {'h' : torch.sum(msgs['m'], 1)}
class NodeApplyModule(nn.Module):
def __init__(self, in_feats, out_feats, activation=None):
......@@ -28,10 +28,10 @@ class NodeApplyModule(nn.Module):
self.activation = activation
def forward(self, node):
h = self.linear(node)
h = self.linear(node['h'])
if self.activation:
h = self.activation(h)
return h
return {'h' : h}
class GCN(nn.Module):
def __init__(self,
......@@ -54,14 +54,14 @@ class GCN(nn.Module):
self.layers.append(NodeApplyModule(n_hidden, n_classes))
def forward(self, features):
self.g.set_n_repr(features)
self.g.set_n_repr({'h' : features})
for layer in self.layers:
# apply dropout
if self.dropout:
val = F.dropout(self.g.get_n_repr(), p=self.dropout)
self.g.set_n_repr(val)
g.apply_nodes(apply_node_func=
lambda node: F.dropout(node['h'], p=self.dropout))
self.g.update_all(gcn_msg, gcn_reduce, layer)
return self.g.pop_n_repr()
return self.g.pop_n_repr('h')
def main(args):
# load and preprocess dataset
......
......@@ -23,10 +23,10 @@ class NodeApplyModule(nn.Module):
self.activation = activation
def forward(self, node):
h = self.linear(node)
h = self.linear(node['h'])
if self.activation:
h = self.activation(h)
return h
return {'h' : h}
class GCN(nn.Module):
def __init__(self,
......@@ -49,14 +49,16 @@ class GCN(nn.Module):
self.layers.append(NodeApplyModule(n_hidden, n_classes))
def forward(self, features):
self.g.set_n_repr(features)
self.g.set_n_repr({'h' : features})
for layer in self.layers:
# apply dropout
if self.dropout:
val = F.dropout(self.g.get_n_repr(), p=self.dropout)
self.g.set_n_repr(val)
self.g.update_all(fn.copy_src(), fn.sum(), layer)
return self.g.pop_n_repr()
g.apply_nodes(apply_node_func=
lambda node: F.dropout(node['h'], p=self.dropout))
self.g.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msgs='m', out='h'),
layer)
return self.g.pop_n_repr('h')
def main(args):
# load and preprocess dataset
......
......@@ -93,23 +93,24 @@ def get_context(arr):
return TVMContext(
TVMContext.STR2MASK[arr.device.type], arr.device.index)
def _typestr(arr_dtype):
def get_tvmtype(arr):
arr_dtype = arr.dtype
if arr_dtype in (th.float16, th.half):
return 'float16'
return TVMType('float16')
elif arr_dtype in (th.float32, th.float):
return 'float32'
return TVMType('float32')
elif arr_dtype in (th.float64, th.double):
return 'float64'
return TVMType('float64')
elif arr_dtype in (th.int16, th.short):
return 'int16'
return TVMType('int16')
elif arr_dtype in (th.int32, th.int):
return 'int32'
return TVMType('int32')
elif arr_dtype in (th.int64, th.long):
return 'int64'
return TVMType('int64')
elif arr_dtype == th.int8:
return 'int8'
return TVMType('int8')
elif arr_dtype == th.uint8:
return 'uint8'
return TVMType('uint8')
else:
raise RuntimeError('Unsupported data type:', arr_dtype)
......@@ -130,20 +131,6 @@ def zerocopy_from_numpy(np_data):
"""Return a tensor that shares the numpy data."""
return th.from_numpy(np_data)
'''
data = arr_data
assert data.is_contiguous()
arr = TVMArray()
shape = c_array(tvm_shape_index_t, tuple(data.shape))
arr.data = ctypes.cast(data.data_ptr(), ctypes.c_void_p)
arr.shape = shape
arr.strides = None
arr.dtype = TVMType(_typestr(data.dtype))
arr.ndim = len(shape)
arr.ctx = get_context(data)
return arr
'''
def nonzero_1d(arr):
"""Return a 1D tensor with nonzero element indices in a 1D vector"""
assert arr.dim() == 1
......
"""Module for base types and utilities."""
from __future__ import absolute_import
import warnings
from ._ffi.base import DGLError
# A special argument for selecting all nodes/edges.
ALL = "__ALL__"
......@@ -8,3 +13,5 @@ def is_all(arg):
__MSG__ = "__MSG__"
__REPR__ = "__REPR__"
dgl_warning = warnings.warn
"""Columnar storage for graph attributes."""
"""Columnar storage for DGLGraph."""
from __future__ import absolute_import
from collections import MutableMapping
......@@ -6,178 +6,598 @@ import numpy as np
from . import backend as F
from .backend import Tensor
from .base import DGLError, dgl_warning
from . import utils
class Scheme(object):
"""The column scheme.
Parameters
----------
shape : tuple of int
The feature shape.
dtype : TVMType
The feature data type.
"""
def __init__(self, shape, dtype):
self.shape = shape
self.dtype = dtype
def __repr__(self):
return '{shape=%s, dtype=%s}' % (repr(self.shape), repr(self.dtype))
def __eq__(self, other):
return self.shape == other.shape and self.dtype == other.dtype
def __ne__(self, other):
return not self.__eq__(other)
@staticmethod
def infer_scheme(tensor):
"""Infer the scheme of the given tensor."""
return Scheme(tuple(F.shape(tensor)[1:]), F.get_tvmtype(tensor))
class Column(object):
"""A column is a compact store of features of multiple nodes/edges.
Currently, we use one dense tensor to batch all the feature tensors
together (along the first dimension).
Parameters
----------
data : Tensor
The initial data of the column.
scheme : Scheme, optional
The scheme of the column. Will be inferred if not provided.
"""
def __init__(self, data, scheme=None):
self.data = data
self.scheme = scheme if scheme else Scheme.infer_scheme(data)
def __len__(self):
"""The column length."""
return F.shape(self.data)[0]
def __getitem__(self, idx):
"""Return the feature data given the index.
Parameters
----------
idx : utils.Index
The index.
Returns
-------
Tensor
The feature data
"""
user_idx = idx.tousertensor(F.get_context(self.data))
return F.gather_row(self.data, user_idx)
def __setitem__(self, idx, feats):
"""Update the feature data given the index.
The update is performed out-placely so it can be used in autograd mode.
For inplace write, please use ``update``.
Parameters
----------
idx : utils.Index
The index.
feats : Tensor
The new features.
"""
self.update(idx, feats, inplace=False)
def update(self, idx, feats, inplace):
"""Update the feature data given the index.
Parameters
----------
idx : utils.Index
The index.
feats : Tensor
The new features.
inplace : bool
If true, use inplace write.
"""
feat_scheme = Scheme.infer_scheme(feats)
if feat_scheme != self.scheme:
raise DGLError("Cannot update column of scheme %s using feature of scheme %s."
% (feat_scheme, self.scheme))
user_idx = idx.tousertensor(F.get_context(self.data))
if inplace:
# TODO(minjie): do not use [] operator directly
self.data[user_idx] = feats
else:
self.data = F.scatter_row(self.data, user_idx, feats)
@staticmethod
def create(data):
"""Create a new column using the given data."""
if isinstance(data, Column):
return Column(data.data)
else:
return Column(data)
class Frame(MutableMapping):
"""The columnar storage for node/edge features.
The frame is a dictionary from feature fields to feature columns.
All columns should have the same number of rows (i.e. the same first dimension).
Parameters
----------
data : dict-like, optional
The frame data in dictionary. If the provided data is another frame,
this frame will NOT share columns with the given frame. So any out-place
update on one will not reflect to the other. The inplace update will
be seen by both. This follows the semantic of python's container.
"""
def __init__(self, data=None):
if data is None:
self._columns = dict()
self._num_rows = 0
else:
self._columns = dict(data)
self._num_rows = F.shape(list(data.values())[0])[0]
for k, v in data.items():
assert F.shape(v)[0] == self._num_rows
# Note that we always create a new column for the given data.
# This avoids two frames accidentally sharing the same column.
self._columns = {k : Column.create(v) for k, v in data.items()}
if len(self._columns) != 0:
self._num_rows = len(next(iter(self._columns.values())))
else:
self._num_rows = 0
# sanity check
for name, col in self._columns.items():
if len(col) != self._num_rows:
raise DGLError('Expected all columns to have same # rows (%d), '
'got %d on %r.' % (self._num_rows, len(col), name))
# Initializer for empty values. Initializer is a callable.
# If is none, then a warning will be raised
# in the first call and zero initializer will be used later.
self._initializer = None
def set_initializer(self, initializer):
"""Set the initializer for empty values.
Initializer is a callable that returns a tensor given the shape and data type.
Parameters
----------
initializer : callable
The initializer.
"""
self._initializer = initializer
@property
def initializer(self):
"""Return the initializer of this frame."""
return self._initializer
@property
def schemes(self):
return set(self._columns.keys())
"""Return a dictionary of column name to column schemes."""
return {k : col.scheme for k, col in self._columns.items()}
@property
def num_columns(self):
"""Return the number of columns in this frame."""
return len(self._columns)
@property
def num_rows(self):
"""Return the number of rows in this frame."""
return self._num_rows
def __contains__(self, key):
return key in self._columns
def __contains__(self, name):
"""Return true if the given column name exists."""
return name in self._columns
def __getitem__(self, key):
# get column
return self._columns[key]
def __getitem__(self, name):
"""Return the column of the given name.
def __setitem__(self, key, val):
# set column
self.add_column(key, val)
Parameters
----------
name : str
The column name.
def __delitem__(self, key):
# delete column
del self._columns[key]
Returns
-------
Column
The column.
"""
return self._columns[name]
def __setitem__(self, name, data):
"""Update the whole column.
Parameters
----------
name : str
The column name.
col : Column or data convertible to Column
The column data.
"""
self.update_column(name, data)
def __delitem__(self, name):
"""Delete the whole column.
Parameters
----------
name : str
The column name.
"""
del self._columns[name]
if len(self._columns) == 0:
self._num_rows = 0
def add_column(self, name, col):
def add_column(self, name, scheme, ctx):
"""Add a new column to the frame.
The frame will be initialized by the initializer.
Parameters
----------
name : str
The column name.
scheme : Scheme
The column scheme.
ctx : TVMContext
The column context.
"""
if name in self:
dgl_warning('Column "%s" already exists. Ignore adding this column again.' % name)
return
if self.num_rows == 0:
raise DGLError('Cannot add column "%s" using column schemes because'
' number of rows is unknown. Make sure there is at least'
' one column in the frame so number of rows can be inferred.')
if self.initializer is None:
dgl_warning('Initializer is not set. Use zero initializer instead.'
' To suppress this warning, use `set_initializer` to'
' explicitly specify which initializer to use.')
# TODO(minjie): handle data type
self.set_initializer(lambda shape, dtype : F.zeros(shape))
# TODO(minjie): directly init data on the targer device.
init_data = self.initializer((self.num_rows,) + scheme.shape, scheme.dtype)
init_data = F.to_context(init_data, ctx)
self._columns[name] = Column(init_data, scheme)
def update_column(self, name, data):
"""Add or replace the column with the given name and data.
Parameters
----------
name : str
The column name.
data : Column or data convertible to Column
The column data.
"""
col = Column.create(data)
if self.num_columns == 0:
self._num_rows = F.shape(col)[0]
else:
assert F.shape(col)[0] == self._num_rows
self._num_rows = len(col)
elif len(col) != self._num_rows:
raise DGLError('Expected data to have %d rows, got %d.' %
(self._num_rows, len(col)))
self._columns[name] = col
def append(self, other):
"""Append another frame's data into this frame.
If the current frame is empty, it will just use the columns of the
given frame. Otherwise, the given data should contain all the
column keys of this frame.
Parameters
----------
other : Frame or dict-like
The frame data to be appended.
"""
if not isinstance(other, Frame):
other = Frame(other)
if len(self._columns) == 0:
for key, col in other.items():
self._columns[key] = col
self._num_rows = other.num_rows
else:
for key, col in other.items():
self._columns[key] = F.pack([self[key], col])
# TODO(minjie): sanity check for num_rows
if len(self._columns) != 0:
self._num_rows = F.shape(list(self._columns.values())[0])[0]
sch = self._columns[key].scheme
other_sch = col.scheme
if sch != other_sch:
raise DGLError("Cannot append column of scheme %s to column of scheme %s."
% (other_scheme, sch))
self._columns[key].data = F.pack(
[self._columns[key].data, col.data])
self._num_rows += other.num_rows
def clear(self):
"""Clear this frame. Remove all the columns."""
self._columns = {}
self._num_rows = 0
def __iter__(self):
"""Return an iterator of columns."""
return iter(self._columns)
def __len__(self):
"""Return the number of columns."""
return self.num_columns
def keys(self):
"""Return the keys."""
return self._columns.keys()
class FrameRef(MutableMapping):
"""Frame reference
"""Reference object to a frame on a subset of rows.
Parameters
----------
frame : dgl.frame.Frame
The underlying frame.
index : iterable of int
The rows that are referenced in the underlying frame.
frame : Frame, optional
The underlying frame. If not given, the reference will point to a
new empty frame.
index : iterable of int, optional
The rows that are referenced in the underlying frame. If not given,
the whole frame is referenced. The index should be distinct (no
duplication is allowed).
"""
def __init__(self, frame=None, index=None):
self._frame = frame if frame is not None else Frame()
if index is None:
self._index_data = slice(0, self._frame.num_rows)
else:
# check no duplication
assert len(index) == len(np.unique(index))
# TODO(minjie): check no duplication
self._index_data = index
self._index = None
@property
def schemes(self):
"""Return the frame schemes.
Returns
-------
dict of str to Scheme
The frame schemes.
"""
return self._frame.schemes
@property
def num_columns(self):
"""Return the number of columns in the referred frame."""
return self._frame.num_columns
@property
def num_rows(self):
"""Return the number of rows referred."""
if isinstance(self._index_data, slice):
# NOTE: we are assuming that the index is a slice ONLY IF
# index=None during construction.
# As such, start is always 0, and step is always 1.
return self._index_data.stop
else:
return len(self._index_data)
def __contains__(self, key):
return key in self._frame
def set_initializer(self, initializer):
"""Set the initializer for empty values.
Initializer is a callable that returns a tensor given the shape and data type.
Parameters
----------
initializer : callable
The initializer.
"""
self._frame.set_initializer(initializer)
def index(self):
"""Return the index object.
Returns
-------
utils.Index
The index.
"""
if self._index is None:
if self.is_contiguous():
self._index = utils.toindex(
F.arange(self._index_data.stop, dtype=F.int64))
else:
self._index = utils.toindex(self._index_data)
return self._index
def __contains__(self, name):
"""Return whether the column name exists."""
return name in self._frame
def __iter__(self):
"""Return the iterator of the columns."""
return iter(self._frame)
def __len__(self):
"""Return the number of columns."""
return self.num_columns
def keys(self):
"""Return the keys."""
return self._frame.keys()
def __getitem__(self, key):
"""Get data from the frame.
If the provided key is string, the corresponding column data will be returned.
If the provided key is an index, the corresponding rows will be selected. The
returned rows are saved in a lazy dictionary so only the real selection happens
when the explicit column name is provided.
Examples (using pytorch)
------------------------
>>> # create a frame of two columns and five rows
>>> f = Frame({'c1' : torch.zeros([5, 2]), 'c2' : torch.ones([5, 2])})
>>> fr = FrameRef(f)
>>> # select the row 1 and 2, the returned `rows` is a lazy dictionary.
>>> rows = fr[Index([1, 2])]
>>> rows['c1'] # only select rows for 'c1' column; 'c2' column is not sliced.
Parameters
----------
key : str or utils.Index
The key.
Returns
-------
Tensor or lazy dict or tensors
Depends on whether it is a column selection or row selection.
"""
if isinstance(key, str):
return self.get_column(key)
return self.select_column(key)
else:
return self.select_rows(key)
def select_rows(self, query):
rowids = self._getrowid(query)
def _lazy_select(key):
idx = rowids.tousertensor(F.get_context(self._frame[key]))
return F.gather_row(self._frame[key], idx)
return utils.LazyDict(_lazy_select, keys=self.schemes)
def select_column(self, name):
"""Return the column of the given name.
def get_column(self, name):
If only part of the rows are referenced, the fetching the whole column will
also slice out the referenced rows.
Parameters
----------
name : str
The column name.
Returns
-------
Tensor
The column data.
"""
col = self._frame[name]
if self.is_span_whole_column():
return col
return col.data
else:
idx = self.index().tousertensor(F.get_context(col))
return F.gather_row(col, idx)
return col[self.index()]
def select_rows(self, query):
"""Return the rows given the query.
Parameters
----------
query : utils.Index
The rows to be selected.
Returns
-------
utils.LazyDict
The lazy dictionary from str to the selected data.
"""
rowids = self._getrowid(query)
return utils.LazyDict(lambda key: self._frame[key][rowids], keys=self.keys())
def __setitem__(self, key, val):
"""Update the data in the frame.
If the provided key is string, the corresponding column data will be updated.
The provided value should be one tensor that have the same scheme and length
as the column.
If the provided key is an index, the corresponding rows will be updated. The
value provided should be a dictionary of string to the data of each column.
All updates are performed out-placely to be work with autograd. For inplace
update, use ``update_column`` or ``update_rows``.
Parameters
----------
key : str or utils.Index
The key.
val : Tensor or dict of tensors
The value.
"""
if isinstance(key, str):
self.add_column(key, val)
self.update_column(key, val, inplace=False)
else:
self.update_rows(key, val)
self.update_rows(key, val, inplace=False)
def update_column(self, name, data, inplace):
"""Update the column.
def add_column(self, name, col, inplace=False):
shp = F.shape(col)
If this frameref spans the whole column of the underlying frame, this is
equivalent to update the column of the frame.
If this frameref only points to part of the rows, then update the column
here will correspond to update part of the column in the frame. Raise error
if the given column name does not exist.
Parameters
----------
name : str
The column name.
data : Tensor
The update data.
inplace : bool
True if the update is performed inplacely.
"""
if self.is_span_whole_column():
col = Column.create(data)
if self.num_columns == 0:
self._index_data = slice(0, shp[0])
# the frame is empty
self._index_data = slice(0, len(col))
self._clear_cache()
assert shp[0] == self.num_rows
self._frame[name] = col
else:
colctx = F.get_context(col)
if name in self._frame:
if name not in self._frame:
feat_shape = F.shape(data)[1:]
feat_dtype = F.get_tvmtype(data)
ctx = F.get_context(data)
self._frame.add_column(name, Scheme(feat_shape, feat_dtype), ctx)
#raise DGLError('Cannot update column. Column "%s" does not exist.'
# ' Did you forget to init the column using `set_n_repr`'
# ' or `set_e_repr`?' % name)
fcol = self._frame[name]
else:
fcol = F.zeros((self._frame.num_rows,) + shp[1:])
fcol = F.to_context(fcol, colctx)
idx = self.index().tousertensor(colctx)
if inplace:
self._frame[name] = fcol
self._frame[name][idx] = col
else:
newfcol = F.scatter_row(fcol, idx, col)
self._frame[name] = newfcol
fcol.update(self.index(), data, inplace)
def update_rows(self, query, data, inplace):
"""Update the rows.
def update_rows(self, query, other, inplace=False):
If the provided data has new column, it will be added to the frame.
See Also
--------
``update_column``
Parameters
----------
query : utils.Index
The rows to be updated.
data : dict-like
The row data.
inplace : bool
True if the update is performed inplacely.
"""
rowids = self._getrowid(query)
for key, col in other.items():
for key, col in data.items():
if key not in self:
# add new column
tmpref = FrameRef(self._frame, rowids)
tmpref.add_column(key, col, inplace)
idx = rowids.tousertensor(F.get_context(self._frame[key]))
if inplace:
self._frame[key][idx] = col
tmpref.update_column(key, col, inplace)
#raise DGLError('Cannot update rows. Column "%s" does not exist.'
# ' Did you forget to init the column using `set_n_repr`'
# ' or `set_e_repr`?' % key)
else:
self._frame[key] = F.scatter_row(self._frame[key], idx, col)
self._frame[key].update(rowids, col, inplace)
def __delitem__(self, key):
"""Delete data in the frame.
If the provided key is a string, the corresponding column will be deleted.
If the provided key is an index object, the corresponding rows will be deleted.
Please note that "deleted" rows are not really deleted, but simply removed
in the reference. As a result, if two FrameRefs point to the same Frame, deleting
from one ref will not relect on the other. By contrast, deleting columns is real.
Parameters
----------
key : str or utils.Index
The key.
"""
if isinstance(key, str):
del self._frame[key]
if len(self._frame) == 0:
......@@ -186,7 +606,18 @@ class FrameRef(MutableMapping):
self.delete_rows(key)
def delete_rows(self, query):
query = F.asnumpy(query)
"""Delete rows.
Please note that "deleted" rows are not really deleted, but simply removed
in the reference. As a result, if two FrameRefs point to the same Frame, deleting
from one ref will not relect on the other. By contrast, deleting columns is real.
Parameters
----------
query : utils.Index
The rows to be deleted.
"""
query = query.tolist()
if isinstance(self._index_data, slice):
self._index_data = list(range(self._index_data.start, self._index_data.stop))
arr = np.array(self._index_data, dtype=np.int32)
......@@ -194,6 +625,13 @@ class FrameRef(MutableMapping):
self._clear_cache()
def append(self, other):
"""Append another frame into this one.
Parameters
----------
other : dict of str to tensor
The data to be appended.
"""
span_whole = self.is_span_whole_column()
contiguous = self.is_contiguous()
old_nrows = self._frame.num_rows
......@@ -208,24 +646,23 @@ class FrameRef(MutableMapping):
self._clear_cache()
def clear(self):
"""Clear the frame."""
self._frame.clear()
self._index_data = slice(0, 0)
self._clear_cache()
def __iter__(self):
return iter(self._frame)
def __len__(self):
return self.num_columns
def is_contiguous(self):
# NOTE: this check could have false negative
"""Return whether this refers to a contiguous range of rows."""
# NOTE: this check could have false negatives and false positives
# (step other than 1)
return isinstance(self._index_data, slice)
def is_span_whole_column(self):
"""Return whether this refers to all the rows."""
return self.is_contiguous() and self.num_rows == self._frame.num_rows
def _getrowid(self, query):
"""Internal function to convert from the local row ids to the row ids of the frame."""
if self.is_contiguous():
# shortcut for identical mapping
return query
......@@ -233,16 +670,8 @@ class FrameRef(MutableMapping):
idxtensor = self.index().tousertensor()
return utils.toindex(F.gather_row(idxtensor, query.tousertensor()))
def index(self):
if self._index is None:
if self.is_contiguous():
self._index = utils.toindex(
F.arange(self._index_data.stop, dtype=F.int64))
else:
self._index = utils.toindex(self._index_data)
return self._index
def _clear_cache(self):
"""Internal function to clear the cached object."""
self._index_tensor = None
def merge_frames(frames, indices, max_index, reduce_func):
......@@ -267,6 +696,8 @@ def merge_frames(frames, indices, max_index, reduce_func):
merged : FrameRef
The merged frame.
"""
# TODO(minjie)
assert False, 'Buggy code, disabled for now.'
assert reduce_func == 'sum'
assert len(frames) > 0
schemes = frames[0].schemes
......
......@@ -504,25 +504,49 @@ class DGLGraph(object):
self._msg_graph.add_nodes(self._graph.number_of_nodes())
def node_attr_schemes(self):
"""Return the node attribute schemes.
"""Return the node feature schemes.
Returns
-------
iterable
The set of attribute names
dict of str to schemes
The schemes of node feature columns.
"""
return self._node_frame.schemes
def edge_attr_schemes(self):
"""Return the edge attribute schemes.
"""Return the edge feature schemes.
Returns
-------
iterable
The set of attribute names
dict of str to schemes
The schemes of edge feature columns.
"""
return self._edge_frame.schemes
def set_n_initializer(self, initializer):
"""Set the initializer for empty node features.
Initializer is a callable that returns a tensor given the shape and data type.
Parameters
----------
initializer : callable
The initializer.
"""
self._node_frame.set_initializer(initializer)
def set_e_initializer(self, initializer):
"""Set the initializer for empty edge features.
Initializer is a callable that returns a tensor given the shape and data type.
Parameters
----------
initializer : callable
The initializer.
"""
self._edge_frame.set_initializer(initializer)
def set_n_repr(self, hu, u=ALL, inplace=False):
"""Set node(s) representation.
......@@ -534,12 +558,17 @@ class DGLGraph(object):
Dictionary type is also supported for `hu`. In this case, each item
will be treated as separate attribute of the nodes.
All update will be done out-placely to work with autograd unless the inplace
flag is true.
Parameters
----------
hu : tensor or dict of tensor
Node representation.
u : node, container or tensor
The node(s).
inplace : bool
True if the update is done inplacely
"""
# sanity check
if is_all(u):
......@@ -607,7 +636,7 @@ class DGLGraph(object):
"""
return self._node_frame.pop(key)
def set_e_repr(self, h_uv, u=ALL, v=ALL):
def set_e_repr(self, h_uv, u=ALL, v=ALL, inplace=False):
"""Set edge(s) representation.
To set multiple edge representations at once, pass `u` and `v` with tensors or
......@@ -618,6 +647,9 @@ class DGLGraph(object):
Dictionary type is also supported for `h_uv`. In this case, each item
will be treated as separate attribute of the edges.
All update will be done out-placely to work with autograd unless the inplace
flag is true.
Parameters
----------
h_uv : tensor or dict of tensor
......@@ -626,28 +658,35 @@ class DGLGraph(object):
The source node(s).
v : node, container or tensor
The destination node(s).
inplace : bool
True if the update is done inplacely
"""
# sanity check
u_is_all = is_all(u)
v_is_all = is_all(v)
assert u_is_all == v_is_all
if u_is_all:
self.set_e_repr_by_id(h_uv, eid=ALL)
self.set_e_repr_by_id(h_uv, eid=ALL, inplace=inplace)
else:
u = utils.toindex(u)
v = utils.toindex(v)
_, _, eid = self._graph.edge_ids(u, v)
self.set_e_repr_by_id(h_uv, eid=eid)
self.set_e_repr_by_id(h_uv, eid=eid, inplace=inplace)
def set_e_repr_by_id(self, h_uv, eid=ALL):
def set_e_repr_by_id(self, h_uv, eid=ALL, inplace=False):
"""Set edge(s) representation by edge id.
All update will be done out-placely to work with autograd unless the inplace
flag is true.
Parameters
----------
h_uv : tensor or dict of tensor
Edge representation.
eid : int, container or tensor
The edge id(s).
inplace : bool
True if the update is done inplacely
"""
# sanity check
if is_all(eid):
......@@ -662,16 +701,18 @@ class DGLGraph(object):
assert F.shape(h_uv)[0] == num_edges
# set
if is_all(eid):
# update column
if utils.is_dict_like(h_uv):
for key, val in h_uv.items():
self._edge_frame[key] = val
else:
self._edge_frame[__REPR__] = h_uv
else:
# update row
if utils.is_dict_like(h_uv):
self._edge_frame[eid] = h_uv
self._edge_frame.update_rows(eid, h_uv, inplace=inplace)
else:
self._edge_frame[eid] = {__REPR__ : h_uv}
self._edge_frame.update_rows(eid, {__REPR__ : h_uv}, inplace=inplace)
def get_e_repr(self, u=ALL, v=ALL):
"""Get node(s) representation.
......@@ -793,12 +834,12 @@ class DGLGraph(object):
"""
self._apply_edge_func = apply_edge_func
def apply_nodes(self, v, apply_node_func="default"):
def apply_nodes(self, v=ALL, apply_node_func="default"):
"""Apply the function on node representations.
Parameters
----------
v : int, iterable of int, tensor
v : int, iterable of int, tensor, optional
The node id(s).
apply_node_func : callable
The apply node function.
......@@ -952,8 +993,8 @@ class DGLGraph(object):
self._msg_frame.update_rows(
msg_target_rows,
{k: F.gather_row(msgs[k], msg_update_rows.tousertensor())
for k in msgs}
)
for k in msgs},
inplace=False)
if len(msg_append_rows) > 0:
new_u, new_v = zip(*new_uv)
new_u = utils.toindex(new_u)
......@@ -961,14 +1002,13 @@ class DGLGraph(object):
self._msg_graph.add_edges(new_u, new_v)
self._msg_frame.append(
{k: F.gather_row(msgs[k], msg_append_rows.tousertensor())
for k in msgs}
)
for k in msgs})
else:
if len(msg_target_rows) > 0:
self._msg_frame.update_rows(
msg_target_rows,
{__MSG__: F.gather_row(msgs, msg_update_rows.tousertensor())}
)
{__MSG__: F.gather_row(msgs, msg_update_rows.tousertensor())},
inplace=False)
if len(msg_append_rows) > 0:
new_u, new_v = zip(*new_uv)
new_u = utils.toindex(new_u)
......
......@@ -20,22 +20,26 @@ def reduce_func(node, msgs):
reduce_msg_shapes.add(tuple(msgs.shape))
assert len(msgs.shape) == 3
assert msgs.shape[2] == D
return {'m' : th.sum(msgs, 1)}
return {'accum' : th.sum(msgs, 1)}
def apply_node_func(node):
return {'h' : node['h'] + node['m']}
return {'h' : node['h'] + node['accum']}
def generate_graph(grad=False):
g = DGLGraph()
g.add_nodes(10) # 10 nodes.
# create a graph where 0 is the source and 9 is the sink
# 17 edges
for i in range(1, 9):
g.add_edge(0, i)
g.add_edge(i, 9)
# add a back flow from 9 to 0
g.add_edge(9, 0)
ncol = Variable(th.randn(10, D), requires_grad=grad)
accumcol = Variable(th.randn(10, D), requires_grad=grad)
ecol = Variable(th.randn(17, D), requires_grad=grad)
g.set_n_repr({'h' : ncol})
g.set_n_initializer(lambda shape, dtype : th.zeros(shape))
return g
def test_batch_setter_getter():
......@@ -46,8 +50,9 @@ def test_batch_setter_getter():
g.set_n_repr({'h' : th.zeros((10, D))})
assert _pfc(g.get_n_repr()['h']) == [0.] * 10
# pop nodes
old_len = len(g.get_n_repr())
assert _pfc(g.pop_n_repr('h')) == [0.] * 10
assert len(g.get_n_repr()) == 0
assert len(g.get_n_repr()) == old_len - 1
g.set_n_repr({'h' : th.zeros((10, D))})
# set partial nodes
u = th.tensor([1, 3, 5])
......@@ -81,8 +86,9 @@ def test_batch_setter_getter():
g.set_e_repr({'l' : th.zeros((17, D))})
assert _pfc(g.get_e_repr()['l']) == [0.] * 17
# pop edges
old_len = len(g.get_e_repr())
assert _pfc(g.pop_e_repr('l')) == [0.] * 17
assert len(g.get_e_repr()) == 0
assert len(g.get_e_repr()) == old_len - 1
g.set_e_repr({'l' : th.zeros((17, D))})
# set partial edges (many-many)
u = th.tensor([0, 0, 2, 5, 9])
......
......@@ -30,8 +30,10 @@ def generate_graph(grad=False):
g.add_edge(i, 9)
# add a back flow from 9 to 0
g.add_edge(9, 0)
col = Variable(th.randn(10, D), requires_grad=grad)
g.set_n_repr(col)
ncol = Variable(th.randn(10, D), requires_grad=grad)
ecol = Variable(th.randn(17, D), requires_grad=grad)
g.set_n_repr(ncol)
g.set_e_repr(ecol)
return g
def test_batch_setter_getter():
......
......@@ -2,14 +2,11 @@ import torch as th
from torch.autograd import Variable
import numpy as np
from dgl.frame import Frame, FrameRef
from dgl.utils import Index
from dgl.utils import Index, toindex
N = 10
D = 5
def check_eq(a, b):
return a.shape == b.shape and np.allclose(a.numpy(), b.numpy())
def check_fail(fn):
try:
fn()
......@@ -27,12 +24,13 @@ def test_create():
data = create_test_data()
f1 = Frame()
for k, v in data.items():
f1.add_column(k, v)
assert f1.schemes == set(data.keys())
f1.update_column(k, v)
print(f1.schemes)
assert f1.keys() == set(data.keys())
assert f1.num_columns == 3
assert f1.num_rows == N
f2 = Frame(data)
assert f2.schemes == set(data.keys())
assert f2.keys() == set(data.keys())
assert f2.num_columns == 3
assert f2.num_rows == N
f1.clear()
......@@ -45,9 +43,9 @@ def test_column1():
f = Frame(data)
assert f.num_rows == N
assert len(f) == 3
assert check_eq(f['a1'], data['a1'])
assert th.allclose(f['a1'].data, data['a1'].data)
f['a1'] = data['a2']
assert check_eq(f['a2'], data['a2'])
assert th.allclose(f['a2'].data, data['a2'].data)
# add a different length column should fail
def failed_add_col():
f['a4'] = th.zeros([N+1, D])
......@@ -70,16 +68,15 @@ def test_column2():
f = FrameRef(data, [3, 4, 5, 6, 7])
assert f.num_rows == 5
assert len(f) == 3
assert check_eq(f['a1'], data['a1'][3:8])
assert th.allclose(f['a1'], data['a1'].data[3:8])
# set column should reflect on the referenced data
f['a1'] = th.zeros([5, D])
assert check_eq(data['a1'][3:8], th.zeros([5, D]))
# add new column should be padded with zero
assert th.allclose(data['a1'].data[3:8], th.zeros([5, D]))
# add new partial column should fail with error initializer
f.set_initializer(lambda shape, dtype : assert_(False))
def failed_add_col():
f['a4'] = th.ones([5, D])
assert len(data) == 4
assert check_eq(data['a4'][0:3], th.zeros([3, D]))
assert check_eq(data['a4'][3:8], th.ones([5, D]))
assert check_eq(data['a4'][8:10], th.zeros([2, D]))
assert check_fail(failed_add_col)
def test_append1():
# test append API on Frame
......@@ -91,9 +88,14 @@ def test_append1():
f1.append(f2)
assert f1.num_rows == 2 * N
c1 = f1['a1']
assert c1.shape == (2 * N, D)
assert c1.data.shape == (2 * N, D)
truth = th.cat([data['a1'], data['a1']])
assert check_eq(truth, c1)
assert th.allclose(truth, c1.data)
# append dict of different length columns should fail
f3 = {'a1' : th.zeros((3, D)), 'a2' : th.zeros((3, D)), 'a3' : th.zeros((2, D))}
def failed_append():
f1.append(f3)
assert check_fail(failed_append)
def test_append2():
# test append on FrameRef
......@@ -113,7 +115,7 @@ def test_append2():
assert not f.is_span_whole_column()
assert f.num_rows == 3 * N
new_idx = list(range(N)) + list(range(2*N, 4*N))
assert check_eq(f.index().tousertensor(), th.tensor(new_idx))
assert th.all(f.index().tousertensor() == th.tensor(new_idx, dtype=th.int64))
assert data.num_rows == 4 * N
def test_row1():
......@@ -127,13 +129,13 @@ def test_row1():
rows = f[rowid]
for k, v in rows.items():
assert v.shape == (len(rowid), D)
assert check_eq(v, data[k][rowid])
assert th.allclose(v, data[k][rowid])
# test duplicate keys
rowid = Index(th.tensor([8, 2, 2, 1]))
rows = f[rowid]
for k, v in rows.items():
assert v.shape == (len(rowid), D)
assert check_eq(v, data[k][rowid])
assert th.allclose(v, data[k][rowid])
# setter
rowid = Index(th.tensor([0, 2, 4]))
......@@ -143,12 +145,14 @@ def test_row1():
}
f[rowid] = vals
for k, v in f[rowid].items():
assert check_eq(v, th.zeros((len(rowid), D)))
assert th.allclose(v, th.zeros((len(rowid), D)))
# setting rows with new column should automatically add a new column
# setting rows with new column should raise error with error initializer
f.set_initializer(lambda shape, dtype : assert_(False))
def failed_update_rows():
vals['a4'] = th.ones((len(rowid), D))
f[rowid] = vals
assert len(f) == 4
assert check_fail(failed_update_rows)
def test_row2():
# test row getter/setter autograd compatibility
......@@ -161,13 +165,13 @@ def test_row2():
rowid = Index(th.tensor([0, 2]))
rows = f[rowid]
rows['a1'].backward(th.ones((len(rowid), D)))
assert check_eq(c1.grad[:,0], th.tensor([1., 0., 1., 0., 0., 0., 0., 0., 0., 0.]))
assert th.allclose(c1.grad[:,0], th.tensor([1., 0., 1., 0., 0., 0., 0., 0., 0., 0.]))
c1.grad.data.zero_()
# test duplicate keys
rowid = Index(th.tensor([8, 2, 2, 1]))
rows = f[rowid]
rows['a1'].backward(th.ones((len(rowid), D)))
assert check_eq(c1.grad[:,0], th.tensor([0., 1., 2., 0., 0., 0., 0., 0., 1., 0.]))
assert th.allclose(c1.grad[:,0], th.tensor([0., 1., 2., 0., 0., 0., 0., 0., 1., 0.]))
c1.grad.data.zero_()
# setter
......@@ -180,8 +184,8 @@ def test_row2():
f[rowid] = vals
c11 = f['a1']
c11.backward(th.ones((N, D)))
assert check_eq(c1.grad[:,0], th.tensor([0., 1., 0., 1., 0., 1., 1., 1., 1., 1.]))
assert check_eq(vals['a1'].grad, th.ones((len(rowid), D)))
assert th.allclose(c1.grad[:,0], th.tensor([0., 1., 0., 1., 0., 1., 1., 1., 1., 1.]))
assert th.allclose(vals['a1'].grad, th.ones((len(rowid), D)))
assert vals['a2'].grad is None
def test_row3():
......@@ -201,8 +205,9 @@ def test_row3():
newidx = list(range(N))
newidx.pop(2)
newidx.pop(2)
newidx = toindex(newidx)
for k, v in f.items():
assert check_eq(v, data[k][th.tensor(newidx)])
assert th.allclose(v, data[k][newidx])
def test_sharing():
data = Frame(create_test_data())
......@@ -210,10 +215,10 @@ def test_sharing():
f2 = FrameRef(data, index=[2, 3, 4, 5, 6])
# test read
for k, v in f1.items():
assert check_eq(data[k][0:4], v)
assert th.allclose(data[k].data[0:4], v)
for k, v in f2.items():
assert check_eq(data[k][2:7], v)
f2_a1 = f2['a1']
assert th.allclose(data[k].data[2:7], v)
f2_a1 = f2['a1'].data
# test write
# update own ref should not been seen by the other.
f1[Index(th.tensor([0, 1]))] = {
......@@ -221,7 +226,7 @@ def test_sharing():
'a2' : th.zeros([2, D]),
'a3' : th.zeros([2, D]),
}
assert check_eq(f2['a1'], f2_a1)
assert th.allclose(f2['a1'], f2_a1)
# update shared space should been seen by the other.
f1[Index(th.tensor([2, 3]))] = {
'a1' : th.ones([2, D]),
......@@ -229,7 +234,7 @@ def test_sharing():
'a3' : th.ones([2, D]),
}
f2_a1[0:2] = th.ones([2, D])
assert check_eq(f2['a1'], f2_a1)
assert th.allclose(f2['a1'], f2_a1)
if __name__ == '__main__':
test_create()
......
......@@ -123,6 +123,7 @@ def test_update_all_multi_fn():
return {'v2': th.sum(msgs['m2'], 1)}
g = generate_graph()
g.set_n_repr({'v1' : th.zeros((10,)), 'v2' : th.zeros((10,))})
fld = 'f2'
# update all, mix of builtin and UDF
g.update_all([fn.copy_src(src=fld, out='m1'), message_func],
......@@ -173,6 +174,8 @@ def test_send_and_recv_multi_fn():
return {'v2' : th.sum(msgs['m2'], 1)}
g = generate_graph()
g.set_n_repr({'v1' : th.zeros((10, D)), 'v2' : th.zeros((10, D)),
'v3' : th.zeros((10, D))})
fld = 'f2'
# send and recv, mix of builtin and UDF
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment