Unverified Commit 98325b10 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4691)


Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent c24e285a
"""Views of DGLGraph."""
from __future__ import absolute_import
from collections import namedtuple, defaultdict
from collections import defaultdict, namedtuple
from collections.abc import MutableMapping
from .base import ALL, DGLError
from . import backend as F
from .base import ALL, DGLError
from .frame import LazyFeature
NodeSpace = namedtuple('NodeSpace', ['data'])
EdgeSpace = namedtuple('EdgeSpace', ['data'])
NodeSpace = namedtuple("NodeSpace", ["data"])
EdgeSpace = namedtuple("EdgeSpace", ["data"])
class HeteroNodeView(object):
"""A NodeView class to act as G.nodes for a DGLHeteroGraph."""
__slots__ = ['_graph', '_typeid_getter']
__slots__ = ["_graph", "_typeid_getter"]
def __init__(self, graph, typeid_getter):
self._graph = graph
......@@ -23,8 +24,9 @@ class HeteroNodeView(object):
def __getitem__(self, key):
if isinstance(key, slice):
# slice
if not (key.start is None and key.stop is None
and key.step is None):
if not (
key.start is None and key.stop is None and key.step is None
):
raise DGLError('Currently only full slice ":" is supported')
nodes = ALL
ntype = None
......@@ -38,20 +40,25 @@ class HeteroNodeView(object):
ntype = None
ntid = self._typeid_getter(ntype)
return NodeSpace(
data=HeteroNodeDataView(
self._graph, ntype, ntid, nodes))
data=HeteroNodeDataView(self._graph, ntype, ntid, nodes)
)
def __call__(self, ntype=None):
"""Return the nodes."""
ntid = self._typeid_getter(ntype)
ret = F.arange(0, self._graph._graph.number_of_nodes(ntid),
dtype=self._graph.idtype, ctx=self._graph.device)
ret = F.arange(
0,
self._graph._graph.number_of_nodes(ntid),
dtype=self._graph.idtype,
ctx=self._graph.device,
)
return ret
class HeteroNodeDataView(MutableMapping):
"""The data view class when G.ndata[ntype] is called."""
__slots__ = ['_graph', '_ntype', '_ntid', '_nodes']
__slots__ = ["_graph", "_ntype", "_ntid", "_nodes"]
def __init__(self, graph, ntype, ntid, nodes):
self._graph = graph
......@@ -63,9 +70,9 @@ class HeteroNodeDataView(MutableMapping):
if isinstance(self._ntype, list):
ret = {}
for (i, ntype) in enumerate(self._ntype):
value = self._graph._get_n_repr(
self._ntid[i], self._nodes).get(
key, None)
value = self._graph._get_n_repr(self._ntid[i], self._nodes).get(
key, None
)
if value is not None:
ret[ntype] = value
return ret
......@@ -76,17 +83,19 @@ class HeteroNodeDataView(MutableMapping):
if isinstance(val, LazyFeature):
self._graph._node_frames[self._ntid][key] = val
elif isinstance(self._ntype, list):
assert isinstance(val, dict), \
'Current HeteroNodeDataView has multiple node types, ' \
'please passing the node type and the corresponding data through a dict.'
assert isinstance(val, dict), (
"Current HeteroNodeDataView has multiple node types, "
"please passing the node type and the corresponding data through a dict."
)
for (ntype, data) in val.items():
ntid = self._graph.get_ntype_id(ntype)
self._graph._set_n_repr(ntid, self._nodes, {key: data})
else:
assert isinstance(val, dict) is False, \
'The HeteroNodeDataView has only one node type. ' \
'please pass a tensor directly'
assert isinstance(val, dict) is False, (
"The HeteroNodeDataView has only one node type. "
"please pass a tensor directly"
)
self._graph._set_n_repr(self._ntid, self._nodes, {key: val})
def __delitem__(self, key):
......@@ -108,8 +117,10 @@ class HeteroNodeDataView(MutableMapping):
else:
ret = self._graph._get_n_repr(self._ntid, self._nodes)
if as_dict:
ret = {key: ret[key]
for key in self._graph._node_frames[self._ntid]}
ret = {
key: ret[key]
for key in self._graph._node_frames[self._ntid]
}
return ret
def __len__(self):
......@@ -130,7 +141,8 @@ class HeteroNodeDataView(MutableMapping):
class HeteroEdgeView(object):
"""A EdgeView class to act as G.edges for a DGLHeteroGraph."""
__slots__ = ['_graph']
__slots__ = ["_graph"]
def __init__(self, graph):
self._graph = graph
......@@ -138,8 +150,9 @@ class HeteroEdgeView(object):
def __getitem__(self, key):
if isinstance(key, slice):
# slice
if not (key.start is None and key.stop is None
and key.step is None):
if not (
key.start is None and key.stop is None and key.step is None
):
raise DGLError('Currently only full slice ":" is supported')
edges = ALL
etype = None
......@@ -168,23 +181,26 @@ class HeteroEdgeView(object):
class HeteroEdgeDataView(MutableMapping):
"""The data view class when G.edata[etype] is called."""
__slots__ = ['_graph', '_etype', '_etid', '_edges']
__slots__ = ["_graph", "_etype", "_etid", "_edges"]
def __init__(self, graph, etype, edges):
self._graph = graph
self._etype = etype
self._etid = [self._graph.get_etype_id(t) for t in etype] \
if isinstance(etype, list) \
self._etid = (
[self._graph.get_etype_id(t) for t in etype]
if isinstance(etype, list)
else self._graph.get_etype_id(etype)
)
self._edges = edges
def __getitem__(self, key):
if isinstance(self._etype, list):
ret = {}
for (i, etype) in enumerate(self._etype):
value = self._graph._get_e_repr(
self._etid[i], self._edges).get(
key, None)
value = self._graph._get_e_repr(self._etid[i], self._edges).get(
key, None
)
if value is not None:
ret[etype] = value
return ret
......@@ -195,17 +211,19 @@ class HeteroEdgeDataView(MutableMapping):
if isinstance(val, LazyFeature):
self._graph._edge_frames[self._etid][key] = val
elif isinstance(self._etype, list):
assert isinstance(val, dict), \
'Current HeteroEdgeDataView has multiple edge types, ' \
'please pass the edge type and the corresponding data through a dict.'
assert isinstance(val, dict), (
"Current HeteroEdgeDataView has multiple edge types, "
"please pass the edge type and the corresponding data through a dict."
)
for (etype, data) in val.items():
etid = self._graph.get_etype_id(etype)
self._graph._set_e_repr(etid, self._edges, {key: data})
else:
assert isinstance(val, dict) is False, \
'The HeteroEdgeDataView has only one edge type. ' \
'please pass a tensor directly'
assert isinstance(val, dict) is False, (
"The HeteroEdgeDataView has only one edge type. "
"please pass a tensor directly"
)
self._graph._set_e_repr(self._etid, self._edges, {key: val})
def __delitem__(self, key):
......@@ -227,8 +245,10 @@ class HeteroEdgeDataView(MutableMapping):
else:
ret = self._graph._get_e_repr(self._etid, self._edges)
if as_dict:
ret = {key: ret[key]
for key in self._graph._edge_frames[self._etid]}
ret = {
key: ret[key]
for key in self._graph._edge_frames[self._etid]
}
return ret
def __len__(self):
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import glob
import os
import platform
import sysconfig
import shutil
import glob
import sys
import sysconfig
from setuptools import find_packages
from setuptools.dist import Distribution
# need to use distutils.core for correct placement of cython dll
if '--inplace' in sys.argv:
if "--inplace" in sys.argv:
from distutils.core import setup
from distutils.extension import Extension
else:
......@@ -31,34 +31,35 @@ def get_lib_path():
"""Get library path, name and version"""
# We can not import `libinfo.py` in setup.py directly since __init__.py
# Will be invoked which introduces dependences
libinfo_py = os.path.join(CURRENT_DIR, './dgl/_ffi/libinfo.py')
libinfo = {'__file__': libinfo_py}
libinfo_py = os.path.join(CURRENT_DIR, "./dgl/_ffi/libinfo.py")
libinfo = {"__file__": libinfo_py}
exec(
compile(open(libinfo_py, "rb").read(), libinfo_py, 'exec'),
compile(open(libinfo_py, "rb").read(), libinfo_py, "exec"),
libinfo,
libinfo,
libinfo)
version = libinfo['__version__']
)
version = libinfo["__version__"]
lib_path = libinfo['find_lib_path']()
lib_path = libinfo["find_lib_path"]()
libs = [lib_path[0]]
return libs, version
def get_ta_lib_pattern():
if sys.platform.startswith('linux'):
ta_lib_pattern = 'libtensoradapter_*.so'
elif sys.platform.startswith('darwin'):
ta_lib_pattern = 'libtensoradapter_*.dylib'
elif sys.platform.startswith('win'):
ta_lib_pattern = 'tensoradapter_*.dll'
if sys.platform.startswith("linux"):
ta_lib_pattern = "libtensoradapter_*.so"
elif sys.platform.startswith("darwin"):
ta_lib_pattern = "libtensoradapter_*.dylib"
elif sys.platform.startswith("win"):
ta_lib_pattern = "tensoradapter_*.dll"
else:
raise NotImplementedError('Unsupported system: %s' % sys.platform)
raise NotImplementedError("Unsupported system: %s" % sys.platform)
return ta_lib_pattern
LIBS, VERSION = get_lib_path()
BACKENDS = ['pytorch']
BACKENDS = ["pytorch"]
TA_LIB_PATTERN = get_ta_lib_pattern()
......@@ -78,11 +79,9 @@ def cleanup():
for backend in BACKENDS:
for ta_path in glob.glob(
os.path.join(
CURRENT_DIR,
"dgl",
"tensoradapter",
backend,
TA_LIB_PATTERN)):
CURRENT_DIR, "dgl", "tensoradapter", backend, TA_LIB_PATTERN
)
):
try:
os.remove(ta_path)
except BaseException:
......@@ -91,17 +90,21 @@ def cleanup():
def config_cython():
"""Try to configure cython and return cython configuration"""
if sys.platform.startswith('win'):
print("WARNING: Cython is not supported on Windows, will compile without cython module")
if sys.platform.startswith("win"):
print(
"WARNING: Cython is not supported on Windows, will compile without cython module"
)
return []
sys_cflags = sysconfig.get_config_var("CFLAGS")
if "i386" in sys_cflags and "x86_64" in sys_cflags:
print(
"WARNING: Cython library may not be compiled correctly with both i386 and x64")
"WARNING: Cython library may not be compiled correctly with both i386 and x64"
)
return []
try:
from Cython.Build import cythonize
# from setuptools.extension import Extension
if sys.version_info >= (3, 0):
subdir = "_cy3"
......@@ -109,32 +112,38 @@ def config_cython():
subdir = "_cy2"
ret = []
path = "dgl/_ffi/_cython"
library_dirs = ['dgl', '../build/Release', '../build']
libraries = ['dgl']
library_dirs = ["dgl", "../build/Release", "../build"]
libraries = ["dgl"]
for fn in os.listdir(path):
if not fn.endswith(".pyx"):
continue
ret.append(Extension(
"dgl._ffi.%s.%s" % (subdir, fn[:-4]),
["dgl/_ffi/_cython/%s" % fn],
include_dirs=["../include/",
"../third_party/dmlc-core/include",
"../third_party/dlpack/include",
],
library_dirs=library_dirs,
libraries=libraries,
# Crashes without this flag with GCC 5.3.1
extra_compile_args=["-std=c++11"],
language="c++"))
ret.append(
Extension(
"dgl._ffi.%s.%s" % (subdir, fn[:-4]),
["dgl/_ffi/_cython/%s" % fn],
include_dirs=[
"../include/",
"../third_party/dmlc-core/include",
"../third_party/dlpack/include",
],
library_dirs=library_dirs,
libraries=libraries,
# Crashes without this flag with GCC 5.3.1
extra_compile_args=["-std=c++11"],
language="c++",
)
)
return cythonize(ret, force=True)
except ImportError:
print("WARNING: Cython is not installed, will compile without cython module")
print(
"WARNING: Cython is not installed, will compile without cython module"
)
return []
include_libs = False
wheel_include_libs = False
if "bdist_wheel" in sys.argv or os.getenv('CONDA_BUILD'):
if "bdist_wheel" in sys.argv or os.getenv("CONDA_BUILD"):
wheel_include_libs = True
elif "clean" in sys.argv:
cleanup()
......@@ -147,78 +156,76 @@ setup_kwargs = {}
if wheel_include_libs:
with open("MANIFEST.in", "w") as fo:
for path in LIBS:
shutil.copy(path, os.path.join(CURRENT_DIR, 'dgl'))
shutil.copy(path, os.path.join(CURRENT_DIR, "dgl"))
dir_, libname = os.path.split(path)
fo.write("include dgl/%s\n" % libname)
for backend in BACKENDS:
for ta_path in glob.glob(
os.path.join(
dir_,
"tensoradapter",
backend,
TA_LIB_PATTERN)):
os.path.join(dir_, "tensoradapter", backend, TA_LIB_PATTERN)
):
ta_name = os.path.basename(ta_path)
os.makedirs(
os.path.join(
CURRENT_DIR,
'dgl',
'tensoradapter',
backend),
exist_ok=True)
os.path.join(CURRENT_DIR, "dgl", "tensoradapter", backend),
exist_ok=True,
)
shutil.copy(
os.path.join(dir_, 'tensoradapter', backend, ta_name),
os.path.join(CURRENT_DIR, 'dgl', 'tensoradapter', backend))
os.path.join(dir_, "tensoradapter", backend, ta_name),
os.path.join(CURRENT_DIR, "dgl", "tensoradapter", backend),
)
fo.write(
"include dgl/tensoradapter/%s/%s\n" %
(backend, ta_name))
"include dgl/tensoradapter/%s/%s\n" % (backend, ta_name)
)
setup_kwargs = {
"include_package_data": True
}
setup_kwargs = {"include_package_data": True}
# For source tree setup
# Conda build also includes the binary library
if include_libs:
rpath = [os.path.relpath(path, CURRENT_DIR) for path in LIBS]
data_files = [('dgl', rpath)]
data_files = [("dgl", rpath)]
for path in LIBS:
for backend in BACKENDS:
data_files.append((
'dgl/tensoradapter/%s' % backend,
glob.glob(os.path.join(
os.path.dirname(os.path.relpath(path, CURRENT_DIR)),
'tensoradapter', backend, TA_LIB_PATTERN))))
setup_kwargs = {
"include_package_data": True,
"data_files": data_files
}
data_files.append(
(
"dgl/tensoradapter/%s" % backend,
glob.glob(
os.path.join(
os.path.dirname(os.path.relpath(path, CURRENT_DIR)),
"tensoradapter",
backend,
TA_LIB_PATTERN,
)
),
)
)
setup_kwargs = {"include_package_data": True, "data_files": data_files}
setup(
name='dgl' + os.getenv('DGL_PACKAGE_SUFFIX', ''),
name="dgl" + os.getenv("DGL_PACKAGE_SUFFIX", ""),
version=VERSION,
description='Deep Graph Library',
description="Deep Graph Library",
zip_safe=False,
maintainer='DGL Team',
maintainer_email='wmjlyjemaine@gmail.com',
maintainer="DGL Team",
maintainer_email="wmjlyjemaine@gmail.com",
packages=find_packages(),
install_requires=[
'numpy>=1.14.0',
'scipy>=1.1.0',
'networkx>=2.1',
'requests>=2.19.0',
'tqdm',
'psutil>=5.8.0',
"numpy>=1.14.0",
"scipy>=1.1.0",
"networkx>=2.1",
"requests>=2.19.0",
"tqdm",
"psutil>=5.8.0",
],
url='https://github.com/dmlc/dgl',
url="https://github.com/dmlc/dgl",
distclass=BinaryDistribution,
ext_modules=config_cython(),
classifiers=[
'Development Status :: 3 - Alpha',
'Programming Language :: Python :: 3',
'License :: OSI Approved :: Apache Software License',
"Development Status :: 3 - Alpha",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
],
license='APACHE',
license="APACHE",
**setup_kwargs
)
......
......@@ -8,10 +8,11 @@ List of affected files:
"""
import os
import re
# current version
# We use the version of the incoming release for code
# that is under development
__version__ = "0.10" + os.getenv('DGL_PRERELEASE', '')
__version__ = "0.10" + os.getenv("DGL_PRERELEASE", "")
print(__version__)
# Implementations
......@@ -47,22 +48,24 @@ def main():
curr_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
proj_root = os.path.abspath(os.path.join(curr_dir, ".."))
# python path
update(os.path.join(proj_root, "python", "dgl", "_ffi", "libinfo.py"),
r"(?<=__version__ = \")[.0-9a-z]+", __version__)
update(
os.path.join(proj_root, "python", "dgl", "_ffi", "libinfo.py"),
r"(?<=__version__ = \")[.0-9a-z]+",
__version__,
)
# C++ header
update(
os.path.join(
proj_root,
"include",
"dgl",
"runtime",
"c_runtime_api.h"),
"(?<=DGL_VERSION \")[.0-9a-z]+",
__version__)
os.path.join(proj_root, "include", "dgl", "runtime", "c_runtime_api.h"),
'(?<=DGL_VERSION ")[.0-9a-z]+',
__version__,
)
# conda
for path in ["dgl"]:
update(os.path.join(proj_root, "conda", path, "meta.yaml"),
"(?<=version: \")[.0-9a-z]+", __version__)
update(
os.path.join(proj_root, "conda", path, "meta.yaml"),
'(?<=version: ")[.0-9a-z]+',
__version__,
)
if __name__ == "__main__":
......
import torch
import os
import torch
cmake_prefix_path = getattr(
torch.utils,
"cmake_prefix_path",
os.path.join(os.path.dirname(torch.__file__), "share", "cmake"))
version = torch.__version__.split('+')[0]
print(';'.join([cmake_prefix_path, version]))
os.path.join(os.path.dirname(torch.__file__), "share", "cmake"),
)
version = torch.__version__.split("+")[0]
print(";".join([cmake_prefix_path, version]))
from dgl.backend import *
from dgl.nn import *
from . import backend_unittest
import os
import importlib
import os
import sys
import numpy as np
mod = importlib.import_module('.%s' % backend_name, __name__)
from dgl.backend import *
from dgl.nn import *
from . import backend_unittest
mod = importlib.import_module(".%s" % backend_name, __name__)
thismod = sys.modules[__name__]
for api in backend_unittest.__dict__.keys():
if api.startswith('__'):
if api.startswith("__"):
continue
elif callable(mod.__dict__[api]):
# Tensor APIs used in unit tests MUST be supported across all backends
......@@ -26,39 +29,51 @@ _arange = arange
_full = full
_full_1d = full_1d
_softmax = softmax
_default_context_str = os.getenv('DGLTESTDEV', 'cpu')
_default_context_str = os.getenv("DGLTESTDEV", "cpu")
_context_dict = {
'cpu': cpu(),
'gpu': cuda(),
}
"cpu": cpu(),
"gpu": cuda(),
}
_default_context = _context_dict[_default_context_str]
def ctx():
return _default_context
def gpu_ctx():
return (_default_context_str == 'gpu')
return _default_context_str == "gpu"
def zeros(shape, dtype=float32, ctx=_default_context):
return _zeros(shape, dtype, ctx)
def ones(shape, dtype=float32, ctx=_default_context):
return _ones(shape, dtype, ctx)
def randn(shape):
return copy_to(_randn(shape), _default_context)
def tensor(data, dtype=None):
return copy_to(_tensor(data, dtype), _default_context)
def arange(start, stop, dtype=int64, ctx=None):
return _arange(start, stop, dtype, ctx if ctx is not None else _default_context)
return _arange(
start, stop, dtype, ctx if ctx is not None else _default_context
)
def full(shape, fill_value, dtype, ctx=_default_context):
return _full(shape, fill_value, dtype, ctx)
def full_1d(length, fill_value, dtype, ctx=_default_context):
return _full_1d(length, fill_value, dtype, ctx)
def softmax(x, dim):
return _softmax(x, dim)
......@@ -5,102 +5,127 @@ unit testing, other than the ones used in the framework itself.
###############################################################################
# Tensor, data type and context interfaces
def cuda():
"""Context object for CUDA."""
pass
def is_cuda_available():
"""Check whether CUDA is available."""
pass
###############################################################################
# Tensor functions on feature data
# --------------------------------
# These functions are performance critical, so it's better to have efficient
# implementation in each framework.
def array_equal(a, b):
"""Check whether the two tensors are *exactly* equal."""
pass
def allclose(a, b, rtol=1e-4, atol=1e-4):
"""Check whether the two tensors are numerically close to each other."""
pass
def randn(shape):
"""Generate a tensor with elements from standard normal distribution."""
pass
def full(shape, fill_value, dtype, ctx):
pass
def narrow_row_set(x, start, stop, new):
"""Set a slice of the given tensor to a new value."""
pass
def sparse_to_numpy(x):
"""Convert a sparse tensor to a numpy array."""
pass
def clone(x):
pass
def reduce_sum(x):
"""Sums all the elements into a single scalar."""
pass
def softmax(x, dim):
"""Softmax Operation on Tensors"""
pass
def spmm(x, y):
"""Sparse dense matrix multiply"""
pass
def add(a, b):
"""Compute a + b"""
pass
def sub(a, b):
"""Compute a - b"""
pass
def mul(a, b):
"""Compute a * b"""
pass
def div(a, b):
"""Compute a / b"""
pass
def sum(x, dim, keepdims=False):
"""Computes the sum of array elements over given axes"""
pass
def max(x, dim):
"""Computes the max of array elements over given axes"""
pass
def min(x, dim):
"""Computes the min of array elements over given axes"""
pass
def prod(x, dim):
"""Computes the prod of array elements over given axes"""
pass
def matmul(a, b):
"""Compute Matrix Multiplication between a and b"""
pass
def dot(a, b):
"""Compute Dot between a and b"""
pass
def abs(a):
"""Compute the absolute value of a"""
pass
###############################################################################
# Tensor functions used *only* on index tensor
# ----------------
......
from __future__ import absolute_import
import numpy as np
import mxnet as mx
import mxnet.ndarray as nd
import numpy as np
def cuda():
return mx.gpu()
def is_cuda_available():
# TODO: Does MXNet have a convenient function to test GPU availability/compilation?
try:
......@@ -15,65 +17,86 @@ def is_cuda_available():
except mx.MXNetError:
return False
def array_equal(a, b):
return nd.equal(a, b).asnumpy().all()
def allclose(a, b, rtol=1e-4, atol=1e-4):
return np.allclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol)
def randn(shape):
return nd.random.randn(*shape)
def full(shape, fill_value, dtype, ctx):
return nd.full(shape, fill_value, dtype=dtype, ctx=ctx)
def narrow_row_set(x, start, stop, new):
x[start:stop] = new
def sparse_to_numpy(x):
return x.asscipy().todense().A
def clone(x):
return x.copy()
def reduce_sum(x):
return x.sum()
def softmax(x, dim):
return nd.softmax(x, axis=dim)
def spmm(x, y):
return nd.dot(x, y)
def add(a, b):
return a + b
def sub(a, b):
return a - b
def mul(a, b):
return a * b
def div(a, b):
return a / b
def sum(x, dim, keepdims=False):
return x.sum(dim, keepdims=keepdims)
def max(x, dim):
return x.max(dim)
def min(x, dim):
return x.min(dim)
def prod(x, dim):
return x.prod(dim)
def matmul(a, b):
return nd.dot(a, b)
def dot(a, b):
return nd.sum(mul(a, b), axis=-1)
def abs(a):
return nd.abs(a)
......@@ -2,72 +2,94 @@ from __future__ import absolute_import
import torch as th
def cuda():
return th.device('cuda:0')
return th.device("cuda:0")
def is_cuda_available():
return th.cuda.is_available()
def array_equal(a, b):
return th.equal(a.cpu(), b.cpu())
def allclose(a, b, rtol=1e-4, atol=1e-4):
return th.allclose(a.float().cpu(),
b.float().cpu(), rtol=rtol, atol=atol)
return th.allclose(a.float().cpu(), b.float().cpu(), rtol=rtol, atol=atol)
def randn(shape):
return th.randn(*shape)
def full(shape, fill_value, dtype, ctx):
return th.full(shape, fill_value, dtype=dtype, device=ctx)
def narrow_row_set(x, start, stop, new):
x[start:stop] = new
def sparse_to_numpy(x):
return x.to_dense().numpy()
def clone(x):
return x.clone()
def reduce_sum(x):
return x.sum()
def softmax(x, dim):
return th.softmax(x, dim)
def spmm(x, y):
return th.spmm(x, y)
def add(a, b):
return a + b
def sub(a, b):
return a - b
def mul(a, b):
return a * b
def div(a, b):
return a / b
def sum(x, dim, keepdims=False):
return x.sum(dim, keepdims=keepdims)
def max(x, dim):
return x.max(dim)[0]
def min(x, dim):
return x.min(dim)[0]
def prod(x, dim):
return x.prod(dim)
def matmul(a, b):
return a @ b
def dot(a, b):
return sum(mul(a, b), dim=-1)
def abs(a):
return a.abs()
......@@ -6,7 +6,7 @@ from scipy.sparse import coo_matrix
def cuda():
return '/gpu:0'
return "/gpu:0"
def is_cuda_available():
......@@ -18,8 +18,12 @@ def array_equal(a, b):
def allclose(a, b, rtol=1e-4, atol=1e-4):
return np.allclose(tf.convert_to_tensor(a).numpy(),
tf.convert_to_tensor(b).numpy(), rtol=rtol, atol=atol)
return np.allclose(
tf.convert_to_tensor(a).numpy(),
tf.convert_to_tensor(b).numpy(),
rtol=rtol,
atol=atol,
)
def randn(shape):
......@@ -97,5 +101,6 @@ def matmul(a, b):
def dot(a, b):
return sum(mul(a, b), dim=-1)
def abs(a):
return tf.abs(a)
import dgl
import dgl.function as fn
from collections import Counter
import numpy as np
import scipy.sparse as ssp
import itertools
import unittest
from collections import Counter
from itertools import product
import backend as F
import networkx as nx
import unittest, pytest
from dgl import DGLError
import numpy as np
import pytest
import scipy.sparse as ssp
import test_utils
from test_utils import parametrize_idtype, get_cases
from scipy.sparse import rand
from test_utils import get_cases, parametrize_idtype
import dgl
import dgl.function as fn
from dgl import DGLError
rfuncs = {'sum': fn.sum, 'max': fn.max, 'min': fn.min, 'mean': fn.mean}
fill_value = {'sum': 0, 'max': float("-inf")}
rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean}
fill_value = {"sum": 0, "max": float("-inf")}
feat_size = 2
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
def create_test_heterograph(idtype):
# test heterograph from the docstring, plus a user -- wishes -- game relation
# 3 users, 2 games, 2 developers
......@@ -28,12 +33,16 @@ def create_test_heterograph(idtype):
# ('user', 'wishes', 'game'),
# ('developer', 'develops', 'game')])
g = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 1, 1], [0, 0, 1]),
('developer', 'develops', 'game'): ([0, 1, 0], [0, 1, 1]),
}, idtype=idtype, device=F.ctx())
g = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1, 2, 1], [0, 0, 1, 1]),
("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
("user", "wishes", "game"): ([0, 1, 1], [0, 0, 1]),
("developer", "develops", "game"): ([0, 1, 0], [0, 1, 1]),
},
idtype=idtype,
device=F.ctx(),
)
assert g.idtype == idtype
assert g.device == F.ctx()
return g
......@@ -45,49 +54,53 @@ def test_unary_copy_u(idtype):
g = create_test_heterograph(idtype)
x1 = F.randn((g.num_nodes('user'), feat_size))
x2 = F.randn((g.num_nodes('developer'), feat_size))
x1 = F.randn((g.num_nodes("user"), feat_size))
x2 = F.randn((g.num_nodes("developer"), feat_size))
F.attach_grad(x1)
F.attach_grad(x2)
g.nodes['user'].data['h'] = x1
g.nodes['developer'].data['h'] = x2
g.nodes["user"].data["h"] = x1
g.nodes["developer"].data["h"] = x2
#################################################################
# apply_edges() is called on each relation type separately
#################################################################
with F.record_grad():
[g.apply_edges(fn.copy_u('h', 'm'), etype = rel)
for rel in g.canonical_etypes]
r1 = g['plays'].edata['m']
[
g.apply_edges(fn.copy_u("h", "m"), etype=rel)
for rel in g.canonical_etypes
]
r1 = g["plays"].edata["m"]
F.backward(r1, F.ones(r1.shape))
n_grad1 = F.grad(g.ndata['h']['user'])
n_grad1 = F.grad(g.ndata["h"]["user"])
# TODO (Israt): clear not working
g.edata['m'].clear()
g.edata["m"].clear()
#################################################################
# apply_edges() is called on all relation types
#################################################################
g.apply_edges(fn.copy_u('h', 'm'))
r2 = g['plays'].edata['m']
g.apply_edges(fn.copy_u("h", "m"))
r2 = g["plays"].edata["m"]
F.backward(r2, F.ones(r2.shape))
n_grad2 = F.grad(g.nodes['user'].data['h'])
n_grad2 = F.grad(g.nodes["user"].data["h"])
# correctness check
def _print_error(a, b):
for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())):
for i, (x, y) in enumerate(
zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
):
if not np.allclose(x, y):
print('@{} {} v.s. {}'.format(i, x, y))
print("@{} {} v.s. {}".format(i, x, y))
if not F.allclose(r1, r2):
_print_error(r1, r2)
assert F.allclose(r1, r2)
if not F.allclose(n_grad1, n_grad2):
print('node grad')
print("node grad")
_print_error(n_grad1, n_grad2)
assert(F.allclose(n_grad1, n_grad2))
assert F.allclose(n_grad1, n_grad2)
_test(fn.copy_u)
......@@ -99,51 +112,55 @@ def test_unary_copy_e(idtype):
g = create_test_heterograph(idtype)
feat_size = 2
x1 = F.randn((4,feat_size))
x2 = F.randn((4,feat_size))
x3 = F.randn((3,feat_size))
x4 = F.randn((3,feat_size))
x1 = F.randn((4, feat_size))
x2 = F.randn((4, feat_size))
x3 = F.randn((3, feat_size))
x4 = F.randn((3, feat_size))
F.attach_grad(x1)
F.attach_grad(x2)
F.attach_grad(x3)
F.attach_grad(x4)
g['plays'].edata['eid'] = x1
g['follows'].edata['eid'] = x2
g['develops'].edata['eid'] = x3
g['wishes'].edata['eid'] = x4
g["plays"].edata["eid"] = x1
g["follows"].edata["eid"] = x2
g["develops"].edata["eid"] = x3
g["wishes"].edata["eid"] = x4
#################################################################
# apply_edges() is called on each relation type separately
#################################################################
with F.record_grad():
[g.apply_edges(fn.copy_e('eid', 'm'), etype = rel)
for rel in g.canonical_etypes]
r1 = g['develops'].edata['m']
[
g.apply_edges(fn.copy_e("eid", "m"), etype=rel)
for rel in g.canonical_etypes
]
r1 = g["develops"].edata["m"]
F.backward(r1, F.ones(r1.shape))
e_grad1 = F.grad(g['develops'].edata['eid'])
e_grad1 = F.grad(g["develops"].edata["eid"])
#################################################################
# apply_edges() is called on all relation types
#################################################################
g.apply_edges(fn.copy_e('eid', 'm'))
r2 = g['develops'].edata['m']
g.apply_edges(fn.copy_e("eid", "m"))
r2 = g["develops"].edata["m"]
F.backward(r2, F.ones(r2.shape))
e_grad2 = F.grad(g['develops'].edata['eid'])
e_grad2 = F.grad(g["develops"].edata["eid"])
# # correctness check
def _print_error(a, b):
for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())):
for i, (x, y) in enumerate(
zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
):
if not np.allclose(x, y):
print('@{} {} v.s. {}'.format(i, x, y))
print("@{} {} v.s. {}".format(i, x, y))
if not F.allclose(r1, r2):
_print_error(r1, r2)
assert F.allclose(r1, r2)
if not F.allclose(e_grad1, e_grad2):
print('edge grad')
print("edge grad")
_print_error(e_grad1, e_grad2)
assert(F.allclose(e_grad1, e_grad2))
assert F.allclose(e_grad1, e_grad2)
_test(fn.copy_e)
......@@ -154,14 +171,14 @@ def test_binary_op(idtype):
g = create_test_heterograph(idtype)
n1 = F.randn((g.num_nodes('user'), feat_size))
n2 = F.randn((g.num_nodes('developer'), feat_size))
n3 = F.randn((g.num_nodes('game'), feat_size))
n1 = F.randn((g.num_nodes("user"), feat_size))
n2 = F.randn((g.num_nodes("developer"), feat_size))
n3 = F.randn((g.num_nodes("game"), feat_size))
x1 = F.randn((g.num_edges('plays'),feat_size))
x2 = F.randn((g.num_edges('follows'),feat_size))
x3 = F.randn((g.num_edges('develops'),feat_size))
x4 = F.randn((g.num_edges('wishes'),feat_size))
x1 = F.randn((g.num_edges("plays"), feat_size))
x2 = F.randn((g.num_edges("follows"), feat_size))
x3 = F.randn((g.num_edges("develops"), feat_size))
x4 = F.randn((g.num_edges("wishes"), feat_size))
builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs)
builtin_msg = getattr(fn, builtin_msg_name)
......@@ -173,25 +190,27 @@ def test_binary_op(idtype):
F.attach_grad(n1)
F.attach_grad(n2)
F.attach_grad(n3)
g.nodes['user'].data['h'] = n1
g.nodes['developer'].data['h'] = n2
g.nodes['game'].data['h'] = n3
g.nodes["user"].data["h"] = n1
g.nodes["developer"].data["h"] = n2
g.nodes["game"].data["h"] = n3
F.attach_grad(x1)
F.attach_grad(x2)
F.attach_grad(x3)
F.attach_grad(x4)
g['plays'].edata['h'] = x1
g['follows'].edata['h'] = x2
g['develops'].edata['h'] = x3
g['wishes'].edata['h'] = x4
g["plays"].edata["h"] = x1
g["follows"].edata["h"] = x2
g["develops"].edata["h"] = x3
g["wishes"].edata["h"] = x4
with F.record_grad():
[g.apply_edges(builtin_msg('h', 'h', 'm'), etype = rel)
for rel in g.canonical_etypes]
r1 = g['plays'].edata['m']
[
g.apply_edges(builtin_msg("h", "h", "m"), etype=rel)
for rel in g.canonical_etypes
]
r1 = g["plays"].edata["m"]
loss = F.sum(r1.view(-1), 0)
F.backward(loss)
n_grad1 = F.grad(g.nodes['game'].data['h'])
n_grad1 = F.grad(g.nodes["game"].data["h"])
#################################################################
# apply_edges() is called on all relation types
......@@ -200,38 +219,40 @@ def test_binary_op(idtype):
F.attach_grad(n1)
F.attach_grad(n2)
F.attach_grad(n3)
g.nodes['user'].data['h'] = n1
g.nodes['developer'].data['h'] = n2
g.nodes['game'].data['h'] = n3
g.nodes["user"].data["h"] = n1
g.nodes["developer"].data["h"] = n2
g.nodes["game"].data["h"] = n3
F.attach_grad(x1)
F.attach_grad(x2)
F.attach_grad(x3)
F.attach_grad(x4)
g['plays'].edata['h'] = x1
g['follows'].edata['h'] = x2
g['develops'].edata['h'] = x3
g['wishes'].edata['h'] = x4
g["plays"].edata["h"] = x1
g["follows"].edata["h"] = x2
g["develops"].edata["h"] = x3
g["wishes"].edata["h"] = x4
with F.record_grad():
g.apply_edges(builtin_msg('h', 'h', 'm'))
r2 = g['plays'].edata['m']
g.apply_edges(builtin_msg("h", "h", "m"))
r2 = g["plays"].edata["m"]
loss = F.sum(r2.view(-1), 0)
F.backward(loss)
n_grad2 = F.grad(g.nodes['game'].data['h'])
n_grad2 = F.grad(g.nodes["game"].data["h"])
# correctness check
def _print_error(a, b):
for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())):
for i, (x, y) in enumerate(
zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
):
if not np.allclose(x, y):
print('@{} {} v.s. {}'.format(i, x, y))
print("@{} {} v.s. {}".format(i, x, y))
if not F.allclose(r1, r2):
_print_error(r1, r2)
assert F.allclose(r1, r2)
if n_grad1 is not None or n_grad2 is not None:
if not F.allclose(n_grad1, n_grad2):
print('node grad')
print("node grad")
_print_error(n_grad1, n_grad2)
assert(F.allclose(n_grad1, n_grad2))
assert F.allclose(n_grad1, n_grad2)
target = ["u", "v", "e"]
for lhs, rhs in product(target, target):
......@@ -242,6 +263,6 @@ def test_binary_op(idtype):
_test(lhs, rhs, binary_op)
if __name__ == '__main__':
if __name__ == "__main__":
test_unary_copy_u()
test_unary_copy_e()
import backend as F
import os
import unittest
import backend as F
def test_set_default_backend():
default_dir = os.path.join(os.path.expanduser('~'), '.dgl_unit_test')
F.set_default_backend(default_dir, 'pytorch')
default_dir = os.path.join(os.path.expanduser("~"), ".dgl_unit_test")
F.set_default_backend(default_dir, "pytorch")
# make sure the config file was created
assert os.path.exists(os.path.join(default_dir, 'config.json'))
assert os.path.exists(os.path.join(default_dir, "config.json"))
import gzip
import os
import tempfile
import unittest
import backend as F
import numpy as np
import gzip
import tempfile
import os
import pandas as pd
import yaml
import pytest
import yaml
import dgl
import dgl.data as data
from dgl import DGLError
import dgl
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_minigc():
ds = data.MiniGCDataset(16, 10, 20)
......@@ -24,35 +29,45 @@ def test_minigc():
g2 = ds[0][0]
assert g2.num_edges() - g1.num_edges() == g1.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_gin():
ds_n_graphs = {
'MUTAG': 188,
'IMDBBINARY': 1000,
'IMDBMULTI': 1500,
'PROTEINS': 1113,
'PTC': 344,
"MUTAG": 188,
"IMDBBINARY": 1000,
"IMDBMULTI": 1500,
"PROTEINS": 1113,
"PTC": 344,
}
transform = dgl.AddSelfLoop(allow_duplicate=True)
for name, n_graphs in ds_n_graphs.items():
ds = data.GINDataset(name, self_loop=False, degree_as_nlabel=False)
assert len(ds) == n_graphs, (len(ds), name)
g1 = ds[0][0]
ds = data.GINDataset(name, self_loop=False, degree_as_nlabel=False, transform=transform)
ds = data.GINDataset(
name, self_loop=False, degree_as_nlabel=False, transform=transform
)
g2 = ds[0][0]
assert g2.num_edges() - g1.num_edges() == g1.num_nodes()
assert ds.num_classes == ds.gclasses
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_fraud():
transform = dgl.AddSelfLoop(allow_duplicate=True)
g = data.FraudDataset('amazon')[0]
g = data.FraudDataset("amazon")[0]
assert g.num_nodes() == 11944
num_edges1 = g.num_edges()
g2 = data.FraudDataset('amazon', transform=transform)[0]
g2 = data.FraudDataset("amazon", transform=transform)[0]
# 3 edge types
assert g2.num_edges() - num_edges1 == g.num_nodes() * 3
......@@ -68,55 +83,69 @@ def test_fraud():
# 3 edge types
assert g2.num_edges() - g.num_edges() == g.num_nodes() * 3
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_fakenews():
transform = dgl.AddSelfLoop(allow_duplicate=True)
ds = data.FakeNewsDataset('politifact', 'bert')
ds = data.FakeNewsDataset("politifact", "bert")
assert len(ds) == 314
g = ds[0][0]
g2 = data.FakeNewsDataset('politifact', 'bert', transform=transform)[0][0]
g2 = data.FakeNewsDataset("politifact", "bert", transform=transform)[0][0]
assert g2.num_edges() - g.num_edges() == g.num_nodes()
ds = data.FakeNewsDataset('gossipcop', 'profile')
ds = data.FakeNewsDataset("gossipcop", "profile")
assert len(ds) == 5464
g = ds[0][0]
g2 = data.FakeNewsDataset('gossipcop', 'profile', transform=transform)[0][0]
g2 = data.FakeNewsDataset("gossipcop", "profile", transform=transform)[0][0]
assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_tudataset_regression():
ds = data.TUDataset('ZINC_test', force_reload=True)
ds = data.TUDataset("ZINC_test", force_reload=True)
assert ds.num_classes == ds.num_labels
assert len(ds) == 5000
g = ds[0][0]
transform = dgl.AddSelfLoop(allow_duplicate=True)
ds = data.TUDataset('ZINC_test', force_reload=True, transform=transform)
ds = data.TUDataset("ZINC_test", force_reload=True, transform=transform)
g2 = ds[0][0]
assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_data_hash():
class HashTestDataset(data.DGLDataset):
def __init__(self, hash_key=()):
super(HashTestDataset, self).__init__(
'hashtest', hash_key=hash_key)
super(HashTestDataset, self).__init__("hashtest", hash_key=hash_key)
def _load(self):
pass
a = HashTestDataset((True, 0, '1', (1, 2, 3)))
b = HashTestDataset((True, 0, '1', (1, 2, 3)))
c = HashTestDataset((True, 0, '1', (1, 2, 4)))
a = HashTestDataset((True, 0, "1", (1, 2, 3)))
b = HashTestDataset((True, 0, "1", (1, 2, 3)))
c = HashTestDataset((True, 0, "1", (1, 2, 4)))
assert a.hash == b.hash
assert a.hash != c.hash
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_citation_graph():
transform = dgl.AddSelfLoop(allow_duplicate=True)
......@@ -149,7 +178,10 @@ def test_citation_graph():
assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_gnn_benchmark():
transform = dgl.AddSelfLoop(allow_duplicate=True)
......@@ -200,7 +232,10 @@ def test_gnn_benchmark():
assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_reddit():
# RedditDataset
......@@ -214,14 +249,18 @@ def test_reddit():
g2 = data.RedditDataset(transform=transform)[0]
assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_explain_syn():
dataset = data.BAShapeDataset()
assert dataset.num_classes == 4
g = dataset[0]
assert 'label' in g.ndata
assert 'feat' in g.ndata
assert "label" in g.ndata
assert "feat" in g.ndata
g1 = data.BAShapeDataset(force_reload=True, seed=0)[0]
src1, dst1 = g1.edges()
......@@ -233,8 +272,8 @@ def test_explain_syn():
dataset = data.BACommunityDataset()
assert dataset.num_classes == 8
g = dataset[0]
assert 'label' in g.ndata
assert 'feat' in g.ndata
assert "label" in g.ndata
assert "feat" in g.ndata
g1 = data.BACommunityDataset(force_reload=True, seed=0)[0]
src1, dst1 = g1.edges()
......@@ -246,8 +285,8 @@ def test_explain_syn():
dataset = data.TreeCycleDataset()
assert dataset.num_classes == 2
g = dataset[0]
assert 'label' in g.ndata
assert 'feat' in g.ndata
assert "label" in g.ndata
assert "feat" in g.ndata
g1 = data.TreeCycleDataset(force_reload=True, seed=0)[0]
src1, dst1 = g1.edges()
......@@ -259,8 +298,8 @@ def test_explain_syn():
dataset = data.TreeGridDataset()
assert dataset.num_classes == 2
g = dataset[0]
assert 'label' in g.ndata
assert 'feat' in g.ndata
assert "label" in g.ndata
assert "feat" in g.ndata
g1 = data.TreeGridDataset(force_reload=True, seed=0)[0]
src1, dst1 = g1.edges()
......@@ -272,9 +311,13 @@ def test_explain_syn():
dataset = data.BA2MotifDataset()
assert dataset.num_classes == 2
g, label = dataset[0]
assert 'feat' in g.ndata
assert "feat" in g.ndata
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_wiki_cs():
g = data.WikiCSDataset()[0]
......@@ -287,6 +330,7 @@ def test_wiki_cs():
g2 = data.WikiCSDataset(transform=transform)[0]
assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skip(reason="Dataset too large to download for the latest CI.")
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_yelp():
......@@ -300,7 +344,11 @@ def test_yelp():
g2 = data.YelpDataset(reorder=True, transform=transform)[0]
assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_flickr():
g = data.FlickrDataset(reorder=True)[0]
......@@ -313,15 +361,19 @@ def test_flickr():
g2 = data.FlickrDataset(reorder=True, transform=transform)[0]
assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_extract_archive():
# gzip
with tempfile.TemporaryDirectory() as src_dir:
gz_file = 'gz_archive'
gz_path = os.path.join(src_dir, gz_file + '.gz')
gz_file = "gz_archive"
gz_path = os.path.join(src_dir, gz_file + ".gz")
content = b"test extract archive gzip"
with gzip.open(gz_path, 'wb') as f:
with gzip.open(gz_path, "wb") as f:
f.write(content)
with tempfile.TemporaryDirectory() as dst_dir:
data.utils.extract_archive(gz_path, dst_dir, overwrite=True)
......@@ -329,7 +381,12 @@ def test_extract_archive():
def _test_construct_graphs_node_ids():
from dgl.data.csv_dataset_base import NodeData, EdgeData, DGLGraphConstructor
from dgl.data.csv_dataset_base import (
DGLGraphConstructor,
EdgeData,
NodeData,
)
num_nodes = 100
num_edges = 1000
......@@ -341,8 +398,7 @@ def _test_construct_graphs_node_ids():
edge_data = EdgeData(src_ids, dst_ids, {})
expect_except = False
try:
_, _ = DGLGraphConstructor.construct_graphs(
node_data, edge_data)
_, _ = DGLGraphConstructor.construct_graphs(node_data, edge_data)
except:
expect_except = True
assert expect_except
......@@ -354,27 +410,31 @@ def _test_construct_graphs_node_ids():
src_ids = np.random.choice(node_ids, size=num_edges)
dst_ids = np.random.choice(node_ids, size=num_edges)
node_feat = np.random.rand(num_nodes, 3)
node_data = NodeData(node_ids, {'feat':node_feat})
node_data = NodeData(node_ids, {"feat": node_feat})
edge_data = EdgeData(src_ids, dst_ids, {})
graphs, data_dict = DGLGraphConstructor.construct_graphs(
node_data, edge_data)
node_data, edge_data
)
assert len(graphs) == 1
assert len(data_dict) == 0
g = graphs[0]
assert g.is_homogeneous
assert g.num_nodes() == len(node_ids)
assert g.num_edges() == len(src_ids)
assert F.array_equal(F.tensor(node_feat[idx], dtype=F.float32), g.ndata['feat'])
assert F.array_equal(
F.tensor(node_feat[idx], dtype=F.float32), g.ndata["feat"]
)
# node IDs are mixed with numeric and non-numeric values
# homogeneous graph
node_ids = [1, 2, 3, 'a']
node_ids = [1, 2, 3, "a"]
src_ids = [1, 2, 3]
dst_ids = ['a', 1, 2]
dst_ids = ["a", 1, 2]
node_data = NodeData(node_ids, {})
edge_data = EdgeData(src_ids, dst_ids, {})
graphs, data_dict = DGLGraphConstructor.construct_graphs(
node_data, edge_data)
node_data, edge_data
)
assert len(graphs) == 1
assert len(data_dict) == 0
g = graphs[0]
......@@ -384,49 +444,63 @@ def _test_construct_graphs_node_ids():
# heterogeneous graph
node_ids_user = [1, 2, 3]
node_ids_item = ['a', 'b', 'c']
node_ids_item = ["a", "b", "c"]
src_ids = node_ids_user
dst_ids = node_ids_item
node_data_user = NodeData(node_ids_user, {}, type='user')
node_data_item = NodeData(node_ids_item, {}, type='item')
edge_data = EdgeData(src_ids, dst_ids, {}, type=('user', 'like', 'item'))
node_data_user = NodeData(node_ids_user, {}, type="user")
node_data_item = NodeData(node_ids_item, {}, type="item")
edge_data = EdgeData(src_ids, dst_ids, {}, type=("user", "like", "item"))
graphs, data_dict = DGLGraphConstructor.construct_graphs(
[node_data_user, node_data_item], edge_data)
[node_data_user, node_data_item], edge_data
)
assert len(graphs) == 1
assert len(data_dict) == 0
g = graphs[0]
assert not g.is_homogeneous
assert g.num_nodes('user') == len(node_ids_user)
assert g.num_nodes('item') == len(node_ids_item)
assert g.num_nodes("user") == len(node_ids_user)
assert g.num_nodes("item") == len(node_ids_item)
assert g.num_edges() == len(src_ids)
def _test_construct_graphs_homo():
from dgl.data.csv_dataset_base import NodeData, EdgeData, DGLGraphConstructor
from dgl.data.csv_dataset_base import (
DGLGraphConstructor,
EdgeData,
NodeData,
)
# node_id could be non-sorted, non-numeric.
num_nodes = 100
num_edges = 1000
num_dims = 3
node_ids = np.random.choice(
np.arange(num_nodes*2), size=num_nodes, replace=False)
np.arange(num_nodes * 2), size=num_nodes, replace=False
)
assert len(node_ids) == num_nodes
# to be non-sorted
np.random.shuffle(node_ids)
# to be non-numeric
node_ids = ['id_{}'.format(id) for id in node_ids]
t_ndata = {'feat': np.random.rand(num_nodes, num_dims),
'label': np.random.randint(2, size=num_nodes)}
node_ids = ["id_{}".format(id) for id in node_ids]
t_ndata = {
"feat": np.random.rand(num_nodes, num_dims),
"label": np.random.randint(2, size=num_nodes),
}
_, u_indices = np.unique(node_ids, return_index=True)
ndata = {'feat': t_ndata['feat'][u_indices],
'label': t_ndata['label'][u_indices]}
ndata = {
"feat": t_ndata["feat"][u_indices],
"label": t_ndata["label"][u_indices],
}
node_data = NodeData(node_ids, t_ndata)
src_ids = np.random.choice(node_ids, size=num_edges)
dst_ids = np.random.choice(node_ids, size=num_edges)
edata = {'feat': np.random.rand(
num_edges, num_dims), 'label': np.random.randint(2, size=num_edges)}
edata = {
"feat": np.random.rand(num_edges, num_dims),
"label": np.random.randint(2, size=num_edges),
}
edge_data = EdgeData(src_ids, dst_ids, edata)
graphs, data_dict = DGLGraphConstructor.construct_graphs(
node_data, edge_data)
node_data, edge_data
)
assert len(graphs) == 1
assert len(data_dict) == 0
g = graphs[0]
......@@ -439,63 +513,81 @@ def _test_construct_graphs_homo():
assert key in rhs
assert F.dtype(rhs[key]) != F.float64
assert F.array_equal(
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key])
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key]
)
assert_data(ndata, g.ndata)
assert_data(edata, g.edata)
def _test_construct_graphs_hetero():
from dgl.data.csv_dataset_base import NodeData, EdgeData, DGLGraphConstructor
from dgl.data.csv_dataset_base import (
DGLGraphConstructor,
EdgeData,
NodeData,
)
# node_id/src_id/dst_id could be non-sorted, duplicated, non-numeric.
num_nodes = 100
num_edges = 1000
num_dims = 3
ntypes = ['user', 'item']
ntypes = ["user", "item"]
node_data = []
node_ids_dict = {}
ndata_dict = {}
for ntype in ntypes:
node_ids = np.random.choice(
np.arange(num_nodes*2), size=num_nodes, replace=False)
np.arange(num_nodes * 2), size=num_nodes, replace=False
)
assert len(node_ids) == num_nodes
# to be non-sorted
np.random.shuffle(node_ids)
# to be non-numeric
node_ids = ['id_{}'.format(id) for id in node_ids]
t_ndata = {'feat': np.random.rand(num_nodes, num_dims),
'label': np.random.randint(2, size=num_nodes)}
node_ids = ["id_{}".format(id) for id in node_ids]
t_ndata = {
"feat": np.random.rand(num_nodes, num_dims),
"label": np.random.randint(2, size=num_nodes),
}
_, u_indices = np.unique(node_ids, return_index=True)
ndata = {'feat': t_ndata['feat'][u_indices],
'label': t_ndata['label'][u_indices]}
ndata = {
"feat": t_ndata["feat"][u_indices],
"label": t_ndata["label"][u_indices],
}
node_data.append(NodeData(node_ids, t_ndata, type=ntype))
node_ids_dict[ntype] = node_ids
ndata_dict[ntype] = ndata
etypes = [('user', 'follow', 'user'), ('user', 'like', 'item')]
etypes = [("user", "follow", "user"), ("user", "like", "item")]
edge_data = []
edata_dict = {}
for src_type, e_type, dst_type in etypes:
src_ids = np.random.choice(node_ids_dict[src_type], size=num_edges)
dst_ids = np.random.choice(node_ids_dict[dst_type], size=num_edges)
edata = {'feat': np.random.rand(
num_edges, num_dims), 'label': np.random.randint(2, size=num_edges)}
edge_data.append(EdgeData(src_ids, dst_ids, edata,
type=(src_type, e_type, dst_type)))
edata = {
"feat": np.random.rand(num_edges, num_dims),
"label": np.random.randint(2, size=num_edges),
}
edge_data.append(
EdgeData(src_ids, dst_ids, edata, type=(src_type, e_type, dst_type))
)
edata_dict[(src_type, e_type, dst_type)] = edata
graphs, data_dict = DGLGraphConstructor.construct_graphs(
node_data, edge_data)
node_data, edge_data
)
assert len(graphs) == 1
assert len(data_dict) == 0
g = graphs[0]
assert not g.is_homogeneous
assert g.num_nodes() == num_nodes*len(ntypes)
assert g.num_edges() == num_edges*len(etypes)
assert g.num_nodes() == num_nodes * len(ntypes)
assert g.num_edges() == num_edges * len(etypes)
def assert_data(lhs, rhs):
for key, value in lhs.items():
assert key in rhs
assert F.dtype(rhs[key]) != F.float64
assert F.array_equal(
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key])
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key]
)
for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes
assert_data(ndata_dict[ntype], g.nodes[ntype].data)
......@@ -505,7 +597,13 @@ def _test_construct_graphs_hetero():
def _test_construct_graphs_multiple():
from dgl.data.csv_dataset_base import NodeData, EdgeData, GraphData, DGLGraphConstructor
from dgl.data.csv_dataset_base import (
DGLGraphConstructor,
EdgeData,
GraphData,
NodeData,
)
num_nodes = 100
num_edges = 1000
num_graphs = 10
......@@ -518,35 +616,48 @@ def _test_construct_graphs_multiple():
u_indices = np.array([], dtype=np.int)
for i in range(num_graphs):
l_node_ids = np.random.choice(
np.arange(num_nodes*2), size=num_nodes, replace=False)
np.arange(num_nodes * 2), size=num_nodes, replace=False
)
node_ids = np.append(node_ids, l_node_ids)
_, l_u_indices = np.unique(l_node_ids, return_index=True)
u_indices = np.append(u_indices, l_u_indices)
ngraph_ids = np.append(ngraph_ids, np.full(num_nodes, i))
src_ids = np.append(src_ids, np.random.choice(
l_node_ids, size=num_edges))
dst_ids = np.append(dst_ids, np.random.choice(
l_node_ids, size=num_edges))
src_ids = np.append(
src_ids, np.random.choice(l_node_ids, size=num_edges)
)
dst_ids = np.append(
dst_ids, np.random.choice(l_node_ids, size=num_edges)
)
egraph_ids = np.append(egraph_ids, np.full(num_edges, i))
ndata = {'feat': np.random.rand(num_nodes*num_graphs, num_dims),
'label': np.random.randint(2, size=num_nodes*num_graphs)}
ngraph_ids = ['graph_{}'.format(id) for id in ngraph_ids]
ndata = {
"feat": np.random.rand(num_nodes * num_graphs, num_dims),
"label": np.random.randint(2, size=num_nodes * num_graphs),
}
ngraph_ids = ["graph_{}".format(id) for id in ngraph_ids]
node_data = NodeData(node_ids, ndata, graph_id=ngraph_ids)
egraph_ids = ['graph_{}'.format(id) for id in egraph_ids]
edata = {'feat': np.random.rand(
num_edges*num_graphs, num_dims), 'label': np.random.randint(2, size=num_edges*num_graphs)}
egraph_ids = ["graph_{}".format(id) for id in egraph_ids]
edata = {
"feat": np.random.rand(num_edges * num_graphs, num_dims),
"label": np.random.randint(2, size=num_edges * num_graphs),
}
edge_data = EdgeData(src_ids, dst_ids, edata, graph_id=egraph_ids)
gdata = {'feat': np.random.rand(num_graphs, num_dims),
'label': np.random.randint(2, size=num_graphs)}
graph_ids = ['graph_{}'.format(id) for id in np.arange(num_graphs)]
gdata = {
"feat": np.random.rand(num_graphs, num_dims),
"label": np.random.randint(2, size=num_graphs),
}
graph_ids = ["graph_{}".format(id) for id in np.arange(num_graphs)]
graph_data = GraphData(graph_ids, gdata)
graphs, data_dict = DGLGraphConstructor.construct_graphs(
node_data, edge_data, graph_data)
node_data, edge_data, graph_data
)
assert len(graphs) == num_graphs
assert len(data_dict) == len(gdata)
for k, v in data_dict.items():
assert F.dtype(v) != F.float64
assert F.array_equal(F.reshape(F.tensor(gdata[k], dtype=F.dtype(v)), (len(graphs), -1)), v)
assert F.array_equal(
F.reshape(F.tensor(gdata[k], dtype=F.dtype(v)), (len(graphs), -1)),
v,
)
for i, g in enumerate(graphs):
assert g.is_homogeneous
assert g.num_nodes() == num_nodes
......@@ -555,22 +666,25 @@ def _test_construct_graphs_multiple():
def assert_data(lhs, rhs, size, node=False):
for key, value in lhs.items():
assert key in rhs
value = value[i*size:(i+1)*size]
value = value[i * size : (i + 1) * size]
if node:
indices = u_indices[i*size:(i+1)*size]
indices = u_indices[i * size : (i + 1) * size]
value = value[indices]
assert F.dtype(rhs[key]) != F.float64
assert F.array_equal(
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key])
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key]
)
assert_data(ndata, g.ndata, num_nodes, node=True)
assert_data(edata, g.edata, num_edges)
# Graph IDs found in node/edge CSV but not in graph CSV
graph_data = GraphData(np.arange(num_graphs-2), {})
graph_data = GraphData(np.arange(num_graphs - 2), {})
expect_except = False
try:
_, _ = DGLGraphConstructor.construct_graphs(
node_data, edge_data, graph_data)
node_data, edge_data, graph_data
)
except:
expect_except = True
assert expect_except
......@@ -578,6 +692,7 @@ def _test_construct_graphs_multiple():
def _test_DefaultDataParser():
from dgl.data.csv_dataset_base import DefaultDataParser
# common csv
with tempfile.TemporaryDirectory() as test_dir:
csv_path = os.path.join(test_dir, "nodes.csv")
......@@ -587,21 +702,28 @@ def _test_DefaultDataParser():
node_id = np.arange(num_nodes)
label = np.random.randint(num_labels, size=num_nodes)
feat = np.random.rand(num_nodes, num_dims)
df = pd.DataFrame({'node_id': node_id, 'label': label,
'feat': [line.tolist() for line in feat],
})
df = pd.DataFrame(
{
"node_id": node_id,
"label": label,
"feat": [line.tolist() for line in feat],
}
)
df.to_csv(csv_path, index=False)
dp = DefaultDataParser()
df = pd.read_csv(csv_path)
dt = dp(df)
assert np.array_equal(node_id, dt['node_id'])
assert np.array_equal(label, dt['label'])
assert np.array_equal(feat, dt['feat'])
assert np.array_equal(node_id, dt["node_id"])
assert np.array_equal(label, dt["label"])
assert np.array_equal(feat, dt["feat"])
# string consists of non-numeric values
with tempfile.TemporaryDirectory() as test_dir:
csv_path = os.path.join(test_dir, "nodes.csv")
df = pd.DataFrame({'label': ['a', 'b', 'c'],
})
df = pd.DataFrame(
{
"label": ["a", "b", "c"],
}
)
df.to_csv(csv_path, index=False)
dp = DefaultDataParser()
df = pd.read_csv(csv_path)
......@@ -614,8 +736,11 @@ def _test_DefaultDataParser():
# csv has index column which is ignored as it's unnamed
with tempfile.TemporaryDirectory() as test_dir:
csv_path = os.path.join(test_dir, "nodes.csv")
df = pd.DataFrame({'label': [1, 2, 3],
})
df = pd.DataFrame(
{
"label": [1, 2, 3],
}
)
df.to_csv(csv_path)
dp = DefaultDataParser()
df = pd.read_csv(csv_path)
......@@ -625,69 +750,96 @@ def _test_DefaultDataParser():
def _test_load_yaml_with_sanity_check():
from dgl.data.csv_dataset_base import load_yaml_with_sanity_check
with tempfile.TemporaryDirectory() as test_dir:
yaml_path = os.path.join(test_dir, 'meta.yaml')
yaml_path = os.path.join(test_dir, "meta.yaml")
# workable but meaningless usually
yaml_data = {'dataset_name': 'default',
'node_data': [], 'edge_data': []}
with open(yaml_path, 'w') as f:
yaml_data = {
"dataset_name": "default",
"node_data": [],
"edge_data": [],
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False)
meta = load_yaml_with_sanity_check(yaml_path)
assert meta.version == '1.0.0'
assert meta.dataset_name == 'default'
assert meta.separator == ','
assert meta.version == "1.0.0"
assert meta.dataset_name == "default"
assert meta.separator == ","
assert len(meta.node_data) == 0
assert len(meta.edge_data) == 0
assert meta.graph_data is None
# minimum with required fields only
yaml_data = {'version': '1.0.0', 'dataset_name': 'default', 'node_data': [{'file_name': 'nodes.csv'}],
'edge_data': [{'file_name': 'edges.csv'}],
}
with open(yaml_path, 'w') as f:
yaml_data = {
"version": "1.0.0",
"dataset_name": "default",
"node_data": [{"file_name": "nodes.csv"}],
"edge_data": [{"file_name": "edges.csv"}],
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False)
meta = load_yaml_with_sanity_check(yaml_path)
for ndata in meta.node_data:
assert ndata.file_name == 'nodes.csv'
assert ndata.ntype == '_V'
assert ndata.graph_id_field == 'graph_id'
assert ndata.node_id_field == 'node_id'
assert ndata.file_name == "nodes.csv"
assert ndata.ntype == "_V"
assert ndata.graph_id_field == "graph_id"
assert ndata.node_id_field == "node_id"
for edata in meta.edge_data:
assert edata.file_name == 'edges.csv'
assert edata.etype == ['_V', '_E', '_V']
assert edata.graph_id_field == 'graph_id'
assert edata.src_id_field == 'src_id'
assert edata.dst_id_field == 'dst_id'
assert edata.file_name == "edges.csv"
assert edata.etype == ["_V", "_E", "_V"]
assert edata.graph_id_field == "graph_id"
assert edata.src_id_field == "src_id"
assert edata.dst_id_field == "dst_id"
# optional fields are specified
yaml_data = {'version': '1.0.0', 'dataset_name': 'default',
'separator': '|',
'node_data': [{'file_name': 'nodes.csv', 'ntype': 'user', 'graph_id_field': 'xxx', 'node_id_field': 'xxx'}],
'edge_data': [{'file_name': 'edges.csv', 'etype': ['user', 'follow', 'user'], 'graph_id_field':'xxx', 'src_id_field':'xxx', 'dst_id_field':'xxx'}],
'graph_data': {'file_name': 'graph.csv', 'graph_id_field': 'xxx'}
}
with open(yaml_path, 'w') as f:
yaml_data = {
"version": "1.0.0",
"dataset_name": "default",
"separator": "|",
"node_data": [
{
"file_name": "nodes.csv",
"ntype": "user",
"graph_id_field": "xxx",
"node_id_field": "xxx",
}
],
"edge_data": [
{
"file_name": "edges.csv",
"etype": ["user", "follow", "user"],
"graph_id_field": "xxx",
"src_id_field": "xxx",
"dst_id_field": "xxx",
}
],
"graph_data": {"file_name": "graph.csv", "graph_id_field": "xxx"},
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False)
meta = load_yaml_with_sanity_check(yaml_path)
assert len(meta.node_data) == 1
ndata = meta.node_data[0]
assert ndata.ntype == 'user'
assert ndata.graph_id_field == 'xxx'
assert ndata.node_id_field == 'xxx'
assert ndata.ntype == "user"
assert ndata.graph_id_field == "xxx"
assert ndata.node_id_field == "xxx"
assert len(meta.edge_data) == 1
edata = meta.edge_data[0]
assert edata.etype == ['user', 'follow', 'user']
assert edata.graph_id_field == 'xxx'
assert edata.src_id_field == 'xxx'
assert edata.dst_id_field == 'xxx'
assert edata.etype == ["user", "follow", "user"]
assert edata.graph_id_field == "xxx"
assert edata.src_id_field == "xxx"
assert edata.dst_id_field == "xxx"
assert meta.graph_data is not None
assert meta.graph_data.file_name == 'graph.csv'
assert meta.graph_data.graph_id_field == 'xxx'
assert meta.graph_data.file_name == "graph.csv"
assert meta.graph_data.graph_id_field == "xxx"
# some required fields are missing
yaml_data = {'dataset_name': 'default',
'node_data': [], 'edge_data': []}
yaml_data = {
"dataset_name": "default",
"node_data": [],
"edge_data": [],
}
for field in yaml_data.keys():
ydata = {k: v for k, v in yaml_data.items()}
ydata.pop(field)
with open(yaml_path, 'w') as f:
with open(yaml_path, "w") as f:
yaml.dump(ydata, f, sort_keys=False)
expect_except = False
try:
......@@ -696,10 +848,13 @@ def _test_load_yaml_with_sanity_check():
expect_except = True
assert expect_except
# inapplicable version
yaml_data = {'version': '0.0.0', 'dataset_name': 'default', 'node_data': [{'file_name': 'nodes_0.csv'}],
'edge_data': [{'file_name': 'edges_0.csv'}],
}
with open(yaml_path, 'w') as f:
yaml_data = {
"version": "0.0.0",
"dataset_name": "default",
"node_data": [{"file_name": "nodes_0.csv"}],
"edge_data": [{"file_name": "edges_0.csv"}],
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False)
expect_except = False
try:
......@@ -708,10 +863,16 @@ def _test_load_yaml_with_sanity_check():
expect_except = True
assert expect_except
# duplicate node types
yaml_data = {'version': '1.0.0', 'dataset_name': 'default', 'node_data': [{'file_name': 'nodes.csv'}, {'file_name': 'nodes.csv'}],
'edge_data': [{'file_name': 'edges.csv'}],
}
with open(yaml_path, 'w') as f:
yaml_data = {
"version": "1.0.0",
"dataset_name": "default",
"node_data": [
{"file_name": "nodes.csv"},
{"file_name": "nodes.csv"},
],
"edge_data": [{"file_name": "edges.csv"}],
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False)
expect_except = False
try:
......@@ -720,10 +881,16 @@ def _test_load_yaml_with_sanity_check():
expect_except = True
assert expect_except
# duplicate edge types
yaml_data = {'version': '1.0.0', 'dataset_name': 'default', 'node_data': [{'file_name': 'nodes.csv'}],
'edge_data': [{'file_name': 'edges.csv'}, {'file_name': 'edges.csv'}],
}
with open(yaml_path, 'w') as f:
yaml_data = {
"version": "1.0.0",
"dataset_name": "default",
"node_data": [{"file_name": "nodes.csv"}],
"edge_data": [
{"file_name": "edges.csv"},
{"file_name": "edges.csv"},
],
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False)
expect_except = False
try:
......@@ -734,190 +901,218 @@ def _test_load_yaml_with_sanity_check():
def _test_load_node_data_from_csv():
from dgl.data.csv_dataset_base import MetaNode, NodeData, DefaultDataParser
from dgl.data.csv_dataset_base import DefaultDataParser, MetaNode, NodeData
with tempfile.TemporaryDirectory() as test_dir:
num_nodes = 100
# minimum
df = pd.DataFrame({'node_id': np.arange(num_nodes)})
csv_path = os.path.join(test_dir, 'nodes.csv')
df = pd.DataFrame({"node_id": np.arange(num_nodes)})
csv_path = os.path.join(test_dir, "nodes.csv")
df.to_csv(csv_path, index=False)
meta_node = MetaNode(file_name=csv_path)
node_data = NodeData.load_from_csv(
meta_node, DefaultDataParser())
assert np.array_equal(df['node_id'], node_data.id)
node_data = NodeData.load_from_csv(meta_node, DefaultDataParser())
assert np.array_equal(df["node_id"], node_data.id)
assert len(node_data.data) == 0
# common case
df = pd.DataFrame({'node_id': np.arange(num_nodes),
'label': np.random.randint(3, size=num_nodes)})
csv_path = os.path.join(test_dir, 'nodes.csv')
df = pd.DataFrame(
{
"node_id": np.arange(num_nodes),
"label": np.random.randint(3, size=num_nodes),
}
)
csv_path = os.path.join(test_dir, "nodes.csv")
df.to_csv(csv_path, index=False)
meta_node = MetaNode(file_name=csv_path)
node_data = NodeData.load_from_csv(
meta_node, DefaultDataParser())
assert np.array_equal(df['node_id'], node_data.id)
node_data = NodeData.load_from_csv(meta_node, DefaultDataParser())
assert np.array_equal(df["node_id"], node_data.id)
assert len(node_data.data) == 1
assert np.array_equal(df['label'], node_data.data['label'])
assert np.array_equal(df["label"], node_data.data["label"])
assert np.array_equal(np.full(num_nodes, 0), node_data.graph_id)
assert node_data.type == '_V'
assert node_data.type == "_V"
# add more fields into nodes.csv
df = pd.DataFrame({'node_id': np.arange(num_nodes), 'label': np.random.randint(
3, size=num_nodes), 'graph_id': np.full(num_nodes, 1)})
csv_path = os.path.join(test_dir, 'nodes.csv')
df = pd.DataFrame(
{
"node_id": np.arange(num_nodes),
"label": np.random.randint(3, size=num_nodes),
"graph_id": np.full(num_nodes, 1),
}
)
csv_path = os.path.join(test_dir, "nodes.csv")
df.to_csv(csv_path, index=False)
meta_node = MetaNode(file_name=csv_path)
node_data = NodeData.load_from_csv(
meta_node, DefaultDataParser())
assert np.array_equal(df['node_id'], node_data.id)
node_data = NodeData.load_from_csv(meta_node, DefaultDataParser())
assert np.array_equal(df["node_id"], node_data.id)
assert len(node_data.data) == 1
assert np.array_equal(df['label'], node_data.data['label'])
assert np.array_equal(df['graph_id'], node_data.graph_id)
assert node_data.type == '_V'
assert np.array_equal(df["label"], node_data.data["label"])
assert np.array_equal(df["graph_id"], node_data.graph_id)
assert node_data.type == "_V"
# required header is missing
df = pd.DataFrame({'label': np.random.randint(3, size=num_nodes)})
csv_path = os.path.join(test_dir, 'nodes.csv')
df = pd.DataFrame({"label": np.random.randint(3, size=num_nodes)})
csv_path = os.path.join(test_dir, "nodes.csv")
df.to_csv(csv_path, index=False)
meta_node = MetaNode(file_name=csv_path)
expect_except = False
try:
NodeData.load_from_csv(
meta_node, DefaultDataParser())
NodeData.load_from_csv(meta_node, DefaultDataParser())
except:
expect_except = True
assert expect_except
def _test_load_edge_data_from_csv():
from dgl.data.csv_dataset_base import MetaEdge, EdgeData, DefaultDataParser
from dgl.data.csv_dataset_base import DefaultDataParser, EdgeData, MetaEdge
with tempfile.TemporaryDirectory() as test_dir:
num_nodes = 100
num_edges = 1000
# minimum
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges),
'dst_id': np.random.randint(num_nodes, size=num_edges),
})
csv_path = os.path.join(test_dir, 'edges.csv')
df = pd.DataFrame(
{
"src_id": np.random.randint(num_nodes, size=num_edges),
"dst_id": np.random.randint(num_nodes, size=num_edges),
}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False)
meta_edge = MetaEdge(file_name=csv_path)
edge_data = EdgeData.load_from_csv(
meta_edge, DefaultDataParser())
assert np.array_equal(df['src_id'], edge_data.src)
assert np.array_equal(df['dst_id'], edge_data.dst)
edge_data = EdgeData.load_from_csv(meta_edge, DefaultDataParser())
assert np.array_equal(df["src_id"], edge_data.src)
assert np.array_equal(df["dst_id"], edge_data.dst)
assert len(edge_data.data) == 0
# common case
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges),
'dst_id': np.random.randint(num_nodes, size=num_edges),
'label': np.random.randint(3, size=num_edges)})
csv_path = os.path.join(test_dir, 'edges.csv')
df = pd.DataFrame(
{
"src_id": np.random.randint(num_nodes, size=num_edges),
"dst_id": np.random.randint(num_nodes, size=num_edges),
"label": np.random.randint(3, size=num_edges),
}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False)
meta_edge = MetaEdge(file_name=csv_path)
edge_data = EdgeData.load_from_csv(
meta_edge, DefaultDataParser())
assert np.array_equal(df['src_id'], edge_data.src)
assert np.array_equal(df['dst_id'], edge_data.dst)
edge_data = EdgeData.load_from_csv(meta_edge, DefaultDataParser())
assert np.array_equal(df["src_id"], edge_data.src)
assert np.array_equal(df["dst_id"], edge_data.dst)
assert len(edge_data.data) == 1
assert np.array_equal(df['label'], edge_data.data['label'])
assert np.array_equal(df["label"], edge_data.data["label"])
assert np.array_equal(np.full(num_edges, 0), edge_data.graph_id)
assert edge_data.type == ('_V', '_E', '_V')
assert edge_data.type == ("_V", "_E", "_V")
# add more fields into edges.csv
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges),
'dst_id': np.random.randint(num_nodes, size=num_edges),
'graph_id': np.arange(num_edges),
'feat': np.random.randint(3, size=num_edges),
'label': np.random.randint(3, size=num_edges)})
csv_path = os.path.join(test_dir, 'edges.csv')
df = pd.DataFrame(
{
"src_id": np.random.randint(num_nodes, size=num_edges),
"dst_id": np.random.randint(num_nodes, size=num_edges),
"graph_id": np.arange(num_edges),
"feat": np.random.randint(3, size=num_edges),
"label": np.random.randint(3, size=num_edges),
}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False)
meta_edge = MetaEdge(file_name=csv_path)
edge_data = EdgeData.load_from_csv(
meta_edge, DefaultDataParser())
assert np.array_equal(df['src_id'], edge_data.src)
assert np.array_equal(df['dst_id'], edge_data.dst)
edge_data = EdgeData.load_from_csv(meta_edge, DefaultDataParser())
assert np.array_equal(df["src_id"], edge_data.src)
assert np.array_equal(df["dst_id"], edge_data.dst)
assert len(edge_data.data) == 2
assert np.array_equal(df['feat'], edge_data.data['feat'])
assert np.array_equal(df['label'], edge_data.data['label'])
assert np.array_equal(df['graph_id'], edge_data.graph_id)
assert edge_data.type == ('_V', '_E', '_V')
assert np.array_equal(df["feat"], edge_data.data["feat"])
assert np.array_equal(df["label"], edge_data.data["label"])
assert np.array_equal(df["graph_id"], edge_data.graph_id)
assert edge_data.type == ("_V", "_E", "_V")
# required headers are missing
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges),
})
csv_path = os.path.join(test_dir, 'edges.csv')
df = pd.DataFrame(
{
"src_id": np.random.randint(num_nodes, size=num_edges),
}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False)
meta_edge = MetaEdge(file_name=csv_path)
expect_except = False
try:
EdgeData.load_from_csv(
meta_edge, DefaultDataParser())
EdgeData.load_from_csv(meta_edge, DefaultDataParser())
except DGLError:
expect_except = True
assert expect_except
df = pd.DataFrame({'dst_id': np.random.randint(num_nodes, size=num_edges),
})
csv_path = os.path.join(test_dir, 'edges.csv')
df = pd.DataFrame(
{
"dst_id": np.random.randint(num_nodes, size=num_edges),
}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False)
meta_edge = MetaEdge(file_name=csv_path)
expect_except = False
try:
EdgeData.load_from_csv(
meta_edge, DefaultDataParser())
EdgeData.load_from_csv(meta_edge, DefaultDataParser())
except DGLError:
expect_except = True
assert expect_except
def _test_load_graph_data_from_csv():
from dgl.data.csv_dataset_base import MetaGraph, GraphData, DefaultDataParser
from dgl.data.csv_dataset_base import (
DefaultDataParser,
GraphData,
MetaGraph,
)
with tempfile.TemporaryDirectory() as test_dir:
num_graphs = 100
# minimum
df = pd.DataFrame({'graph_id': np.arange(num_graphs)})
csv_path = os.path.join(test_dir, 'graph.csv')
df = pd.DataFrame({"graph_id": np.arange(num_graphs)})
csv_path = os.path.join(test_dir, "graph.csv")
df.to_csv(csv_path, index=False)
meta_graph = MetaGraph(file_name=csv_path)
graph_data = GraphData.load_from_csv(
meta_graph, DefaultDataParser())
assert np.array_equal(df['graph_id'], graph_data.graph_id)
graph_data = GraphData.load_from_csv(meta_graph, DefaultDataParser())
assert np.array_equal(df["graph_id"], graph_data.graph_id)
assert len(graph_data.data) == 0
# common case
df = pd.DataFrame({'graph_id': np.arange(num_graphs),
'label': np.random.randint(3, size=num_graphs)})
csv_path = os.path.join(test_dir, 'graph.csv')
df = pd.DataFrame(
{
"graph_id": np.arange(num_graphs),
"label": np.random.randint(3, size=num_graphs),
}
)
csv_path = os.path.join(test_dir, "graph.csv")
df.to_csv(csv_path, index=False)
meta_graph = MetaGraph(file_name=csv_path)
graph_data = GraphData.load_from_csv(
meta_graph, DefaultDataParser())
assert np.array_equal(df['graph_id'], graph_data.graph_id)
graph_data = GraphData.load_from_csv(meta_graph, DefaultDataParser())
assert np.array_equal(df["graph_id"], graph_data.graph_id)
assert len(graph_data.data) == 1
assert np.array_equal(df['label'], graph_data.data['label'])
assert np.array_equal(df["label"], graph_data.data["label"])
# add more fields into graph.csv
df = pd.DataFrame({'graph_id': np.arange(num_graphs),
'feat': np.random.randint(3, size=num_graphs),
'label': np.random.randint(3, size=num_graphs)})
csv_path = os.path.join(test_dir, 'graph.csv')
df = pd.DataFrame(
{
"graph_id": np.arange(num_graphs),
"feat": np.random.randint(3, size=num_graphs),
"label": np.random.randint(3, size=num_graphs),
}
)
csv_path = os.path.join(test_dir, "graph.csv")
df.to_csv(csv_path, index=False)
meta_graph = MetaGraph(file_name=csv_path)
graph_data = GraphData.load_from_csv(
meta_graph, DefaultDataParser())
assert np.array_equal(df['graph_id'], graph_data.graph_id)
graph_data = GraphData.load_from_csv(meta_graph, DefaultDataParser())
assert np.array_equal(df["graph_id"], graph_data.graph_id)
assert len(graph_data.data) == 2
assert np.array_equal(df['feat'], graph_data.data['feat'])
assert np.array_equal(df['label'], graph_data.data['label'])
assert np.array_equal(df["feat"], graph_data.data["feat"])
assert np.array_equal(df["label"], graph_data.data["label"])
# required header is missing
df = pd.DataFrame({'label': np.random.randint(3, size=num_graphs)})
csv_path = os.path.join(test_dir, 'graph.csv')
df = pd.DataFrame({"label": np.random.randint(3, size=num_graphs)})
csv_path = os.path.join(test_dir, "graph.csv")
df.to_csv(csv_path, index=False)
meta_graph = MetaGraph(file_name=csv_path)
expect_except = False
try:
GraphData.load_from_csv(
meta_graph, DefaultDataParser())
GraphData.load_from_csv(meta_graph, DefaultDataParser())
except DGLError:
expect_except = True
assert expect_except
......@@ -931,40 +1126,56 @@ def _test_CSVDataset_single():
edges_csv_path_1 = os.path.join(test_dir, "test_edges_1.csv")
nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv")
nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv")
meta_yaml_data = {'version': '1.0.0', 'dataset_name': 'default_name',
'node_data': [{'file_name': os.path.basename(nodes_csv_path_0),
'ntype': 'user',
},
{'file_name': os.path.basename(nodes_csv_path_1),
'ntype': 'item',
}],
'edge_data': [{'file_name': os.path.basename(edges_csv_path_0),
'etype': ['user', 'follow', 'user'],
},
{'file_name': os.path.basename(edges_csv_path_1),
'etype': ['user', 'like', 'item'],
}],
}
with open(meta_yaml_path, 'w') as f:
meta_yaml_data = {
"version": "1.0.0",
"dataset_name": "default_name",
"node_data": [
{
"file_name": os.path.basename(nodes_csv_path_0),
"ntype": "user",
},
{
"file_name": os.path.basename(nodes_csv_path_1),
"ntype": "item",
},
],
"edge_data": [
{
"file_name": os.path.basename(edges_csv_path_0),
"etype": ["user", "follow", "user"],
},
{
"file_name": os.path.basename(edges_csv_path_1),
"etype": ["user", "like", "item"],
},
],
}
with open(meta_yaml_path, "w") as f:
yaml.dump(meta_yaml_data, f, sort_keys=False)
num_nodes = 100
num_edges = 500
num_dims = 3
feat_ndata = np.random.rand(num_nodes, num_dims)
label_ndata = np.random.randint(2, size=num_nodes)
df = pd.DataFrame({'node_id': np.arange(num_nodes),
'label': label_ndata,
'feat': [line.tolist() for line in feat_ndata],
})
df = pd.DataFrame(
{
"node_id": np.arange(num_nodes),
"label": label_ndata,
"feat": [line.tolist() for line in feat_ndata],
}
)
df.to_csv(nodes_csv_path_0, index=False)
df.to_csv(nodes_csv_path_1, index=False)
feat_edata = np.random.rand(num_edges, num_dims)
label_edata = np.random.randint(2, size=num_edges)
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges),
'dst_id': np.random.randint(num_nodes, size=num_edges),
'label': label_edata,
'feat': [line.tolist() for line in feat_edata],
})
df = pd.DataFrame(
{
"src_id": np.random.randint(num_nodes, size=num_edges),
"dst_id": np.random.randint(num_nodes, size=num_edges),
"label": label_edata,
"feat": [line.tolist() for line in feat_edata],
}
)
df.to_csv(edges_csv_path_0, index=False)
df.to_csv(edges_csv_path_1, index=False)
......@@ -974,24 +1185,29 @@ def _test_CSVDataset_single():
# remove original node data file to verify reload from cached files
os.remove(nodes_csv_path_0)
assert not os.path.exists(nodes_csv_path_0)
csv_dataset = data.CSVDataset(
test_dir, force_reload=force_reload)
csv_dataset = data.CSVDataset(test_dir, force_reload=force_reload)
assert len(csv_dataset) == 1
g = csv_dataset[0]
assert not g.is_homogeneous
assert csv_dataset.has_cache()
for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes
assert F.array_equal(F.tensor(feat_ndata, dtype=F.float32),
g.nodes[ntype].data['feat'])
assert np.array_equal(label_ndata,
F.asnumpy(g.nodes[ntype].data['label']))
assert F.array_equal(
F.tensor(feat_ndata, dtype=F.float32),
g.nodes[ntype].data["feat"],
)
assert np.array_equal(
label_ndata, F.asnumpy(g.nodes[ntype].data["label"])
)
for etype in g.etypes:
assert g.num_edges(etype) == num_edges
assert F.array_equal(F.tensor(feat_edata, dtype=F.float32),
g.edges[etype].data['feat'])
assert np.array_equal(label_edata,
F.asnumpy(g.edges[etype].data['label']))
assert F.array_equal(
F.tensor(feat_edata, dtype=F.float32),
g.edges[etype].data["feat"],
)
assert np.array_equal(
label_edata, F.asnumpy(g.edges[etype].data["label"])
)
def _test_CSVDataset_multiple():
......@@ -1003,52 +1219,87 @@ def _test_CSVDataset_multiple():
nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv")
nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv")
graph_csv_path = os.path.join(test_dir, "test_graph.csv")
meta_yaml_data = {'version': '1.0.0', 'dataset_name': 'default_name',
'node_data': [{'file_name': os.path.basename(nodes_csv_path_0),
'ntype': 'user',
},
{'file_name': os.path.basename(nodes_csv_path_1),
'ntype': 'item',
}],
'edge_data': [{'file_name': os.path.basename(edges_csv_path_0),
'etype': ['user', 'follow', 'user'],
},
{'file_name': os.path.basename(edges_csv_path_1),
'etype': ['user', 'like', 'item'],
}],
'graph_data': {'file_name': os.path.basename(graph_csv_path)}
}
with open(meta_yaml_path, 'w') as f:
meta_yaml_data = {
"version": "1.0.0",
"dataset_name": "default_name",
"node_data": [
{
"file_name": os.path.basename(nodes_csv_path_0),
"ntype": "user",
},
{
"file_name": os.path.basename(nodes_csv_path_1),
"ntype": "item",
},
],
"edge_data": [
{
"file_name": os.path.basename(edges_csv_path_0),
"etype": ["user", "follow", "user"],
},
{
"file_name": os.path.basename(edges_csv_path_1),
"etype": ["user", "like", "item"],
},
],
"graph_data": {"file_name": os.path.basename(graph_csv_path)},
}
with open(meta_yaml_path, "w") as f:
yaml.dump(meta_yaml_data, f, sort_keys=False)
num_nodes = 100
num_edges = 500
num_graphs = 10
num_dims = 3
feat_ndata = np.random.rand(num_nodes*num_graphs, num_dims)
label_ndata = np.random.randint(2, size=num_nodes*num_graphs)
df = pd.DataFrame({'node_id': np.hstack([np.arange(num_nodes) for _ in range(num_graphs)]),
'label': label_ndata,
'feat': [line.tolist() for line in feat_ndata],
'graph_id': np.hstack([np.full(num_nodes, i) for i in range(num_graphs)])
})
feat_ndata = np.random.rand(num_nodes * num_graphs, num_dims)
label_ndata = np.random.randint(2, size=num_nodes * num_graphs)
df = pd.DataFrame(
{
"node_id": np.hstack(
[np.arange(num_nodes) for _ in range(num_graphs)]
),
"label": label_ndata,
"feat": [line.tolist() for line in feat_ndata],
"graph_id": np.hstack(
[np.full(num_nodes, i) for i in range(num_graphs)]
),
}
)
df.to_csv(nodes_csv_path_0, index=False)
df.to_csv(nodes_csv_path_1, index=False)
feat_edata = np.random.rand(num_edges*num_graphs, num_dims)
label_edata = np.random.randint(2, size=num_edges*num_graphs)
df = pd.DataFrame({'src_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]),
'dst_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]),
'label': label_edata,
'feat': [line.tolist() for line in feat_edata],
'graph_id': np.hstack([np.full(num_edges, i) for i in range(num_graphs)])
})
feat_edata = np.random.rand(num_edges * num_graphs, num_dims)
label_edata = np.random.randint(2, size=num_edges * num_graphs)
df = pd.DataFrame(
{
"src_id": np.hstack(
[
np.random.randint(num_nodes, size=num_edges)
for _ in range(num_graphs)
]
),
"dst_id": np.hstack(
[
np.random.randint(num_nodes, size=num_edges)
for _ in range(num_graphs)
]
),
"label": label_edata,
"feat": [line.tolist() for line in feat_edata],
"graph_id": np.hstack(
[np.full(num_edges, i) for i in range(num_graphs)]
),
}
)
df.to_csv(edges_csv_path_0, index=False)
df.to_csv(edges_csv_path_1, index=False)
feat_gdata = np.random.rand(num_graphs, num_dims)
label_gdata = np.random.randint(2, size=num_graphs)
df = pd.DataFrame({'label': label_gdata,
'feat': [line.tolist() for line in feat_gdata],
'graph_id': np.arange(num_graphs)
})
df = pd.DataFrame(
{
"label": label_gdata,
"feat": [line.tolist() for line in feat_gdata],
"graph_id": np.arange(num_graphs),
}
)
df.to_csv(graph_csv_path, index=False)
# load CSVDataset with default node/edge/gdata_parser
......@@ -1057,31 +1308,47 @@ def _test_CSVDataset_multiple():
# remove original node data file to verify reload from cached files
os.remove(nodes_csv_path_0)
assert not os.path.exists(nodes_csv_path_0)
csv_dataset = data.CSVDataset(
test_dir, force_reload=force_reload)
csv_dataset = data.CSVDataset(test_dir, force_reload=force_reload)
assert len(csv_dataset) == num_graphs
assert csv_dataset.has_cache()
assert len(csv_dataset.data) == 2
assert 'feat' in csv_dataset.data
assert 'label' in csv_dataset.data
assert F.array_equal(F.tensor(feat_gdata, dtype=F.float32),
csv_dataset.data['feat'])
assert "feat" in csv_dataset.data
assert "label" in csv_dataset.data
assert F.array_equal(
F.tensor(feat_gdata, dtype=F.float32), csv_dataset.data["feat"]
)
for i, (g, g_data) in enumerate(csv_dataset):
assert not g.is_homogeneous
assert F.asnumpy(g_data['label']) == label_gdata[i]
assert F.array_equal(g_data['feat'], F.tensor(feat_gdata[i], dtype=F.float32))
assert F.asnumpy(g_data["label"]) == label_gdata[i]
assert F.array_equal(
g_data["feat"], F.tensor(feat_gdata[i], dtype=F.float32)
)
for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes
assert F.array_equal(F.tensor(feat_ndata[i*num_nodes:(i+1)*num_nodes], dtype=F.float32),
g.nodes[ntype].data['feat'])
assert np.array_equal(label_ndata[i*num_nodes:(i+1)*num_nodes],
F.asnumpy(g.nodes[ntype].data['label']))
assert F.array_equal(
F.tensor(
feat_ndata[i * num_nodes : (i + 1) * num_nodes],
dtype=F.float32,
),
g.nodes[ntype].data["feat"],
)
assert np.array_equal(
label_ndata[i * num_nodes : (i + 1) * num_nodes],
F.asnumpy(g.nodes[ntype].data["label"]),
)
for etype in g.etypes:
assert g.num_edges(etype) == num_edges
assert F.array_equal(F.tensor(feat_edata[i*num_edges:(i+1)*num_edges], dtype=F.float32),
g.edges[etype].data['feat'])
assert np.array_equal(label_edata[i*num_edges:(i+1)*num_edges],
F.asnumpy(g.edges[etype].data['label']))
assert F.array_equal(
F.tensor(
feat_edata[i * num_edges : (i + 1) * num_edges],
dtype=F.float32,
),
g.edges[etype].data["feat"],
)
assert np.array_equal(
label_edata[i * num_edges : (i + 1) * num_edges],
F.asnumpy(g.edges[etype].data["label"]),
)
def _test_CSVDataset_customized_data_parser():
......@@ -1093,45 +1360,76 @@ def _test_CSVDataset_customized_data_parser():
nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv")
nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv")
graph_csv_path = os.path.join(test_dir, "test_graph.csv")
meta_yaml_data = {'dataset_name': 'default_name',
'node_data': [{'file_name': os.path.basename(nodes_csv_path_0),
'ntype': 'user',
},
{'file_name': os.path.basename(nodes_csv_path_1),
'ntype': 'item',
}],
'edge_data': [{'file_name': os.path.basename(edges_csv_path_0),
'etype': ['user', 'follow', 'user'],
},
{'file_name': os.path.basename(edges_csv_path_1),
'etype': ['user', 'like', 'item'],
}],
'graph_data': {'file_name': os.path.basename(graph_csv_path)}
}
with open(meta_yaml_path, 'w') as f:
meta_yaml_data = {
"dataset_name": "default_name",
"node_data": [
{
"file_name": os.path.basename(nodes_csv_path_0),
"ntype": "user",
},
{
"file_name": os.path.basename(nodes_csv_path_1),
"ntype": "item",
},
],
"edge_data": [
{
"file_name": os.path.basename(edges_csv_path_0),
"etype": ["user", "follow", "user"],
},
{
"file_name": os.path.basename(edges_csv_path_1),
"etype": ["user", "like", "item"],
},
],
"graph_data": {"file_name": os.path.basename(graph_csv_path)},
}
with open(meta_yaml_path, "w") as f:
yaml.dump(meta_yaml_data, f, sort_keys=False)
num_nodes = 100
num_edges = 500
num_graphs = 10
label_ndata = np.random.randint(2, size=num_nodes*num_graphs)
df = pd.DataFrame({'node_id': np.hstack([np.arange(num_nodes) for _ in range(num_graphs)]),
'label': label_ndata,
'graph_id': np.hstack([np.full(num_nodes, i) for i in range(num_graphs)])
})
label_ndata = np.random.randint(2, size=num_nodes * num_graphs)
df = pd.DataFrame(
{
"node_id": np.hstack(
[np.arange(num_nodes) for _ in range(num_graphs)]
),
"label": label_ndata,
"graph_id": np.hstack(
[np.full(num_nodes, i) for i in range(num_graphs)]
),
}
)
df.to_csv(nodes_csv_path_0, index=False)
df.to_csv(nodes_csv_path_1, index=False)
label_edata = np.random.randint(2, size=num_edges*num_graphs)
df = pd.DataFrame({'src_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]),
'dst_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]),
'label': label_edata,
'graph_id': np.hstack([np.full(num_edges, i) for i in range(num_graphs)])
})
label_edata = np.random.randint(2, size=num_edges * num_graphs)
df = pd.DataFrame(
{
"src_id": np.hstack(
[
np.random.randint(num_nodes, size=num_edges)
for _ in range(num_graphs)
]
),
"dst_id": np.hstack(
[
np.random.randint(num_nodes, size=num_edges)
for _ in range(num_graphs)
]
),
"label": label_edata,
"graph_id": np.hstack(
[np.full(num_edges, i) for i in range(num_graphs)]
),
}
)
df.to_csv(edges_csv_path_0, index=False)
df.to_csv(edges_csv_path_1, index=False)
label_gdata = np.random.randint(2, size=num_graphs)
df = pd.DataFrame({'label': label_gdata,
'graph_id': np.arange(num_graphs)
})
df = pd.DataFrame(
{"label": label_gdata, "graph_id": np.arange(num_graphs)}
)
df.to_csv(graph_csv_path, index=False)
class CustDataParser:
......@@ -1139,69 +1437,86 @@ def _test_CSVDataset_customized_data_parser():
data = {}
for header in df:
dt = df[header].to_numpy().squeeze()
if header == 'label':
if header == "label":
dt += 2
data[header] = dt
return data
# load CSVDataset with customized node/edge/gdata_parser
# specify via dict[ntype/etype, callable]
csv_dataset = data.CSVDataset(
test_dir, force_reload=True, ndata_parser={'user': CustDataParser()},
edata_parser={('user', 'like', 'item'): CustDataParser()},
gdata_parser=CustDataParser())
test_dir,
force_reload=True,
ndata_parser={"user": CustDataParser()},
edata_parser={("user", "like", "item"): CustDataParser()},
gdata_parser=CustDataParser(),
)
assert len(csv_dataset) == num_graphs
assert len(csv_dataset.data) == 1
assert 'label' in csv_dataset.data
assert "label" in csv_dataset.data
for i, (g, g_data) in enumerate(csv_dataset):
assert not g.is_homogeneous
assert F.asnumpy(g_data) == label_gdata[i] + 2
for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes
offset = 2 if ntype == 'user' else 0
assert np.array_equal(label_ndata[i*num_nodes:(i+1)*num_nodes]+offset,
F.asnumpy(g.nodes[ntype].data['label']))
offset = 2 if ntype == "user" else 0
assert np.array_equal(
label_ndata[i * num_nodes : (i + 1) * num_nodes] + offset,
F.asnumpy(g.nodes[ntype].data["label"]),
)
for etype in g.etypes:
assert g.num_edges(etype) == num_edges
offset = 2 if etype == 'like' else 0
assert np.array_equal(label_edata[i*num_edges:(i+1)*num_edges]+offset,
F.asnumpy(g.edges[etype].data['label']))
offset = 2 if etype == "like" else 0
assert np.array_equal(
label_edata[i * num_edges : (i + 1) * num_edges] + offset,
F.asnumpy(g.edges[etype].data["label"]),
)
# specify via callable
csv_dataset = data.CSVDataset(
test_dir, force_reload=True, ndata_parser=CustDataParser(),
edata_parser=CustDataParser(), gdata_parser=CustDataParser())
test_dir,
force_reload=True,
ndata_parser=CustDataParser(),
edata_parser=CustDataParser(),
gdata_parser=CustDataParser(),
)
assert len(csv_dataset) == num_graphs
assert len(csv_dataset.data) == 1
assert 'label' in csv_dataset.data
assert "label" in csv_dataset.data
for i, (g, g_data) in enumerate(csv_dataset):
assert not g.is_homogeneous
assert F.asnumpy(g_data) == label_gdata[i] + 2
for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes
offset = 2
assert np.array_equal(label_ndata[i*num_nodes:(i+1)*num_nodes]+offset,
F.asnumpy(g.nodes[ntype].data['label']))
assert np.array_equal(
label_ndata[i * num_nodes : (i + 1) * num_nodes] + offset,
F.asnumpy(g.nodes[ntype].data["label"]),
)
for etype in g.etypes:
assert g.num_edges(etype) == num_edges
offset = 2
assert np.array_equal(label_edata[i*num_edges:(i+1)*num_edges]+offset,
F.asnumpy(g.edges[etype].data['label']))
assert np.array_equal(
label_edata[i * num_edges : (i + 1) * num_edges] + offset,
F.asnumpy(g.edges[etype].data["label"]),
)
def _test_NodeEdgeGraphData():
from dgl.data.csv_dataset_base import NodeData, EdgeData, GraphData
from dgl.data.csv_dataset_base import EdgeData, GraphData, NodeData
# NodeData basics
num_nodes = 100
node_ids = np.arange(num_nodes, dtype=np.float)
ndata = NodeData(node_ids, {})
assert np.array_equal(ndata.id, node_ids)
assert len(ndata.data) == 0
assert ndata.type == '_V'
assert ndata.type == "_V"
assert np.array_equal(ndata.graph_id, np.full(num_nodes, 0))
# NodeData more
data = {'feat': np.random.rand(num_nodes, 3)}
data = {"feat": np.random.rand(num_nodes, 3)}
graph_id = np.arange(num_nodes)
ndata = NodeData(node_ids, data, type='user', graph_id=graph_id)
assert ndata.type == 'user'
ndata = NodeData(node_ids, data, type="user", graph_id=graph_id)
assert ndata.type == "user"
assert np.array_equal(ndata.graph_id, graph_id)
assert len(ndata.data) == len(data)
for k, v in data.items():
......@@ -1210,8 +1525,11 @@ def _test_NodeEdgeGraphData():
# NodeData except
expect_except = False
try:
NodeData(np.arange(num_nodes), {'feat': np.random.rand(
num_nodes+1, 3)}, graph_id=np.arange(num_nodes-1))
NodeData(
np.arange(num_nodes),
{"feat": np.random.rand(num_nodes + 1, 3)},
graph_id=np.arange(num_nodes - 1),
)
except:
expect_except = True
assert expect_except
......@@ -1224,17 +1542,16 @@ def _test_NodeEdgeGraphData():
edata = EdgeData(src_ids, dst_ids, {})
assert np.array_equal(edata.src, src_ids)
assert np.array_equal(edata.dst, dst_ids)
assert edata.type == ('_V', '_E', '_V')
assert edata.type == ("_V", "_E", "_V")
assert len(edata.data) == 0
assert np.array_equal(edata.graph_id, np.full(num_edges, 0))
# EdageData more
src_ids = np.random.randint(num_nodes, size=num_edges).astype(np.float)
dst_ids = np.random.randint(num_nodes, size=num_edges).astype(np.float)
data = {'feat': np.random.rand(num_edges, 3)}
etype = ('user', 'like', 'item')
data = {"feat": np.random.rand(num_edges, 3)}
etype = ("user", "like", "item")
graph_ids = np.arange(num_edges)
edata = EdgeData(src_ids, dst_ids, data,
type=etype, graph_id=graph_ids)
edata = EdgeData(src_ids, dst_ids, data, type=etype, graph_id=graph_ids)
assert np.array_equal(edata.src, src_ids)
assert np.array_equal(edata.dst, dst_ids)
assert edata.type == etype
......@@ -1246,8 +1563,12 @@ def _test_NodeEdgeGraphData():
# EdgeData except
expect_except = False
try:
EdgeData(np.arange(num_edges), np.arange(
num_edges+1), {'feat': np.random.rand(num_edges-1, 3)}, graph_id=np.arange(num_edges+2))
EdgeData(
np.arange(num_edges),
np.arange(num_edges + 1),
{"feat": np.random.rand(num_edges - 1, 3)},
graph_id=np.arange(num_edges + 2),
)
except:
expect_except = True
assert expect_except
......@@ -1260,7 +1581,7 @@ def _test_NodeEdgeGraphData():
assert len(gdata.data) == 0
# GraphData more
graph_ids = np.arange(num_graphs).astype(np.float)
data = {'feat': np.random.rand(num_graphs, 3)}
data = {"feat": np.random.rand(num_graphs, 3)}
gdata = GraphData(graph_ids, data)
assert np.array_equal(gdata.graph_id, graph_ids)
assert len(gdata.data) == len(data)
......@@ -1269,7 +1590,10 @@ def _test_NodeEdgeGraphData():
assert np.array_equal(gdata.data[k], v)
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_csvdataset():
_test_NodeEdgeGraphData()
......@@ -1286,121 +1610,207 @@ def test_csvdataset():
_test_CSVDataset_multiple()
_test_CSVDataset_customized_data_parser()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_add_nodepred_split():
dataset = data.AmazonCoBuyComputerDataset()
print('train_mask' in dataset[0].ndata)
print("train_mask" in dataset[0].ndata)
data.utils.add_nodepred_split(dataset, [0.8, 0.1, 0.1])
assert 'train_mask' in dataset[0].ndata
assert "train_mask" in dataset[0].ndata
dataset = data.AIFBDataset()
print('train_mask' in dataset[0].nodes['Publikationen'].data)
data.utils.add_nodepred_split(dataset, [0.8, 0.1, 0.1], ntype='Publikationen')
assert 'train_mask' in dataset[0].nodes['Publikationen'].data
print("train_mask" in dataset[0].nodes["Publikationen"].data)
data.utils.add_nodepred_split(
dataset, [0.8, 0.1, 0.1], ntype="Publikationen"
)
assert "train_mask" in dataset[0].nodes["Publikationen"].data
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_nodepred1():
ds = data.AmazonCoBuyComputerDataset()
print('train_mask' in ds[0].ndata)
print("train_mask" in ds[0].ndata)
new_ds = data.AsNodePredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 1
assert new_ds[0].num_nodes() == ds[0].num_nodes()
assert new_ds[0].num_edges() == ds[0].num_edges()
assert 'train_mask' in new_ds[0].ndata
assert F.array_equal(new_ds.train_idx, F.nonzero_1d(
new_ds[0].ndata['train_mask']))
assert F.array_equal(new_ds.val_idx, F.nonzero_1d(
new_ds[0].ndata['val_mask']))
assert F.array_equal(new_ds.test_idx, F.nonzero_1d(
new_ds[0].ndata['test_mask']))
assert "train_mask" in new_ds[0].ndata
assert F.array_equal(
new_ds.train_idx, F.nonzero_1d(new_ds[0].ndata["train_mask"])
)
assert F.array_equal(
new_ds.val_idx, F.nonzero_1d(new_ds[0].ndata["val_mask"])
)
assert F.array_equal(
new_ds.test_idx, F.nonzero_1d(new_ds[0].ndata["test_mask"])
)
ds = data.AIFBDataset()
print('train_mask' in ds[0].nodes['Personen'].data)
new_ds = data.AsNodePredDataset(ds, [0.8, 0.1, 0.1], 'Personen', verbose=True)
print("train_mask" in ds[0].nodes["Personen"].data)
new_ds = data.AsNodePredDataset(
ds, [0.8, 0.1, 0.1], "Personen", verbose=True
)
assert len(new_ds) == 1
assert new_ds[0].ntypes == ds[0].ntypes
assert new_ds[0].canonical_etypes == ds[0].canonical_etypes
assert 'train_mask' in new_ds[0].nodes['Personen'].data
assert F.array_equal(new_ds.train_idx, F.nonzero_1d(
new_ds[0].nodes['Personen'].data['train_mask']))
assert F.array_equal(new_ds.val_idx, F.nonzero_1d(
new_ds[0].nodes['Personen'].data['val_mask']))
assert F.array_equal(new_ds.test_idx, F.nonzero_1d(
new_ds[0].nodes['Personen'].data['test_mask']))
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
assert "train_mask" in new_ds[0].nodes["Personen"].data
assert F.array_equal(
new_ds.train_idx,
F.nonzero_1d(new_ds[0].nodes["Personen"].data["train_mask"]),
)
assert F.array_equal(
new_ds.val_idx,
F.nonzero_1d(new_ds[0].nodes["Personen"].data["val_mask"]),
)
assert F.array_equal(
new_ds.test_idx,
F.nonzero_1d(new_ds[0].nodes["Personen"].data["test_mask"]),
)
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_nodepred2():
# test proper reprocessing
# create
ds = data.AsNodePredDataset(data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1])
assert F.sum(F.astype(ds[0].ndata['train_mask'], F.int32), 0) == int(ds[0].num_nodes() * 0.8)
ds = data.AsNodePredDataset(
data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1]
)
assert F.sum(F.astype(ds[0].ndata["train_mask"], F.int32), 0) == int(
ds[0].num_nodes() * 0.8
)
assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.8)
# read from cache
ds = data.AsNodePredDataset(data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1])
assert F.sum(F.astype(ds[0].ndata['train_mask'], F.int32), 0) == int(ds[0].num_nodes() * 0.8)
ds = data.AsNodePredDataset(
data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1]
)
assert F.sum(F.astype(ds[0].ndata["train_mask"], F.int32), 0) == int(
ds[0].num_nodes() * 0.8
)
assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.8)
# invalid cache, re-read
ds = data.AsNodePredDataset(data.AmazonCoBuyComputerDataset(), [0.1, 0.1, 0.8])
assert F.sum(F.astype(ds[0].ndata['train_mask'], F.int32), 0) == int(ds[0].num_nodes() * 0.1)
ds = data.AsNodePredDataset(
data.AmazonCoBuyComputerDataset(), [0.1, 0.1, 0.8]
)
assert F.sum(F.astype(ds[0].ndata["train_mask"], F.int32), 0) == int(
ds[0].num_nodes() * 0.1
)
assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.1)
# create
ds = data.AsNodePredDataset(data.AIFBDataset(), [0.8, 0.1, 0.1], 'Personen', verbose=True)
assert F.sum(F.astype(ds[0].nodes['Personen'].data['train_mask'], F.int32), 0) == int(ds[0].num_nodes('Personen') * 0.8)
assert len(ds.train_idx) == int(ds[0].num_nodes('Personen') * 0.8)
ds = data.AsNodePredDataset(
data.AIFBDataset(), [0.8, 0.1, 0.1], "Personen", verbose=True
)
assert F.sum(
F.astype(ds[0].nodes["Personen"].data["train_mask"], F.int32), 0
) == int(ds[0].num_nodes("Personen") * 0.8)
assert len(ds.train_idx) == int(ds[0].num_nodes("Personen") * 0.8)
# read from cache
ds = data.AsNodePredDataset(data.AIFBDataset(), [0.8, 0.1, 0.1], 'Personen', verbose=True)
assert F.sum(F.astype(ds[0].nodes['Personen'].data['train_mask'], F.int32), 0) == int(ds[0].num_nodes('Personen') * 0.8)
assert len(ds.train_idx) == int(ds[0].num_nodes('Personen') * 0.8)
ds = data.AsNodePredDataset(
data.AIFBDataset(), [0.8, 0.1, 0.1], "Personen", verbose=True
)
assert F.sum(
F.astype(ds[0].nodes["Personen"].data["train_mask"], F.int32), 0
) == int(ds[0].num_nodes("Personen") * 0.8)
assert len(ds.train_idx) == int(ds[0].num_nodes("Personen") * 0.8)
# invalid cache, re-read
ds = data.AsNodePredDataset(data.AIFBDataset(), [0.1, 0.1, 0.8], 'Personen', verbose=True)
assert F.sum(F.astype(ds[0].nodes['Personen'].data['train_mask'], F.int32), 0) == int(ds[0].num_nodes('Personen') * 0.1)
assert len(ds.train_idx) == int(ds[0].num_nodes('Personen') * 0.1)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason="ogb only supports pytorch")
ds = data.AsNodePredDataset(
data.AIFBDataset(), [0.1, 0.1, 0.8], "Personen", verbose=True
)
assert F.sum(
F.astype(ds[0].nodes["Personen"].data["train_mask"], F.int32), 0
) == int(ds[0].num_nodes("Personen") * 0.1)
assert len(ds.train_idx) == int(ds[0].num_nodes("Personen") * 0.1)
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="ogb only supports pytorch"
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_nodepred_ogb():
from ogb.nodeproppred import DglNodePropPredDataset
ds = data.AsNodePredDataset(DglNodePropPredDataset("ogbn-arxiv"), split_ratio=None, verbose=True)
ds = data.AsNodePredDataset(
DglNodePropPredDataset("ogbn-arxiv"), split_ratio=None, verbose=True
)
split = DglNodePropPredDataset("ogbn-arxiv").get_idx_split()
train_idx, val_idx, test_idx = split['train'], split['valid'], split['test']
train_idx, val_idx, test_idx = split["train"], split["valid"], split["test"]
assert F.array_equal(ds.train_idx, F.tensor(train_idx))
assert F.array_equal(ds.val_idx, F.tensor(val_idx))
assert F.array_equal(ds.test_idx, F.tensor(test_idx))
# force generate new split
ds = data.AsNodePredDataset(DglNodePropPredDataset("ogbn-arxiv"), split_ratio=[0.7, 0.2, 0.1], verbose=True)
ds = data.AsNodePredDataset(
DglNodePropPredDataset("ogbn-arxiv"),
split_ratio=[0.7, 0.2, 0.1],
verbose=True,
)
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_linkpred():
# create
ds = data.AsLinkPredDataset(data.CoraGraphDataset(), split_ratio=[0.8, 0.1, 0.1], neg_ratio=1, verbose=True)
ds = data.AsLinkPredDataset(
data.CoraGraphDataset(),
split_ratio=[0.8, 0.1, 0.1],
neg_ratio=1,
verbose=True,
)
# Cora has 10556 edges, 10% test edges can be 1057
assert ds.test_edges[0][0].shape[0] == 1057
# negative samples, not guaranteed, so the assert is in a relaxed range
assert 1000 <= ds.test_edges[1][0].shape[0] <= 1057
# read from cache
ds = data.AsLinkPredDataset(data.CoraGraphDataset(), split_ratio=[0.7, 0.1, 0.2], neg_ratio=2, verbose=True)
ds = data.AsLinkPredDataset(
data.CoraGraphDataset(),
split_ratio=[0.7, 0.1, 0.2],
neg_ratio=2,
verbose=True,
)
assert ds.test_edges[0][0].shape[0] == 2112
# negative samples, not guaranteed to be ratio 2, so the assert is in a relaxed range
assert 4000 < ds.test_edges[1][0].shape[0] <= 4224
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason="ogb only supports pytorch")
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="ogb only supports pytorch"
)
def test_as_linkpred_ogb():
from ogb.linkproppred import DglLinkPropPredDataset
ds = data.AsLinkPredDataset(DglLinkPropPredDataset("ogbl-collab"), split_ratio=None, verbose=True)
ds = data.AsLinkPredDataset(
DglLinkPropPredDataset("ogbl-collab"), split_ratio=None, verbose=True
)
# original dataset has 46329 test edges
assert ds.test_edges[0][0].shape[0] == 46329
# force generate new split
ds = data.AsLinkPredDataset(DglLinkPropPredDataset("ogbl-collab"), split_ratio=[0.7, 0.2, 0.1], verbose=True)
ds = data.AsLinkPredDataset(
DglLinkPropPredDataset("ogbl-collab"),
split_ratio=[0.7, 0.2, 0.1],
verbose=True,
)
assert ds.test_edges[0][0].shape[0] == 235812
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_nodepred_csvdataset():
with tempfile.TemporaryDirectory() as test_dir:
......@@ -1408,13 +1818,13 @@ def test_as_nodepred_csvdataset():
meta_yaml_path = os.path.join(test_dir, "meta.yaml")
edges_csv_path = os.path.join(test_dir, "test_edges.csv")
nodes_csv_path = os.path.join(test_dir, "test_nodes.csv")
meta_yaml_data = {'version': '1.0.0', 'dataset_name': 'default_name',
'node_data': [{'file_name': os.path.basename(nodes_csv_path)
}],
'edge_data': [{'file_name': os.path.basename(edges_csv_path)
}],
}
with open(meta_yaml_path, 'w') as f:
meta_yaml_data = {
"version": "1.0.0",
"dataset_name": "default_name",
"node_data": [{"file_name": os.path.basename(nodes_csv_path)}],
"edge_data": [{"file_name": os.path.basename(edges_csv_path)}],
}
with open(meta_yaml_path, "w") as f:
yaml.dump(meta_yaml_data, f, sort_keys=False)
num_nodes = 100
num_edges = 500
......@@ -1422,37 +1832,49 @@ def test_as_nodepred_csvdataset():
num_classes = num_nodes
feat_ndata = np.random.rand(num_nodes, num_dims)
label_ndata = np.arange(num_classes)
df = pd.DataFrame({'node_id': np.arange(num_nodes),
'label': label_ndata,
'feat': [line.tolist() for line in feat_ndata],
})
df = pd.DataFrame(
{
"node_id": np.arange(num_nodes),
"label": label_ndata,
"feat": [line.tolist() for line in feat_ndata],
}
)
df.to_csv(nodes_csv_path, index=False)
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges),
'dst_id': np.random.randint(num_nodes, size=num_edges),
})
df = pd.DataFrame(
{
"src_id": np.random.randint(num_nodes, size=num_edges),
"dst_id": np.random.randint(num_nodes, size=num_edges),
}
)
df.to_csv(edges_csv_path, index=False)
ds = data.CSVDataset(test_dir, force_reload=True)
assert 'feat' in ds[0].ndata
assert 'label' in ds[0].ndata
assert 'train_mask' not in ds[0].ndata
assert not hasattr(ds[0], 'num_classes')
new_ds = data.AsNodePredDataset(ds, split_ratio=[0.8, 0.1, 0.1], force_reload=True)
assert "feat" in ds[0].ndata
assert "label" in ds[0].ndata
assert "train_mask" not in ds[0].ndata
assert not hasattr(ds[0], "num_classes")
new_ds = data.AsNodePredDataset(
ds, split_ratio=[0.8, 0.1, 0.1], force_reload=True
)
assert new_ds.num_classes == num_classes
assert 'feat' in new_ds[0].ndata
assert 'label' in new_ds[0].ndata
assert 'train_mask' in new_ds[0].ndata
assert "feat" in new_ds[0].ndata
assert "label" in new_ds[0].ndata
assert "train_mask" in new_ds[0].ndata
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_graphpred():
ds = data.GINDataset(name='MUTAG', self_loop=True)
ds = data.GINDataset(name="MUTAG", self_loop=True)
new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 188
assert new_ds.num_tasks == 1
assert new_ds.num_classes == 2
ds = data.FakeNewsDataset('politifact', 'profile')
ds = data.FakeNewsDataset("politifact", "profile")
new_ds = data.AsGraphPredDataset(ds, verbose=True)
assert len(new_ds) == 314
assert new_ds.num_tasks == 1
......@@ -1464,25 +1886,25 @@ def test_as_graphpred():
assert new_ds.num_tasks == 14
assert new_ds.num_classes is None
ds = data.QM9Dataset(label_keys=['mu', 'gap'])
ds = data.QM9Dataset(label_keys=["mu", "gap"])
new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 130831
assert new_ds.num_tasks == 2
assert new_ds.num_classes is None
ds = data.QM9EdgeDataset(label_keys=['mu', 'alpha'])
ds = data.QM9EdgeDataset(label_keys=["mu", "alpha"])
new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 130831
assert new_ds.num_tasks == 2
assert new_ds.num_classes is None
ds = data.TUDataset('DD')
ds = data.TUDataset("DD")
new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 1178
assert new_ds.num_tasks == 1
assert new_ds.num_classes == 2
ds = data.LegacyTUDataset('DD')
ds = data.LegacyTUDataset("DD")
new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 1178
assert new_ds.num_tasks == 1
......@@ -1494,25 +1916,41 @@ def test_as_graphpred():
assert new_ds.num_tasks == 1
assert new_ds.num_classes == 2
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_graphpred_reprocess():
ds = data.AsGraphPredDataset(data.GINDataset(name='MUTAG', self_loop=True), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(
data.GINDataset(name="MUTAG", self_loop=True), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache
ds = data.AsGraphPredDataset(data.GINDataset(name='MUTAG', self_loop=True), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(
data.GINDataset(name="MUTAG", self_loop=True), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read
ds = data.AsGraphPredDataset(data.GINDataset(name='MUTAG', self_loop=True), [0.1, 0.1, 0.8])
ds = data.AsGraphPredDataset(
data.GINDataset(name="MUTAG", self_loop=True), [0.1, 0.1, 0.8]
)
assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.FakeNewsDataset('politifact', 'profile'), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(
data.FakeNewsDataset("politifact", "profile"), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache
ds = data.AsGraphPredDataset(data.FakeNewsDataset('politifact', 'profile'), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(
data.FakeNewsDataset("politifact", "profile"), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read
ds = data.AsGraphPredDataset(data.FakeNewsDataset('politifact', 'profile'), [0.1, 0.1, 0.8])
ds = data.AsGraphPredDataset(
data.FakeNewsDataset("politifact", "profile"), [0.1, 0.1, 0.8]
)
assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.QM7bDataset(), [0.8, 0.1, 0.1])
......@@ -1524,40 +1962,52 @@ def test_as_graphpred_reprocess():
ds = data.AsGraphPredDataset(data.QM7bDataset(), [0.1, 0.1, 0.8])
assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.QM9Dataset(label_keys=['mu', 'gap']), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(
data.QM9Dataset(label_keys=["mu", "gap"]), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache
ds = data.AsGraphPredDataset(data.QM9Dataset(label_keys=['mu', 'gap']), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(
data.QM9Dataset(label_keys=["mu", "gap"]), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read
ds = data.AsGraphPredDataset(data.QM9Dataset(label_keys=['mu', 'gap']), [0.1, 0.1, 0.8])
ds = data.AsGraphPredDataset(
data.QM9Dataset(label_keys=["mu", "gap"]), [0.1, 0.1, 0.8]
)
assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.QM9EdgeDataset(label_keys=['mu', 'alpha']), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(
data.QM9EdgeDataset(label_keys=["mu", "alpha"]), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache
ds = data.AsGraphPredDataset(data.QM9EdgeDataset(label_keys=['mu', 'alpha']), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(
data.QM9EdgeDataset(label_keys=["mu", "alpha"]), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read
ds = data.AsGraphPredDataset(data.QM9EdgeDataset(label_keys=['mu', 'alpha']), [0.1, 0.1, 0.8])
ds = data.AsGraphPredDataset(
data.QM9EdgeDataset(label_keys=["mu", "alpha"]), [0.1, 0.1, 0.8]
)
assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.TUDataset('DD'), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(data.TUDataset("DD"), [0.8, 0.1, 0.1])
assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache
ds = data.AsGraphPredDataset(data.TUDataset('DD'), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(data.TUDataset("DD"), [0.8, 0.1, 0.1])
assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read
ds = data.AsGraphPredDataset(data.TUDataset('DD'), [0.1, 0.1, 0.8])
ds = data.AsGraphPredDataset(data.TUDataset("DD"), [0.1, 0.1, 0.8])
assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.LegacyTUDataset('DD'), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(data.LegacyTUDataset("DD"), [0.8, 0.1, 0.1])
assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache
ds = data.AsGraphPredDataset(data.LegacyTUDataset('DD'), [0.8, 0.1, 0.1])
ds = data.AsGraphPredDataset(data.LegacyTUDataset("DD"), [0.8, 0.1, 0.1])
assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read
ds = data.AsGraphPredDataset(data.LegacyTUDataset('DD'), [0.1, 0.1, 0.8])
ds = data.AsGraphPredDataset(data.LegacyTUDataset("DD"), [0.1, 0.1, 0.8])
assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.BA2MotifDataset(), [0.8, 0.1, 0.1])
......@@ -1569,18 +2019,27 @@ def test_as_graphpred_reprocess():
ds = data.AsGraphPredDataset(data.BA2MotifDataset(), [0.1, 0.1, 0.8])
assert len(ds.train_idx) == int(len(ds) * 0.1)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason="ogb only supports pytorch")
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="ogb only supports pytorch"
)
def test_as_graphpred_ogb():
from ogb.graphproppred import DglGraphPropPredDataset
ds = data.AsGraphPredDataset(DglGraphPropPredDataset('ogbg-molhiv'),
split_ratio=None, verbose=True)
ds = data.AsGraphPredDataset(
DglGraphPropPredDataset("ogbg-molhiv"), split_ratio=None, verbose=True
)
assert len(ds.train_idx) == 32901
# force generate new split
ds = data.AsGraphPredDataset(DglGraphPropPredDataset('ogbg-molhiv'),
split_ratio=[0.6, 0.2, 0.2], verbose=True)
ds = data.AsGraphPredDataset(
DglGraphPropPredDataset("ogbg-molhiv"),
split_ratio=[0.6, 0.2, 0.2],
verbose=True,
)
assert len(ds.train_idx) == 24676
if __name__ == '__main__':
if __name__ == "__main__":
test_minigc()
test_gin()
test_data_hash()
......
import unittest
import backend as F
import dgl
from dgl.dataloading import NeighborSampler, negative_sampler, \
as_edge_prediction_sampler
from test_utils import parametrize_idtype
import dgl
from dgl.dataloading import (
NeighborSampler,
as_edge_prediction_sampler,
negative_sampler,
)
def create_test_graph(idtype):
# test heterograph from the docstring, plus a user -- wishes -- game relation
# 3 users, 2 games, 2 developers
......@@ -14,12 +20,16 @@ def create_test_graph(idtype):
# ('user', 'wishes', 'game'),
# ('developer', 'develops', 'game')])
g = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1], [1, 2]),
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 2], [1, 0]),
('developer', 'develops', 'game'): ([0, 1], [0, 1])
}, idtype=idtype, device=F.ctx())
g = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1], [1, 2]),
("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
("user", "wishes", "game"): ([0, 2], [1, 0]),
("developer", "develops", "game"): ([0, 1], [0, 1]),
},
idtype=idtype,
device=F.ctx(),
)
assert g.idtype == idtype
assert g.device == F.ctx()
return g
......@@ -28,14 +38,15 @@ def create_test_graph(idtype):
@parametrize_idtype
def test_edge_prediction_sampler(idtype):
g = create_test_graph(idtype)
sampler = NeighborSampler([10,10])
sampler = NeighborSampler([10, 10])
sampler = as_edge_prediction_sampler(
sampler, negative_sampler=negative_sampler.Uniform(1))
sampler, negative_sampler=negative_sampler.Uniform(1)
)
seeds = F.copy_to(F.arange(0, 2, dtype=idtype), ctx=F.ctx())
# just a smoke test to make sure we don't fail internal assertions
result = sampler.sample(g, {'follows': seeds})
result = sampler.sample(g, {"follows": seeds})
if __name__ == '__main__':
if __name__ == "__main__":
test_edge_prediction_sampler()
import dgl
from dgl.ops import edge_softmax
import dgl.function as fn
from collections import Counter
import math
import numpy as np
import scipy.sparse as ssp
import itertools
import math
import unittest
from collections import Counter
import backend as F
import networkx as nx
import unittest, pytest
from dgl import DGLError
import numpy as np
import pytest
import scipy.sparse as ssp
import test_utils
from test_utils import parametrize_idtype, get_cases
from scipy.sparse import rand
from test_utils import get_cases, parametrize_idtype
rfuncs = {'sum': fn.sum, 'max': fn.max, 'min': fn.min, 'mean': fn.mean}
fill_value = {'sum': 0, 'max': float("-inf")}
import dgl
import dgl.function as fn
from dgl import DGLError
from dgl.ops import edge_softmax
rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean}
fill_value = {"sum": 0, "max": float("-inf")}
feat_size = 2
def create_test_heterograph(idtype):
# test heterograph from the docstring, plus a user -- wishes -- game relation
# 3 users, 2 games, 2 developers
......@@ -27,37 +31,57 @@ def create_test_heterograph(idtype):
# ('user', 'wishes', 'game'),
# ('developer', 'develops', 'game')])
g = dgl.heterograph({
('user', 'follows', 'user'): ([0, 1, 2, 1, 1], [0, 0, 1, 1, 2]),
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 1, 1], [0, 0, 1]),
('developer', 'develops', 'game'): ([0, 1, 0], [0, 1, 1]),
}, idtype=idtype, device=F.ctx())
g = dgl.heterograph(
{
("user", "follows", "user"): ([0, 1, 2, 1, 1], [0, 0, 1, 1, 2]),
("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
("user", "wishes", "game"): ([0, 1, 1], [0, 0, 1]),
("developer", "develops", "game"): ([0, 1, 0], [0, 1, 1]),
},
idtype=idtype,
device=F.ctx(),
)
assert g.idtype == idtype
assert g.device == F.ctx()
return g
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
def test_edge_softmax_unidirectional():
g = dgl.heterograph({
('A', 'AB', 'B'): ([1,2,3,1,2,3,1,2,3],[0,0,0,1,1,1,2,2,2]),
('B', 'BB', 'B'): ([0,1,2,0,1,2,0,1,2], [0,0,0,1,1,1,2,2,2])})
g = dgl.heterograph(
{
("A", "AB", "B"): (
[1, 2, 3, 1, 2, 3, 1, 2, 3],
[0, 0, 0, 1, 1, 1, 2, 2, 2],
),
("B", "BB", "B"): (
[0, 1, 2, 0, 1, 2, 0, 1, 2],
[0, 0, 0, 1, 1, 1, 2, 2, 2],
),
}
)
g = g.to(F.ctx())
g.edges['AB'].data['x'] = F.ones(9) * 2
g.edges['BB'].data['x'] = F.ones(9)
result = dgl.ops.edge_softmax(g, {'AB': g.edges['AB'].data['x'], 'BB': g.edges['BB'].data['x']})
g.edges["AB"].data["x"] = F.ones(9) * 2
g.edges["BB"].data["x"] = F.ones(9)
result = dgl.ops.edge_softmax(
g, {"AB": g.edges["AB"].data["x"], "BB": g.edges["BB"].data["x"]}
)
ab = result['A', 'AB', 'B']
bb = result['B', 'BB', 'B']
ab = result["A", "AB", "B"]
bb = result["B", "BB", "B"]
e2 = F.zeros_like(ab) + math.exp(2) / ((math.exp(2) + math.exp(1)) * 3)
e1 = F.zeros_like(bb) + math.exp(1) / ((math.exp(2) + math.exp(1)) * 3)
assert F.allclose(ab, e2)
assert F.allclose(bb, e1)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@pytest.mark.parametrize('g', get_cases(['clique']))
@pytest.mark.parametrize('norm_by', ['src', 'dst'])
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
@pytest.mark.parametrize("g", get_cases(["clique"]))
@pytest.mark.parametrize("norm_by", ["src", "dst"])
# @pytest.mark.parametrize('shp', edge_softmax_shapes)
@parametrize_idtype
def test_edge_softmax(g, norm_by, idtype):
......@@ -65,20 +89,20 @@ def test_edge_softmax(g, norm_by, idtype):
g = create_test_heterograph(idtype)
x1 = F.randn((g.num_edges('plays'),feat_size))
x2 = F.randn((g.num_edges('follows'),feat_size))
x3 = F.randn((g.num_edges('develops'),feat_size))
x4 = F.randn((g.num_edges('wishes'),feat_size))
x1 = F.randn((g.num_edges("plays"), feat_size))
x2 = F.randn((g.num_edges("follows"), feat_size))
x3 = F.randn((g.num_edges("develops"), feat_size))
x4 = F.randn((g.num_edges("wishes"), feat_size))
F.attach_grad(F.clone(x1))
F.attach_grad(F.clone(x2))
F.attach_grad(F.clone(x3))
F.attach_grad(F.clone(x4))
g['plays'].edata['eid'] = x1
g['follows'].edata['eid'] = x2
g['develops'].edata['eid'] = x3
g['wishes'].edata['eid'] = x4
g["plays"].edata["eid"] = x1
g["follows"].edata["eid"] = x2
g["develops"].edata["eid"] = x3
g["wishes"].edata["eid"] = x4
#################################################################
# edge_softmax() on homogeneous graph
......@@ -89,12 +113,12 @@ def test_edge_softmax(g, norm_by, idtype):
hm_x = F.cat((x3, x2, x1, x4), 0)
hm_e = F.attach_grad(F.clone(hm_x))
score_hm = edge_softmax(hm_g, hm_e, norm_by=norm_by)
hm_g.edata['score'] = score_hm
hm_g.edata["score"] = score_hm
ht_g = dgl.to_heterogeneous(hm_g, g.ntypes, g.etypes)
r1 = ht_g.edata['score'][('user', 'plays', 'game')]
r2 = ht_g.edata['score'][('user', 'follows', 'user')]
r3 = ht_g.edata['score'][('developer', 'develops', 'game')]
r4 = ht_g.edata['score'][('user', 'wishes', 'game')]
r1 = ht_g.edata["score"][("user", "plays", "game")]
r2 = ht_g.edata["score"][("user", "follows", "user")]
r3 = ht_g.edata["score"][("developer", "develops", "game")]
r4 = ht_g.edata["score"][("user", "wishes", "game")]
F.backward(F.reduce_sum(r1) + F.reduce_sum(r2))
grad_edata_hm = F.grad(hm_e)
......@@ -106,18 +130,22 @@ def test_edge_softmax(g, norm_by, idtype):
e2 = F.attach_grad(F.clone(x2))
e3 = F.attach_grad(F.clone(x3))
e4 = F.attach_grad(F.clone(x4))
e = {('user', 'follows', 'user'): e2,
('user', 'plays', 'game'): e1,
('user', 'wishes', 'game'): e4,
('developer', 'develops', 'game'): e3}
e = {
("user", "follows", "user"): e2,
("user", "plays", "game"): e1,
("user", "wishes", "game"): e4,
("developer", "develops", "game"): e3,
}
with F.record_grad():
score = edge_softmax(g, e, norm_by=norm_by)
r5 = score[('user', 'plays', 'game')]
r6 = score[('user', 'follows', 'user')]
r7 = score[('developer', 'develops', 'game')]
r8 = score[('user', 'wishes', 'game')]
r5 = score[("user", "plays", "game")]
r6 = score[("user", "follows", "user")]
r7 = score[("developer", "develops", "game")]
r8 = score[("user", "wishes", "game")]
F.backward(F.reduce_sum(r5) + F.reduce_sum(r6))
grad_edata_ht = F.cat((F.grad(e3), F.grad(e2), F.grad(e1), F.grad(e4)), 0)
grad_edata_ht = F.cat(
(F.grad(e3), F.grad(e2), F.grad(e1), F.grad(e4)), 0
)
# correctness check
assert F.allclose(r1, r5)
assert F.allclose(r2, r6)
......@@ -125,5 +153,6 @@ def test_edge_softmax(g, norm_by, idtype):
assert F.allclose(r4, r8)
assert F.allclose(grad_edata_hm, grad_edata_ht)
if __name__ == '__main__':
if __name__ == "__main__":
test_edge_softmax_unidirectional()
import dgl
import numpy as np
import backend as F
import unittest, pytest
import os
import unittest
import backend as F
import numpy as np
import pytest
import dgl
@unittest.skipIf(os.name == 'nt', reason='Cython only works on linux')
@unittest.skipIf(os.name == "nt", reason="Cython only works on linux")
def test_cython():
import dgl._ffi._cy3.core
@pytest.mark.parametrize('arg', [1, 2.3])
@pytest.mark.parametrize("arg", [1, 2.3])
def test_callback(arg):
def cb(x):
return x + 1
ret = dgl._api_internal._TestPythonCallback(cb, arg)
assert ret == arg + 1
@pytest.mark.parametrize('dtype', [F.float32, F.float64, F.int32, F.int64])
@pytest.mark.parametrize("dtype", [F.float32, F.float64, F.int32, F.int64])
def _test_callback_array(dtype):
def cb(x):
return F.to_dgl_nd(F.from_dgl_nd(x) + 1)
arg = F.copy_to(F.tensor([1, 2, 3], dtype=dtype), F.ctx())
ret = F.from_dgl_nd(dgl._api_internal._TestPythonCallback(cb, F.to_dgl_nd(arg)))
ret = F.from_dgl_nd(
dgl._api_internal._TestPythonCallback(cb, F.to_dgl_nd(arg))
)
assert np.allclose(F.asnumpy(ret), F.asnumpy(arg) + 1)
@pytest.mark.parametrize('arg', [1, 2.3])
@pytest.mark.parametrize("arg", [1, 2.3])
def test_callback_thread(arg):
def cb(x):
return x + 1
ret = dgl._api_internal._TestPythonCallbackThread(cb, arg)
assert ret == arg + 1
@pytest.mark.parametrize('dtype', [F.float32, F.float64, F.int32, F.int64])
@pytest.mark.parametrize("dtype", [F.float32, F.float64, F.int32, F.int64])
def _test_callback_array_thread(dtype):
def cb(x):
return F.to_dgl_nd(F.from_dgl_nd(x) + 1)
arg = F.copy_to(F.tensor([1, 2, 3], dtype=dtype), F.ctx())
ret = F.from_dgl_nd(dgl._api_internal._TestPythonCallbackThread(cb, F.to_dgl_nd(arg)))
ret = F.from_dgl_nd(
dgl._api_internal._TestPythonCallbackThread(cb, F.to_dgl_nd(arg))
)
assert np.allclose(F.asnumpy(ret), F.asnumpy(arg) + 1)
import dgl
import unittest
import backend as F
import numpy as np
import unittest
from test_utils import parametrize_idtype
import dgl
from dgl.utils import Filter
def test_graph_filter():
g = dgl.DGLGraph().to(F.ctx())
g.add_nodes(4)
g.add_edges([0,1,2,3], [1,2,3,0])
g.add_edges([0, 1, 2, 3], [1, 2, 3, 0])
n_repr = np.zeros((4, 5))
e_repr = np.zeros((4, 5))
......@@ -17,11 +20,11 @@ def test_graph_filter():
n_repr = F.copy_to(F.zerocopy_from_numpy(n_repr), F.ctx())
e_repr = F.copy_to(F.zerocopy_from_numpy(e_repr), F.ctx())
g.ndata['a'] = n_repr
g.edata['a'] = e_repr
g.ndata["a"] = n_repr
g.edata["a"] = e_repr
def predicate(r):
return F.max(r.data['a'], 1) > 0
return F.max(r.data["a"], 1) > 0
# full node filter
n_idx = g.filter_nodes(predicate)
......@@ -39,28 +42,35 @@ def test_graph_filter():
e_idx = g.filter_edges(predicate, [0, 1])
assert set(F.zerocopy_to_numpy(e_idx)) == {1}
@unittest.skipIf(F._default_context_str == 'cpu',
reason="CPU not yet supported")
@unittest.skipIf(
F._default_context_str == "cpu", reason="CPU not yet supported"
)
@parametrize_idtype
def test_array_filter(idtype):
f = Filter(F.copy_to(F.tensor([0,1,9,4,6,5,7], dtype=idtype), F.ctx()))
x = F.copy_to(F.tensor([0,3,9,11], dtype=idtype), F.ctx())
y = F.copy_to(F.tensor([0,19,0,28,3,9,11,4,5], dtype=idtype), F.ctx())
f = Filter(
F.copy_to(F.tensor([0, 1, 9, 4, 6, 5, 7], dtype=idtype), F.ctx())
)
x = F.copy_to(F.tensor([0, 3, 9, 11], dtype=idtype), F.ctx())
y = F.copy_to(
F.tensor([0, 19, 0, 28, 3, 9, 11, 4, 5], dtype=idtype), F.ctx()
)
xi_act = f.find_included_indices(x)
xi_exp = F.copy_to(F.tensor([0,2], dtype=idtype), F.ctx())
xi_exp = F.copy_to(F.tensor([0, 2], dtype=idtype), F.ctx())
assert F.array_equal(xi_act, xi_exp)
xe_act = f.find_excluded_indices(x)
xe_exp = F.copy_to(F.tensor([1,3], dtype=idtype), F.ctx())
xe_exp = F.copy_to(F.tensor([1, 3], dtype=idtype), F.ctx())
assert F.array_equal(xe_act, xe_exp)
yi_act = f.find_included_indices(y)
yi_exp = F.copy_to(F.tensor([0,2,5,7,8], dtype=idtype), F.ctx())
yi_exp = F.copy_to(F.tensor([0, 2, 5, 7, 8], dtype=idtype), F.ctx())
assert F.array_equal(yi_act, yi_exp)
ye_act = f.find_excluded_indices(y)
ye_exp = F.copy_to(F.tensor([1,3,4,6], dtype=idtype), F.ctx())
ye_exp = F.copy_to(F.tensor([1, 3, 4, 6], dtype=idtype), F.ctx())
assert F.array_equal(ye_act, ye_exp)
if __name__ == '__main__':
if __name__ == "__main__":
test_graph_filter()
test_array_filter()
import pickle
import unittest
import backend as F
import numpy as np
from test_utils import parametrize_idtype
import dgl
import dgl.ndarray as nd
from dgl.frame import Column
import numpy as np
import backend as F
import unittest
import pickle
from test_utils import parametrize_idtype
def test_column_subcolumn():
data = F.copy_to(F.tensor([[1., 1., 1., 1.],
[0., 2., 9., 0.],
[3., 2., 1., 0.],
[1., 1., 1., 1.],
[0., 2., 4., 0.]]), F.ctx())
data = F.copy_to(
F.tensor(
[
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 9.0, 0.0],
[3.0, 2.0, 1.0, 0.0],
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 4.0, 0.0],
]
),
F.ctx(),
)
original = Column(data)
# subcolumn from cpu context
......@@ -28,22 +38,32 @@ def test_column_subcolumn():
assert len(l2) == i2.shape[0]
i1i2 = F.copy_to(F.gather_row(i1, F.copy_to(i2, F.context(i1))), F.ctx())
assert F.array_equal(l2.data, F.gather_row(data,i1i2))
assert F.array_equal(l2.data, F.gather_row(data, i1i2))
# next subcolumn also from target context
i3 = F.copy_to(F.tensor([1], dtype=F.int64), F.ctx())
l3 = l2.subcolumn(i3)
assert len(l3) == i3.shape[0]
i1i2i3 = F.copy_to(F.gather_row(i1i2, F.copy_to(i3, F.context(i1i2))), F.ctx())
i1i2i3 = F.copy_to(
F.gather_row(i1i2, F.copy_to(i3, F.context(i1i2))), F.ctx()
)
assert F.array_equal(l3.data, F.gather_row(data, i1i2i3))
def test_serialize_deserialize_plain():
data = F.copy_to(F.tensor([[1., 1., 1., 1.],
[0., 2., 9., 0.],
[3., 2., 1., 0.],
[1., 1., 1., 1.],
[0., 2., 4., 0.]]), F.ctx())
data = F.copy_to(
F.tensor(
[
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 9.0, 0.0],
[3.0, 2.0, 1.0, 0.0],
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 4.0, 0.0],
]
),
F.ctx(),
)
original = Column(data)
serial = pickle.dumps(original)
......@@ -52,12 +72,20 @@ def test_serialize_deserialize_plain():
assert F.array_equal(new.data, original.data)
def test_serialize_deserialize_subcolumn():
data = F.copy_to(F.tensor([[1., 1., 1., 1.],
[0., 2., 9., 0.],
[3., 2., 1., 0.],
[1., 1., 1., 1.],
[0., 2., 4., 0.]]), F.ctx())
data = F.copy_to(
F.tensor(
[
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 9.0, 0.0],
[3.0, 2.0, 1.0, 0.0],
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 4.0, 0.0],
]
),
F.ctx(),
)
original = Column(data)
# subcolumn from cpu context
......@@ -69,12 +97,20 @@ def test_serialize_deserialize_subcolumn():
assert F.array_equal(new.data, l1.data)
def test_serialize_deserialize_dtype():
data = F.copy_to(F.tensor([[1., 1., 1., 1.],
[0., 2., 9., 0.],
[3., 2., 1., 0.],
[1., 1., 1., 1.],
[0., 2., 4., 0.]]), F.ctx())
data = F.copy_to(
F.tensor(
[
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 9.0, 0.0],
[3.0, 2.0, 1.0, 0.0],
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 4.0, 0.0],
]
),
F.ctx(),
)
original = Column(data)
original = original.astype(F.int64)
......
import time
import unittest
from timeit import default_timer
import dgl
import backend as F
import dgl.function as fn
import time
import numpy as np
import unittest, pytest
from test_utils import parametrize_idtype, get_cases
import pytest
from test_utils import get_cases, parametrize_idtype
import dgl
import dgl.function as fn
iters = 5
n_edge_scale = 1
......
import dgl
import unittest
import backend as F
import numpy as np
import unittest
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU random choice not implemented")
import dgl
@unittest.skipIf(
F._default_context_str == "gpu", reason="GPU random choice not implemented"
)
def test_rand_graph():
g = dgl.rand_graph(10000, 100000)
assert g.number_of_nodes() == 10000
......@@ -18,5 +23,6 @@ def test_rand_graph():
assert F.array_equal(u1, u2)
assert F.array_equal(v1, v2)
if __name__ == '__main__':
if __name__ == "__main__":
test_rand_graph()
import unittest
import backend as F
import numpy as np
import dgl
import dgl.ndarray as nd
from dgl.utils import toindex
import numpy as np
import backend as F
import unittest
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support inplace update")
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support inplace update",
)
def test_dlpack():
# test dlpack conversion.
def nd2th():
ans = np.array([[1., 1., 1., 1.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])
ans = np.array(
[[1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
)
x = nd.array(np.zeros((3, 4), dtype=np.float32))
dl = x.to_dlpack()
y = F.zerocopy_from_dlpack(dl)
......@@ -21,9 +27,9 @@ def test_dlpack():
assert np.allclose(x.asnumpy(), ans)
def th2nd():
ans = np.array([[1., 1., 1., 1.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])
ans = np.array(
[[1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
)
x = F.zeros((3, 4))
dl = F.zerocopy_to_dlpack(x)
y = nd.from_dlpack(dl)
......@@ -37,7 +43,7 @@ def test_dlpack():
ans = np.array([0, 2])
y = x[:2, 0]
# Uncomment this line and comment the one below to observe error
#dl = dlpack.to_dlpack(y)
# dl = dlpack.to_dlpack(y)
dl = F.zerocopy_to_dlpack(y)
z = nd.from_dlpack(dl)
print(x)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment