Unverified Commit 98325b10 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Black auto fix. (#4691)


Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent c24e285a
"""Views of DGLGraph.""" """Views of DGLGraph."""
from __future__ import absolute_import from __future__ import absolute_import
from collections import namedtuple, defaultdict from collections import defaultdict, namedtuple
from collections.abc import MutableMapping from collections.abc import MutableMapping
from .base import ALL, DGLError
from . import backend as F from . import backend as F
from .base import ALL, DGLError
from .frame import LazyFeature from .frame import LazyFeature
NodeSpace = namedtuple('NodeSpace', ['data']) NodeSpace = namedtuple("NodeSpace", ["data"])
EdgeSpace = namedtuple('EdgeSpace', ['data']) EdgeSpace = namedtuple("EdgeSpace", ["data"])
class HeteroNodeView(object): class HeteroNodeView(object):
"""A NodeView class to act as G.nodes for a DGLHeteroGraph.""" """A NodeView class to act as G.nodes for a DGLHeteroGraph."""
__slots__ = ['_graph', '_typeid_getter']
__slots__ = ["_graph", "_typeid_getter"]
def __init__(self, graph, typeid_getter): def __init__(self, graph, typeid_getter):
self._graph = graph self._graph = graph
...@@ -23,8 +24,9 @@ class HeteroNodeView(object): ...@@ -23,8 +24,9 @@ class HeteroNodeView(object):
def __getitem__(self, key): def __getitem__(self, key):
if isinstance(key, slice): if isinstance(key, slice):
# slice # slice
if not (key.start is None and key.stop is None if not (
and key.step is None): key.start is None and key.stop is None and key.step is None
):
raise DGLError('Currently only full slice ":" is supported') raise DGLError('Currently only full slice ":" is supported')
nodes = ALL nodes = ALL
ntype = None ntype = None
...@@ -38,20 +40,25 @@ class HeteroNodeView(object): ...@@ -38,20 +40,25 @@ class HeteroNodeView(object):
ntype = None ntype = None
ntid = self._typeid_getter(ntype) ntid = self._typeid_getter(ntype)
return NodeSpace( return NodeSpace(
data=HeteroNodeDataView( data=HeteroNodeDataView(self._graph, ntype, ntid, nodes)
self._graph, ntype, ntid, nodes)) )
def __call__(self, ntype=None): def __call__(self, ntype=None):
"""Return the nodes.""" """Return the nodes."""
ntid = self._typeid_getter(ntype) ntid = self._typeid_getter(ntype)
ret = F.arange(0, self._graph._graph.number_of_nodes(ntid), ret = F.arange(
dtype=self._graph.idtype, ctx=self._graph.device) 0,
self._graph._graph.number_of_nodes(ntid),
dtype=self._graph.idtype,
ctx=self._graph.device,
)
return ret return ret
class HeteroNodeDataView(MutableMapping): class HeteroNodeDataView(MutableMapping):
"""The data view class when G.ndata[ntype] is called.""" """The data view class when G.ndata[ntype] is called."""
__slots__ = ['_graph', '_ntype', '_ntid', '_nodes']
__slots__ = ["_graph", "_ntype", "_ntid", "_nodes"]
def __init__(self, graph, ntype, ntid, nodes): def __init__(self, graph, ntype, ntid, nodes):
self._graph = graph self._graph = graph
...@@ -63,9 +70,9 @@ class HeteroNodeDataView(MutableMapping): ...@@ -63,9 +70,9 @@ class HeteroNodeDataView(MutableMapping):
if isinstance(self._ntype, list): if isinstance(self._ntype, list):
ret = {} ret = {}
for (i, ntype) in enumerate(self._ntype): for (i, ntype) in enumerate(self._ntype):
value = self._graph._get_n_repr( value = self._graph._get_n_repr(self._ntid[i], self._nodes).get(
self._ntid[i], self._nodes).get( key, None
key, None) )
if value is not None: if value is not None:
ret[ntype] = value ret[ntype] = value
return ret return ret
...@@ -76,17 +83,19 @@ class HeteroNodeDataView(MutableMapping): ...@@ -76,17 +83,19 @@ class HeteroNodeDataView(MutableMapping):
if isinstance(val, LazyFeature): if isinstance(val, LazyFeature):
self._graph._node_frames[self._ntid][key] = val self._graph._node_frames[self._ntid][key] = val
elif isinstance(self._ntype, list): elif isinstance(self._ntype, list):
assert isinstance(val, dict), \ assert isinstance(val, dict), (
'Current HeteroNodeDataView has multiple node types, ' \ "Current HeteroNodeDataView has multiple node types, "
'please passing the node type and the corresponding data through a dict.' "please passing the node type and the corresponding data through a dict."
)
for (ntype, data) in val.items(): for (ntype, data) in val.items():
ntid = self._graph.get_ntype_id(ntype) ntid = self._graph.get_ntype_id(ntype)
self._graph._set_n_repr(ntid, self._nodes, {key: data}) self._graph._set_n_repr(ntid, self._nodes, {key: data})
else: else:
assert isinstance(val, dict) is False, \ assert isinstance(val, dict) is False, (
'The HeteroNodeDataView has only one node type. ' \ "The HeteroNodeDataView has only one node type. "
'please pass a tensor directly' "please pass a tensor directly"
)
self._graph._set_n_repr(self._ntid, self._nodes, {key: val}) self._graph._set_n_repr(self._ntid, self._nodes, {key: val})
def __delitem__(self, key): def __delitem__(self, key):
...@@ -108,8 +117,10 @@ class HeteroNodeDataView(MutableMapping): ...@@ -108,8 +117,10 @@ class HeteroNodeDataView(MutableMapping):
else: else:
ret = self._graph._get_n_repr(self._ntid, self._nodes) ret = self._graph._get_n_repr(self._ntid, self._nodes)
if as_dict: if as_dict:
ret = {key: ret[key] ret = {
for key in self._graph._node_frames[self._ntid]} key: ret[key]
for key in self._graph._node_frames[self._ntid]
}
return ret return ret
def __len__(self): def __len__(self):
...@@ -130,7 +141,8 @@ class HeteroNodeDataView(MutableMapping): ...@@ -130,7 +141,8 @@ class HeteroNodeDataView(MutableMapping):
class HeteroEdgeView(object): class HeteroEdgeView(object):
"""A EdgeView class to act as G.edges for a DGLHeteroGraph.""" """A EdgeView class to act as G.edges for a DGLHeteroGraph."""
__slots__ = ['_graph']
__slots__ = ["_graph"]
def __init__(self, graph): def __init__(self, graph):
self._graph = graph self._graph = graph
...@@ -138,8 +150,9 @@ class HeteroEdgeView(object): ...@@ -138,8 +150,9 @@ class HeteroEdgeView(object):
def __getitem__(self, key): def __getitem__(self, key):
if isinstance(key, slice): if isinstance(key, slice):
# slice # slice
if not (key.start is None and key.stop is None if not (
and key.step is None): key.start is None and key.stop is None and key.step is None
):
raise DGLError('Currently only full slice ":" is supported') raise DGLError('Currently only full slice ":" is supported')
edges = ALL edges = ALL
etype = None etype = None
...@@ -168,23 +181,26 @@ class HeteroEdgeView(object): ...@@ -168,23 +181,26 @@ class HeteroEdgeView(object):
class HeteroEdgeDataView(MutableMapping): class HeteroEdgeDataView(MutableMapping):
"""The data view class when G.edata[etype] is called.""" """The data view class when G.edata[etype] is called."""
__slots__ = ['_graph', '_etype', '_etid', '_edges']
__slots__ = ["_graph", "_etype", "_etid", "_edges"]
def __init__(self, graph, etype, edges): def __init__(self, graph, etype, edges):
self._graph = graph self._graph = graph
self._etype = etype self._etype = etype
self._etid = [self._graph.get_etype_id(t) for t in etype] \ self._etid = (
if isinstance(etype, list) \ [self._graph.get_etype_id(t) for t in etype]
if isinstance(etype, list)
else self._graph.get_etype_id(etype) else self._graph.get_etype_id(etype)
)
self._edges = edges self._edges = edges
def __getitem__(self, key): def __getitem__(self, key):
if isinstance(self._etype, list): if isinstance(self._etype, list):
ret = {} ret = {}
for (i, etype) in enumerate(self._etype): for (i, etype) in enumerate(self._etype):
value = self._graph._get_e_repr( value = self._graph._get_e_repr(self._etid[i], self._edges).get(
self._etid[i], self._edges).get( key, None
key, None) )
if value is not None: if value is not None:
ret[etype] = value ret[etype] = value
return ret return ret
...@@ -195,17 +211,19 @@ class HeteroEdgeDataView(MutableMapping): ...@@ -195,17 +211,19 @@ class HeteroEdgeDataView(MutableMapping):
if isinstance(val, LazyFeature): if isinstance(val, LazyFeature):
self._graph._edge_frames[self._etid][key] = val self._graph._edge_frames[self._etid][key] = val
elif isinstance(self._etype, list): elif isinstance(self._etype, list):
assert isinstance(val, dict), \ assert isinstance(val, dict), (
'Current HeteroEdgeDataView has multiple edge types, ' \ "Current HeteroEdgeDataView has multiple edge types, "
'please pass the edge type and the corresponding data through a dict.' "please pass the edge type and the corresponding data through a dict."
)
for (etype, data) in val.items(): for (etype, data) in val.items():
etid = self._graph.get_etype_id(etype) etid = self._graph.get_etype_id(etype)
self._graph._set_e_repr(etid, self._edges, {key: data}) self._graph._set_e_repr(etid, self._edges, {key: data})
else: else:
assert isinstance(val, dict) is False, \ assert isinstance(val, dict) is False, (
'The HeteroEdgeDataView has only one edge type. ' \ "The HeteroEdgeDataView has only one edge type. "
'please pass a tensor directly' "please pass a tensor directly"
)
self._graph._set_e_repr(self._etid, self._edges, {key: val}) self._graph._set_e_repr(self._etid, self._edges, {key: val})
def __delitem__(self, key): def __delitem__(self, key):
...@@ -227,8 +245,10 @@ class HeteroEdgeDataView(MutableMapping): ...@@ -227,8 +245,10 @@ class HeteroEdgeDataView(MutableMapping):
else: else:
ret = self._graph._get_e_repr(self._etid, self._edges) ret = self._graph._get_e_repr(self._etid, self._edges)
if as_dict: if as_dict:
ret = {key: ret[key] ret = {
for key in self._graph._edge_frames[self._etid]} key: ret[key]
for key in self._graph._edge_frames[self._etid]
}
return ret return ret
def __len__(self): def __len__(self):
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import sys import glob
import os import os
import platform import platform
import sysconfig
import shutil import shutil
import glob import sys
import sysconfig
from setuptools import find_packages from setuptools import find_packages
from setuptools.dist import Distribution from setuptools.dist import Distribution
# need to use distutils.core for correct placement of cython dll # need to use distutils.core for correct placement of cython dll
if '--inplace' in sys.argv: if "--inplace" in sys.argv:
from distutils.core import setup from distutils.core import setup
from distutils.extension import Extension from distutils.extension import Extension
else: else:
...@@ -31,34 +31,35 @@ def get_lib_path(): ...@@ -31,34 +31,35 @@ def get_lib_path():
"""Get library path, name and version""" """Get library path, name and version"""
# We can not import `libinfo.py` in setup.py directly since __init__.py # We can not import `libinfo.py` in setup.py directly since __init__.py
# Will be invoked which introduces dependences # Will be invoked which introduces dependences
libinfo_py = os.path.join(CURRENT_DIR, './dgl/_ffi/libinfo.py') libinfo_py = os.path.join(CURRENT_DIR, "./dgl/_ffi/libinfo.py")
libinfo = {'__file__': libinfo_py} libinfo = {"__file__": libinfo_py}
exec( exec(
compile(open(libinfo_py, "rb").read(), libinfo_py, 'exec'), compile(open(libinfo_py, "rb").read(), libinfo_py, "exec"),
libinfo,
libinfo, libinfo,
libinfo) )
version = libinfo['__version__'] version = libinfo["__version__"]
lib_path = libinfo['find_lib_path']() lib_path = libinfo["find_lib_path"]()
libs = [lib_path[0]] libs = [lib_path[0]]
return libs, version return libs, version
def get_ta_lib_pattern(): def get_ta_lib_pattern():
if sys.platform.startswith('linux'): if sys.platform.startswith("linux"):
ta_lib_pattern = 'libtensoradapter_*.so' ta_lib_pattern = "libtensoradapter_*.so"
elif sys.platform.startswith('darwin'): elif sys.platform.startswith("darwin"):
ta_lib_pattern = 'libtensoradapter_*.dylib' ta_lib_pattern = "libtensoradapter_*.dylib"
elif sys.platform.startswith('win'): elif sys.platform.startswith("win"):
ta_lib_pattern = 'tensoradapter_*.dll' ta_lib_pattern = "tensoradapter_*.dll"
else: else:
raise NotImplementedError('Unsupported system: %s' % sys.platform) raise NotImplementedError("Unsupported system: %s" % sys.platform)
return ta_lib_pattern return ta_lib_pattern
LIBS, VERSION = get_lib_path() LIBS, VERSION = get_lib_path()
BACKENDS = ['pytorch'] BACKENDS = ["pytorch"]
TA_LIB_PATTERN = get_ta_lib_pattern() TA_LIB_PATTERN = get_ta_lib_pattern()
...@@ -78,11 +79,9 @@ def cleanup(): ...@@ -78,11 +79,9 @@ def cleanup():
for backend in BACKENDS: for backend in BACKENDS:
for ta_path in glob.glob( for ta_path in glob.glob(
os.path.join( os.path.join(
CURRENT_DIR, CURRENT_DIR, "dgl", "tensoradapter", backend, TA_LIB_PATTERN
"dgl", )
"tensoradapter", ):
backend,
TA_LIB_PATTERN)):
try: try:
os.remove(ta_path) os.remove(ta_path)
except BaseException: except BaseException:
...@@ -91,17 +90,21 @@ def cleanup(): ...@@ -91,17 +90,21 @@ def cleanup():
def config_cython(): def config_cython():
"""Try to configure cython and return cython configuration""" """Try to configure cython and return cython configuration"""
if sys.platform.startswith('win'): if sys.platform.startswith("win"):
print("WARNING: Cython is not supported on Windows, will compile without cython module") print(
"WARNING: Cython is not supported on Windows, will compile without cython module"
)
return [] return []
sys_cflags = sysconfig.get_config_var("CFLAGS") sys_cflags = sysconfig.get_config_var("CFLAGS")
if "i386" in sys_cflags and "x86_64" in sys_cflags: if "i386" in sys_cflags and "x86_64" in sys_cflags:
print( print(
"WARNING: Cython library may not be compiled correctly with both i386 and x64") "WARNING: Cython library may not be compiled correctly with both i386 and x64"
)
return [] return []
try: try:
from Cython.Build import cythonize from Cython.Build import cythonize
# from setuptools.extension import Extension # from setuptools.extension import Extension
if sys.version_info >= (3, 0): if sys.version_info >= (3, 0):
subdir = "_cy3" subdir = "_cy3"
...@@ -109,32 +112,38 @@ def config_cython(): ...@@ -109,32 +112,38 @@ def config_cython():
subdir = "_cy2" subdir = "_cy2"
ret = [] ret = []
path = "dgl/_ffi/_cython" path = "dgl/_ffi/_cython"
library_dirs = ['dgl', '../build/Release', '../build'] library_dirs = ["dgl", "../build/Release", "../build"]
libraries = ['dgl'] libraries = ["dgl"]
for fn in os.listdir(path): for fn in os.listdir(path):
if not fn.endswith(".pyx"): if not fn.endswith(".pyx"):
continue continue
ret.append(Extension( ret.append(
"dgl._ffi.%s.%s" % (subdir, fn[:-4]), Extension(
["dgl/_ffi/_cython/%s" % fn], "dgl._ffi.%s.%s" % (subdir, fn[:-4]),
include_dirs=["../include/", ["dgl/_ffi/_cython/%s" % fn],
"../third_party/dmlc-core/include", include_dirs=[
"../third_party/dlpack/include", "../include/",
], "../third_party/dmlc-core/include",
library_dirs=library_dirs, "../third_party/dlpack/include",
libraries=libraries, ],
# Crashes without this flag with GCC 5.3.1 library_dirs=library_dirs,
extra_compile_args=["-std=c++11"], libraries=libraries,
language="c++")) # Crashes without this flag with GCC 5.3.1
extra_compile_args=["-std=c++11"],
language="c++",
)
)
return cythonize(ret, force=True) return cythonize(ret, force=True)
except ImportError: except ImportError:
print("WARNING: Cython is not installed, will compile without cython module") print(
"WARNING: Cython is not installed, will compile without cython module"
)
return [] return []
include_libs = False include_libs = False
wheel_include_libs = False wheel_include_libs = False
if "bdist_wheel" in sys.argv or os.getenv('CONDA_BUILD'): if "bdist_wheel" in sys.argv or os.getenv("CONDA_BUILD"):
wheel_include_libs = True wheel_include_libs = True
elif "clean" in sys.argv: elif "clean" in sys.argv:
cleanup() cleanup()
...@@ -147,78 +156,76 @@ setup_kwargs = {} ...@@ -147,78 +156,76 @@ setup_kwargs = {}
if wheel_include_libs: if wheel_include_libs:
with open("MANIFEST.in", "w") as fo: with open("MANIFEST.in", "w") as fo:
for path in LIBS: for path in LIBS:
shutil.copy(path, os.path.join(CURRENT_DIR, 'dgl')) shutil.copy(path, os.path.join(CURRENT_DIR, "dgl"))
dir_, libname = os.path.split(path) dir_, libname = os.path.split(path)
fo.write("include dgl/%s\n" % libname) fo.write("include dgl/%s\n" % libname)
for backend in BACKENDS: for backend in BACKENDS:
for ta_path in glob.glob( for ta_path in glob.glob(
os.path.join( os.path.join(dir_, "tensoradapter", backend, TA_LIB_PATTERN)
dir_, ):
"tensoradapter",
backend,
TA_LIB_PATTERN)):
ta_name = os.path.basename(ta_path) ta_name = os.path.basename(ta_path)
os.makedirs( os.makedirs(
os.path.join( os.path.join(CURRENT_DIR, "dgl", "tensoradapter", backend),
CURRENT_DIR, exist_ok=True,
'dgl', )
'tensoradapter',
backend),
exist_ok=True)
shutil.copy( shutil.copy(
os.path.join(dir_, 'tensoradapter', backend, ta_name), os.path.join(dir_, "tensoradapter", backend, ta_name),
os.path.join(CURRENT_DIR, 'dgl', 'tensoradapter', backend)) os.path.join(CURRENT_DIR, "dgl", "tensoradapter", backend),
)
fo.write( fo.write(
"include dgl/tensoradapter/%s/%s\n" % "include dgl/tensoradapter/%s/%s\n" % (backend, ta_name)
(backend, ta_name)) )
setup_kwargs = { setup_kwargs = {"include_package_data": True}
"include_package_data": True
}
# For source tree setup # For source tree setup
# Conda build also includes the binary library # Conda build also includes the binary library
if include_libs: if include_libs:
rpath = [os.path.relpath(path, CURRENT_DIR) for path in LIBS] rpath = [os.path.relpath(path, CURRENT_DIR) for path in LIBS]
data_files = [('dgl', rpath)] data_files = [("dgl", rpath)]
for path in LIBS: for path in LIBS:
for backend in BACKENDS: for backend in BACKENDS:
data_files.append(( data_files.append(
'dgl/tensoradapter/%s' % backend, (
glob.glob(os.path.join( "dgl/tensoradapter/%s" % backend,
os.path.dirname(os.path.relpath(path, CURRENT_DIR)), glob.glob(
'tensoradapter', backend, TA_LIB_PATTERN)))) os.path.join(
setup_kwargs = { os.path.dirname(os.path.relpath(path, CURRENT_DIR)),
"include_package_data": True, "tensoradapter",
"data_files": data_files backend,
} TA_LIB_PATTERN,
)
),
)
)
setup_kwargs = {"include_package_data": True, "data_files": data_files}
setup( setup(
name='dgl' + os.getenv('DGL_PACKAGE_SUFFIX', ''), name="dgl" + os.getenv("DGL_PACKAGE_SUFFIX", ""),
version=VERSION, version=VERSION,
description='Deep Graph Library', description="Deep Graph Library",
zip_safe=False, zip_safe=False,
maintainer='DGL Team', maintainer="DGL Team",
maintainer_email='wmjlyjemaine@gmail.com', maintainer_email="wmjlyjemaine@gmail.com",
packages=find_packages(), packages=find_packages(),
install_requires=[ install_requires=[
'numpy>=1.14.0', "numpy>=1.14.0",
'scipy>=1.1.0', "scipy>=1.1.0",
'networkx>=2.1', "networkx>=2.1",
'requests>=2.19.0', "requests>=2.19.0",
'tqdm', "tqdm",
'psutil>=5.8.0', "psutil>=5.8.0",
], ],
url='https://github.com/dmlc/dgl', url="https://github.com/dmlc/dgl",
distclass=BinaryDistribution, distclass=BinaryDistribution,
ext_modules=config_cython(), ext_modules=config_cython(),
classifiers=[ classifiers=[
'Development Status :: 3 - Alpha', "Development Status :: 3 - Alpha",
'Programming Language :: Python :: 3', "Programming Language :: Python :: 3",
'License :: OSI Approved :: Apache Software License', "License :: OSI Approved :: Apache Software License",
], ],
license='APACHE', license="APACHE",
**setup_kwargs **setup_kwargs
) )
......
...@@ -8,10 +8,11 @@ List of affected files: ...@@ -8,10 +8,11 @@ List of affected files:
""" """
import os import os
import re import re
# current version # current version
# We use the version of the incoming release for code # We use the version of the incoming release for code
# that is under development # that is under development
__version__ = "0.10" + os.getenv('DGL_PRERELEASE', '') __version__ = "0.10" + os.getenv("DGL_PRERELEASE", "")
print(__version__) print(__version__)
# Implementations # Implementations
...@@ -47,22 +48,24 @@ def main(): ...@@ -47,22 +48,24 @@ def main():
curr_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) curr_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
proj_root = os.path.abspath(os.path.join(curr_dir, "..")) proj_root = os.path.abspath(os.path.join(curr_dir, ".."))
# python path # python path
update(os.path.join(proj_root, "python", "dgl", "_ffi", "libinfo.py"), update(
r"(?<=__version__ = \")[.0-9a-z]+", __version__) os.path.join(proj_root, "python", "dgl", "_ffi", "libinfo.py"),
r"(?<=__version__ = \")[.0-9a-z]+",
__version__,
)
# C++ header # C++ header
update( update(
os.path.join( os.path.join(proj_root, "include", "dgl", "runtime", "c_runtime_api.h"),
proj_root, '(?<=DGL_VERSION ")[.0-9a-z]+',
"include", __version__,
"dgl", )
"runtime",
"c_runtime_api.h"),
"(?<=DGL_VERSION \")[.0-9a-z]+",
__version__)
# conda # conda
for path in ["dgl"]: for path in ["dgl"]:
update(os.path.join(proj_root, "conda", path, "meta.yaml"), update(
"(?<=version: \")[.0-9a-z]+", __version__) os.path.join(proj_root, "conda", path, "meta.yaml"),
'(?<=version: ")[.0-9a-z]+',
__version__,
)
if __name__ == "__main__": if __name__ == "__main__":
......
import torch
import os import os
import torch
cmake_prefix_path = getattr( cmake_prefix_path = getattr(
torch.utils, torch.utils,
"cmake_prefix_path", "cmake_prefix_path",
os.path.join(os.path.dirname(torch.__file__), "share", "cmake")) os.path.join(os.path.dirname(torch.__file__), "share", "cmake"),
version = torch.__version__.split('+')[0] )
print(';'.join([cmake_prefix_path, version])) version = torch.__version__.split("+")[0]
print(";".join([cmake_prefix_path, version]))
from dgl.backend import *
from dgl.nn import *
from . import backend_unittest
import os
import importlib import importlib
import os
import sys import sys
import numpy as np import numpy as np
mod = importlib.import_module('.%s' % backend_name, __name__) from dgl.backend import *
from dgl.nn import *
from . import backend_unittest
mod = importlib.import_module(".%s" % backend_name, __name__)
thismod = sys.modules[__name__] thismod = sys.modules[__name__]
for api in backend_unittest.__dict__.keys(): for api in backend_unittest.__dict__.keys():
if api.startswith('__'): if api.startswith("__"):
continue continue
elif callable(mod.__dict__[api]): elif callable(mod.__dict__[api]):
# Tensor APIs used in unit tests MUST be supported across all backends # Tensor APIs used in unit tests MUST be supported across all backends
...@@ -26,39 +29,51 @@ _arange = arange ...@@ -26,39 +29,51 @@ _arange = arange
_full = full _full = full
_full_1d = full_1d _full_1d = full_1d
_softmax = softmax _softmax = softmax
_default_context_str = os.getenv('DGLTESTDEV', 'cpu') _default_context_str = os.getenv("DGLTESTDEV", "cpu")
_context_dict = { _context_dict = {
'cpu': cpu(), "cpu": cpu(),
'gpu': cuda(), "gpu": cuda(),
} }
_default_context = _context_dict[_default_context_str] _default_context = _context_dict[_default_context_str]
def ctx(): def ctx():
return _default_context return _default_context
def gpu_ctx(): def gpu_ctx():
return (_default_context_str == 'gpu') return _default_context_str == "gpu"
def zeros(shape, dtype=float32, ctx=_default_context): def zeros(shape, dtype=float32, ctx=_default_context):
return _zeros(shape, dtype, ctx) return _zeros(shape, dtype, ctx)
def ones(shape, dtype=float32, ctx=_default_context): def ones(shape, dtype=float32, ctx=_default_context):
return _ones(shape, dtype, ctx) return _ones(shape, dtype, ctx)
def randn(shape): def randn(shape):
return copy_to(_randn(shape), _default_context) return copy_to(_randn(shape), _default_context)
def tensor(data, dtype=None): def tensor(data, dtype=None):
return copy_to(_tensor(data, dtype), _default_context) return copy_to(_tensor(data, dtype), _default_context)
def arange(start, stop, dtype=int64, ctx=None): def arange(start, stop, dtype=int64, ctx=None):
return _arange(start, stop, dtype, ctx if ctx is not None else _default_context) return _arange(
start, stop, dtype, ctx if ctx is not None else _default_context
)
def full(shape, fill_value, dtype, ctx=_default_context): def full(shape, fill_value, dtype, ctx=_default_context):
return _full(shape, fill_value, dtype, ctx) return _full(shape, fill_value, dtype, ctx)
def full_1d(length, fill_value, dtype, ctx=_default_context): def full_1d(length, fill_value, dtype, ctx=_default_context):
return _full_1d(length, fill_value, dtype, ctx) return _full_1d(length, fill_value, dtype, ctx)
def softmax(x, dim): def softmax(x, dim):
return _softmax(x, dim) return _softmax(x, dim)
...@@ -5,102 +5,127 @@ unit testing, other than the ones used in the framework itself. ...@@ -5,102 +5,127 @@ unit testing, other than the ones used in the framework itself.
############################################################################### ###############################################################################
# Tensor, data type and context interfaces # Tensor, data type and context interfaces
def cuda(): def cuda():
"""Context object for CUDA.""" """Context object for CUDA."""
pass pass
def is_cuda_available(): def is_cuda_available():
"""Check whether CUDA is available.""" """Check whether CUDA is available."""
pass pass
############################################################################### ###############################################################################
# Tensor functions on feature data # Tensor functions on feature data
# -------------------------------- # --------------------------------
# These functions are performance critical, so it's better to have efficient # These functions are performance critical, so it's better to have efficient
# implementation in each framework. # implementation in each framework.
def array_equal(a, b): def array_equal(a, b):
"""Check whether the two tensors are *exactly* equal.""" """Check whether the two tensors are *exactly* equal."""
pass pass
def allclose(a, b, rtol=1e-4, atol=1e-4): def allclose(a, b, rtol=1e-4, atol=1e-4):
"""Check whether the two tensors are numerically close to each other.""" """Check whether the two tensors are numerically close to each other."""
pass pass
def randn(shape): def randn(shape):
"""Generate a tensor with elements from standard normal distribution.""" """Generate a tensor with elements from standard normal distribution."""
pass pass
def full(shape, fill_value, dtype, ctx): def full(shape, fill_value, dtype, ctx):
pass pass
def narrow_row_set(x, start, stop, new): def narrow_row_set(x, start, stop, new):
"""Set a slice of the given tensor to a new value.""" """Set a slice of the given tensor to a new value."""
pass pass
def sparse_to_numpy(x): def sparse_to_numpy(x):
"""Convert a sparse tensor to a numpy array.""" """Convert a sparse tensor to a numpy array."""
pass pass
def clone(x): def clone(x):
pass pass
def reduce_sum(x): def reduce_sum(x):
"""Sums all the elements into a single scalar.""" """Sums all the elements into a single scalar."""
pass pass
def softmax(x, dim): def softmax(x, dim):
"""Softmax Operation on Tensors""" """Softmax Operation on Tensors"""
pass pass
def spmm(x, y): def spmm(x, y):
"""Sparse dense matrix multiply""" """Sparse dense matrix multiply"""
pass pass
def add(a, b): def add(a, b):
"""Compute a + b""" """Compute a + b"""
pass pass
def sub(a, b): def sub(a, b):
"""Compute a - b""" """Compute a - b"""
pass pass
def mul(a, b): def mul(a, b):
"""Compute a * b""" """Compute a * b"""
pass pass
def div(a, b): def div(a, b):
"""Compute a / b""" """Compute a / b"""
pass pass
def sum(x, dim, keepdims=False): def sum(x, dim, keepdims=False):
"""Computes the sum of array elements over given axes""" """Computes the sum of array elements over given axes"""
pass pass
def max(x, dim): def max(x, dim):
"""Computes the max of array elements over given axes""" """Computes the max of array elements over given axes"""
pass pass
def min(x, dim): def min(x, dim):
"""Computes the min of array elements over given axes""" """Computes the min of array elements over given axes"""
pass pass
def prod(x, dim): def prod(x, dim):
"""Computes the prod of array elements over given axes""" """Computes the prod of array elements over given axes"""
pass pass
def matmul(a, b): def matmul(a, b):
"""Compute Matrix Multiplication between a and b""" """Compute Matrix Multiplication between a and b"""
pass pass
def dot(a, b): def dot(a, b):
"""Compute Dot between a and b""" """Compute Dot between a and b"""
pass pass
def abs(a): def abs(a):
"""Compute the absolute value of a""" """Compute the absolute value of a"""
pass pass
############################################################################### ###############################################################################
# Tensor functions used *only* on index tensor # Tensor functions used *only* on index tensor
# ---------------- # ----------------
......
from __future__ import absolute_import from __future__ import absolute_import
import numpy as np
import mxnet as mx import mxnet as mx
import mxnet.ndarray as nd import mxnet.ndarray as nd
import numpy as np
def cuda(): def cuda():
return mx.gpu() return mx.gpu()
def is_cuda_available(): def is_cuda_available():
# TODO: Does MXNet have a convenient function to test GPU availability/compilation? # TODO: Does MXNet have a convenient function to test GPU availability/compilation?
try: try:
...@@ -15,65 +17,86 @@ def is_cuda_available(): ...@@ -15,65 +17,86 @@ def is_cuda_available():
except mx.MXNetError: except mx.MXNetError:
return False return False
def array_equal(a, b): def array_equal(a, b):
return nd.equal(a, b).asnumpy().all() return nd.equal(a, b).asnumpy().all()
def allclose(a, b, rtol=1e-4, atol=1e-4): def allclose(a, b, rtol=1e-4, atol=1e-4):
return np.allclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol) return np.allclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol)
def randn(shape): def randn(shape):
return nd.random.randn(*shape) return nd.random.randn(*shape)
def full(shape, fill_value, dtype, ctx): def full(shape, fill_value, dtype, ctx):
return nd.full(shape, fill_value, dtype=dtype, ctx=ctx) return nd.full(shape, fill_value, dtype=dtype, ctx=ctx)
def narrow_row_set(x, start, stop, new): def narrow_row_set(x, start, stop, new):
x[start:stop] = new x[start:stop] = new
def sparse_to_numpy(x): def sparse_to_numpy(x):
return x.asscipy().todense().A return x.asscipy().todense().A
def clone(x): def clone(x):
return x.copy() return x.copy()
def reduce_sum(x): def reduce_sum(x):
return x.sum() return x.sum()
def softmax(x, dim): def softmax(x, dim):
return nd.softmax(x, axis=dim) return nd.softmax(x, axis=dim)
def spmm(x, y): def spmm(x, y):
return nd.dot(x, y) return nd.dot(x, y)
def add(a, b): def add(a, b):
return a + b return a + b
def sub(a, b): def sub(a, b):
return a - b return a - b
def mul(a, b): def mul(a, b):
return a * b return a * b
def div(a, b): def div(a, b):
return a / b return a / b
def sum(x, dim, keepdims=False): def sum(x, dim, keepdims=False):
return x.sum(dim, keepdims=keepdims) return x.sum(dim, keepdims=keepdims)
def max(x, dim): def max(x, dim):
return x.max(dim) return x.max(dim)
def min(x, dim): def min(x, dim):
return x.min(dim) return x.min(dim)
def prod(x, dim): def prod(x, dim):
return x.prod(dim) return x.prod(dim)
def matmul(a, b): def matmul(a, b):
return nd.dot(a, b) return nd.dot(a, b)
def dot(a, b): def dot(a, b):
return nd.sum(mul(a, b), axis=-1) return nd.sum(mul(a, b), axis=-1)
def abs(a): def abs(a):
return nd.abs(a) return nd.abs(a)
...@@ -2,72 +2,94 @@ from __future__ import absolute_import ...@@ -2,72 +2,94 @@ from __future__ import absolute_import
import torch as th import torch as th
def cuda(): def cuda():
return th.device('cuda:0') return th.device("cuda:0")
def is_cuda_available(): def is_cuda_available():
return th.cuda.is_available() return th.cuda.is_available()
def array_equal(a, b): def array_equal(a, b):
return th.equal(a.cpu(), b.cpu()) return th.equal(a.cpu(), b.cpu())
def allclose(a, b, rtol=1e-4, atol=1e-4): def allclose(a, b, rtol=1e-4, atol=1e-4):
return th.allclose(a.float().cpu(), return th.allclose(a.float().cpu(), b.float().cpu(), rtol=rtol, atol=atol)
b.float().cpu(), rtol=rtol, atol=atol)
def randn(shape): def randn(shape):
return th.randn(*shape) return th.randn(*shape)
def full(shape, fill_value, dtype, ctx): def full(shape, fill_value, dtype, ctx):
return th.full(shape, fill_value, dtype=dtype, device=ctx) return th.full(shape, fill_value, dtype=dtype, device=ctx)
def narrow_row_set(x, start, stop, new): def narrow_row_set(x, start, stop, new):
x[start:stop] = new x[start:stop] = new
def sparse_to_numpy(x): def sparse_to_numpy(x):
return x.to_dense().numpy() return x.to_dense().numpy()
def clone(x): def clone(x):
return x.clone() return x.clone()
def reduce_sum(x): def reduce_sum(x):
return x.sum() return x.sum()
def softmax(x, dim): def softmax(x, dim):
return th.softmax(x, dim) return th.softmax(x, dim)
def spmm(x, y): def spmm(x, y):
return th.spmm(x, y) return th.spmm(x, y)
def add(a, b): def add(a, b):
return a + b return a + b
def sub(a, b): def sub(a, b):
return a - b return a - b
def mul(a, b): def mul(a, b):
return a * b return a * b
def div(a, b): def div(a, b):
return a / b return a / b
def sum(x, dim, keepdims=False): def sum(x, dim, keepdims=False):
return x.sum(dim, keepdims=keepdims) return x.sum(dim, keepdims=keepdims)
def max(x, dim): def max(x, dim):
return x.max(dim)[0] return x.max(dim)[0]
def min(x, dim): def min(x, dim):
return x.min(dim)[0] return x.min(dim)[0]
def prod(x, dim): def prod(x, dim):
return x.prod(dim) return x.prod(dim)
def matmul(a, b): def matmul(a, b):
return a @ b return a @ b
def dot(a, b): def dot(a, b):
return sum(mul(a, b), dim=-1) return sum(mul(a, b), dim=-1)
def abs(a): def abs(a):
return a.abs() return a.abs()
...@@ -6,7 +6,7 @@ from scipy.sparse import coo_matrix ...@@ -6,7 +6,7 @@ from scipy.sparse import coo_matrix
def cuda(): def cuda():
return '/gpu:0' return "/gpu:0"
def is_cuda_available(): def is_cuda_available():
...@@ -18,8 +18,12 @@ def array_equal(a, b): ...@@ -18,8 +18,12 @@ def array_equal(a, b):
def allclose(a, b, rtol=1e-4, atol=1e-4): def allclose(a, b, rtol=1e-4, atol=1e-4):
return np.allclose(tf.convert_to_tensor(a).numpy(), return np.allclose(
tf.convert_to_tensor(b).numpy(), rtol=rtol, atol=atol) tf.convert_to_tensor(a).numpy(),
tf.convert_to_tensor(b).numpy(),
rtol=rtol,
atol=atol,
)
def randn(shape): def randn(shape):
...@@ -97,5 +101,6 @@ def matmul(a, b): ...@@ -97,5 +101,6 @@ def matmul(a, b):
def dot(a, b): def dot(a, b):
return sum(mul(a, b), dim=-1) return sum(mul(a, b), dim=-1)
def abs(a): def abs(a):
return tf.abs(a) return tf.abs(a)
import dgl
import dgl.function as fn
from collections import Counter
import numpy as np
import scipy.sparse as ssp
import itertools import itertools
import unittest
from collections import Counter
from itertools import product from itertools import product
import backend as F import backend as F
import networkx as nx import networkx as nx
import unittest, pytest import numpy as np
from dgl import DGLError import pytest
import scipy.sparse as ssp
import test_utils import test_utils
from test_utils import parametrize_idtype, get_cases
from scipy.sparse import rand from scipy.sparse import rand
from test_utils import get_cases, parametrize_idtype
import dgl
import dgl.function as fn
from dgl import DGLError
rfuncs = {'sum': fn.sum, 'max': fn.max, 'min': fn.min, 'mean': fn.mean} rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean}
fill_value = {'sum': 0, 'max': float("-inf")} fill_value = {"sum": 0, "max": float("-inf")}
feat_size = 2 feat_size = 2
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
def create_test_heterograph(idtype): def create_test_heterograph(idtype):
# test heterograph from the docstring, plus a user -- wishes -- game relation # test heterograph from the docstring, plus a user -- wishes -- game relation
# 3 users, 2 games, 2 developers # 3 users, 2 games, 2 developers
...@@ -28,12 +33,16 @@ def create_test_heterograph(idtype): ...@@ -28,12 +33,16 @@ def create_test_heterograph(idtype):
# ('user', 'wishes', 'game'), # ('user', 'wishes', 'game'),
# ('developer', 'develops', 'game')]) # ('developer', 'develops', 'game')])
g = dgl.heterograph({ g = dgl.heterograph(
('user', 'follows', 'user'): ([0, 1, 2, 1], [0, 0, 1, 1]), {
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "follows", "user"): ([0, 1, 2, 1], [0, 0, 1, 1]),
('user', 'wishes', 'game'): ([0, 1, 1], [0, 0, 1]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
('developer', 'develops', 'game'): ([0, 1, 0], [0, 1, 1]), ("user", "wishes", "game"): ([0, 1, 1], [0, 0, 1]),
}, idtype=idtype, device=F.ctx()) ("developer", "develops", "game"): ([0, 1, 0], [0, 1, 1]),
},
idtype=idtype,
device=F.ctx(),
)
assert g.idtype == idtype assert g.idtype == idtype
assert g.device == F.ctx() assert g.device == F.ctx()
return g return g
...@@ -45,49 +54,53 @@ def test_unary_copy_u(idtype): ...@@ -45,49 +54,53 @@ def test_unary_copy_u(idtype):
g = create_test_heterograph(idtype) g = create_test_heterograph(idtype)
x1 = F.randn((g.num_nodes('user'), feat_size)) x1 = F.randn((g.num_nodes("user"), feat_size))
x2 = F.randn((g.num_nodes('developer'), feat_size)) x2 = F.randn((g.num_nodes("developer"), feat_size))
F.attach_grad(x1) F.attach_grad(x1)
F.attach_grad(x2) F.attach_grad(x2)
g.nodes['user'].data['h'] = x1 g.nodes["user"].data["h"] = x1
g.nodes['developer'].data['h'] = x2 g.nodes["developer"].data["h"] = x2
################################################################# #################################################################
# apply_edges() is called on each relation type separately # apply_edges() is called on each relation type separately
################################################################# #################################################################
with F.record_grad(): with F.record_grad():
[g.apply_edges(fn.copy_u('h', 'm'), etype = rel) [
for rel in g.canonical_etypes] g.apply_edges(fn.copy_u("h", "m"), etype=rel)
r1 = g['plays'].edata['m'] for rel in g.canonical_etypes
]
r1 = g["plays"].edata["m"]
F.backward(r1, F.ones(r1.shape)) F.backward(r1, F.ones(r1.shape))
n_grad1 = F.grad(g.ndata['h']['user']) n_grad1 = F.grad(g.ndata["h"]["user"])
# TODO (Israt): clear not working # TODO (Israt): clear not working
g.edata['m'].clear() g.edata["m"].clear()
################################################################# #################################################################
# apply_edges() is called on all relation types # apply_edges() is called on all relation types
################################################################# #################################################################
g.apply_edges(fn.copy_u('h', 'm')) g.apply_edges(fn.copy_u("h", "m"))
r2 = g['plays'].edata['m'] r2 = g["plays"].edata["m"]
F.backward(r2, F.ones(r2.shape)) F.backward(r2, F.ones(r2.shape))
n_grad2 = F.grad(g.nodes['user'].data['h']) n_grad2 = F.grad(g.nodes["user"].data["h"])
# correctness check # correctness check
def _print_error(a, b): def _print_error(a, b):
for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())): for i, (x, y) in enumerate(
zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
):
if not np.allclose(x, y): if not np.allclose(x, y):
print('@{} {} v.s. {}'.format(i, x, y)) print("@{} {} v.s. {}".format(i, x, y))
if not F.allclose(r1, r2): if not F.allclose(r1, r2):
_print_error(r1, r2) _print_error(r1, r2)
assert F.allclose(r1, r2) assert F.allclose(r1, r2)
if not F.allclose(n_grad1, n_grad2): if not F.allclose(n_grad1, n_grad2):
print('node grad') print("node grad")
_print_error(n_grad1, n_grad2) _print_error(n_grad1, n_grad2)
assert(F.allclose(n_grad1, n_grad2)) assert F.allclose(n_grad1, n_grad2)
_test(fn.copy_u) _test(fn.copy_u)
...@@ -99,51 +112,55 @@ def test_unary_copy_e(idtype): ...@@ -99,51 +112,55 @@ def test_unary_copy_e(idtype):
g = create_test_heterograph(idtype) g = create_test_heterograph(idtype)
feat_size = 2 feat_size = 2
x1 = F.randn((4,feat_size)) x1 = F.randn((4, feat_size))
x2 = F.randn((4,feat_size)) x2 = F.randn((4, feat_size))
x3 = F.randn((3,feat_size)) x3 = F.randn((3, feat_size))
x4 = F.randn((3,feat_size)) x4 = F.randn((3, feat_size))
F.attach_grad(x1) F.attach_grad(x1)
F.attach_grad(x2) F.attach_grad(x2)
F.attach_grad(x3) F.attach_grad(x3)
F.attach_grad(x4) F.attach_grad(x4)
g['plays'].edata['eid'] = x1 g["plays"].edata["eid"] = x1
g['follows'].edata['eid'] = x2 g["follows"].edata["eid"] = x2
g['develops'].edata['eid'] = x3 g["develops"].edata["eid"] = x3
g['wishes'].edata['eid'] = x4 g["wishes"].edata["eid"] = x4
################################################################# #################################################################
# apply_edges() is called on each relation type separately # apply_edges() is called on each relation type separately
################################################################# #################################################################
with F.record_grad(): with F.record_grad():
[g.apply_edges(fn.copy_e('eid', 'm'), etype = rel) [
for rel in g.canonical_etypes] g.apply_edges(fn.copy_e("eid", "m"), etype=rel)
r1 = g['develops'].edata['m'] for rel in g.canonical_etypes
]
r1 = g["develops"].edata["m"]
F.backward(r1, F.ones(r1.shape)) F.backward(r1, F.ones(r1.shape))
e_grad1 = F.grad(g['develops'].edata['eid']) e_grad1 = F.grad(g["develops"].edata["eid"])
################################################################# #################################################################
# apply_edges() is called on all relation types # apply_edges() is called on all relation types
################################################################# #################################################################
g.apply_edges(fn.copy_e('eid', 'm')) g.apply_edges(fn.copy_e("eid", "m"))
r2 = g['develops'].edata['m'] r2 = g["develops"].edata["m"]
F.backward(r2, F.ones(r2.shape)) F.backward(r2, F.ones(r2.shape))
e_grad2 = F.grad(g['develops'].edata['eid']) e_grad2 = F.grad(g["develops"].edata["eid"])
# # correctness check # # correctness check
def _print_error(a, b): def _print_error(a, b):
for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())): for i, (x, y) in enumerate(
zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
):
if not np.allclose(x, y): if not np.allclose(x, y):
print('@{} {} v.s. {}'.format(i, x, y)) print("@{} {} v.s. {}".format(i, x, y))
if not F.allclose(r1, r2): if not F.allclose(r1, r2):
_print_error(r1, r2) _print_error(r1, r2)
assert F.allclose(r1, r2) assert F.allclose(r1, r2)
if not F.allclose(e_grad1, e_grad2): if not F.allclose(e_grad1, e_grad2):
print('edge grad') print("edge grad")
_print_error(e_grad1, e_grad2) _print_error(e_grad1, e_grad2)
assert(F.allclose(e_grad1, e_grad2)) assert F.allclose(e_grad1, e_grad2)
_test(fn.copy_e) _test(fn.copy_e)
...@@ -154,14 +171,14 @@ def test_binary_op(idtype): ...@@ -154,14 +171,14 @@ def test_binary_op(idtype):
g = create_test_heterograph(idtype) g = create_test_heterograph(idtype)
n1 = F.randn((g.num_nodes('user'), feat_size)) n1 = F.randn((g.num_nodes("user"), feat_size))
n2 = F.randn((g.num_nodes('developer'), feat_size)) n2 = F.randn((g.num_nodes("developer"), feat_size))
n3 = F.randn((g.num_nodes('game'), feat_size)) n3 = F.randn((g.num_nodes("game"), feat_size))
x1 = F.randn((g.num_edges('plays'),feat_size)) x1 = F.randn((g.num_edges("plays"), feat_size))
x2 = F.randn((g.num_edges('follows'),feat_size)) x2 = F.randn((g.num_edges("follows"), feat_size))
x3 = F.randn((g.num_edges('develops'),feat_size)) x3 = F.randn((g.num_edges("develops"), feat_size))
x4 = F.randn((g.num_edges('wishes'),feat_size)) x4 = F.randn((g.num_edges("wishes"), feat_size))
builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs) builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs)
builtin_msg = getattr(fn, builtin_msg_name) builtin_msg = getattr(fn, builtin_msg_name)
...@@ -173,25 +190,27 @@ def test_binary_op(idtype): ...@@ -173,25 +190,27 @@ def test_binary_op(idtype):
F.attach_grad(n1) F.attach_grad(n1)
F.attach_grad(n2) F.attach_grad(n2)
F.attach_grad(n3) F.attach_grad(n3)
g.nodes['user'].data['h'] = n1 g.nodes["user"].data["h"] = n1
g.nodes['developer'].data['h'] = n2 g.nodes["developer"].data["h"] = n2
g.nodes['game'].data['h'] = n3 g.nodes["game"].data["h"] = n3
F.attach_grad(x1) F.attach_grad(x1)
F.attach_grad(x2) F.attach_grad(x2)
F.attach_grad(x3) F.attach_grad(x3)
F.attach_grad(x4) F.attach_grad(x4)
g['plays'].edata['h'] = x1 g["plays"].edata["h"] = x1
g['follows'].edata['h'] = x2 g["follows"].edata["h"] = x2
g['develops'].edata['h'] = x3 g["develops"].edata["h"] = x3
g['wishes'].edata['h'] = x4 g["wishes"].edata["h"] = x4
with F.record_grad(): with F.record_grad():
[g.apply_edges(builtin_msg('h', 'h', 'm'), etype = rel) [
for rel in g.canonical_etypes] g.apply_edges(builtin_msg("h", "h", "m"), etype=rel)
r1 = g['plays'].edata['m'] for rel in g.canonical_etypes
]
r1 = g["plays"].edata["m"]
loss = F.sum(r1.view(-1), 0) loss = F.sum(r1.view(-1), 0)
F.backward(loss) F.backward(loss)
n_grad1 = F.grad(g.nodes['game'].data['h']) n_grad1 = F.grad(g.nodes["game"].data["h"])
################################################################# #################################################################
# apply_edges() is called on all relation types # apply_edges() is called on all relation types
...@@ -200,38 +219,40 @@ def test_binary_op(idtype): ...@@ -200,38 +219,40 @@ def test_binary_op(idtype):
F.attach_grad(n1) F.attach_grad(n1)
F.attach_grad(n2) F.attach_grad(n2)
F.attach_grad(n3) F.attach_grad(n3)
g.nodes['user'].data['h'] = n1 g.nodes["user"].data["h"] = n1
g.nodes['developer'].data['h'] = n2 g.nodes["developer"].data["h"] = n2
g.nodes['game'].data['h'] = n3 g.nodes["game"].data["h"] = n3
F.attach_grad(x1) F.attach_grad(x1)
F.attach_grad(x2) F.attach_grad(x2)
F.attach_grad(x3) F.attach_grad(x3)
F.attach_grad(x4) F.attach_grad(x4)
g['plays'].edata['h'] = x1 g["plays"].edata["h"] = x1
g['follows'].edata['h'] = x2 g["follows"].edata["h"] = x2
g['develops'].edata['h'] = x3 g["develops"].edata["h"] = x3
g['wishes'].edata['h'] = x4 g["wishes"].edata["h"] = x4
with F.record_grad(): with F.record_grad():
g.apply_edges(builtin_msg('h', 'h', 'm')) g.apply_edges(builtin_msg("h", "h", "m"))
r2 = g['plays'].edata['m'] r2 = g["plays"].edata["m"]
loss = F.sum(r2.view(-1), 0) loss = F.sum(r2.view(-1), 0)
F.backward(loss) F.backward(loss)
n_grad2 = F.grad(g.nodes['game'].data['h']) n_grad2 = F.grad(g.nodes["game"].data["h"])
# correctness check # correctness check
def _print_error(a, b): def _print_error(a, b):
for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())): for i, (x, y) in enumerate(
zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
):
if not np.allclose(x, y): if not np.allclose(x, y):
print('@{} {} v.s. {}'.format(i, x, y)) print("@{} {} v.s. {}".format(i, x, y))
if not F.allclose(r1, r2): if not F.allclose(r1, r2):
_print_error(r1, r2) _print_error(r1, r2)
assert F.allclose(r1, r2) assert F.allclose(r1, r2)
if n_grad1 is not None or n_grad2 is not None: if n_grad1 is not None or n_grad2 is not None:
if not F.allclose(n_grad1, n_grad2): if not F.allclose(n_grad1, n_grad2):
print('node grad') print("node grad")
_print_error(n_grad1, n_grad2) _print_error(n_grad1, n_grad2)
assert(F.allclose(n_grad1, n_grad2)) assert F.allclose(n_grad1, n_grad2)
target = ["u", "v", "e"] target = ["u", "v", "e"]
for lhs, rhs in product(target, target): for lhs, rhs in product(target, target):
...@@ -242,6 +263,6 @@ def test_binary_op(idtype): ...@@ -242,6 +263,6 @@ def test_binary_op(idtype):
_test(lhs, rhs, binary_op) _test(lhs, rhs, binary_op)
if __name__ == '__main__': if __name__ == "__main__":
test_unary_copy_u() test_unary_copy_u()
test_unary_copy_e() test_unary_copy_e()
import backend as F
import os import os
import unittest import unittest
import backend as F
def test_set_default_backend(): def test_set_default_backend():
default_dir = os.path.join(os.path.expanduser('~'), '.dgl_unit_test') default_dir = os.path.join(os.path.expanduser("~"), ".dgl_unit_test")
F.set_default_backend(default_dir, 'pytorch') F.set_default_backend(default_dir, "pytorch")
# make sure the config file was created # make sure the config file was created
assert os.path.exists(os.path.join(default_dir, 'config.json')) assert os.path.exists(os.path.join(default_dir, "config.json"))
import gzip
import os
import tempfile
import unittest import unittest
import backend as F import backend as F
import numpy as np import numpy as np
import gzip
import tempfile
import os
import pandas as pd import pandas as pd
import yaml
import pytest import pytest
import yaml
import dgl import dgl
import dgl.data as data import dgl.data as data
from dgl import DGLError from dgl import DGLError
import dgl
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_minigc(): def test_minigc():
ds = data.MiniGCDataset(16, 10, 20) ds = data.MiniGCDataset(16, 10, 20)
...@@ -24,35 +29,45 @@ def test_minigc(): ...@@ -24,35 +29,45 @@ def test_minigc():
g2 = ds[0][0] g2 = ds[0][0]
assert g2.num_edges() - g1.num_edges() == g1.num_nodes() assert g2.num_edges() - g1.num_edges() == g1.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_gin(): def test_gin():
ds_n_graphs = { ds_n_graphs = {
'MUTAG': 188, "MUTAG": 188,
'IMDBBINARY': 1000, "IMDBBINARY": 1000,
'IMDBMULTI': 1500, "IMDBMULTI": 1500,
'PROTEINS': 1113, "PROTEINS": 1113,
'PTC': 344, "PTC": 344,
} }
transform = dgl.AddSelfLoop(allow_duplicate=True) transform = dgl.AddSelfLoop(allow_duplicate=True)
for name, n_graphs in ds_n_graphs.items(): for name, n_graphs in ds_n_graphs.items():
ds = data.GINDataset(name, self_loop=False, degree_as_nlabel=False) ds = data.GINDataset(name, self_loop=False, degree_as_nlabel=False)
assert len(ds) == n_graphs, (len(ds), name) assert len(ds) == n_graphs, (len(ds), name)
g1 = ds[0][0] g1 = ds[0][0]
ds = data.GINDataset(name, self_loop=False, degree_as_nlabel=False, transform=transform) ds = data.GINDataset(
name, self_loop=False, degree_as_nlabel=False, transform=transform
)
g2 = ds[0][0] g2 = ds[0][0]
assert g2.num_edges() - g1.num_edges() == g1.num_nodes() assert g2.num_edges() - g1.num_edges() == g1.num_nodes()
assert ds.num_classes == ds.gclasses assert ds.num_classes == ds.gclasses
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_fraud(): def test_fraud():
transform = dgl.AddSelfLoop(allow_duplicate=True) transform = dgl.AddSelfLoop(allow_duplicate=True)
g = data.FraudDataset('amazon')[0] g = data.FraudDataset("amazon")[0]
assert g.num_nodes() == 11944 assert g.num_nodes() == 11944
num_edges1 = g.num_edges() num_edges1 = g.num_edges()
g2 = data.FraudDataset('amazon', transform=transform)[0] g2 = data.FraudDataset("amazon", transform=transform)[0]
# 3 edge types # 3 edge types
assert g2.num_edges() - num_edges1 == g.num_nodes() * 3 assert g2.num_edges() - num_edges1 == g.num_nodes() * 3
...@@ -68,55 +83,69 @@ def test_fraud(): ...@@ -68,55 +83,69 @@ def test_fraud():
# 3 edge types # 3 edge types
assert g2.num_edges() - g.num_edges() == g.num_nodes() * 3 assert g2.num_edges() - g.num_edges() == g.num_nodes() * 3
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_fakenews(): def test_fakenews():
transform = dgl.AddSelfLoop(allow_duplicate=True) transform = dgl.AddSelfLoop(allow_duplicate=True)
ds = data.FakeNewsDataset('politifact', 'bert') ds = data.FakeNewsDataset("politifact", "bert")
assert len(ds) == 314 assert len(ds) == 314
g = ds[0][0] g = ds[0][0]
g2 = data.FakeNewsDataset('politifact', 'bert', transform=transform)[0][0] g2 = data.FakeNewsDataset("politifact", "bert", transform=transform)[0][0]
assert g2.num_edges() - g.num_edges() == g.num_nodes() assert g2.num_edges() - g.num_edges() == g.num_nodes()
ds = data.FakeNewsDataset('gossipcop', 'profile') ds = data.FakeNewsDataset("gossipcop", "profile")
assert len(ds) == 5464 assert len(ds) == 5464
g = ds[0][0] g = ds[0][0]
g2 = data.FakeNewsDataset('gossipcop', 'profile', transform=transform)[0][0] g2 = data.FakeNewsDataset("gossipcop", "profile", transform=transform)[0][0]
assert g2.num_edges() - g.num_edges() == g.num_nodes() assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_tudataset_regression(): def test_tudataset_regression():
ds = data.TUDataset('ZINC_test', force_reload=True) ds = data.TUDataset("ZINC_test", force_reload=True)
assert ds.num_classes == ds.num_labels assert ds.num_classes == ds.num_labels
assert len(ds) == 5000 assert len(ds) == 5000
g = ds[0][0] g = ds[0][0]
transform = dgl.AddSelfLoop(allow_duplicate=True) transform = dgl.AddSelfLoop(allow_duplicate=True)
ds = data.TUDataset('ZINC_test', force_reload=True, transform=transform) ds = data.TUDataset("ZINC_test", force_reload=True, transform=transform)
g2 = ds[0][0] g2 = ds[0][0]
assert g2.num_edges() - g.num_edges() == g.num_nodes() assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_data_hash(): def test_data_hash():
class HashTestDataset(data.DGLDataset): class HashTestDataset(data.DGLDataset):
def __init__(self, hash_key=()): def __init__(self, hash_key=()):
super(HashTestDataset, self).__init__( super(HashTestDataset, self).__init__("hashtest", hash_key=hash_key)
'hashtest', hash_key=hash_key)
def _load(self): def _load(self):
pass pass
a = HashTestDataset((True, 0, '1', (1, 2, 3))) a = HashTestDataset((True, 0, "1", (1, 2, 3)))
b = HashTestDataset((True, 0, '1', (1, 2, 3))) b = HashTestDataset((True, 0, "1", (1, 2, 3)))
c = HashTestDataset((True, 0, '1', (1, 2, 4))) c = HashTestDataset((True, 0, "1", (1, 2, 4)))
assert a.hash == b.hash assert a.hash == b.hash
assert a.hash != c.hash assert a.hash != c.hash
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.") @unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_citation_graph(): def test_citation_graph():
transform = dgl.AddSelfLoop(allow_duplicate=True) transform = dgl.AddSelfLoop(allow_duplicate=True)
...@@ -149,7 +178,10 @@ def test_citation_graph(): ...@@ -149,7 +178,10 @@ def test_citation_graph():
assert g2.num_edges() - g.num_edges() == g.num_nodes() assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.") @unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_gnn_benchmark(): def test_gnn_benchmark():
transform = dgl.AddSelfLoop(allow_duplicate=True) transform = dgl.AddSelfLoop(allow_duplicate=True)
...@@ -200,7 +232,10 @@ def test_gnn_benchmark(): ...@@ -200,7 +232,10 @@ def test_gnn_benchmark():
assert g2.num_edges() - g.num_edges() == g.num_nodes() assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.") @unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_reddit(): def test_reddit():
# RedditDataset # RedditDataset
...@@ -214,14 +249,18 @@ def test_reddit(): ...@@ -214,14 +249,18 @@ def test_reddit():
g2 = data.RedditDataset(transform=transform)[0] g2 = data.RedditDataset(transform=transform)[0]
assert g2.num_edges() - g.num_edges() == g.num_nodes() assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_explain_syn(): def test_explain_syn():
dataset = data.BAShapeDataset() dataset = data.BAShapeDataset()
assert dataset.num_classes == 4 assert dataset.num_classes == 4
g = dataset[0] g = dataset[0]
assert 'label' in g.ndata assert "label" in g.ndata
assert 'feat' in g.ndata assert "feat" in g.ndata
g1 = data.BAShapeDataset(force_reload=True, seed=0)[0] g1 = data.BAShapeDataset(force_reload=True, seed=0)[0]
src1, dst1 = g1.edges() src1, dst1 = g1.edges()
...@@ -233,8 +272,8 @@ def test_explain_syn(): ...@@ -233,8 +272,8 @@ def test_explain_syn():
dataset = data.BACommunityDataset() dataset = data.BACommunityDataset()
assert dataset.num_classes == 8 assert dataset.num_classes == 8
g = dataset[0] g = dataset[0]
assert 'label' in g.ndata assert "label" in g.ndata
assert 'feat' in g.ndata assert "feat" in g.ndata
g1 = data.BACommunityDataset(force_reload=True, seed=0)[0] g1 = data.BACommunityDataset(force_reload=True, seed=0)[0]
src1, dst1 = g1.edges() src1, dst1 = g1.edges()
...@@ -246,8 +285,8 @@ def test_explain_syn(): ...@@ -246,8 +285,8 @@ def test_explain_syn():
dataset = data.TreeCycleDataset() dataset = data.TreeCycleDataset()
assert dataset.num_classes == 2 assert dataset.num_classes == 2
g = dataset[0] g = dataset[0]
assert 'label' in g.ndata assert "label" in g.ndata
assert 'feat' in g.ndata assert "feat" in g.ndata
g1 = data.TreeCycleDataset(force_reload=True, seed=0)[0] g1 = data.TreeCycleDataset(force_reload=True, seed=0)[0]
src1, dst1 = g1.edges() src1, dst1 = g1.edges()
...@@ -259,8 +298,8 @@ def test_explain_syn(): ...@@ -259,8 +298,8 @@ def test_explain_syn():
dataset = data.TreeGridDataset() dataset = data.TreeGridDataset()
assert dataset.num_classes == 2 assert dataset.num_classes == 2
g = dataset[0] g = dataset[0]
assert 'label' in g.ndata assert "label" in g.ndata
assert 'feat' in g.ndata assert "feat" in g.ndata
g1 = data.TreeGridDataset(force_reload=True, seed=0)[0] g1 = data.TreeGridDataset(force_reload=True, seed=0)[0]
src1, dst1 = g1.edges() src1, dst1 = g1.edges()
...@@ -272,9 +311,13 @@ def test_explain_syn(): ...@@ -272,9 +311,13 @@ def test_explain_syn():
dataset = data.BA2MotifDataset() dataset = data.BA2MotifDataset()
assert dataset.num_classes == 2 assert dataset.num_classes == 2
g, label = dataset[0] g, label = dataset[0]
assert 'feat' in g.ndata assert "feat" in g.ndata
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_wiki_cs(): def test_wiki_cs():
g = data.WikiCSDataset()[0] g = data.WikiCSDataset()[0]
...@@ -287,6 +330,7 @@ def test_wiki_cs(): ...@@ -287,6 +330,7 @@ def test_wiki_cs():
g2 = data.WikiCSDataset(transform=transform)[0] g2 = data.WikiCSDataset(transform=transform)[0]
assert g2.num_edges() - g.num_edges() == g.num_nodes() assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skip(reason="Dataset too large to download for the latest CI.") @unittest.skip(reason="Dataset too large to download for the latest CI.")
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_yelp(): def test_yelp():
...@@ -300,7 +344,11 @@ def test_yelp(): ...@@ -300,7 +344,11 @@ def test_yelp():
g2 = data.YelpDataset(reorder=True, transform=transform)[0] g2 = data.YelpDataset(reorder=True, transform=transform)[0]
assert g2.num_edges() - g.num_edges() == g.num_nodes() assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_flickr(): def test_flickr():
g = data.FlickrDataset(reorder=True)[0] g = data.FlickrDataset(reorder=True)[0]
...@@ -313,15 +361,19 @@ def test_flickr(): ...@@ -313,15 +361,19 @@ def test_flickr():
g2 = data.FlickrDataset(reorder=True, transform=transform)[0] g2 = data.FlickrDataset(reorder=True, transform=transform)[0]
assert g2.num_edges() - g.num_edges() == g.num_nodes() assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_extract_archive(): def test_extract_archive():
# gzip # gzip
with tempfile.TemporaryDirectory() as src_dir: with tempfile.TemporaryDirectory() as src_dir:
gz_file = 'gz_archive' gz_file = "gz_archive"
gz_path = os.path.join(src_dir, gz_file + '.gz') gz_path = os.path.join(src_dir, gz_file + ".gz")
content = b"test extract archive gzip" content = b"test extract archive gzip"
with gzip.open(gz_path, 'wb') as f: with gzip.open(gz_path, "wb") as f:
f.write(content) f.write(content)
with tempfile.TemporaryDirectory() as dst_dir: with tempfile.TemporaryDirectory() as dst_dir:
data.utils.extract_archive(gz_path, dst_dir, overwrite=True) data.utils.extract_archive(gz_path, dst_dir, overwrite=True)
...@@ -329,7 +381,12 @@ def test_extract_archive(): ...@@ -329,7 +381,12 @@ def test_extract_archive():
def _test_construct_graphs_node_ids(): def _test_construct_graphs_node_ids():
from dgl.data.csv_dataset_base import NodeData, EdgeData, DGLGraphConstructor from dgl.data.csv_dataset_base import (
DGLGraphConstructor,
EdgeData,
NodeData,
)
num_nodes = 100 num_nodes = 100
num_edges = 1000 num_edges = 1000
...@@ -341,8 +398,7 @@ def _test_construct_graphs_node_ids(): ...@@ -341,8 +398,7 @@ def _test_construct_graphs_node_ids():
edge_data = EdgeData(src_ids, dst_ids, {}) edge_data = EdgeData(src_ids, dst_ids, {})
expect_except = False expect_except = False
try: try:
_, _ = DGLGraphConstructor.construct_graphs( _, _ = DGLGraphConstructor.construct_graphs(node_data, edge_data)
node_data, edge_data)
except: except:
expect_except = True expect_except = True
assert expect_except assert expect_except
...@@ -354,27 +410,31 @@ def _test_construct_graphs_node_ids(): ...@@ -354,27 +410,31 @@ def _test_construct_graphs_node_ids():
src_ids = np.random.choice(node_ids, size=num_edges) src_ids = np.random.choice(node_ids, size=num_edges)
dst_ids = np.random.choice(node_ids, size=num_edges) dst_ids = np.random.choice(node_ids, size=num_edges)
node_feat = np.random.rand(num_nodes, 3) node_feat = np.random.rand(num_nodes, 3)
node_data = NodeData(node_ids, {'feat':node_feat}) node_data = NodeData(node_ids, {"feat": node_feat})
edge_data = EdgeData(src_ids, dst_ids, {}) edge_data = EdgeData(src_ids, dst_ids, {})
graphs, data_dict = DGLGraphConstructor.construct_graphs( graphs, data_dict = DGLGraphConstructor.construct_graphs(
node_data, edge_data) node_data, edge_data
)
assert len(graphs) == 1 assert len(graphs) == 1
assert len(data_dict) == 0 assert len(data_dict) == 0
g = graphs[0] g = graphs[0]
assert g.is_homogeneous assert g.is_homogeneous
assert g.num_nodes() == len(node_ids) assert g.num_nodes() == len(node_ids)
assert g.num_edges() == len(src_ids) assert g.num_edges() == len(src_ids)
assert F.array_equal(F.tensor(node_feat[idx], dtype=F.float32), g.ndata['feat']) assert F.array_equal(
F.tensor(node_feat[idx], dtype=F.float32), g.ndata["feat"]
)
# node IDs are mixed with numeric and non-numeric values # node IDs are mixed with numeric and non-numeric values
# homogeneous graph # homogeneous graph
node_ids = [1, 2, 3, 'a'] node_ids = [1, 2, 3, "a"]
src_ids = [1, 2, 3] src_ids = [1, 2, 3]
dst_ids = ['a', 1, 2] dst_ids = ["a", 1, 2]
node_data = NodeData(node_ids, {}) node_data = NodeData(node_ids, {})
edge_data = EdgeData(src_ids, dst_ids, {}) edge_data = EdgeData(src_ids, dst_ids, {})
graphs, data_dict = DGLGraphConstructor.construct_graphs( graphs, data_dict = DGLGraphConstructor.construct_graphs(
node_data, edge_data) node_data, edge_data
)
assert len(graphs) == 1 assert len(graphs) == 1
assert len(data_dict) == 0 assert len(data_dict) == 0
g = graphs[0] g = graphs[0]
...@@ -384,49 +444,63 @@ def _test_construct_graphs_node_ids(): ...@@ -384,49 +444,63 @@ def _test_construct_graphs_node_ids():
# heterogeneous graph # heterogeneous graph
node_ids_user = [1, 2, 3] node_ids_user = [1, 2, 3]
node_ids_item = ['a', 'b', 'c'] node_ids_item = ["a", "b", "c"]
src_ids = node_ids_user src_ids = node_ids_user
dst_ids = node_ids_item dst_ids = node_ids_item
node_data_user = NodeData(node_ids_user, {}, type='user') node_data_user = NodeData(node_ids_user, {}, type="user")
node_data_item = NodeData(node_ids_item, {}, type='item') node_data_item = NodeData(node_ids_item, {}, type="item")
edge_data = EdgeData(src_ids, dst_ids, {}, type=('user', 'like', 'item')) edge_data = EdgeData(src_ids, dst_ids, {}, type=("user", "like", "item"))
graphs, data_dict = DGLGraphConstructor.construct_graphs( graphs, data_dict = DGLGraphConstructor.construct_graphs(
[node_data_user, node_data_item], edge_data) [node_data_user, node_data_item], edge_data
)
assert len(graphs) == 1 assert len(graphs) == 1
assert len(data_dict) == 0 assert len(data_dict) == 0
g = graphs[0] g = graphs[0]
assert not g.is_homogeneous assert not g.is_homogeneous
assert g.num_nodes('user') == len(node_ids_user) assert g.num_nodes("user") == len(node_ids_user)
assert g.num_nodes('item') == len(node_ids_item) assert g.num_nodes("item") == len(node_ids_item)
assert g.num_edges() == len(src_ids) assert g.num_edges() == len(src_ids)
def _test_construct_graphs_homo(): def _test_construct_graphs_homo():
from dgl.data.csv_dataset_base import NodeData, EdgeData, DGLGraphConstructor from dgl.data.csv_dataset_base import (
DGLGraphConstructor,
EdgeData,
NodeData,
)
# node_id could be non-sorted, non-numeric. # node_id could be non-sorted, non-numeric.
num_nodes = 100 num_nodes = 100
num_edges = 1000 num_edges = 1000
num_dims = 3 num_dims = 3
node_ids = np.random.choice( node_ids = np.random.choice(
np.arange(num_nodes*2), size=num_nodes, replace=False) np.arange(num_nodes * 2), size=num_nodes, replace=False
)
assert len(node_ids) == num_nodes assert len(node_ids) == num_nodes
# to be non-sorted # to be non-sorted
np.random.shuffle(node_ids) np.random.shuffle(node_ids)
# to be non-numeric # to be non-numeric
node_ids = ['id_{}'.format(id) for id in node_ids] node_ids = ["id_{}".format(id) for id in node_ids]
t_ndata = {'feat': np.random.rand(num_nodes, num_dims), t_ndata = {
'label': np.random.randint(2, size=num_nodes)} "feat": np.random.rand(num_nodes, num_dims),
"label": np.random.randint(2, size=num_nodes),
}
_, u_indices = np.unique(node_ids, return_index=True) _, u_indices = np.unique(node_ids, return_index=True)
ndata = {'feat': t_ndata['feat'][u_indices], ndata = {
'label': t_ndata['label'][u_indices]} "feat": t_ndata["feat"][u_indices],
"label": t_ndata["label"][u_indices],
}
node_data = NodeData(node_ids, t_ndata) node_data = NodeData(node_ids, t_ndata)
src_ids = np.random.choice(node_ids, size=num_edges) src_ids = np.random.choice(node_ids, size=num_edges)
dst_ids = np.random.choice(node_ids, size=num_edges) dst_ids = np.random.choice(node_ids, size=num_edges)
edata = {'feat': np.random.rand( edata = {
num_edges, num_dims), 'label': np.random.randint(2, size=num_edges)} "feat": np.random.rand(num_edges, num_dims),
"label": np.random.randint(2, size=num_edges),
}
edge_data = EdgeData(src_ids, dst_ids, edata) edge_data = EdgeData(src_ids, dst_ids, edata)
graphs, data_dict = DGLGraphConstructor.construct_graphs( graphs, data_dict = DGLGraphConstructor.construct_graphs(
node_data, edge_data) node_data, edge_data
)
assert len(graphs) == 1 assert len(graphs) == 1
assert len(data_dict) == 0 assert len(data_dict) == 0
g = graphs[0] g = graphs[0]
...@@ -439,63 +513,81 @@ def _test_construct_graphs_homo(): ...@@ -439,63 +513,81 @@ def _test_construct_graphs_homo():
assert key in rhs assert key in rhs
assert F.dtype(rhs[key]) != F.float64 assert F.dtype(rhs[key]) != F.float64
assert F.array_equal( assert F.array_equal(
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key]) F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key]
)
assert_data(ndata, g.ndata) assert_data(ndata, g.ndata)
assert_data(edata, g.edata) assert_data(edata, g.edata)
def _test_construct_graphs_hetero(): def _test_construct_graphs_hetero():
from dgl.data.csv_dataset_base import NodeData, EdgeData, DGLGraphConstructor from dgl.data.csv_dataset_base import (
DGLGraphConstructor,
EdgeData,
NodeData,
)
# node_id/src_id/dst_id could be non-sorted, duplicated, non-numeric. # node_id/src_id/dst_id could be non-sorted, duplicated, non-numeric.
num_nodes = 100 num_nodes = 100
num_edges = 1000 num_edges = 1000
num_dims = 3 num_dims = 3
ntypes = ['user', 'item'] ntypes = ["user", "item"]
node_data = [] node_data = []
node_ids_dict = {} node_ids_dict = {}
ndata_dict = {} ndata_dict = {}
for ntype in ntypes: for ntype in ntypes:
node_ids = np.random.choice( node_ids = np.random.choice(
np.arange(num_nodes*2), size=num_nodes, replace=False) np.arange(num_nodes * 2), size=num_nodes, replace=False
)
assert len(node_ids) == num_nodes assert len(node_ids) == num_nodes
# to be non-sorted # to be non-sorted
np.random.shuffle(node_ids) np.random.shuffle(node_ids)
# to be non-numeric # to be non-numeric
node_ids = ['id_{}'.format(id) for id in node_ids] node_ids = ["id_{}".format(id) for id in node_ids]
t_ndata = {'feat': np.random.rand(num_nodes, num_dims), t_ndata = {
'label': np.random.randint(2, size=num_nodes)} "feat": np.random.rand(num_nodes, num_dims),
"label": np.random.randint(2, size=num_nodes),
}
_, u_indices = np.unique(node_ids, return_index=True) _, u_indices = np.unique(node_ids, return_index=True)
ndata = {'feat': t_ndata['feat'][u_indices], ndata = {
'label': t_ndata['label'][u_indices]} "feat": t_ndata["feat"][u_indices],
"label": t_ndata["label"][u_indices],
}
node_data.append(NodeData(node_ids, t_ndata, type=ntype)) node_data.append(NodeData(node_ids, t_ndata, type=ntype))
node_ids_dict[ntype] = node_ids node_ids_dict[ntype] = node_ids
ndata_dict[ntype] = ndata ndata_dict[ntype] = ndata
etypes = [('user', 'follow', 'user'), ('user', 'like', 'item')] etypes = [("user", "follow", "user"), ("user", "like", "item")]
edge_data = [] edge_data = []
edata_dict = {} edata_dict = {}
for src_type, e_type, dst_type in etypes: for src_type, e_type, dst_type in etypes:
src_ids = np.random.choice(node_ids_dict[src_type], size=num_edges) src_ids = np.random.choice(node_ids_dict[src_type], size=num_edges)
dst_ids = np.random.choice(node_ids_dict[dst_type], size=num_edges) dst_ids = np.random.choice(node_ids_dict[dst_type], size=num_edges)
edata = {'feat': np.random.rand( edata = {
num_edges, num_dims), 'label': np.random.randint(2, size=num_edges)} "feat": np.random.rand(num_edges, num_dims),
edge_data.append(EdgeData(src_ids, dst_ids, edata, "label": np.random.randint(2, size=num_edges),
type=(src_type, e_type, dst_type))) }
edge_data.append(
EdgeData(src_ids, dst_ids, edata, type=(src_type, e_type, dst_type))
)
edata_dict[(src_type, e_type, dst_type)] = edata edata_dict[(src_type, e_type, dst_type)] = edata
graphs, data_dict = DGLGraphConstructor.construct_graphs( graphs, data_dict = DGLGraphConstructor.construct_graphs(
node_data, edge_data) node_data, edge_data
)
assert len(graphs) == 1 assert len(graphs) == 1
assert len(data_dict) == 0 assert len(data_dict) == 0
g = graphs[0] g = graphs[0]
assert not g.is_homogeneous assert not g.is_homogeneous
assert g.num_nodes() == num_nodes*len(ntypes) assert g.num_nodes() == num_nodes * len(ntypes)
assert g.num_edges() == num_edges*len(etypes) assert g.num_edges() == num_edges * len(etypes)
def assert_data(lhs, rhs): def assert_data(lhs, rhs):
for key, value in lhs.items(): for key, value in lhs.items():
assert key in rhs assert key in rhs
assert F.dtype(rhs[key]) != F.float64 assert F.dtype(rhs[key]) != F.float64
assert F.array_equal( assert F.array_equal(
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key]) F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key]
)
for ntype in g.ntypes: for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes assert g.num_nodes(ntype) == num_nodes
assert_data(ndata_dict[ntype], g.nodes[ntype].data) assert_data(ndata_dict[ntype], g.nodes[ntype].data)
...@@ -505,7 +597,13 @@ def _test_construct_graphs_hetero(): ...@@ -505,7 +597,13 @@ def _test_construct_graphs_hetero():
def _test_construct_graphs_multiple(): def _test_construct_graphs_multiple():
from dgl.data.csv_dataset_base import NodeData, EdgeData, GraphData, DGLGraphConstructor from dgl.data.csv_dataset_base import (
DGLGraphConstructor,
EdgeData,
GraphData,
NodeData,
)
num_nodes = 100 num_nodes = 100
num_edges = 1000 num_edges = 1000
num_graphs = 10 num_graphs = 10
...@@ -518,35 +616,48 @@ def _test_construct_graphs_multiple(): ...@@ -518,35 +616,48 @@ def _test_construct_graphs_multiple():
u_indices = np.array([], dtype=np.int) u_indices = np.array([], dtype=np.int)
for i in range(num_graphs): for i in range(num_graphs):
l_node_ids = np.random.choice( l_node_ids = np.random.choice(
np.arange(num_nodes*2), size=num_nodes, replace=False) np.arange(num_nodes * 2), size=num_nodes, replace=False
)
node_ids = np.append(node_ids, l_node_ids) node_ids = np.append(node_ids, l_node_ids)
_, l_u_indices = np.unique(l_node_ids, return_index=True) _, l_u_indices = np.unique(l_node_ids, return_index=True)
u_indices = np.append(u_indices, l_u_indices) u_indices = np.append(u_indices, l_u_indices)
ngraph_ids = np.append(ngraph_ids, np.full(num_nodes, i)) ngraph_ids = np.append(ngraph_ids, np.full(num_nodes, i))
src_ids = np.append(src_ids, np.random.choice( src_ids = np.append(
l_node_ids, size=num_edges)) src_ids, np.random.choice(l_node_ids, size=num_edges)
dst_ids = np.append(dst_ids, np.random.choice( )
l_node_ids, size=num_edges)) dst_ids = np.append(
dst_ids, np.random.choice(l_node_ids, size=num_edges)
)
egraph_ids = np.append(egraph_ids, np.full(num_edges, i)) egraph_ids = np.append(egraph_ids, np.full(num_edges, i))
ndata = {'feat': np.random.rand(num_nodes*num_graphs, num_dims), ndata = {
'label': np.random.randint(2, size=num_nodes*num_graphs)} "feat": np.random.rand(num_nodes * num_graphs, num_dims),
ngraph_ids = ['graph_{}'.format(id) for id in ngraph_ids] "label": np.random.randint(2, size=num_nodes * num_graphs),
}
ngraph_ids = ["graph_{}".format(id) for id in ngraph_ids]
node_data = NodeData(node_ids, ndata, graph_id=ngraph_ids) node_data = NodeData(node_ids, ndata, graph_id=ngraph_ids)
egraph_ids = ['graph_{}'.format(id) for id in egraph_ids] egraph_ids = ["graph_{}".format(id) for id in egraph_ids]
edata = {'feat': np.random.rand( edata = {
num_edges*num_graphs, num_dims), 'label': np.random.randint(2, size=num_edges*num_graphs)} "feat": np.random.rand(num_edges * num_graphs, num_dims),
"label": np.random.randint(2, size=num_edges * num_graphs),
}
edge_data = EdgeData(src_ids, dst_ids, edata, graph_id=egraph_ids) edge_data = EdgeData(src_ids, dst_ids, edata, graph_id=egraph_ids)
gdata = {'feat': np.random.rand(num_graphs, num_dims), gdata = {
'label': np.random.randint(2, size=num_graphs)} "feat": np.random.rand(num_graphs, num_dims),
graph_ids = ['graph_{}'.format(id) for id in np.arange(num_graphs)] "label": np.random.randint(2, size=num_graphs),
}
graph_ids = ["graph_{}".format(id) for id in np.arange(num_graphs)]
graph_data = GraphData(graph_ids, gdata) graph_data = GraphData(graph_ids, gdata)
graphs, data_dict = DGLGraphConstructor.construct_graphs( graphs, data_dict = DGLGraphConstructor.construct_graphs(
node_data, edge_data, graph_data) node_data, edge_data, graph_data
)
assert len(graphs) == num_graphs assert len(graphs) == num_graphs
assert len(data_dict) == len(gdata) assert len(data_dict) == len(gdata)
for k, v in data_dict.items(): for k, v in data_dict.items():
assert F.dtype(v) != F.float64 assert F.dtype(v) != F.float64
assert F.array_equal(F.reshape(F.tensor(gdata[k], dtype=F.dtype(v)), (len(graphs), -1)), v) assert F.array_equal(
F.reshape(F.tensor(gdata[k], dtype=F.dtype(v)), (len(graphs), -1)),
v,
)
for i, g in enumerate(graphs): for i, g in enumerate(graphs):
assert g.is_homogeneous assert g.is_homogeneous
assert g.num_nodes() == num_nodes assert g.num_nodes() == num_nodes
...@@ -555,22 +666,25 @@ def _test_construct_graphs_multiple(): ...@@ -555,22 +666,25 @@ def _test_construct_graphs_multiple():
def assert_data(lhs, rhs, size, node=False): def assert_data(lhs, rhs, size, node=False):
for key, value in lhs.items(): for key, value in lhs.items():
assert key in rhs assert key in rhs
value = value[i*size:(i+1)*size] value = value[i * size : (i + 1) * size]
if node: if node:
indices = u_indices[i*size:(i+1)*size] indices = u_indices[i * size : (i + 1) * size]
value = value[indices] value = value[indices]
assert F.dtype(rhs[key]) != F.float64 assert F.dtype(rhs[key]) != F.float64
assert F.array_equal( assert F.array_equal(
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key]) F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key]
)
assert_data(ndata, g.ndata, num_nodes, node=True) assert_data(ndata, g.ndata, num_nodes, node=True)
assert_data(edata, g.edata, num_edges) assert_data(edata, g.edata, num_edges)
# Graph IDs found in node/edge CSV but not in graph CSV # Graph IDs found in node/edge CSV but not in graph CSV
graph_data = GraphData(np.arange(num_graphs-2), {}) graph_data = GraphData(np.arange(num_graphs - 2), {})
expect_except = False expect_except = False
try: try:
_, _ = DGLGraphConstructor.construct_graphs( _, _ = DGLGraphConstructor.construct_graphs(
node_data, edge_data, graph_data) node_data, edge_data, graph_data
)
except: except:
expect_except = True expect_except = True
assert expect_except assert expect_except
...@@ -578,6 +692,7 @@ def _test_construct_graphs_multiple(): ...@@ -578,6 +692,7 @@ def _test_construct_graphs_multiple():
def _test_DefaultDataParser(): def _test_DefaultDataParser():
from dgl.data.csv_dataset_base import DefaultDataParser from dgl.data.csv_dataset_base import DefaultDataParser
# common csv # common csv
with tempfile.TemporaryDirectory() as test_dir: with tempfile.TemporaryDirectory() as test_dir:
csv_path = os.path.join(test_dir, "nodes.csv") csv_path = os.path.join(test_dir, "nodes.csv")
...@@ -587,21 +702,28 @@ def _test_DefaultDataParser(): ...@@ -587,21 +702,28 @@ def _test_DefaultDataParser():
node_id = np.arange(num_nodes) node_id = np.arange(num_nodes)
label = np.random.randint(num_labels, size=num_nodes) label = np.random.randint(num_labels, size=num_nodes)
feat = np.random.rand(num_nodes, num_dims) feat = np.random.rand(num_nodes, num_dims)
df = pd.DataFrame({'node_id': node_id, 'label': label, df = pd.DataFrame(
'feat': [line.tolist() for line in feat], {
}) "node_id": node_id,
"label": label,
"feat": [line.tolist() for line in feat],
}
)
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
dp = DefaultDataParser() dp = DefaultDataParser()
df = pd.read_csv(csv_path) df = pd.read_csv(csv_path)
dt = dp(df) dt = dp(df)
assert np.array_equal(node_id, dt['node_id']) assert np.array_equal(node_id, dt["node_id"])
assert np.array_equal(label, dt['label']) assert np.array_equal(label, dt["label"])
assert np.array_equal(feat, dt['feat']) assert np.array_equal(feat, dt["feat"])
# string consists of non-numeric values # string consists of non-numeric values
with tempfile.TemporaryDirectory() as test_dir: with tempfile.TemporaryDirectory() as test_dir:
csv_path = os.path.join(test_dir, "nodes.csv") csv_path = os.path.join(test_dir, "nodes.csv")
df = pd.DataFrame({'label': ['a', 'b', 'c'], df = pd.DataFrame(
}) {
"label": ["a", "b", "c"],
}
)
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
dp = DefaultDataParser() dp = DefaultDataParser()
df = pd.read_csv(csv_path) df = pd.read_csv(csv_path)
...@@ -614,8 +736,11 @@ def _test_DefaultDataParser(): ...@@ -614,8 +736,11 @@ def _test_DefaultDataParser():
# csv has index column which is ignored as it's unnamed # csv has index column which is ignored as it's unnamed
with tempfile.TemporaryDirectory() as test_dir: with tempfile.TemporaryDirectory() as test_dir:
csv_path = os.path.join(test_dir, "nodes.csv") csv_path = os.path.join(test_dir, "nodes.csv")
df = pd.DataFrame({'label': [1, 2, 3], df = pd.DataFrame(
}) {
"label": [1, 2, 3],
}
)
df.to_csv(csv_path) df.to_csv(csv_path)
dp = DefaultDataParser() dp = DefaultDataParser()
df = pd.read_csv(csv_path) df = pd.read_csv(csv_path)
...@@ -625,69 +750,96 @@ def _test_DefaultDataParser(): ...@@ -625,69 +750,96 @@ def _test_DefaultDataParser():
def _test_load_yaml_with_sanity_check(): def _test_load_yaml_with_sanity_check():
from dgl.data.csv_dataset_base import load_yaml_with_sanity_check from dgl.data.csv_dataset_base import load_yaml_with_sanity_check
with tempfile.TemporaryDirectory() as test_dir: with tempfile.TemporaryDirectory() as test_dir:
yaml_path = os.path.join(test_dir, 'meta.yaml') yaml_path = os.path.join(test_dir, "meta.yaml")
# workable but meaningless usually # workable but meaningless usually
yaml_data = {'dataset_name': 'default', yaml_data = {
'node_data': [], 'edge_data': []} "dataset_name": "default",
with open(yaml_path, 'w') as f: "node_data": [],
"edge_data": [],
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False) yaml.dump(yaml_data, f, sort_keys=False)
meta = load_yaml_with_sanity_check(yaml_path) meta = load_yaml_with_sanity_check(yaml_path)
assert meta.version == '1.0.0' assert meta.version == "1.0.0"
assert meta.dataset_name == 'default' assert meta.dataset_name == "default"
assert meta.separator == ',' assert meta.separator == ","
assert len(meta.node_data) == 0 assert len(meta.node_data) == 0
assert len(meta.edge_data) == 0 assert len(meta.edge_data) == 0
assert meta.graph_data is None assert meta.graph_data is None
# minimum with required fields only # minimum with required fields only
yaml_data = {'version': '1.0.0', 'dataset_name': 'default', 'node_data': [{'file_name': 'nodes.csv'}], yaml_data = {
'edge_data': [{'file_name': 'edges.csv'}], "version": "1.0.0",
} "dataset_name": "default",
with open(yaml_path, 'w') as f: "node_data": [{"file_name": "nodes.csv"}],
"edge_data": [{"file_name": "edges.csv"}],
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False) yaml.dump(yaml_data, f, sort_keys=False)
meta = load_yaml_with_sanity_check(yaml_path) meta = load_yaml_with_sanity_check(yaml_path)
for ndata in meta.node_data: for ndata in meta.node_data:
assert ndata.file_name == 'nodes.csv' assert ndata.file_name == "nodes.csv"
assert ndata.ntype == '_V' assert ndata.ntype == "_V"
assert ndata.graph_id_field == 'graph_id' assert ndata.graph_id_field == "graph_id"
assert ndata.node_id_field == 'node_id' assert ndata.node_id_field == "node_id"
for edata in meta.edge_data: for edata in meta.edge_data:
assert edata.file_name == 'edges.csv' assert edata.file_name == "edges.csv"
assert edata.etype == ['_V', '_E', '_V'] assert edata.etype == ["_V", "_E", "_V"]
assert edata.graph_id_field == 'graph_id' assert edata.graph_id_field == "graph_id"
assert edata.src_id_field == 'src_id' assert edata.src_id_field == "src_id"
assert edata.dst_id_field == 'dst_id' assert edata.dst_id_field == "dst_id"
# optional fields are specified # optional fields are specified
yaml_data = {'version': '1.0.0', 'dataset_name': 'default', yaml_data = {
'separator': '|', "version": "1.0.0",
'node_data': [{'file_name': 'nodes.csv', 'ntype': 'user', 'graph_id_field': 'xxx', 'node_id_field': 'xxx'}], "dataset_name": "default",
'edge_data': [{'file_name': 'edges.csv', 'etype': ['user', 'follow', 'user'], 'graph_id_field':'xxx', 'src_id_field':'xxx', 'dst_id_field':'xxx'}], "separator": "|",
'graph_data': {'file_name': 'graph.csv', 'graph_id_field': 'xxx'} "node_data": [
} {
with open(yaml_path, 'w') as f: "file_name": "nodes.csv",
"ntype": "user",
"graph_id_field": "xxx",
"node_id_field": "xxx",
}
],
"edge_data": [
{
"file_name": "edges.csv",
"etype": ["user", "follow", "user"],
"graph_id_field": "xxx",
"src_id_field": "xxx",
"dst_id_field": "xxx",
}
],
"graph_data": {"file_name": "graph.csv", "graph_id_field": "xxx"},
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False) yaml.dump(yaml_data, f, sort_keys=False)
meta = load_yaml_with_sanity_check(yaml_path) meta = load_yaml_with_sanity_check(yaml_path)
assert len(meta.node_data) == 1 assert len(meta.node_data) == 1
ndata = meta.node_data[0] ndata = meta.node_data[0]
assert ndata.ntype == 'user' assert ndata.ntype == "user"
assert ndata.graph_id_field == 'xxx' assert ndata.graph_id_field == "xxx"
assert ndata.node_id_field == 'xxx' assert ndata.node_id_field == "xxx"
assert len(meta.edge_data) == 1 assert len(meta.edge_data) == 1
edata = meta.edge_data[0] edata = meta.edge_data[0]
assert edata.etype == ['user', 'follow', 'user'] assert edata.etype == ["user", "follow", "user"]
assert edata.graph_id_field == 'xxx' assert edata.graph_id_field == "xxx"
assert edata.src_id_field == 'xxx' assert edata.src_id_field == "xxx"
assert edata.dst_id_field == 'xxx' assert edata.dst_id_field == "xxx"
assert meta.graph_data is not None assert meta.graph_data is not None
assert meta.graph_data.file_name == 'graph.csv' assert meta.graph_data.file_name == "graph.csv"
assert meta.graph_data.graph_id_field == 'xxx' assert meta.graph_data.graph_id_field == "xxx"
# some required fields are missing # some required fields are missing
yaml_data = {'dataset_name': 'default', yaml_data = {
'node_data': [], 'edge_data': []} "dataset_name": "default",
"node_data": [],
"edge_data": [],
}
for field in yaml_data.keys(): for field in yaml_data.keys():
ydata = {k: v for k, v in yaml_data.items()} ydata = {k: v for k, v in yaml_data.items()}
ydata.pop(field) ydata.pop(field)
with open(yaml_path, 'w') as f: with open(yaml_path, "w") as f:
yaml.dump(ydata, f, sort_keys=False) yaml.dump(ydata, f, sort_keys=False)
expect_except = False expect_except = False
try: try:
...@@ -696,10 +848,13 @@ def _test_load_yaml_with_sanity_check(): ...@@ -696,10 +848,13 @@ def _test_load_yaml_with_sanity_check():
expect_except = True expect_except = True
assert expect_except assert expect_except
# inapplicable version # inapplicable version
yaml_data = {'version': '0.0.0', 'dataset_name': 'default', 'node_data': [{'file_name': 'nodes_0.csv'}], yaml_data = {
'edge_data': [{'file_name': 'edges_0.csv'}], "version": "0.0.0",
} "dataset_name": "default",
with open(yaml_path, 'w') as f: "node_data": [{"file_name": "nodes_0.csv"}],
"edge_data": [{"file_name": "edges_0.csv"}],
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False) yaml.dump(yaml_data, f, sort_keys=False)
expect_except = False expect_except = False
try: try:
...@@ -708,10 +863,16 @@ def _test_load_yaml_with_sanity_check(): ...@@ -708,10 +863,16 @@ def _test_load_yaml_with_sanity_check():
expect_except = True expect_except = True
assert expect_except assert expect_except
# duplicate node types # duplicate node types
yaml_data = {'version': '1.0.0', 'dataset_name': 'default', 'node_data': [{'file_name': 'nodes.csv'}, {'file_name': 'nodes.csv'}], yaml_data = {
'edge_data': [{'file_name': 'edges.csv'}], "version": "1.0.0",
} "dataset_name": "default",
with open(yaml_path, 'w') as f: "node_data": [
{"file_name": "nodes.csv"},
{"file_name": "nodes.csv"},
],
"edge_data": [{"file_name": "edges.csv"}],
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False) yaml.dump(yaml_data, f, sort_keys=False)
expect_except = False expect_except = False
try: try:
...@@ -720,10 +881,16 @@ def _test_load_yaml_with_sanity_check(): ...@@ -720,10 +881,16 @@ def _test_load_yaml_with_sanity_check():
expect_except = True expect_except = True
assert expect_except assert expect_except
# duplicate edge types # duplicate edge types
yaml_data = {'version': '1.0.0', 'dataset_name': 'default', 'node_data': [{'file_name': 'nodes.csv'}], yaml_data = {
'edge_data': [{'file_name': 'edges.csv'}, {'file_name': 'edges.csv'}], "version": "1.0.0",
} "dataset_name": "default",
with open(yaml_path, 'w') as f: "node_data": [{"file_name": "nodes.csv"}],
"edge_data": [
{"file_name": "edges.csv"},
{"file_name": "edges.csv"},
],
}
with open(yaml_path, "w") as f:
yaml.dump(yaml_data, f, sort_keys=False) yaml.dump(yaml_data, f, sort_keys=False)
expect_except = False expect_except = False
try: try:
...@@ -734,190 +901,218 @@ def _test_load_yaml_with_sanity_check(): ...@@ -734,190 +901,218 @@ def _test_load_yaml_with_sanity_check():
def _test_load_node_data_from_csv(): def _test_load_node_data_from_csv():
from dgl.data.csv_dataset_base import MetaNode, NodeData, DefaultDataParser from dgl.data.csv_dataset_base import DefaultDataParser, MetaNode, NodeData
with tempfile.TemporaryDirectory() as test_dir: with tempfile.TemporaryDirectory() as test_dir:
num_nodes = 100 num_nodes = 100
# minimum # minimum
df = pd.DataFrame({'node_id': np.arange(num_nodes)}) df = pd.DataFrame({"node_id": np.arange(num_nodes)})
csv_path = os.path.join(test_dir, 'nodes.csv') csv_path = os.path.join(test_dir, "nodes.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_node = MetaNode(file_name=csv_path) meta_node = MetaNode(file_name=csv_path)
node_data = NodeData.load_from_csv( node_data = NodeData.load_from_csv(meta_node, DefaultDataParser())
meta_node, DefaultDataParser()) assert np.array_equal(df["node_id"], node_data.id)
assert np.array_equal(df['node_id'], node_data.id)
assert len(node_data.data) == 0 assert len(node_data.data) == 0
# common case # common case
df = pd.DataFrame({'node_id': np.arange(num_nodes), df = pd.DataFrame(
'label': np.random.randint(3, size=num_nodes)}) {
csv_path = os.path.join(test_dir, 'nodes.csv') "node_id": np.arange(num_nodes),
"label": np.random.randint(3, size=num_nodes),
}
)
csv_path = os.path.join(test_dir, "nodes.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_node = MetaNode(file_name=csv_path) meta_node = MetaNode(file_name=csv_path)
node_data = NodeData.load_from_csv( node_data = NodeData.load_from_csv(meta_node, DefaultDataParser())
meta_node, DefaultDataParser()) assert np.array_equal(df["node_id"], node_data.id)
assert np.array_equal(df['node_id'], node_data.id)
assert len(node_data.data) == 1 assert len(node_data.data) == 1
assert np.array_equal(df['label'], node_data.data['label']) assert np.array_equal(df["label"], node_data.data["label"])
assert np.array_equal(np.full(num_nodes, 0), node_data.graph_id) assert np.array_equal(np.full(num_nodes, 0), node_data.graph_id)
assert node_data.type == '_V' assert node_data.type == "_V"
# add more fields into nodes.csv # add more fields into nodes.csv
df = pd.DataFrame({'node_id': np.arange(num_nodes), 'label': np.random.randint( df = pd.DataFrame(
3, size=num_nodes), 'graph_id': np.full(num_nodes, 1)}) {
csv_path = os.path.join(test_dir, 'nodes.csv') "node_id": np.arange(num_nodes),
"label": np.random.randint(3, size=num_nodes),
"graph_id": np.full(num_nodes, 1),
}
)
csv_path = os.path.join(test_dir, "nodes.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_node = MetaNode(file_name=csv_path) meta_node = MetaNode(file_name=csv_path)
node_data = NodeData.load_from_csv( node_data = NodeData.load_from_csv(meta_node, DefaultDataParser())
meta_node, DefaultDataParser()) assert np.array_equal(df["node_id"], node_data.id)
assert np.array_equal(df['node_id'], node_data.id)
assert len(node_data.data) == 1 assert len(node_data.data) == 1
assert np.array_equal(df['label'], node_data.data['label']) assert np.array_equal(df["label"], node_data.data["label"])
assert np.array_equal(df['graph_id'], node_data.graph_id) assert np.array_equal(df["graph_id"], node_data.graph_id)
assert node_data.type == '_V' assert node_data.type == "_V"
# required header is missing # required header is missing
df = pd.DataFrame({'label': np.random.randint(3, size=num_nodes)}) df = pd.DataFrame({"label": np.random.randint(3, size=num_nodes)})
csv_path = os.path.join(test_dir, 'nodes.csv') csv_path = os.path.join(test_dir, "nodes.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_node = MetaNode(file_name=csv_path) meta_node = MetaNode(file_name=csv_path)
expect_except = False expect_except = False
try: try:
NodeData.load_from_csv( NodeData.load_from_csv(meta_node, DefaultDataParser())
meta_node, DefaultDataParser())
except: except:
expect_except = True expect_except = True
assert expect_except assert expect_except
def _test_load_edge_data_from_csv(): def _test_load_edge_data_from_csv():
from dgl.data.csv_dataset_base import MetaEdge, EdgeData, DefaultDataParser from dgl.data.csv_dataset_base import DefaultDataParser, EdgeData, MetaEdge
with tempfile.TemporaryDirectory() as test_dir: with tempfile.TemporaryDirectory() as test_dir:
num_nodes = 100 num_nodes = 100
num_edges = 1000 num_edges = 1000
# minimum # minimum
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges), df = pd.DataFrame(
'dst_id': np.random.randint(num_nodes, size=num_edges), {
}) "src_id": np.random.randint(num_nodes, size=num_edges),
csv_path = os.path.join(test_dir, 'edges.csv') "dst_id": np.random.randint(num_nodes, size=num_edges),
}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_edge = MetaEdge(file_name=csv_path) meta_edge = MetaEdge(file_name=csv_path)
edge_data = EdgeData.load_from_csv( edge_data = EdgeData.load_from_csv(meta_edge, DefaultDataParser())
meta_edge, DefaultDataParser()) assert np.array_equal(df["src_id"], edge_data.src)
assert np.array_equal(df['src_id'], edge_data.src) assert np.array_equal(df["dst_id"], edge_data.dst)
assert np.array_equal(df['dst_id'], edge_data.dst)
assert len(edge_data.data) == 0 assert len(edge_data.data) == 0
# common case # common case
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges), df = pd.DataFrame(
'dst_id': np.random.randint(num_nodes, size=num_edges), {
'label': np.random.randint(3, size=num_edges)}) "src_id": np.random.randint(num_nodes, size=num_edges),
csv_path = os.path.join(test_dir, 'edges.csv') "dst_id": np.random.randint(num_nodes, size=num_edges),
"label": np.random.randint(3, size=num_edges),
}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_edge = MetaEdge(file_name=csv_path) meta_edge = MetaEdge(file_name=csv_path)
edge_data = EdgeData.load_from_csv( edge_data = EdgeData.load_from_csv(meta_edge, DefaultDataParser())
meta_edge, DefaultDataParser()) assert np.array_equal(df["src_id"], edge_data.src)
assert np.array_equal(df['src_id'], edge_data.src) assert np.array_equal(df["dst_id"], edge_data.dst)
assert np.array_equal(df['dst_id'], edge_data.dst)
assert len(edge_data.data) == 1 assert len(edge_data.data) == 1
assert np.array_equal(df['label'], edge_data.data['label']) assert np.array_equal(df["label"], edge_data.data["label"])
assert np.array_equal(np.full(num_edges, 0), edge_data.graph_id) assert np.array_equal(np.full(num_edges, 0), edge_data.graph_id)
assert edge_data.type == ('_V', '_E', '_V') assert edge_data.type == ("_V", "_E", "_V")
# add more fields into edges.csv # add more fields into edges.csv
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges), df = pd.DataFrame(
'dst_id': np.random.randint(num_nodes, size=num_edges), {
'graph_id': np.arange(num_edges), "src_id": np.random.randint(num_nodes, size=num_edges),
'feat': np.random.randint(3, size=num_edges), "dst_id": np.random.randint(num_nodes, size=num_edges),
'label': np.random.randint(3, size=num_edges)}) "graph_id": np.arange(num_edges),
csv_path = os.path.join(test_dir, 'edges.csv') "feat": np.random.randint(3, size=num_edges),
"label": np.random.randint(3, size=num_edges),
}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_edge = MetaEdge(file_name=csv_path) meta_edge = MetaEdge(file_name=csv_path)
edge_data = EdgeData.load_from_csv( edge_data = EdgeData.load_from_csv(meta_edge, DefaultDataParser())
meta_edge, DefaultDataParser()) assert np.array_equal(df["src_id"], edge_data.src)
assert np.array_equal(df['src_id'], edge_data.src) assert np.array_equal(df["dst_id"], edge_data.dst)
assert np.array_equal(df['dst_id'], edge_data.dst)
assert len(edge_data.data) == 2 assert len(edge_data.data) == 2
assert np.array_equal(df['feat'], edge_data.data['feat']) assert np.array_equal(df["feat"], edge_data.data["feat"])
assert np.array_equal(df['label'], edge_data.data['label']) assert np.array_equal(df["label"], edge_data.data["label"])
assert np.array_equal(df['graph_id'], edge_data.graph_id) assert np.array_equal(df["graph_id"], edge_data.graph_id)
assert edge_data.type == ('_V', '_E', '_V') assert edge_data.type == ("_V", "_E", "_V")
# required headers are missing # required headers are missing
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges), df = pd.DataFrame(
}) {
csv_path = os.path.join(test_dir, 'edges.csv') "src_id": np.random.randint(num_nodes, size=num_edges),
}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_edge = MetaEdge(file_name=csv_path) meta_edge = MetaEdge(file_name=csv_path)
expect_except = False expect_except = False
try: try:
EdgeData.load_from_csv( EdgeData.load_from_csv(meta_edge, DefaultDataParser())
meta_edge, DefaultDataParser())
except DGLError: except DGLError:
expect_except = True expect_except = True
assert expect_except assert expect_except
df = pd.DataFrame({'dst_id': np.random.randint(num_nodes, size=num_edges), df = pd.DataFrame(
}) {
csv_path = os.path.join(test_dir, 'edges.csv') "dst_id": np.random.randint(num_nodes, size=num_edges),
}
)
csv_path = os.path.join(test_dir, "edges.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_edge = MetaEdge(file_name=csv_path) meta_edge = MetaEdge(file_name=csv_path)
expect_except = False expect_except = False
try: try:
EdgeData.load_from_csv( EdgeData.load_from_csv(meta_edge, DefaultDataParser())
meta_edge, DefaultDataParser())
except DGLError: except DGLError:
expect_except = True expect_except = True
assert expect_except assert expect_except
def _test_load_graph_data_from_csv(): def _test_load_graph_data_from_csv():
from dgl.data.csv_dataset_base import MetaGraph, GraphData, DefaultDataParser from dgl.data.csv_dataset_base import (
DefaultDataParser,
GraphData,
MetaGraph,
)
with tempfile.TemporaryDirectory() as test_dir: with tempfile.TemporaryDirectory() as test_dir:
num_graphs = 100 num_graphs = 100
# minimum # minimum
df = pd.DataFrame({'graph_id': np.arange(num_graphs)}) df = pd.DataFrame({"graph_id": np.arange(num_graphs)})
csv_path = os.path.join(test_dir, 'graph.csv') csv_path = os.path.join(test_dir, "graph.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_graph = MetaGraph(file_name=csv_path) meta_graph = MetaGraph(file_name=csv_path)
graph_data = GraphData.load_from_csv( graph_data = GraphData.load_from_csv(meta_graph, DefaultDataParser())
meta_graph, DefaultDataParser()) assert np.array_equal(df["graph_id"], graph_data.graph_id)
assert np.array_equal(df['graph_id'], graph_data.graph_id)
assert len(graph_data.data) == 0 assert len(graph_data.data) == 0
# common case # common case
df = pd.DataFrame({'graph_id': np.arange(num_graphs), df = pd.DataFrame(
'label': np.random.randint(3, size=num_graphs)}) {
csv_path = os.path.join(test_dir, 'graph.csv') "graph_id": np.arange(num_graphs),
"label": np.random.randint(3, size=num_graphs),
}
)
csv_path = os.path.join(test_dir, "graph.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_graph = MetaGraph(file_name=csv_path) meta_graph = MetaGraph(file_name=csv_path)
graph_data = GraphData.load_from_csv( graph_data = GraphData.load_from_csv(meta_graph, DefaultDataParser())
meta_graph, DefaultDataParser()) assert np.array_equal(df["graph_id"], graph_data.graph_id)
assert np.array_equal(df['graph_id'], graph_data.graph_id)
assert len(graph_data.data) == 1 assert len(graph_data.data) == 1
assert np.array_equal(df['label'], graph_data.data['label']) assert np.array_equal(df["label"], graph_data.data["label"])
# add more fields into graph.csv # add more fields into graph.csv
df = pd.DataFrame({'graph_id': np.arange(num_graphs), df = pd.DataFrame(
'feat': np.random.randint(3, size=num_graphs), {
'label': np.random.randint(3, size=num_graphs)}) "graph_id": np.arange(num_graphs),
csv_path = os.path.join(test_dir, 'graph.csv') "feat": np.random.randint(3, size=num_graphs),
"label": np.random.randint(3, size=num_graphs),
}
)
csv_path = os.path.join(test_dir, "graph.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_graph = MetaGraph(file_name=csv_path) meta_graph = MetaGraph(file_name=csv_path)
graph_data = GraphData.load_from_csv( graph_data = GraphData.load_from_csv(meta_graph, DefaultDataParser())
meta_graph, DefaultDataParser()) assert np.array_equal(df["graph_id"], graph_data.graph_id)
assert np.array_equal(df['graph_id'], graph_data.graph_id)
assert len(graph_data.data) == 2 assert len(graph_data.data) == 2
assert np.array_equal(df['feat'], graph_data.data['feat']) assert np.array_equal(df["feat"], graph_data.data["feat"])
assert np.array_equal(df['label'], graph_data.data['label']) assert np.array_equal(df["label"], graph_data.data["label"])
# required header is missing # required header is missing
df = pd.DataFrame({'label': np.random.randint(3, size=num_graphs)}) df = pd.DataFrame({"label": np.random.randint(3, size=num_graphs)})
csv_path = os.path.join(test_dir, 'graph.csv') csv_path = os.path.join(test_dir, "graph.csv")
df.to_csv(csv_path, index=False) df.to_csv(csv_path, index=False)
meta_graph = MetaGraph(file_name=csv_path) meta_graph = MetaGraph(file_name=csv_path)
expect_except = False expect_except = False
try: try:
GraphData.load_from_csv( GraphData.load_from_csv(meta_graph, DefaultDataParser())
meta_graph, DefaultDataParser())
except DGLError: except DGLError:
expect_except = True expect_except = True
assert expect_except assert expect_except
...@@ -931,40 +1126,56 @@ def _test_CSVDataset_single(): ...@@ -931,40 +1126,56 @@ def _test_CSVDataset_single():
edges_csv_path_1 = os.path.join(test_dir, "test_edges_1.csv") edges_csv_path_1 = os.path.join(test_dir, "test_edges_1.csv")
nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv") nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv")
nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv") nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv")
meta_yaml_data = {'version': '1.0.0', 'dataset_name': 'default_name', meta_yaml_data = {
'node_data': [{'file_name': os.path.basename(nodes_csv_path_0), "version": "1.0.0",
'ntype': 'user', "dataset_name": "default_name",
}, "node_data": [
{'file_name': os.path.basename(nodes_csv_path_1), {
'ntype': 'item', "file_name": os.path.basename(nodes_csv_path_0),
}], "ntype": "user",
'edge_data': [{'file_name': os.path.basename(edges_csv_path_0), },
'etype': ['user', 'follow', 'user'], {
}, "file_name": os.path.basename(nodes_csv_path_1),
{'file_name': os.path.basename(edges_csv_path_1), "ntype": "item",
'etype': ['user', 'like', 'item'], },
}], ],
} "edge_data": [
with open(meta_yaml_path, 'w') as f: {
"file_name": os.path.basename(edges_csv_path_0),
"etype": ["user", "follow", "user"],
},
{
"file_name": os.path.basename(edges_csv_path_1),
"etype": ["user", "like", "item"],
},
],
}
with open(meta_yaml_path, "w") as f:
yaml.dump(meta_yaml_data, f, sort_keys=False) yaml.dump(meta_yaml_data, f, sort_keys=False)
num_nodes = 100 num_nodes = 100
num_edges = 500 num_edges = 500
num_dims = 3 num_dims = 3
feat_ndata = np.random.rand(num_nodes, num_dims) feat_ndata = np.random.rand(num_nodes, num_dims)
label_ndata = np.random.randint(2, size=num_nodes) label_ndata = np.random.randint(2, size=num_nodes)
df = pd.DataFrame({'node_id': np.arange(num_nodes), df = pd.DataFrame(
'label': label_ndata, {
'feat': [line.tolist() for line in feat_ndata], "node_id": np.arange(num_nodes),
}) "label": label_ndata,
"feat": [line.tolist() for line in feat_ndata],
}
)
df.to_csv(nodes_csv_path_0, index=False) df.to_csv(nodes_csv_path_0, index=False)
df.to_csv(nodes_csv_path_1, index=False) df.to_csv(nodes_csv_path_1, index=False)
feat_edata = np.random.rand(num_edges, num_dims) feat_edata = np.random.rand(num_edges, num_dims)
label_edata = np.random.randint(2, size=num_edges) label_edata = np.random.randint(2, size=num_edges)
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges), df = pd.DataFrame(
'dst_id': np.random.randint(num_nodes, size=num_edges), {
'label': label_edata, "src_id": np.random.randint(num_nodes, size=num_edges),
'feat': [line.tolist() for line in feat_edata], "dst_id": np.random.randint(num_nodes, size=num_edges),
}) "label": label_edata,
"feat": [line.tolist() for line in feat_edata],
}
)
df.to_csv(edges_csv_path_0, index=False) df.to_csv(edges_csv_path_0, index=False)
df.to_csv(edges_csv_path_1, index=False) df.to_csv(edges_csv_path_1, index=False)
...@@ -974,24 +1185,29 @@ def _test_CSVDataset_single(): ...@@ -974,24 +1185,29 @@ def _test_CSVDataset_single():
# remove original node data file to verify reload from cached files # remove original node data file to verify reload from cached files
os.remove(nodes_csv_path_0) os.remove(nodes_csv_path_0)
assert not os.path.exists(nodes_csv_path_0) assert not os.path.exists(nodes_csv_path_0)
csv_dataset = data.CSVDataset( csv_dataset = data.CSVDataset(test_dir, force_reload=force_reload)
test_dir, force_reload=force_reload)
assert len(csv_dataset) == 1 assert len(csv_dataset) == 1
g = csv_dataset[0] g = csv_dataset[0]
assert not g.is_homogeneous assert not g.is_homogeneous
assert csv_dataset.has_cache() assert csv_dataset.has_cache()
for ntype in g.ntypes: for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes assert g.num_nodes(ntype) == num_nodes
assert F.array_equal(F.tensor(feat_ndata, dtype=F.float32), assert F.array_equal(
g.nodes[ntype].data['feat']) F.tensor(feat_ndata, dtype=F.float32),
assert np.array_equal(label_ndata, g.nodes[ntype].data["feat"],
F.asnumpy(g.nodes[ntype].data['label'])) )
assert np.array_equal(
label_ndata, F.asnumpy(g.nodes[ntype].data["label"])
)
for etype in g.etypes: for etype in g.etypes:
assert g.num_edges(etype) == num_edges assert g.num_edges(etype) == num_edges
assert F.array_equal(F.tensor(feat_edata, dtype=F.float32), assert F.array_equal(
g.edges[etype].data['feat']) F.tensor(feat_edata, dtype=F.float32),
assert np.array_equal(label_edata, g.edges[etype].data["feat"],
F.asnumpy(g.edges[etype].data['label'])) )
assert np.array_equal(
label_edata, F.asnumpy(g.edges[etype].data["label"])
)
def _test_CSVDataset_multiple(): def _test_CSVDataset_multiple():
...@@ -1003,52 +1219,87 @@ def _test_CSVDataset_multiple(): ...@@ -1003,52 +1219,87 @@ def _test_CSVDataset_multiple():
nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv") nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv")
nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv") nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv")
graph_csv_path = os.path.join(test_dir, "test_graph.csv") graph_csv_path = os.path.join(test_dir, "test_graph.csv")
meta_yaml_data = {'version': '1.0.0', 'dataset_name': 'default_name', meta_yaml_data = {
'node_data': [{'file_name': os.path.basename(nodes_csv_path_0), "version": "1.0.0",
'ntype': 'user', "dataset_name": "default_name",
}, "node_data": [
{'file_name': os.path.basename(nodes_csv_path_1), {
'ntype': 'item', "file_name": os.path.basename(nodes_csv_path_0),
}], "ntype": "user",
'edge_data': [{'file_name': os.path.basename(edges_csv_path_0), },
'etype': ['user', 'follow', 'user'], {
}, "file_name": os.path.basename(nodes_csv_path_1),
{'file_name': os.path.basename(edges_csv_path_1), "ntype": "item",
'etype': ['user', 'like', 'item'], },
}], ],
'graph_data': {'file_name': os.path.basename(graph_csv_path)} "edge_data": [
} {
with open(meta_yaml_path, 'w') as f: "file_name": os.path.basename(edges_csv_path_0),
"etype": ["user", "follow", "user"],
},
{
"file_name": os.path.basename(edges_csv_path_1),
"etype": ["user", "like", "item"],
},
],
"graph_data": {"file_name": os.path.basename(graph_csv_path)},
}
with open(meta_yaml_path, "w") as f:
yaml.dump(meta_yaml_data, f, sort_keys=False) yaml.dump(meta_yaml_data, f, sort_keys=False)
num_nodes = 100 num_nodes = 100
num_edges = 500 num_edges = 500
num_graphs = 10 num_graphs = 10
num_dims = 3 num_dims = 3
feat_ndata = np.random.rand(num_nodes*num_graphs, num_dims) feat_ndata = np.random.rand(num_nodes * num_graphs, num_dims)
label_ndata = np.random.randint(2, size=num_nodes*num_graphs) label_ndata = np.random.randint(2, size=num_nodes * num_graphs)
df = pd.DataFrame({'node_id': np.hstack([np.arange(num_nodes) for _ in range(num_graphs)]), df = pd.DataFrame(
'label': label_ndata, {
'feat': [line.tolist() for line in feat_ndata], "node_id": np.hstack(
'graph_id': np.hstack([np.full(num_nodes, i) for i in range(num_graphs)]) [np.arange(num_nodes) for _ in range(num_graphs)]
}) ),
"label": label_ndata,
"feat": [line.tolist() for line in feat_ndata],
"graph_id": np.hstack(
[np.full(num_nodes, i) for i in range(num_graphs)]
),
}
)
df.to_csv(nodes_csv_path_0, index=False) df.to_csv(nodes_csv_path_0, index=False)
df.to_csv(nodes_csv_path_1, index=False) df.to_csv(nodes_csv_path_1, index=False)
feat_edata = np.random.rand(num_edges*num_graphs, num_dims) feat_edata = np.random.rand(num_edges * num_graphs, num_dims)
label_edata = np.random.randint(2, size=num_edges*num_graphs) label_edata = np.random.randint(2, size=num_edges * num_graphs)
df = pd.DataFrame({'src_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]), df = pd.DataFrame(
'dst_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]), {
'label': label_edata, "src_id": np.hstack(
'feat': [line.tolist() for line in feat_edata], [
'graph_id': np.hstack([np.full(num_edges, i) for i in range(num_graphs)]) np.random.randint(num_nodes, size=num_edges)
}) for _ in range(num_graphs)
]
),
"dst_id": np.hstack(
[
np.random.randint(num_nodes, size=num_edges)
for _ in range(num_graphs)
]
),
"label": label_edata,
"feat": [line.tolist() for line in feat_edata],
"graph_id": np.hstack(
[np.full(num_edges, i) for i in range(num_graphs)]
),
}
)
df.to_csv(edges_csv_path_0, index=False) df.to_csv(edges_csv_path_0, index=False)
df.to_csv(edges_csv_path_1, index=False) df.to_csv(edges_csv_path_1, index=False)
feat_gdata = np.random.rand(num_graphs, num_dims) feat_gdata = np.random.rand(num_graphs, num_dims)
label_gdata = np.random.randint(2, size=num_graphs) label_gdata = np.random.randint(2, size=num_graphs)
df = pd.DataFrame({'label': label_gdata, df = pd.DataFrame(
'feat': [line.tolist() for line in feat_gdata], {
'graph_id': np.arange(num_graphs) "label": label_gdata,
}) "feat": [line.tolist() for line in feat_gdata],
"graph_id": np.arange(num_graphs),
}
)
df.to_csv(graph_csv_path, index=False) df.to_csv(graph_csv_path, index=False)
# load CSVDataset with default node/edge/gdata_parser # load CSVDataset with default node/edge/gdata_parser
...@@ -1057,31 +1308,47 @@ def _test_CSVDataset_multiple(): ...@@ -1057,31 +1308,47 @@ def _test_CSVDataset_multiple():
# remove original node data file to verify reload from cached files # remove original node data file to verify reload from cached files
os.remove(nodes_csv_path_0) os.remove(nodes_csv_path_0)
assert not os.path.exists(nodes_csv_path_0) assert not os.path.exists(nodes_csv_path_0)
csv_dataset = data.CSVDataset( csv_dataset = data.CSVDataset(test_dir, force_reload=force_reload)
test_dir, force_reload=force_reload)
assert len(csv_dataset) == num_graphs assert len(csv_dataset) == num_graphs
assert csv_dataset.has_cache() assert csv_dataset.has_cache()
assert len(csv_dataset.data) == 2 assert len(csv_dataset.data) == 2
assert 'feat' in csv_dataset.data assert "feat" in csv_dataset.data
assert 'label' in csv_dataset.data assert "label" in csv_dataset.data
assert F.array_equal(F.tensor(feat_gdata, dtype=F.float32), assert F.array_equal(
csv_dataset.data['feat']) F.tensor(feat_gdata, dtype=F.float32), csv_dataset.data["feat"]
)
for i, (g, g_data) in enumerate(csv_dataset): for i, (g, g_data) in enumerate(csv_dataset):
assert not g.is_homogeneous assert not g.is_homogeneous
assert F.asnumpy(g_data['label']) == label_gdata[i] assert F.asnumpy(g_data["label"]) == label_gdata[i]
assert F.array_equal(g_data['feat'], F.tensor(feat_gdata[i], dtype=F.float32)) assert F.array_equal(
g_data["feat"], F.tensor(feat_gdata[i], dtype=F.float32)
)
for ntype in g.ntypes: for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes assert g.num_nodes(ntype) == num_nodes
assert F.array_equal(F.tensor(feat_ndata[i*num_nodes:(i+1)*num_nodes], dtype=F.float32), assert F.array_equal(
g.nodes[ntype].data['feat']) F.tensor(
assert np.array_equal(label_ndata[i*num_nodes:(i+1)*num_nodes], feat_ndata[i * num_nodes : (i + 1) * num_nodes],
F.asnumpy(g.nodes[ntype].data['label'])) dtype=F.float32,
),
g.nodes[ntype].data["feat"],
)
assert np.array_equal(
label_ndata[i * num_nodes : (i + 1) * num_nodes],
F.asnumpy(g.nodes[ntype].data["label"]),
)
for etype in g.etypes: for etype in g.etypes:
assert g.num_edges(etype) == num_edges assert g.num_edges(etype) == num_edges
assert F.array_equal(F.tensor(feat_edata[i*num_edges:(i+1)*num_edges], dtype=F.float32), assert F.array_equal(
g.edges[etype].data['feat']) F.tensor(
assert np.array_equal(label_edata[i*num_edges:(i+1)*num_edges], feat_edata[i * num_edges : (i + 1) * num_edges],
F.asnumpy(g.edges[etype].data['label'])) dtype=F.float32,
),
g.edges[etype].data["feat"],
)
assert np.array_equal(
label_edata[i * num_edges : (i + 1) * num_edges],
F.asnumpy(g.edges[etype].data["label"]),
)
def _test_CSVDataset_customized_data_parser(): def _test_CSVDataset_customized_data_parser():
...@@ -1093,45 +1360,76 @@ def _test_CSVDataset_customized_data_parser(): ...@@ -1093,45 +1360,76 @@ def _test_CSVDataset_customized_data_parser():
nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv") nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv")
nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv") nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv")
graph_csv_path = os.path.join(test_dir, "test_graph.csv") graph_csv_path = os.path.join(test_dir, "test_graph.csv")
meta_yaml_data = {'dataset_name': 'default_name', meta_yaml_data = {
'node_data': [{'file_name': os.path.basename(nodes_csv_path_0), "dataset_name": "default_name",
'ntype': 'user', "node_data": [
}, {
{'file_name': os.path.basename(nodes_csv_path_1), "file_name": os.path.basename(nodes_csv_path_0),
'ntype': 'item', "ntype": "user",
}], },
'edge_data': [{'file_name': os.path.basename(edges_csv_path_0), {
'etype': ['user', 'follow', 'user'], "file_name": os.path.basename(nodes_csv_path_1),
}, "ntype": "item",
{'file_name': os.path.basename(edges_csv_path_1), },
'etype': ['user', 'like', 'item'], ],
}], "edge_data": [
'graph_data': {'file_name': os.path.basename(graph_csv_path)} {
} "file_name": os.path.basename(edges_csv_path_0),
with open(meta_yaml_path, 'w') as f: "etype": ["user", "follow", "user"],
},
{
"file_name": os.path.basename(edges_csv_path_1),
"etype": ["user", "like", "item"],
},
],
"graph_data": {"file_name": os.path.basename(graph_csv_path)},
}
with open(meta_yaml_path, "w") as f:
yaml.dump(meta_yaml_data, f, sort_keys=False) yaml.dump(meta_yaml_data, f, sort_keys=False)
num_nodes = 100 num_nodes = 100
num_edges = 500 num_edges = 500
num_graphs = 10 num_graphs = 10
label_ndata = np.random.randint(2, size=num_nodes*num_graphs) label_ndata = np.random.randint(2, size=num_nodes * num_graphs)
df = pd.DataFrame({'node_id': np.hstack([np.arange(num_nodes) for _ in range(num_graphs)]), df = pd.DataFrame(
'label': label_ndata, {
'graph_id': np.hstack([np.full(num_nodes, i) for i in range(num_graphs)]) "node_id": np.hstack(
}) [np.arange(num_nodes) for _ in range(num_graphs)]
),
"label": label_ndata,
"graph_id": np.hstack(
[np.full(num_nodes, i) for i in range(num_graphs)]
),
}
)
df.to_csv(nodes_csv_path_0, index=False) df.to_csv(nodes_csv_path_0, index=False)
df.to_csv(nodes_csv_path_1, index=False) df.to_csv(nodes_csv_path_1, index=False)
label_edata = np.random.randint(2, size=num_edges*num_graphs) label_edata = np.random.randint(2, size=num_edges * num_graphs)
df = pd.DataFrame({'src_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]), df = pd.DataFrame(
'dst_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]), {
'label': label_edata, "src_id": np.hstack(
'graph_id': np.hstack([np.full(num_edges, i) for i in range(num_graphs)]) [
}) np.random.randint(num_nodes, size=num_edges)
for _ in range(num_graphs)
]
),
"dst_id": np.hstack(
[
np.random.randint(num_nodes, size=num_edges)
for _ in range(num_graphs)
]
),
"label": label_edata,
"graph_id": np.hstack(
[np.full(num_edges, i) for i in range(num_graphs)]
),
}
)
df.to_csv(edges_csv_path_0, index=False) df.to_csv(edges_csv_path_0, index=False)
df.to_csv(edges_csv_path_1, index=False) df.to_csv(edges_csv_path_1, index=False)
label_gdata = np.random.randint(2, size=num_graphs) label_gdata = np.random.randint(2, size=num_graphs)
df = pd.DataFrame({'label': label_gdata, df = pd.DataFrame(
'graph_id': np.arange(num_graphs) {"label": label_gdata, "graph_id": np.arange(num_graphs)}
}) )
df.to_csv(graph_csv_path, index=False) df.to_csv(graph_csv_path, index=False)
class CustDataParser: class CustDataParser:
...@@ -1139,69 +1437,86 @@ def _test_CSVDataset_customized_data_parser(): ...@@ -1139,69 +1437,86 @@ def _test_CSVDataset_customized_data_parser():
data = {} data = {}
for header in df: for header in df:
dt = df[header].to_numpy().squeeze() dt = df[header].to_numpy().squeeze()
if header == 'label': if header == "label":
dt += 2 dt += 2
data[header] = dt data[header] = dt
return data return data
# load CSVDataset with customized node/edge/gdata_parser # load CSVDataset with customized node/edge/gdata_parser
# specify via dict[ntype/etype, callable] # specify via dict[ntype/etype, callable]
csv_dataset = data.CSVDataset( csv_dataset = data.CSVDataset(
test_dir, force_reload=True, ndata_parser={'user': CustDataParser()}, test_dir,
edata_parser={('user', 'like', 'item'): CustDataParser()}, force_reload=True,
gdata_parser=CustDataParser()) ndata_parser={"user": CustDataParser()},
edata_parser={("user", "like", "item"): CustDataParser()},
gdata_parser=CustDataParser(),
)
assert len(csv_dataset) == num_graphs assert len(csv_dataset) == num_graphs
assert len(csv_dataset.data) == 1 assert len(csv_dataset.data) == 1
assert 'label' in csv_dataset.data assert "label" in csv_dataset.data
for i, (g, g_data) in enumerate(csv_dataset): for i, (g, g_data) in enumerate(csv_dataset):
assert not g.is_homogeneous assert not g.is_homogeneous
assert F.asnumpy(g_data) == label_gdata[i] + 2 assert F.asnumpy(g_data) == label_gdata[i] + 2
for ntype in g.ntypes: for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes assert g.num_nodes(ntype) == num_nodes
offset = 2 if ntype == 'user' else 0 offset = 2 if ntype == "user" else 0
assert np.array_equal(label_ndata[i*num_nodes:(i+1)*num_nodes]+offset, assert np.array_equal(
F.asnumpy(g.nodes[ntype].data['label'])) label_ndata[i * num_nodes : (i + 1) * num_nodes] + offset,
F.asnumpy(g.nodes[ntype].data["label"]),
)
for etype in g.etypes: for etype in g.etypes:
assert g.num_edges(etype) == num_edges assert g.num_edges(etype) == num_edges
offset = 2 if etype == 'like' else 0 offset = 2 if etype == "like" else 0
assert np.array_equal(label_edata[i*num_edges:(i+1)*num_edges]+offset, assert np.array_equal(
F.asnumpy(g.edges[etype].data['label'])) label_edata[i * num_edges : (i + 1) * num_edges] + offset,
F.asnumpy(g.edges[etype].data["label"]),
)
# specify via callable # specify via callable
csv_dataset = data.CSVDataset( csv_dataset = data.CSVDataset(
test_dir, force_reload=True, ndata_parser=CustDataParser(), test_dir,
edata_parser=CustDataParser(), gdata_parser=CustDataParser()) force_reload=True,
ndata_parser=CustDataParser(),
edata_parser=CustDataParser(),
gdata_parser=CustDataParser(),
)
assert len(csv_dataset) == num_graphs assert len(csv_dataset) == num_graphs
assert len(csv_dataset.data) == 1 assert len(csv_dataset.data) == 1
assert 'label' in csv_dataset.data assert "label" in csv_dataset.data
for i, (g, g_data) in enumerate(csv_dataset): for i, (g, g_data) in enumerate(csv_dataset):
assert not g.is_homogeneous assert not g.is_homogeneous
assert F.asnumpy(g_data) == label_gdata[i] + 2 assert F.asnumpy(g_data) == label_gdata[i] + 2
for ntype in g.ntypes: for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes assert g.num_nodes(ntype) == num_nodes
offset = 2 offset = 2
assert np.array_equal(label_ndata[i*num_nodes:(i+1)*num_nodes]+offset, assert np.array_equal(
F.asnumpy(g.nodes[ntype].data['label'])) label_ndata[i * num_nodes : (i + 1) * num_nodes] + offset,
F.asnumpy(g.nodes[ntype].data["label"]),
)
for etype in g.etypes: for etype in g.etypes:
assert g.num_edges(etype) == num_edges assert g.num_edges(etype) == num_edges
offset = 2 offset = 2
assert np.array_equal(label_edata[i*num_edges:(i+1)*num_edges]+offset, assert np.array_equal(
F.asnumpy(g.edges[etype].data['label'])) label_edata[i * num_edges : (i + 1) * num_edges] + offset,
F.asnumpy(g.edges[etype].data["label"]),
)
def _test_NodeEdgeGraphData(): def _test_NodeEdgeGraphData():
from dgl.data.csv_dataset_base import NodeData, EdgeData, GraphData from dgl.data.csv_dataset_base import EdgeData, GraphData, NodeData
# NodeData basics # NodeData basics
num_nodes = 100 num_nodes = 100
node_ids = np.arange(num_nodes, dtype=np.float) node_ids = np.arange(num_nodes, dtype=np.float)
ndata = NodeData(node_ids, {}) ndata = NodeData(node_ids, {})
assert np.array_equal(ndata.id, node_ids) assert np.array_equal(ndata.id, node_ids)
assert len(ndata.data) == 0 assert len(ndata.data) == 0
assert ndata.type == '_V' assert ndata.type == "_V"
assert np.array_equal(ndata.graph_id, np.full(num_nodes, 0)) assert np.array_equal(ndata.graph_id, np.full(num_nodes, 0))
# NodeData more # NodeData more
data = {'feat': np.random.rand(num_nodes, 3)} data = {"feat": np.random.rand(num_nodes, 3)}
graph_id = np.arange(num_nodes) graph_id = np.arange(num_nodes)
ndata = NodeData(node_ids, data, type='user', graph_id=graph_id) ndata = NodeData(node_ids, data, type="user", graph_id=graph_id)
assert ndata.type == 'user' assert ndata.type == "user"
assert np.array_equal(ndata.graph_id, graph_id) assert np.array_equal(ndata.graph_id, graph_id)
assert len(ndata.data) == len(data) assert len(ndata.data) == len(data)
for k, v in data.items(): for k, v in data.items():
...@@ -1210,8 +1525,11 @@ def _test_NodeEdgeGraphData(): ...@@ -1210,8 +1525,11 @@ def _test_NodeEdgeGraphData():
# NodeData except # NodeData except
expect_except = False expect_except = False
try: try:
NodeData(np.arange(num_nodes), {'feat': np.random.rand( NodeData(
num_nodes+1, 3)}, graph_id=np.arange(num_nodes-1)) np.arange(num_nodes),
{"feat": np.random.rand(num_nodes + 1, 3)},
graph_id=np.arange(num_nodes - 1),
)
except: except:
expect_except = True expect_except = True
assert expect_except assert expect_except
...@@ -1224,17 +1542,16 @@ def _test_NodeEdgeGraphData(): ...@@ -1224,17 +1542,16 @@ def _test_NodeEdgeGraphData():
edata = EdgeData(src_ids, dst_ids, {}) edata = EdgeData(src_ids, dst_ids, {})
assert np.array_equal(edata.src, src_ids) assert np.array_equal(edata.src, src_ids)
assert np.array_equal(edata.dst, dst_ids) assert np.array_equal(edata.dst, dst_ids)
assert edata.type == ('_V', '_E', '_V') assert edata.type == ("_V", "_E", "_V")
assert len(edata.data) == 0 assert len(edata.data) == 0
assert np.array_equal(edata.graph_id, np.full(num_edges, 0)) assert np.array_equal(edata.graph_id, np.full(num_edges, 0))
# EdageData more # EdageData more
src_ids = np.random.randint(num_nodes, size=num_edges).astype(np.float) src_ids = np.random.randint(num_nodes, size=num_edges).astype(np.float)
dst_ids = np.random.randint(num_nodes, size=num_edges).astype(np.float) dst_ids = np.random.randint(num_nodes, size=num_edges).astype(np.float)
data = {'feat': np.random.rand(num_edges, 3)} data = {"feat": np.random.rand(num_edges, 3)}
etype = ('user', 'like', 'item') etype = ("user", "like", "item")
graph_ids = np.arange(num_edges) graph_ids = np.arange(num_edges)
edata = EdgeData(src_ids, dst_ids, data, edata = EdgeData(src_ids, dst_ids, data, type=etype, graph_id=graph_ids)
type=etype, graph_id=graph_ids)
assert np.array_equal(edata.src, src_ids) assert np.array_equal(edata.src, src_ids)
assert np.array_equal(edata.dst, dst_ids) assert np.array_equal(edata.dst, dst_ids)
assert edata.type == etype assert edata.type == etype
...@@ -1246,8 +1563,12 @@ def _test_NodeEdgeGraphData(): ...@@ -1246,8 +1563,12 @@ def _test_NodeEdgeGraphData():
# EdgeData except # EdgeData except
expect_except = False expect_except = False
try: try:
EdgeData(np.arange(num_edges), np.arange( EdgeData(
num_edges+1), {'feat': np.random.rand(num_edges-1, 3)}, graph_id=np.arange(num_edges+2)) np.arange(num_edges),
np.arange(num_edges + 1),
{"feat": np.random.rand(num_edges - 1, 3)},
graph_id=np.arange(num_edges + 2),
)
except: except:
expect_except = True expect_except = True
assert expect_except assert expect_except
...@@ -1260,7 +1581,7 @@ def _test_NodeEdgeGraphData(): ...@@ -1260,7 +1581,7 @@ def _test_NodeEdgeGraphData():
assert len(gdata.data) == 0 assert len(gdata.data) == 0
# GraphData more # GraphData more
graph_ids = np.arange(num_graphs).astype(np.float) graph_ids = np.arange(num_graphs).astype(np.float)
data = {'feat': np.random.rand(num_graphs, 3)} data = {"feat": np.random.rand(num_graphs, 3)}
gdata = GraphData(graph_ids, data) gdata = GraphData(graph_ids, data)
assert np.array_equal(gdata.graph_id, graph_ids) assert np.array_equal(gdata.graph_id, graph_ids)
assert len(gdata.data) == len(data) assert len(gdata.data) == len(data)
...@@ -1269,7 +1590,10 @@ def _test_NodeEdgeGraphData(): ...@@ -1269,7 +1590,10 @@ def _test_NodeEdgeGraphData():
assert np.array_equal(gdata.data[k], v) assert np.array_equal(gdata.data[k], v)
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.") @unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_csvdataset(): def test_csvdataset():
_test_NodeEdgeGraphData() _test_NodeEdgeGraphData()
...@@ -1286,121 +1610,207 @@ def test_csvdataset(): ...@@ -1286,121 +1610,207 @@ def test_csvdataset():
_test_CSVDataset_multiple() _test_CSVDataset_multiple()
_test_CSVDataset_customized_data_parser() _test_CSVDataset_customized_data_parser()
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_add_nodepred_split(): def test_add_nodepred_split():
dataset = data.AmazonCoBuyComputerDataset() dataset = data.AmazonCoBuyComputerDataset()
print('train_mask' in dataset[0].ndata) print("train_mask" in dataset[0].ndata)
data.utils.add_nodepred_split(dataset, [0.8, 0.1, 0.1]) data.utils.add_nodepred_split(dataset, [0.8, 0.1, 0.1])
assert 'train_mask' in dataset[0].ndata assert "train_mask" in dataset[0].ndata
dataset = data.AIFBDataset() dataset = data.AIFBDataset()
print('train_mask' in dataset[0].nodes['Publikationen'].data) print("train_mask" in dataset[0].nodes["Publikationen"].data)
data.utils.add_nodepred_split(dataset, [0.8, 0.1, 0.1], ntype='Publikationen') data.utils.add_nodepred_split(
assert 'train_mask' in dataset[0].nodes['Publikationen'].data dataset, [0.8, 0.1, 0.1], ntype="Publikationen"
)
assert "train_mask" in dataset[0].nodes["Publikationen"].data
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_nodepred1(): def test_as_nodepred1():
ds = data.AmazonCoBuyComputerDataset() ds = data.AmazonCoBuyComputerDataset()
print('train_mask' in ds[0].ndata) print("train_mask" in ds[0].ndata)
new_ds = data.AsNodePredDataset(ds, [0.8, 0.1, 0.1], verbose=True) new_ds = data.AsNodePredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 1 assert len(new_ds) == 1
assert new_ds[0].num_nodes() == ds[0].num_nodes() assert new_ds[0].num_nodes() == ds[0].num_nodes()
assert new_ds[0].num_edges() == ds[0].num_edges() assert new_ds[0].num_edges() == ds[0].num_edges()
assert 'train_mask' in new_ds[0].ndata assert "train_mask" in new_ds[0].ndata
assert F.array_equal(new_ds.train_idx, F.nonzero_1d( assert F.array_equal(
new_ds[0].ndata['train_mask'])) new_ds.train_idx, F.nonzero_1d(new_ds[0].ndata["train_mask"])
assert F.array_equal(new_ds.val_idx, F.nonzero_1d( )
new_ds[0].ndata['val_mask'])) assert F.array_equal(
assert F.array_equal(new_ds.test_idx, F.nonzero_1d( new_ds.val_idx, F.nonzero_1d(new_ds[0].ndata["val_mask"])
new_ds[0].ndata['test_mask'])) )
assert F.array_equal(
new_ds.test_idx, F.nonzero_1d(new_ds[0].ndata["test_mask"])
)
ds = data.AIFBDataset() ds = data.AIFBDataset()
print('train_mask' in ds[0].nodes['Personen'].data) print("train_mask" in ds[0].nodes["Personen"].data)
new_ds = data.AsNodePredDataset(ds, [0.8, 0.1, 0.1], 'Personen', verbose=True) new_ds = data.AsNodePredDataset(
ds, [0.8, 0.1, 0.1], "Personen", verbose=True
)
assert len(new_ds) == 1 assert len(new_ds) == 1
assert new_ds[0].ntypes == ds[0].ntypes assert new_ds[0].ntypes == ds[0].ntypes
assert new_ds[0].canonical_etypes == ds[0].canonical_etypes assert new_ds[0].canonical_etypes == ds[0].canonical_etypes
assert 'train_mask' in new_ds[0].nodes['Personen'].data assert "train_mask" in new_ds[0].nodes["Personen"].data
assert F.array_equal(new_ds.train_idx, F.nonzero_1d( assert F.array_equal(
new_ds[0].nodes['Personen'].data['train_mask'])) new_ds.train_idx,
assert F.array_equal(new_ds.val_idx, F.nonzero_1d( F.nonzero_1d(new_ds[0].nodes["Personen"].data["train_mask"]),
new_ds[0].nodes['Personen'].data['val_mask'])) )
assert F.array_equal(new_ds.test_idx, F.nonzero_1d( assert F.array_equal(
new_ds[0].nodes['Personen'].data['test_mask'])) new_ds.val_idx,
F.nonzero_1d(new_ds[0].nodes["Personen"].data["val_mask"]),
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.") )
assert F.array_equal(
new_ds.test_idx,
F.nonzero_1d(new_ds[0].nodes["Personen"].data["test_mask"]),
)
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_nodepred2(): def test_as_nodepred2():
# test proper reprocessing # test proper reprocessing
# create # create
ds = data.AsNodePredDataset(data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1]) ds = data.AsNodePredDataset(
assert F.sum(F.astype(ds[0].ndata['train_mask'], F.int32), 0) == int(ds[0].num_nodes() * 0.8) data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1]
)
assert F.sum(F.astype(ds[0].ndata["train_mask"], F.int32), 0) == int(
ds[0].num_nodes() * 0.8
)
assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.8) assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.8)
# read from cache # read from cache
ds = data.AsNodePredDataset(data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1]) ds = data.AsNodePredDataset(
assert F.sum(F.astype(ds[0].ndata['train_mask'], F.int32), 0) == int(ds[0].num_nodes() * 0.8) data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1]
)
assert F.sum(F.astype(ds[0].ndata["train_mask"], F.int32), 0) == int(
ds[0].num_nodes() * 0.8
)
assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.8) assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.8)
# invalid cache, re-read # invalid cache, re-read
ds = data.AsNodePredDataset(data.AmazonCoBuyComputerDataset(), [0.1, 0.1, 0.8]) ds = data.AsNodePredDataset(
assert F.sum(F.astype(ds[0].ndata['train_mask'], F.int32), 0) == int(ds[0].num_nodes() * 0.1) data.AmazonCoBuyComputerDataset(), [0.1, 0.1, 0.8]
)
assert F.sum(F.astype(ds[0].ndata["train_mask"], F.int32), 0) == int(
ds[0].num_nodes() * 0.1
)
assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.1) assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.1)
# create # create
ds = data.AsNodePredDataset(data.AIFBDataset(), [0.8, 0.1, 0.1], 'Personen', verbose=True) ds = data.AsNodePredDataset(
assert F.sum(F.astype(ds[0].nodes['Personen'].data['train_mask'], F.int32), 0) == int(ds[0].num_nodes('Personen') * 0.8) data.AIFBDataset(), [0.8, 0.1, 0.1], "Personen", verbose=True
assert len(ds.train_idx) == int(ds[0].num_nodes('Personen') * 0.8) )
assert F.sum(
F.astype(ds[0].nodes["Personen"].data["train_mask"], F.int32), 0
) == int(ds[0].num_nodes("Personen") * 0.8)
assert len(ds.train_idx) == int(ds[0].num_nodes("Personen") * 0.8)
# read from cache # read from cache
ds = data.AsNodePredDataset(data.AIFBDataset(), [0.8, 0.1, 0.1], 'Personen', verbose=True) ds = data.AsNodePredDataset(
assert F.sum(F.astype(ds[0].nodes['Personen'].data['train_mask'], F.int32), 0) == int(ds[0].num_nodes('Personen') * 0.8) data.AIFBDataset(), [0.8, 0.1, 0.1], "Personen", verbose=True
assert len(ds.train_idx) == int(ds[0].num_nodes('Personen') * 0.8) )
assert F.sum(
F.astype(ds[0].nodes["Personen"].data["train_mask"], F.int32), 0
) == int(ds[0].num_nodes("Personen") * 0.8)
assert len(ds.train_idx) == int(ds[0].num_nodes("Personen") * 0.8)
# invalid cache, re-read # invalid cache, re-read
ds = data.AsNodePredDataset(data.AIFBDataset(), [0.1, 0.1, 0.8], 'Personen', verbose=True) ds = data.AsNodePredDataset(
assert F.sum(F.astype(ds[0].nodes['Personen'].data['train_mask'], F.int32), 0) == int(ds[0].num_nodes('Personen') * 0.1) data.AIFBDataset(), [0.1, 0.1, 0.8], "Personen", verbose=True
assert len(ds.train_idx) == int(ds[0].num_nodes('Personen') * 0.1) )
assert F.sum(
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason="ogb only supports pytorch") F.astype(ds[0].nodes["Personen"].data["train_mask"], F.int32), 0
) == int(ds[0].num_nodes("Personen") * 0.1)
assert len(ds.train_idx) == int(ds[0].num_nodes("Personen") * 0.1)
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="ogb only supports pytorch"
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_nodepred_ogb(): def test_as_nodepred_ogb():
from ogb.nodeproppred import DglNodePropPredDataset from ogb.nodeproppred import DglNodePropPredDataset
ds = data.AsNodePredDataset(DglNodePropPredDataset("ogbn-arxiv"), split_ratio=None, verbose=True)
ds = data.AsNodePredDataset(
DglNodePropPredDataset("ogbn-arxiv"), split_ratio=None, verbose=True
)
split = DglNodePropPredDataset("ogbn-arxiv").get_idx_split() split = DglNodePropPredDataset("ogbn-arxiv").get_idx_split()
train_idx, val_idx, test_idx = split['train'], split['valid'], split['test'] train_idx, val_idx, test_idx = split["train"], split["valid"], split["test"]
assert F.array_equal(ds.train_idx, F.tensor(train_idx)) assert F.array_equal(ds.train_idx, F.tensor(train_idx))
assert F.array_equal(ds.val_idx, F.tensor(val_idx)) assert F.array_equal(ds.val_idx, F.tensor(val_idx))
assert F.array_equal(ds.test_idx, F.tensor(test_idx)) assert F.array_equal(ds.test_idx, F.tensor(test_idx))
# force generate new split # force generate new split
ds = data.AsNodePredDataset(DglNodePropPredDataset("ogbn-arxiv"), split_ratio=[0.7, 0.2, 0.1], verbose=True) ds = data.AsNodePredDataset(
DglNodePropPredDataset("ogbn-arxiv"),
split_ratio=[0.7, 0.2, 0.1],
verbose=True,
)
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.") @unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_linkpred(): def test_as_linkpred():
# create # create
ds = data.AsLinkPredDataset(data.CoraGraphDataset(), split_ratio=[0.8, 0.1, 0.1], neg_ratio=1, verbose=True) ds = data.AsLinkPredDataset(
data.CoraGraphDataset(),
split_ratio=[0.8, 0.1, 0.1],
neg_ratio=1,
verbose=True,
)
# Cora has 10556 edges, 10% test edges can be 1057 # Cora has 10556 edges, 10% test edges can be 1057
assert ds.test_edges[0][0].shape[0] == 1057 assert ds.test_edges[0][0].shape[0] == 1057
# negative samples, not guaranteed, so the assert is in a relaxed range # negative samples, not guaranteed, so the assert is in a relaxed range
assert 1000 <= ds.test_edges[1][0].shape[0] <= 1057 assert 1000 <= ds.test_edges[1][0].shape[0] <= 1057
# read from cache # read from cache
ds = data.AsLinkPredDataset(data.CoraGraphDataset(), split_ratio=[0.7, 0.1, 0.2], neg_ratio=2, verbose=True) ds = data.AsLinkPredDataset(
data.CoraGraphDataset(),
split_ratio=[0.7, 0.1, 0.2],
neg_ratio=2,
verbose=True,
)
assert ds.test_edges[0][0].shape[0] == 2112 assert ds.test_edges[0][0].shape[0] == 2112
# negative samples, not guaranteed to be ratio 2, so the assert is in a relaxed range # negative samples, not guaranteed to be ratio 2, so the assert is in a relaxed range
assert 4000 < ds.test_edges[1][0].shape[0] <= 4224 assert 4000 < ds.test_edges[1][0].shape[0] <= 4224
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason="ogb only supports pytorch") @unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="ogb only supports pytorch"
)
def test_as_linkpred_ogb(): def test_as_linkpred_ogb():
from ogb.linkproppred import DglLinkPropPredDataset from ogb.linkproppred import DglLinkPropPredDataset
ds = data.AsLinkPredDataset(DglLinkPropPredDataset("ogbl-collab"), split_ratio=None, verbose=True)
ds = data.AsLinkPredDataset(
DglLinkPropPredDataset("ogbl-collab"), split_ratio=None, verbose=True
)
# original dataset has 46329 test edges # original dataset has 46329 test edges
assert ds.test_edges[0][0].shape[0] == 46329 assert ds.test_edges[0][0].shape[0] == 46329
# force generate new split # force generate new split
ds = data.AsLinkPredDataset(DglLinkPropPredDataset("ogbl-collab"), split_ratio=[0.7, 0.2, 0.1], verbose=True) ds = data.AsLinkPredDataset(
DglLinkPropPredDataset("ogbl-collab"),
split_ratio=[0.7, 0.2, 0.1],
verbose=True,
)
assert ds.test_edges[0][0].shape[0] == 235812 assert ds.test_edges[0][0].shape[0] == 235812
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_nodepred_csvdataset(): def test_as_nodepred_csvdataset():
with tempfile.TemporaryDirectory() as test_dir: with tempfile.TemporaryDirectory() as test_dir:
...@@ -1408,13 +1818,13 @@ def test_as_nodepred_csvdataset(): ...@@ -1408,13 +1818,13 @@ def test_as_nodepred_csvdataset():
meta_yaml_path = os.path.join(test_dir, "meta.yaml") meta_yaml_path = os.path.join(test_dir, "meta.yaml")
edges_csv_path = os.path.join(test_dir, "test_edges.csv") edges_csv_path = os.path.join(test_dir, "test_edges.csv")
nodes_csv_path = os.path.join(test_dir, "test_nodes.csv") nodes_csv_path = os.path.join(test_dir, "test_nodes.csv")
meta_yaml_data = {'version': '1.0.0', 'dataset_name': 'default_name', meta_yaml_data = {
'node_data': [{'file_name': os.path.basename(nodes_csv_path) "version": "1.0.0",
}], "dataset_name": "default_name",
'edge_data': [{'file_name': os.path.basename(edges_csv_path) "node_data": [{"file_name": os.path.basename(nodes_csv_path)}],
}], "edge_data": [{"file_name": os.path.basename(edges_csv_path)}],
} }
with open(meta_yaml_path, 'w') as f: with open(meta_yaml_path, "w") as f:
yaml.dump(meta_yaml_data, f, sort_keys=False) yaml.dump(meta_yaml_data, f, sort_keys=False)
num_nodes = 100 num_nodes = 100
num_edges = 500 num_edges = 500
...@@ -1422,37 +1832,49 @@ def test_as_nodepred_csvdataset(): ...@@ -1422,37 +1832,49 @@ def test_as_nodepred_csvdataset():
num_classes = num_nodes num_classes = num_nodes
feat_ndata = np.random.rand(num_nodes, num_dims) feat_ndata = np.random.rand(num_nodes, num_dims)
label_ndata = np.arange(num_classes) label_ndata = np.arange(num_classes)
df = pd.DataFrame({'node_id': np.arange(num_nodes), df = pd.DataFrame(
'label': label_ndata, {
'feat': [line.tolist() for line in feat_ndata], "node_id": np.arange(num_nodes),
}) "label": label_ndata,
"feat": [line.tolist() for line in feat_ndata],
}
)
df.to_csv(nodes_csv_path, index=False) df.to_csv(nodes_csv_path, index=False)
df = pd.DataFrame({'src_id': np.random.randint(num_nodes, size=num_edges), df = pd.DataFrame(
'dst_id': np.random.randint(num_nodes, size=num_edges), {
}) "src_id": np.random.randint(num_nodes, size=num_edges),
"dst_id": np.random.randint(num_nodes, size=num_edges),
}
)
df.to_csv(edges_csv_path, index=False) df.to_csv(edges_csv_path, index=False)
ds = data.CSVDataset(test_dir, force_reload=True) ds = data.CSVDataset(test_dir, force_reload=True)
assert 'feat' in ds[0].ndata assert "feat" in ds[0].ndata
assert 'label' in ds[0].ndata assert "label" in ds[0].ndata
assert 'train_mask' not in ds[0].ndata assert "train_mask" not in ds[0].ndata
assert not hasattr(ds[0], 'num_classes') assert not hasattr(ds[0], "num_classes")
new_ds = data.AsNodePredDataset(ds, split_ratio=[0.8, 0.1, 0.1], force_reload=True) new_ds = data.AsNodePredDataset(
ds, split_ratio=[0.8, 0.1, 0.1], force_reload=True
)
assert new_ds.num_classes == num_classes assert new_ds.num_classes == num_classes
assert 'feat' in new_ds[0].ndata assert "feat" in new_ds[0].ndata
assert 'label' in new_ds[0].ndata assert "label" in new_ds[0].ndata
assert 'train_mask' in new_ds[0].ndata assert "train_mask" in new_ds[0].ndata
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_graphpred(): def test_as_graphpred():
ds = data.GINDataset(name='MUTAG', self_loop=True) ds = data.GINDataset(name="MUTAG", self_loop=True)
new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 188 assert len(new_ds) == 188
assert new_ds.num_tasks == 1 assert new_ds.num_tasks == 1
assert new_ds.num_classes == 2 assert new_ds.num_classes == 2
ds = data.FakeNewsDataset('politifact', 'profile') ds = data.FakeNewsDataset("politifact", "profile")
new_ds = data.AsGraphPredDataset(ds, verbose=True) new_ds = data.AsGraphPredDataset(ds, verbose=True)
assert len(new_ds) == 314 assert len(new_ds) == 314
assert new_ds.num_tasks == 1 assert new_ds.num_tasks == 1
...@@ -1464,25 +1886,25 @@ def test_as_graphpred(): ...@@ -1464,25 +1886,25 @@ def test_as_graphpred():
assert new_ds.num_tasks == 14 assert new_ds.num_tasks == 14
assert new_ds.num_classes is None assert new_ds.num_classes is None
ds = data.QM9Dataset(label_keys=['mu', 'gap']) ds = data.QM9Dataset(label_keys=["mu", "gap"])
new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 130831 assert len(new_ds) == 130831
assert new_ds.num_tasks == 2 assert new_ds.num_tasks == 2
assert new_ds.num_classes is None assert new_ds.num_classes is None
ds = data.QM9EdgeDataset(label_keys=['mu', 'alpha']) ds = data.QM9EdgeDataset(label_keys=["mu", "alpha"])
new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 130831 assert len(new_ds) == 130831
assert new_ds.num_tasks == 2 assert new_ds.num_tasks == 2
assert new_ds.num_classes is None assert new_ds.num_classes is None
ds = data.TUDataset('DD') ds = data.TUDataset("DD")
new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 1178 assert len(new_ds) == 1178
assert new_ds.num_tasks == 1 assert new_ds.num_tasks == 1
assert new_ds.num_classes == 2 assert new_ds.num_classes == 2
ds = data.LegacyTUDataset('DD') ds = data.LegacyTUDataset("DD")
new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True)
assert len(new_ds) == 1178 assert len(new_ds) == 1178
assert new_ds.num_tasks == 1 assert new_ds.num_tasks == 1
...@@ -1494,25 +1916,41 @@ def test_as_graphpred(): ...@@ -1494,25 +1916,41 @@ def test_as_graphpred():
assert new_ds.num_tasks == 1 assert new_ds.num_tasks == 1
assert new_ds.num_classes == 2 assert new_ds.num_classes == 2
@unittest.skipIf(F._default_context_str == 'gpu', reason="Datasets don't need to be tested on GPU.")
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
def test_as_graphpred_reprocess(): def test_as_graphpred_reprocess():
ds = data.AsGraphPredDataset(data.GINDataset(name='MUTAG', self_loop=True), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(
data.GINDataset(name="MUTAG", self_loop=True), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache # read from cache
ds = data.AsGraphPredDataset(data.GINDataset(name='MUTAG', self_loop=True), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(
data.GINDataset(name="MUTAG", self_loop=True), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read # invalid cache, re-read
ds = data.AsGraphPredDataset(data.GINDataset(name='MUTAG', self_loop=True), [0.1, 0.1, 0.8]) ds = data.AsGraphPredDataset(
data.GINDataset(name="MUTAG", self_loop=True), [0.1, 0.1, 0.8]
)
assert len(ds.train_idx) == int(len(ds) * 0.1) assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.FakeNewsDataset('politifact', 'profile'), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(
data.FakeNewsDataset("politifact", "profile"), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache # read from cache
ds = data.AsGraphPredDataset(data.FakeNewsDataset('politifact', 'profile'), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(
data.FakeNewsDataset("politifact", "profile"), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read # invalid cache, re-read
ds = data.AsGraphPredDataset(data.FakeNewsDataset('politifact', 'profile'), [0.1, 0.1, 0.8]) ds = data.AsGraphPredDataset(
data.FakeNewsDataset("politifact", "profile"), [0.1, 0.1, 0.8]
)
assert len(ds.train_idx) == int(len(ds) * 0.1) assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.QM7bDataset(), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(data.QM7bDataset(), [0.8, 0.1, 0.1])
...@@ -1524,40 +1962,52 @@ def test_as_graphpred_reprocess(): ...@@ -1524,40 +1962,52 @@ def test_as_graphpred_reprocess():
ds = data.AsGraphPredDataset(data.QM7bDataset(), [0.1, 0.1, 0.8]) ds = data.AsGraphPredDataset(data.QM7bDataset(), [0.1, 0.1, 0.8])
assert len(ds.train_idx) == int(len(ds) * 0.1) assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.QM9Dataset(label_keys=['mu', 'gap']), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(
data.QM9Dataset(label_keys=["mu", "gap"]), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache # read from cache
ds = data.AsGraphPredDataset(data.QM9Dataset(label_keys=['mu', 'gap']), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(
data.QM9Dataset(label_keys=["mu", "gap"]), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read # invalid cache, re-read
ds = data.AsGraphPredDataset(data.QM9Dataset(label_keys=['mu', 'gap']), [0.1, 0.1, 0.8]) ds = data.AsGraphPredDataset(
data.QM9Dataset(label_keys=["mu", "gap"]), [0.1, 0.1, 0.8]
)
assert len(ds.train_idx) == int(len(ds) * 0.1) assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.QM9EdgeDataset(label_keys=['mu', 'alpha']), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(
data.QM9EdgeDataset(label_keys=["mu", "alpha"]), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache # read from cache
ds = data.AsGraphPredDataset(data.QM9EdgeDataset(label_keys=['mu', 'alpha']), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(
data.QM9EdgeDataset(label_keys=["mu", "alpha"]), [0.8, 0.1, 0.1]
)
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read # invalid cache, re-read
ds = data.AsGraphPredDataset(data.QM9EdgeDataset(label_keys=['mu', 'alpha']), [0.1, 0.1, 0.8]) ds = data.AsGraphPredDataset(
data.QM9EdgeDataset(label_keys=["mu", "alpha"]), [0.1, 0.1, 0.8]
)
assert len(ds.train_idx) == int(len(ds) * 0.1) assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.TUDataset('DD'), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(data.TUDataset("DD"), [0.8, 0.1, 0.1])
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache # read from cache
ds = data.AsGraphPredDataset(data.TUDataset('DD'), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(data.TUDataset("DD"), [0.8, 0.1, 0.1])
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read # invalid cache, re-read
ds = data.AsGraphPredDataset(data.TUDataset('DD'), [0.1, 0.1, 0.8]) ds = data.AsGraphPredDataset(data.TUDataset("DD"), [0.1, 0.1, 0.8])
assert len(ds.train_idx) == int(len(ds) * 0.1) assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.LegacyTUDataset('DD'), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(data.LegacyTUDataset("DD"), [0.8, 0.1, 0.1])
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# read from cache # read from cache
ds = data.AsGraphPredDataset(data.LegacyTUDataset('DD'), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(data.LegacyTUDataset("DD"), [0.8, 0.1, 0.1])
assert len(ds.train_idx) == int(len(ds) * 0.8) assert len(ds.train_idx) == int(len(ds) * 0.8)
# invalid cache, re-read # invalid cache, re-read
ds = data.AsGraphPredDataset(data.LegacyTUDataset('DD'), [0.1, 0.1, 0.8]) ds = data.AsGraphPredDataset(data.LegacyTUDataset("DD"), [0.1, 0.1, 0.8])
assert len(ds.train_idx) == int(len(ds) * 0.1) assert len(ds.train_idx) == int(len(ds) * 0.1)
ds = data.AsGraphPredDataset(data.BA2MotifDataset(), [0.8, 0.1, 0.1]) ds = data.AsGraphPredDataset(data.BA2MotifDataset(), [0.8, 0.1, 0.1])
...@@ -1569,18 +2019,27 @@ def test_as_graphpred_reprocess(): ...@@ -1569,18 +2019,27 @@ def test_as_graphpred_reprocess():
ds = data.AsGraphPredDataset(data.BA2MotifDataset(), [0.1, 0.1, 0.8]) ds = data.AsGraphPredDataset(data.BA2MotifDataset(), [0.1, 0.1, 0.8])
assert len(ds.train_idx) == int(len(ds) * 0.1) assert len(ds.train_idx) == int(len(ds) * 0.1)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason="ogb only supports pytorch")
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="ogb only supports pytorch"
)
def test_as_graphpred_ogb(): def test_as_graphpred_ogb():
from ogb.graphproppred import DglGraphPropPredDataset from ogb.graphproppred import DglGraphPropPredDataset
ds = data.AsGraphPredDataset(DglGraphPropPredDataset('ogbg-molhiv'),
split_ratio=None, verbose=True) ds = data.AsGraphPredDataset(
DglGraphPropPredDataset("ogbg-molhiv"), split_ratio=None, verbose=True
)
assert len(ds.train_idx) == 32901 assert len(ds.train_idx) == 32901
# force generate new split # force generate new split
ds = data.AsGraphPredDataset(DglGraphPropPredDataset('ogbg-molhiv'), ds = data.AsGraphPredDataset(
split_ratio=[0.6, 0.2, 0.2], verbose=True) DglGraphPropPredDataset("ogbg-molhiv"),
split_ratio=[0.6, 0.2, 0.2],
verbose=True,
)
assert len(ds.train_idx) == 24676 assert len(ds.train_idx) == 24676
if __name__ == '__main__':
if __name__ == "__main__":
test_minigc() test_minigc()
test_gin() test_gin()
test_data_hash() test_data_hash()
......
import unittest import unittest
import backend as F import backend as F
import dgl
from dgl.dataloading import NeighborSampler, negative_sampler, \
as_edge_prediction_sampler
from test_utils import parametrize_idtype from test_utils import parametrize_idtype
import dgl
from dgl.dataloading import (
NeighborSampler,
as_edge_prediction_sampler,
negative_sampler,
)
def create_test_graph(idtype): def create_test_graph(idtype):
# test heterograph from the docstring, plus a user -- wishes -- game relation # test heterograph from the docstring, plus a user -- wishes -- game relation
# 3 users, 2 games, 2 developers # 3 users, 2 games, 2 developers
...@@ -14,12 +20,16 @@ def create_test_graph(idtype): ...@@ -14,12 +20,16 @@ def create_test_graph(idtype):
# ('user', 'wishes', 'game'), # ('user', 'wishes', 'game'),
# ('developer', 'develops', 'game')]) # ('developer', 'develops', 'game')])
g = dgl.heterograph({ g = dgl.heterograph(
('user', 'follows', 'user'): ([0, 1], [1, 2]), {
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "follows", "user"): ([0, 1], [1, 2]),
('user', 'wishes', 'game'): ([0, 2], [1, 0]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
('developer', 'develops', 'game'): ([0, 1], [0, 1]) ("user", "wishes", "game"): ([0, 2], [1, 0]),
}, idtype=idtype, device=F.ctx()) ("developer", "develops", "game"): ([0, 1], [0, 1]),
},
idtype=idtype,
device=F.ctx(),
)
assert g.idtype == idtype assert g.idtype == idtype
assert g.device == F.ctx() assert g.device == F.ctx()
return g return g
...@@ -28,14 +38,15 @@ def create_test_graph(idtype): ...@@ -28,14 +38,15 @@ def create_test_graph(idtype):
@parametrize_idtype @parametrize_idtype
def test_edge_prediction_sampler(idtype): def test_edge_prediction_sampler(idtype):
g = create_test_graph(idtype) g = create_test_graph(idtype)
sampler = NeighborSampler([10,10]) sampler = NeighborSampler([10, 10])
sampler = as_edge_prediction_sampler( sampler = as_edge_prediction_sampler(
sampler, negative_sampler=negative_sampler.Uniform(1)) sampler, negative_sampler=negative_sampler.Uniform(1)
)
seeds = F.copy_to(F.arange(0, 2, dtype=idtype), ctx=F.ctx()) seeds = F.copy_to(F.arange(0, 2, dtype=idtype), ctx=F.ctx())
# just a smoke test to make sure we don't fail internal assertions # just a smoke test to make sure we don't fail internal assertions
result = sampler.sample(g, {'follows': seeds}) result = sampler.sample(g, {"follows": seeds})
if __name__ == '__main__': if __name__ == "__main__":
test_edge_prediction_sampler() test_edge_prediction_sampler()
import dgl
from dgl.ops import edge_softmax
import dgl.function as fn
from collections import Counter
import math
import numpy as np
import scipy.sparse as ssp
import itertools import itertools
import math
import unittest
from collections import Counter
import backend as F import backend as F
import networkx as nx import networkx as nx
import unittest, pytest import numpy as np
from dgl import DGLError import pytest
import scipy.sparse as ssp
import test_utils import test_utils
from test_utils import parametrize_idtype, get_cases
from scipy.sparse import rand from scipy.sparse import rand
from test_utils import get_cases, parametrize_idtype
rfuncs = {'sum': fn.sum, 'max': fn.max, 'min': fn.min, 'mean': fn.mean} import dgl
fill_value = {'sum': 0, 'max': float("-inf")} import dgl.function as fn
from dgl import DGLError
from dgl.ops import edge_softmax
rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean}
fill_value = {"sum": 0, "max": float("-inf")}
feat_size = 2 feat_size = 2
def create_test_heterograph(idtype): def create_test_heterograph(idtype):
# test heterograph from the docstring, plus a user -- wishes -- game relation # test heterograph from the docstring, plus a user -- wishes -- game relation
# 3 users, 2 games, 2 developers # 3 users, 2 games, 2 developers
...@@ -27,37 +31,57 @@ def create_test_heterograph(idtype): ...@@ -27,37 +31,57 @@ def create_test_heterograph(idtype):
# ('user', 'wishes', 'game'), # ('user', 'wishes', 'game'),
# ('developer', 'develops', 'game')]) # ('developer', 'develops', 'game')])
g = dgl.heterograph({ g = dgl.heterograph(
('user', 'follows', 'user'): ([0, 1, 2, 1, 1], [0, 0, 1, 1, 2]), {
('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "follows", "user"): ([0, 1, 2, 1, 1], [0, 0, 1, 1, 2]),
('user', 'wishes', 'game'): ([0, 1, 1], [0, 0, 1]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
('developer', 'develops', 'game'): ([0, 1, 0], [0, 1, 1]), ("user", "wishes", "game"): ([0, 1, 1], [0, 0, 1]),
}, idtype=idtype, device=F.ctx()) ("developer", "develops", "game"): ([0, 1, 0], [0, 1, 1]),
},
idtype=idtype,
device=F.ctx(),
)
assert g.idtype == idtype assert g.idtype == idtype
assert g.device == F.ctx() assert g.device == F.ctx()
return g return g
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now')
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
)
def test_edge_softmax_unidirectional(): def test_edge_softmax_unidirectional():
g = dgl.heterograph({ g = dgl.heterograph(
('A', 'AB', 'B'): ([1,2,3,1,2,3,1,2,3],[0,0,0,1,1,1,2,2,2]), {
('B', 'BB', 'B'): ([0,1,2,0,1,2,0,1,2], [0,0,0,1,1,1,2,2,2])}) ("A", "AB", "B"): (
[1, 2, 3, 1, 2, 3, 1, 2, 3],
[0, 0, 0, 1, 1, 1, 2, 2, 2],
),
("B", "BB", "B"): (
[0, 1, 2, 0, 1, 2, 0, 1, 2],
[0, 0, 0, 1, 1, 1, 2, 2, 2],
),
}
)
g = g.to(F.ctx()) g = g.to(F.ctx())
g.edges['AB'].data['x'] = F.ones(9) * 2 g.edges["AB"].data["x"] = F.ones(9) * 2
g.edges['BB'].data['x'] = F.ones(9) g.edges["BB"].data["x"] = F.ones(9)
result = dgl.ops.edge_softmax(g, {'AB': g.edges['AB'].data['x'], 'BB': g.edges['BB'].data['x']}) result = dgl.ops.edge_softmax(
g, {"AB": g.edges["AB"].data["x"], "BB": g.edges["BB"].data["x"]}
)
ab = result['A', 'AB', 'B'] ab = result["A", "AB", "B"]
bb = result['B', 'BB', 'B'] bb = result["B", "BB", "B"]
e2 = F.zeros_like(ab) + math.exp(2) / ((math.exp(2) + math.exp(1)) * 3) e2 = F.zeros_like(ab) + math.exp(2) / ((math.exp(2) + math.exp(1)) * 3)
e1 = F.zeros_like(bb) + math.exp(1) / ((math.exp(2) + math.exp(1)) * 3) e1 = F.zeros_like(bb) + math.exp(1) / ((math.exp(2) + math.exp(1)) * 3)
assert F.allclose(ab, e2) assert F.allclose(ab, e2)
assert F.allclose(bb, e1) assert F.allclose(bb, e1)
@unittest.skipIf(dgl.backend.backend_name != 'pytorch', reason='Only support PyTorch for now') @unittest.skipIf(
@pytest.mark.parametrize('g', get_cases(['clique'])) dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now"
@pytest.mark.parametrize('norm_by', ['src', 'dst']) )
@pytest.mark.parametrize("g", get_cases(["clique"]))
@pytest.mark.parametrize("norm_by", ["src", "dst"])
# @pytest.mark.parametrize('shp', edge_softmax_shapes) # @pytest.mark.parametrize('shp', edge_softmax_shapes)
@parametrize_idtype @parametrize_idtype
def test_edge_softmax(g, norm_by, idtype): def test_edge_softmax(g, norm_by, idtype):
...@@ -65,20 +89,20 @@ def test_edge_softmax(g, norm_by, idtype): ...@@ -65,20 +89,20 @@ def test_edge_softmax(g, norm_by, idtype):
g = create_test_heterograph(idtype) g = create_test_heterograph(idtype)
x1 = F.randn((g.num_edges('plays'),feat_size)) x1 = F.randn((g.num_edges("plays"), feat_size))
x2 = F.randn((g.num_edges('follows'),feat_size)) x2 = F.randn((g.num_edges("follows"), feat_size))
x3 = F.randn((g.num_edges('develops'),feat_size)) x3 = F.randn((g.num_edges("develops"), feat_size))
x4 = F.randn((g.num_edges('wishes'),feat_size)) x4 = F.randn((g.num_edges("wishes"), feat_size))
F.attach_grad(F.clone(x1)) F.attach_grad(F.clone(x1))
F.attach_grad(F.clone(x2)) F.attach_grad(F.clone(x2))
F.attach_grad(F.clone(x3)) F.attach_grad(F.clone(x3))
F.attach_grad(F.clone(x4)) F.attach_grad(F.clone(x4))
g['plays'].edata['eid'] = x1 g["plays"].edata["eid"] = x1
g['follows'].edata['eid'] = x2 g["follows"].edata["eid"] = x2
g['develops'].edata['eid'] = x3 g["develops"].edata["eid"] = x3
g['wishes'].edata['eid'] = x4 g["wishes"].edata["eid"] = x4
################################################################# #################################################################
# edge_softmax() on homogeneous graph # edge_softmax() on homogeneous graph
...@@ -89,12 +113,12 @@ def test_edge_softmax(g, norm_by, idtype): ...@@ -89,12 +113,12 @@ def test_edge_softmax(g, norm_by, idtype):
hm_x = F.cat((x3, x2, x1, x4), 0) hm_x = F.cat((x3, x2, x1, x4), 0)
hm_e = F.attach_grad(F.clone(hm_x)) hm_e = F.attach_grad(F.clone(hm_x))
score_hm = edge_softmax(hm_g, hm_e, norm_by=norm_by) score_hm = edge_softmax(hm_g, hm_e, norm_by=norm_by)
hm_g.edata['score'] = score_hm hm_g.edata["score"] = score_hm
ht_g = dgl.to_heterogeneous(hm_g, g.ntypes, g.etypes) ht_g = dgl.to_heterogeneous(hm_g, g.ntypes, g.etypes)
r1 = ht_g.edata['score'][('user', 'plays', 'game')] r1 = ht_g.edata["score"][("user", "plays", "game")]
r2 = ht_g.edata['score'][('user', 'follows', 'user')] r2 = ht_g.edata["score"][("user", "follows", "user")]
r3 = ht_g.edata['score'][('developer', 'develops', 'game')] r3 = ht_g.edata["score"][("developer", "develops", "game")]
r4 = ht_g.edata['score'][('user', 'wishes', 'game')] r4 = ht_g.edata["score"][("user", "wishes", "game")]
F.backward(F.reduce_sum(r1) + F.reduce_sum(r2)) F.backward(F.reduce_sum(r1) + F.reduce_sum(r2))
grad_edata_hm = F.grad(hm_e) grad_edata_hm = F.grad(hm_e)
...@@ -106,18 +130,22 @@ def test_edge_softmax(g, norm_by, idtype): ...@@ -106,18 +130,22 @@ def test_edge_softmax(g, norm_by, idtype):
e2 = F.attach_grad(F.clone(x2)) e2 = F.attach_grad(F.clone(x2))
e3 = F.attach_grad(F.clone(x3)) e3 = F.attach_grad(F.clone(x3))
e4 = F.attach_grad(F.clone(x4)) e4 = F.attach_grad(F.clone(x4))
e = {('user', 'follows', 'user'): e2, e = {
('user', 'plays', 'game'): e1, ("user", "follows", "user"): e2,
('user', 'wishes', 'game'): e4, ("user", "plays", "game"): e1,
('developer', 'develops', 'game'): e3} ("user", "wishes", "game"): e4,
("developer", "develops", "game"): e3,
}
with F.record_grad(): with F.record_grad():
score = edge_softmax(g, e, norm_by=norm_by) score = edge_softmax(g, e, norm_by=norm_by)
r5 = score[('user', 'plays', 'game')] r5 = score[("user", "plays", "game")]
r6 = score[('user', 'follows', 'user')] r6 = score[("user", "follows", "user")]
r7 = score[('developer', 'develops', 'game')] r7 = score[("developer", "develops", "game")]
r8 = score[('user', 'wishes', 'game')] r8 = score[("user", "wishes", "game")]
F.backward(F.reduce_sum(r5) + F.reduce_sum(r6)) F.backward(F.reduce_sum(r5) + F.reduce_sum(r6))
grad_edata_ht = F.cat((F.grad(e3), F.grad(e2), F.grad(e1), F.grad(e4)), 0) grad_edata_ht = F.cat(
(F.grad(e3), F.grad(e2), F.grad(e1), F.grad(e4)), 0
)
# correctness check # correctness check
assert F.allclose(r1, r5) assert F.allclose(r1, r5)
assert F.allclose(r2, r6) assert F.allclose(r2, r6)
...@@ -125,5 +153,6 @@ def test_edge_softmax(g, norm_by, idtype): ...@@ -125,5 +153,6 @@ def test_edge_softmax(g, norm_by, idtype):
assert F.allclose(r4, r8) assert F.allclose(r4, r8)
assert F.allclose(grad_edata_hm, grad_edata_ht) assert F.allclose(grad_edata_hm, grad_edata_ht)
if __name__ == '__main__':
if __name__ == "__main__":
test_edge_softmax_unidirectional() test_edge_softmax_unidirectional()
import dgl
import numpy as np
import backend as F
import unittest, pytest
import os import os
import unittest
import backend as F
import numpy as np
import pytest
import dgl
@unittest.skipIf(os.name == 'nt', reason='Cython only works on linux') @unittest.skipIf(os.name == "nt", reason="Cython only works on linux")
def test_cython(): def test_cython():
import dgl._ffi._cy3.core import dgl._ffi._cy3.core
@pytest.mark.parametrize('arg', [1, 2.3])
@pytest.mark.parametrize("arg", [1, 2.3])
def test_callback(arg): def test_callback(arg):
def cb(x): def cb(x):
return x + 1 return x + 1
ret = dgl._api_internal._TestPythonCallback(cb, arg) ret = dgl._api_internal._TestPythonCallback(cb, arg)
assert ret == arg + 1 assert ret == arg + 1
@pytest.mark.parametrize('dtype', [F.float32, F.float64, F.int32, F.int64])
@pytest.mark.parametrize("dtype", [F.float32, F.float64, F.int32, F.int64])
def _test_callback_array(dtype): def _test_callback_array(dtype):
def cb(x): def cb(x):
return F.to_dgl_nd(F.from_dgl_nd(x) + 1) return F.to_dgl_nd(F.from_dgl_nd(x) + 1)
arg = F.copy_to(F.tensor([1, 2, 3], dtype=dtype), F.ctx()) arg = F.copy_to(F.tensor([1, 2, 3], dtype=dtype), F.ctx())
ret = F.from_dgl_nd(dgl._api_internal._TestPythonCallback(cb, F.to_dgl_nd(arg))) ret = F.from_dgl_nd(
dgl._api_internal._TestPythonCallback(cb, F.to_dgl_nd(arg))
)
assert np.allclose(F.asnumpy(ret), F.asnumpy(arg) + 1) assert np.allclose(F.asnumpy(ret), F.asnumpy(arg) + 1)
@pytest.mark.parametrize('arg', [1, 2.3])
@pytest.mark.parametrize("arg", [1, 2.3])
def test_callback_thread(arg): def test_callback_thread(arg):
def cb(x): def cb(x):
return x + 1 return x + 1
ret = dgl._api_internal._TestPythonCallbackThread(cb, arg) ret = dgl._api_internal._TestPythonCallbackThread(cb, arg)
assert ret == arg + 1 assert ret == arg + 1
@pytest.mark.parametrize('dtype', [F.float32, F.float64, F.int32, F.int64])
@pytest.mark.parametrize("dtype", [F.float32, F.float64, F.int32, F.int64])
def _test_callback_array_thread(dtype): def _test_callback_array_thread(dtype):
def cb(x): def cb(x):
return F.to_dgl_nd(F.from_dgl_nd(x) + 1) return F.to_dgl_nd(F.from_dgl_nd(x) + 1)
arg = F.copy_to(F.tensor([1, 2, 3], dtype=dtype), F.ctx()) arg = F.copy_to(F.tensor([1, 2, 3], dtype=dtype), F.ctx())
ret = F.from_dgl_nd(dgl._api_internal._TestPythonCallbackThread(cb, F.to_dgl_nd(arg))) ret = F.from_dgl_nd(
dgl._api_internal._TestPythonCallbackThread(cb, F.to_dgl_nd(arg))
)
assert np.allclose(F.asnumpy(ret), F.asnumpy(arg) + 1) assert np.allclose(F.asnumpy(ret), F.asnumpy(arg) + 1)
import dgl import unittest
import backend as F import backend as F
import numpy as np import numpy as np
import unittest
from test_utils import parametrize_idtype from test_utils import parametrize_idtype
import dgl
from dgl.utils import Filter from dgl.utils import Filter
def test_graph_filter(): def test_graph_filter():
g = dgl.DGLGraph().to(F.ctx()) g = dgl.DGLGraph().to(F.ctx())
g.add_nodes(4) g.add_nodes(4)
g.add_edges([0,1,2,3], [1,2,3,0]) g.add_edges([0, 1, 2, 3], [1, 2, 3, 0])
n_repr = np.zeros((4, 5)) n_repr = np.zeros((4, 5))
e_repr = np.zeros((4, 5)) e_repr = np.zeros((4, 5))
...@@ -17,11 +20,11 @@ def test_graph_filter(): ...@@ -17,11 +20,11 @@ def test_graph_filter():
n_repr = F.copy_to(F.zerocopy_from_numpy(n_repr), F.ctx()) n_repr = F.copy_to(F.zerocopy_from_numpy(n_repr), F.ctx())
e_repr = F.copy_to(F.zerocopy_from_numpy(e_repr), F.ctx()) e_repr = F.copy_to(F.zerocopy_from_numpy(e_repr), F.ctx())
g.ndata['a'] = n_repr g.ndata["a"] = n_repr
g.edata['a'] = e_repr g.edata["a"] = e_repr
def predicate(r): def predicate(r):
return F.max(r.data['a'], 1) > 0 return F.max(r.data["a"], 1) > 0
# full node filter # full node filter
n_idx = g.filter_nodes(predicate) n_idx = g.filter_nodes(predicate)
...@@ -39,28 +42,35 @@ def test_graph_filter(): ...@@ -39,28 +42,35 @@ def test_graph_filter():
e_idx = g.filter_edges(predicate, [0, 1]) e_idx = g.filter_edges(predicate, [0, 1])
assert set(F.zerocopy_to_numpy(e_idx)) == {1} assert set(F.zerocopy_to_numpy(e_idx)) == {1}
@unittest.skipIf(F._default_context_str == 'cpu',
reason="CPU not yet supported") @unittest.skipIf(
F._default_context_str == "cpu", reason="CPU not yet supported"
)
@parametrize_idtype @parametrize_idtype
def test_array_filter(idtype): def test_array_filter(idtype):
f = Filter(F.copy_to(F.tensor([0,1,9,4,6,5,7], dtype=idtype), F.ctx())) f = Filter(
x = F.copy_to(F.tensor([0,3,9,11], dtype=idtype), F.ctx()) F.copy_to(F.tensor([0, 1, 9, 4, 6, 5, 7], dtype=idtype), F.ctx())
y = F.copy_to(F.tensor([0,19,0,28,3,9,11,4,5], dtype=idtype), F.ctx()) )
x = F.copy_to(F.tensor([0, 3, 9, 11], dtype=idtype), F.ctx())
y = F.copy_to(
F.tensor([0, 19, 0, 28, 3, 9, 11, 4, 5], dtype=idtype), F.ctx()
)
xi_act = f.find_included_indices(x) xi_act = f.find_included_indices(x)
xi_exp = F.copy_to(F.tensor([0,2], dtype=idtype), F.ctx()) xi_exp = F.copy_to(F.tensor([0, 2], dtype=idtype), F.ctx())
assert F.array_equal(xi_act, xi_exp) assert F.array_equal(xi_act, xi_exp)
xe_act = f.find_excluded_indices(x) xe_act = f.find_excluded_indices(x)
xe_exp = F.copy_to(F.tensor([1,3], dtype=idtype), F.ctx()) xe_exp = F.copy_to(F.tensor([1, 3], dtype=idtype), F.ctx())
assert F.array_equal(xe_act, xe_exp) assert F.array_equal(xe_act, xe_exp)
yi_act = f.find_included_indices(y) yi_act = f.find_included_indices(y)
yi_exp = F.copy_to(F.tensor([0,2,5,7,8], dtype=idtype), F.ctx()) yi_exp = F.copy_to(F.tensor([0, 2, 5, 7, 8], dtype=idtype), F.ctx())
assert F.array_equal(yi_act, yi_exp) assert F.array_equal(yi_act, yi_exp)
ye_act = f.find_excluded_indices(y) ye_act = f.find_excluded_indices(y)
ye_exp = F.copy_to(F.tensor([1,3,4,6], dtype=idtype), F.ctx()) ye_exp = F.copy_to(F.tensor([1, 3, 4, 6], dtype=idtype), F.ctx())
assert F.array_equal(ye_act, ye_exp) assert F.array_equal(ye_act, ye_exp)
if __name__ == '__main__':
if __name__ == "__main__":
test_graph_filter() test_graph_filter()
test_array_filter() test_array_filter()
import pickle
import unittest
import backend as F
import numpy as np
from test_utils import parametrize_idtype
import dgl import dgl
import dgl.ndarray as nd import dgl.ndarray as nd
from dgl.frame import Column from dgl.frame import Column
import numpy as np
import backend as F
import unittest
import pickle
from test_utils import parametrize_idtype
def test_column_subcolumn(): def test_column_subcolumn():
data = F.copy_to(F.tensor([[1., 1., 1., 1.], data = F.copy_to(
[0., 2., 9., 0.], F.tensor(
[3., 2., 1., 0.], [
[1., 1., 1., 1.], [1.0, 1.0, 1.0, 1.0],
[0., 2., 4., 0.]]), F.ctx()) [0.0, 2.0, 9.0, 0.0],
[3.0, 2.0, 1.0, 0.0],
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 4.0, 0.0],
]
),
F.ctx(),
)
original = Column(data) original = Column(data)
# subcolumn from cpu context # subcolumn from cpu context
...@@ -28,22 +38,32 @@ def test_column_subcolumn(): ...@@ -28,22 +38,32 @@ def test_column_subcolumn():
assert len(l2) == i2.shape[0] assert len(l2) == i2.shape[0]
i1i2 = F.copy_to(F.gather_row(i1, F.copy_to(i2, F.context(i1))), F.ctx()) i1i2 = F.copy_to(F.gather_row(i1, F.copy_to(i2, F.context(i1))), F.ctx())
assert F.array_equal(l2.data, F.gather_row(data,i1i2)) assert F.array_equal(l2.data, F.gather_row(data, i1i2))
# next subcolumn also from target context # next subcolumn also from target context
i3 = F.copy_to(F.tensor([1], dtype=F.int64), F.ctx()) i3 = F.copy_to(F.tensor([1], dtype=F.int64), F.ctx())
l3 = l2.subcolumn(i3) l3 = l2.subcolumn(i3)
assert len(l3) == i3.shape[0] assert len(l3) == i3.shape[0]
i1i2i3 = F.copy_to(F.gather_row(i1i2, F.copy_to(i3, F.context(i1i2))), F.ctx()) i1i2i3 = F.copy_to(
F.gather_row(i1i2, F.copy_to(i3, F.context(i1i2))), F.ctx()
)
assert F.array_equal(l3.data, F.gather_row(data, i1i2i3)) assert F.array_equal(l3.data, F.gather_row(data, i1i2i3))
def test_serialize_deserialize_plain(): def test_serialize_deserialize_plain():
data = F.copy_to(F.tensor([[1., 1., 1., 1.], data = F.copy_to(
[0., 2., 9., 0.], F.tensor(
[3., 2., 1., 0.], [
[1., 1., 1., 1.], [1.0, 1.0, 1.0, 1.0],
[0., 2., 4., 0.]]), F.ctx()) [0.0, 2.0, 9.0, 0.0],
[3.0, 2.0, 1.0, 0.0],
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 4.0, 0.0],
]
),
F.ctx(),
)
original = Column(data) original = Column(data)
serial = pickle.dumps(original) serial = pickle.dumps(original)
...@@ -52,12 +72,20 @@ def test_serialize_deserialize_plain(): ...@@ -52,12 +72,20 @@ def test_serialize_deserialize_plain():
assert F.array_equal(new.data, original.data) assert F.array_equal(new.data, original.data)
def test_serialize_deserialize_subcolumn(): def test_serialize_deserialize_subcolumn():
data = F.copy_to(F.tensor([[1., 1., 1., 1.], data = F.copy_to(
[0., 2., 9., 0.], F.tensor(
[3., 2., 1., 0.], [
[1., 1., 1., 1.], [1.0, 1.0, 1.0, 1.0],
[0., 2., 4., 0.]]), F.ctx()) [0.0, 2.0, 9.0, 0.0],
[3.0, 2.0, 1.0, 0.0],
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 4.0, 0.0],
]
),
F.ctx(),
)
original = Column(data) original = Column(data)
# subcolumn from cpu context # subcolumn from cpu context
...@@ -69,12 +97,20 @@ def test_serialize_deserialize_subcolumn(): ...@@ -69,12 +97,20 @@ def test_serialize_deserialize_subcolumn():
assert F.array_equal(new.data, l1.data) assert F.array_equal(new.data, l1.data)
def test_serialize_deserialize_dtype(): def test_serialize_deserialize_dtype():
data = F.copy_to(F.tensor([[1., 1., 1., 1.], data = F.copy_to(
[0., 2., 9., 0.], F.tensor(
[3., 2., 1., 0.], [
[1., 1., 1., 1.], [1.0, 1.0, 1.0, 1.0],
[0., 2., 4., 0.]]), F.ctx()) [0.0, 2.0, 9.0, 0.0],
[3.0, 2.0, 1.0, 0.0],
[1.0, 1.0, 1.0, 1.0],
[0.0, 2.0, 4.0, 0.0],
]
),
F.ctx(),
)
original = Column(data) original = Column(data)
original = original.astype(F.int64) original = original.astype(F.int64)
......
import time
import unittest
from timeit import default_timer from timeit import default_timer
import dgl
import backend as F import backend as F
import dgl.function as fn
import time
import numpy as np import numpy as np
import unittest, pytest import pytest
from test_utils import parametrize_idtype, get_cases from test_utils import get_cases, parametrize_idtype
import dgl
import dgl.function as fn
iters = 5 iters = 5
n_edge_scale = 1 n_edge_scale = 1
......
import dgl import unittest
import backend as F import backend as F
import numpy as np import numpy as np
import unittest
@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU random choice not implemented") import dgl
@unittest.skipIf(
F._default_context_str == "gpu", reason="GPU random choice not implemented"
)
def test_rand_graph(): def test_rand_graph():
g = dgl.rand_graph(10000, 100000) g = dgl.rand_graph(10000, 100000)
assert g.number_of_nodes() == 10000 assert g.number_of_nodes() == 10000
...@@ -18,5 +23,6 @@ def test_rand_graph(): ...@@ -18,5 +23,6 @@ def test_rand_graph():
assert F.array_equal(u1, u2) assert F.array_equal(u1, u2)
assert F.array_equal(v1, v2) assert F.array_equal(v1, v2)
if __name__ == '__main__':
if __name__ == "__main__":
test_rand_graph() test_rand_graph()
import unittest
import backend as F
import numpy as np
import dgl import dgl
import dgl.ndarray as nd import dgl.ndarray as nd
from dgl.utils import toindex from dgl.utils import toindex
import numpy as np
import backend as F
import unittest
@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support inplace update")
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow",
reason="TF doesn't support inplace update",
)
def test_dlpack(): def test_dlpack():
# test dlpack conversion. # test dlpack conversion.
def nd2th(): def nd2th():
ans = np.array([[1., 1., 1., 1.], ans = np.array(
[0., 0., 0., 0.], [[1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
[0., 0., 0., 0.]]) )
x = nd.array(np.zeros((3, 4), dtype=np.float32)) x = nd.array(np.zeros((3, 4), dtype=np.float32))
dl = x.to_dlpack() dl = x.to_dlpack()
y = F.zerocopy_from_dlpack(dl) y = F.zerocopy_from_dlpack(dl)
...@@ -21,9 +27,9 @@ def test_dlpack(): ...@@ -21,9 +27,9 @@ def test_dlpack():
assert np.allclose(x.asnumpy(), ans) assert np.allclose(x.asnumpy(), ans)
def th2nd(): def th2nd():
ans = np.array([[1., 1., 1., 1.], ans = np.array(
[0., 0., 0., 0.], [[1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]]
[0., 0., 0., 0.]]) )
x = F.zeros((3, 4)) x = F.zeros((3, 4))
dl = F.zerocopy_to_dlpack(x) dl = F.zerocopy_to_dlpack(x)
y = nd.from_dlpack(dl) y = nd.from_dlpack(dl)
...@@ -37,7 +43,7 @@ def test_dlpack(): ...@@ -37,7 +43,7 @@ def test_dlpack():
ans = np.array([0, 2]) ans = np.array([0, 2])
y = x[:2, 0] y = x[:2, 0]
# Uncomment this line and comment the one below to observe error # Uncomment this line and comment the one below to observe error
#dl = dlpack.to_dlpack(y) # dl = dlpack.to_dlpack(y)
dl = F.zerocopy_to_dlpack(y) dl = F.zerocopy_to_dlpack(y)
z = nd.from_dlpack(dl) z = nd.from_dlpack(dl)
print(x) print(x)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment