"src/diffusers/commands/diffusers_cli.py" did not exist on "27266abc9ff8cda37a892e6ddfdd6a5caab94e66"
Unverified Commit c88fca50 authored by Tianqi Zhang (张天启)'s avatar Tianqi Zhang (张天启) Committed by GitHub
Browse files

[Feature] Add edge coarsening for homogeneous undirected graphs (#2691)



* finish graph matching gpu version

* use C++ shuffle

* finish graph matching

* fix bug

* fix bug

* change name and use swap

* upt

* fix format problem

* fix format problem

* stronger test

* upt

* upt

* change python api

* upt

* upt

* format check

* upt

* upt

* fix bug
Co-authored-by: default avatarTong He <hetong007@gmail.com>
parent 91cb3477
...@@ -272,7 +272,8 @@ macro(dgl_config_cuda out_variable) ...@@ -272,7 +272,8 @@ macro(dgl_config_cuda out_variable)
list(APPEND DGL_LINKER_LIBS list(APPEND DGL_LINKER_LIBS
${CUDA_CUDART_LIBRARY} ${CUDA_CUDART_LIBRARY}
${CUDA_CUBLAS_LIBRARIES} ${CUDA_CUBLAS_LIBRARIES}
${CUDA_cusparse_LIBRARY}) ${CUDA_cusparse_LIBRARY}
${CUDA_CURAND_LIBRARY})
set(${out_variable} ${DGL_CUDA_SRC}) set(${out_variable} ${DGL_CUDA_SRC})
endmacro() endmacro()
...@@ -40,6 +40,9 @@ macro(find_cuda use_cuda) ...@@ -40,6 +40,9 @@ macro(find_cuda use_cuda)
find_library(CUDA_CUBLAS_LIBRARY cublas find_library(CUDA_CUBLAS_LIBRARY cublas
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
${CUDA_TOOLKIT_ROOT_DIR}/lib/Win32) ${CUDA_TOOLKIT_ROOT_DIR}/lib/Win32)
find_library(CUDA_CURAND_LIBRARY curand
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
${CUDA_TOOLKIT_ROOT_DIR}/lib/Win32)
else(MSVC) else(MSVC)
#find_library(CUDA_CUDA_LIBRARY cuda #find_library(CUDA_CUDA_LIBRARY cuda
# PATHS ${CUDA_TOOLKIT_ROOT_DIR} # PATHS ${CUDA_TOOLKIT_ROOT_DIR}
...@@ -48,6 +51,9 @@ macro(find_cuda use_cuda) ...@@ -48,6 +51,9 @@ macro(find_cuda use_cuda)
find_library(CUDA_CUBLAS_LIBRARY cublas find_library(CUDA_CUBLAS_LIBRARY cublas
${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib64
${CUDA_TOOLKIT_ROOT_DIR}/lib) ${CUDA_TOOLKIT_ROOT_DIR}/lib)
find_library(CUDA_CURAND_LIBRARY curand
${CUDA_TOOLKIT_ROOT_DIR}/lib64
${CUDA_TOOLKIT_ROOT_DIR}/lib)
endif(MSVC) endif(MSVC)
message(STATUS "Found CUDA_TOOLKIT_ROOT_DIR=" ${CUDA_TOOLKIT_ROOT_DIR}) message(STATUS "Found CUDA_TOOLKIT_ROOT_DIR=" ${CUDA_TOOLKIT_ROOT_DIR})
#message(STATUS "Found CUDA_CUDA_LIBRARY=" ${CUDA_CUDA_LIBRARY}) #message(STATUS "Found CUDA_CUDA_LIBRARY=" ${CUDA_CUDA_LIBRARY})
...@@ -55,5 +61,6 @@ macro(find_cuda use_cuda) ...@@ -55,5 +61,6 @@ macro(find_cuda use_cuda)
#message(STATUS "Found CUDA_NVRTC_LIBRARY=" ${CUDA_NVRTC_LIBRARY}) #message(STATUS "Found CUDA_NVRTC_LIBRARY=" ${CUDA_NVRTC_LIBRARY})
#message(STATUS "Found CUDA_CUDNN_LIBRARY=" ${CUDA_CUDNN_LIBRARY}) #message(STATUS "Found CUDA_CUDNN_LIBRARY=" ${CUDA_CUDNN_LIBRARY})
message(STATUS "Found CUDA_CUBLAS_LIBRARY=" ${CUDA_CUBLAS_LIBRARY}) message(STATUS "Found CUDA_CUBLAS_LIBRARY=" ${CUDA_CUBLAS_LIBRARY})
message(STATUS "Found CUDA_CURAND_LIBRARY=" ${CUDA_CURAND_LIBRARY})
endif(CUDA_FOUND) endif(CUDA_FOUND)
endmacro(find_cuda) endmacro(find_cuda)
...@@ -3,6 +3,7 @@ import importlib ...@@ -3,6 +3,7 @@ import importlib
import sys import sys
from ..backend import backend_name from ..backend import backend_name
def _load_backend(mod_name): def _load_backend(mod_name):
mod = importlib.import_module('.%s' % mod_name, __name__) mod = importlib.import_module('.%s' % mod_name, __name__)
thismod = sys.modules[__name__] thismod = sys.modules[__name__]
......
"""Python interfaces to DGL farthest point sampler.""" """Python interfaces to DGL farthest point sampler."""
from dgl._ffi.base import DGLError
import numpy as np
from .._ffi.function import _init_api from .._ffi.function import _init_api
from .. import backend as F from .. import backend as F
from .. import ndarray as nd
def farthest_point_sampler(data, batch_size, sample_points, dist, start_idx, result): def farthest_point_sampler(data, batch_size, sample_points, dist, start_idx, result):
"""Farthest Point Sampler r"""Farthest Point Sampler
Parameters Parameters
---------- ----------
...@@ -34,4 +38,65 @@ def farthest_point_sampler(data, batch_size, sample_points, dist, start_idx, res ...@@ -34,4 +38,65 @@ def farthest_point_sampler(data, batch_size, sample_points, dist, start_idx, res
F.zerocopy_to_dgl_ndarray(start_idx), F.zerocopy_to_dgl_ndarray(start_idx),
F.zerocopy_to_dgl_ndarray(result)) F.zerocopy_to_dgl_ndarray(result))
def _neighbor_matching(graph_idx, num_nodes, edge_weights=None, relabel_idx=True):
"""
Description
-----------
The neighbor matching procedure of edge coarsening used in
`Metis <http://cacs.usc.edu/education/cs653/Karypis-METIS-SIAMJSC98.pdf>`__
and
`Graclus <https://www.cs.utexas.edu/users/inderjit/public_papers/multilevel_pami.pdf>`__
for homogeneous graph coarsening. This procedure keeps picking an unmarked
vertex and matching it with one its unmarked neighbors (that maximizes its
edge weight) until no match can be done.
If no edge weight is given, this procedure will randomly pick neighbor for each
vertex.
The GPU implementation is based on `A GPU Algorithm for Greedy Graph Matching
<http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf>`__
NOTE: The input graph must be bi-directed (undirected) graph. Call :obj:`dgl.to_bidirected`
if you are not sure your graph is bi-directed.
Parameters
----------
graph : HeteroGraphIndex
The input homogeneous graph.
num_nodes : int
The number of nodes in this homogeneous graph.
edge_weight : tensor, optional
The edge weight tensor holding non-negative scalar weight for each edge.
default: :obj:`None`
relabel_idx : bool, optional
If true, relabel resulting node labels to have consecutive node ids.
default: :obj:`True`
Returns
-------
a 1-D tensor
A vector with each element that indicates the cluster ID of a vertex.
"""
edge_weight_capi = nd.NULL["int64"]
if edge_weights is not None:
edge_weight_capi = F.zerocopy_to_dgl_ndarray(edge_weights)
node_label = F.full_1d(
num_nodes, -1, getattr(F, graph_idx.dtype), F.to_backend_ctx(graph_idx.ctx))
node_label_capi = F.zerocopy_to_dgl_ndarray_for_write(node_label)
_CAPI_NeighborMatching(graph_idx, edge_weight_capi, node_label_capi)
if F.reduce_sum(node_label < 0).item() != 0:
raise DGLError("Find unmatched node")
# reorder node id
# TODO: actually we can add `return_inverse` option for `unique`
# function in backend for efficiency.
if relabel_idx:
node_label_np = F.zerocopy_to_numpy(node_label)
_, node_label_np = np.unique(node_label_np, return_inverse=True)
return F.tensor(node_label_np)
else:
return node_label
_init_api('dgl.geometry', __name__) _init_api('dgl.geometry', __name__)
"""Package for mxnet-specific Geometry modules.""" """Package for mxnet-specific Geometry modules."""
from .fps import * from .fps import *
from .edge_coarsening import *
"""Edge coarsening procedure used in Metis and Graclus, for mxnet"""
# pylint: disable=no-member, invalid-name, W0235
import dgl
import mxnet as mx
from ..capi import _neighbor_matching
__all__ = ['neighbor_matching']
class NeighborMatchingFn(mx.autograd.Function):
r"""
Description
-----------
AutoGrad function for neighbor matching
"""
def __init__(self, gidx, num_nodes, e_weights, relabel_idx):
super(NeighborMatchingFn, self).__init__()
self.gidx = gidx
self.num_nodes = num_nodes
self.e_weights = e_weights
self.relabel_idx = relabel_idx
def forward(self):
r"""
Description
-----------
Perform forward computation
"""
return _neighbor_matching(
self.gidx, self.num_nodes, self.e_weights, self.relabel_idx)
def backward(self):
r"""
Description
-----------
Perform backward computation
"""
pass # pylint: disable=unnecessary-pass
def neighbor_matching(graph, e_weights, relabel_idx):
r"""
Description
-----------
The neighbor matching procedure of edge coarsening in
`Metis <http://cacs.usc.edu/education/cs653/Karypis-METIS-SIAMJSC98.pdf>`__
and
`Graclus <https://www.cs.utexas.edu/users/inderjit/public_papers/multilevel_pami.pdf>`__
for homogeneous graph coarsening. This procedure keeps picking an unmarked
vertex and matching it with one its unmarked neighbors (that maximizes its
edge weight) until no match can be done.
If no edge weight is given, this procedure will randomly pick neighbor for each
vertex.
The GPU implementation is based on `A GPU Algorithm for Greedy Graph Matching
<http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf>`__
NOTE: The input graph must be bi-directed (undirected) graph. Call :obj:`dgl.to_bidirected`
if you are not sure your graph is bi-directed.
Parameters
----------
graph : DGLGraph
The input homogeneous graph.
edge_weight : mxnet.NDArray, optional
The edge weight tensor holding non-negative scalar weight for each edge.
default: :obj:`None`
relabel_idx : bool, optional
If true, relabel resulting node labels to have consecutive node ids.
default: :obj:`True`
"""
assert graph.is_homogeneous, \
"The graph used in graph node matching must be homogeneous"
if e_weights is not None:
graph.edata['e_weights'] = e_weights
graph = dgl.remove_self_loop(graph)
e_weights = graph.edata['e_weights']
graph.edata.pop('e_weights')
else:
graph = dgl.remove_self_loop(graph)
func = NeighborMatchingFn(graph._graph, graph.num_nodes(), e_weights, relabel_idx)
return func()
"""Package for mxnet-specific Geometry modules.""" """Package for pytorch-specific Geometry modules."""
from .fps import * from .fps import *
from .edge_coarsening import *
"""Edge coarsening procedure used in Metis and Graclus, for pytorch"""
# pylint: disable=no-member, invalid-name, W0613
import dgl
import torch as th
from ..capi import _neighbor_matching
__all__ = ['neighbor_matching']
class NeighborMatchingFn(th.autograd.Function):
r"""
Description
-----------
AutoGrad function for neighbor matching
"""
@staticmethod
def forward(ctx, gidx, num_nodes, e_weights, relabel_idx):
r"""
Description
-----------
Perform forward computation
"""
return _neighbor_matching(gidx, num_nodes, e_weights, relabel_idx)
@staticmethod
def backward(ctx):
r"""
Description
-----------
Perform backward computation
"""
pass # pylint: disable=unnecessary-pass
def neighbor_matching(graph, e_weights=None, relabel_idx=True):
r"""
Description
-----------
The neighbor matching procedure of edge coarsening in
`Metis <http://cacs.usc.edu/education/cs653/Karypis-METIS-SIAMJSC98.pdf>`__
and
`Graclus <https://www.cs.utexas.edu/users/inderjit/public_papers/multilevel_pami.pdf>`__
for homogeneous graph coarsening. This procedure keeps picking an unmarked
vertex and matching it with one its unmarked neighbors (that maximizes its
edge weight) until no match can be done.
If no edge weight is given, this procedure will randomly pick neighbor for each
vertex.
The GPU implementation is based on `A GPU Algorithm for Greedy Graph Matching
<http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf>`__
NOTE: The input graph must be bi-directed (undirected) graph. Call :obj:`dgl.to_bidirected`
if you are not sure your graph is bi-directed.
Parameters
----------
graph : DGLGraph
The input homogeneous graph.
edge_weight : torch.Tensor, optional
The edge weight tensor holding non-negative scalar weight for each edge.
default: :obj:`None`
relabel_idx : bool, optional
If true, relabel resulting node labels to have consecutive node ids.
default: :obj:`True`
"""
assert graph.is_homogeneous, \
"The graph used in graph node matching must be homogeneous"
if e_weights is not None:
graph.edata['e_weights'] = e_weights
graph = dgl.remove_self_loop(graph)
e_weights = graph.edata['e_weights']
graph.edata.pop('e_weights')
else:
graph = dgl.remove_self_loop(graph)
return NeighborMatchingFn.apply(graph._graph, graph.num_nodes(), e_weights, relabel_idx)
/*!
* Copyright (c) 2019 by Contributors
* \file array/check.h
* \brief DGL check utilities
*/
#ifndef DGL_ARRAY_CHECK_H_
#define DGL_ARRAY_CHECK_H_
#include <dgl/runtime/ndarray.h>
#include <dgl/array.h>
#include <vector>
#include <string>
namespace dgl {
namespace aten {
// Check whether the given arguments have the same context.
inline void CheckCtx(
const DLContext& ctx,
const std::vector<NDArray>& arrays,
const std::vector<std::string>& names) {
for (size_t i = 0; i < arrays.size(); ++i) {
if (IsNullArray(arrays[i]))
continue;
CHECK_EQ(ctx, arrays[i]->ctx)
<< "Expected device context " << ctx << ". But got "
<< arrays[i]->ctx << " for " << names[i] << ".";
}
}
// Check whether input tensors are contiguous.
inline void CheckContiguous(
const std::vector<NDArray>& arrays,
const std::vector<std::string>& names) {
for (size_t i = 0; i < arrays.size(); ++i) {
if (IsNullArray(arrays[i]))
continue;
CHECK(arrays[i].IsContiguous())
<< "Expect " << names[i] << " to be a contiguous tensor";
}
}
// Check whether input tensors have valid shape.
inline void CheckShape(
const std::vector<uint64_t>& gdim,
const std::vector<int>& uev_idx,
const std::vector<NDArray>& arrays,
const std::vector<std::string>& names) {
for (size_t i = 0; i < arrays.size(); ++i) {
if (IsNullArray(arrays[i]))
continue;
CHECK_GE(arrays[i]->ndim, 2)
<< "Expect " << names[i] << " to have ndim >= 2, "
<< "Note that for scalar feature we expand its "
<< "dimension with an additional dimension of "
<< "length one.";
CHECK_EQ(gdim[uev_idx[i]], arrays[i]->shape[0])
<< "Expect " << names[i] << " to have size "
<< gdim[uev_idx[i]] << " on the first dimension, "
<< "but got " << arrays[i]->shape[0];
}
}
} // namespace aten
} // namespace dgl
#endif // DGL_ARRAY_CHECK_H_
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include "kernel_decl.h" #include "kernel_decl.h"
#include "../c_api_common.h" #include "../c_api_common.h"
#include "./check.h"
using namespace dgl::runtime; using namespace dgl::runtime;
...@@ -19,53 +20,6 @@ namespace dgl { ...@@ -19,53 +20,6 @@ namespace dgl {
namespace aten { namespace aten {
namespace { namespace {
// Check whether the given arguments have the same context.
inline void CheckCtx(
const DLContext& ctx,
const std::vector<NDArray>& arrays,
const std::vector<std::string>& names) {
for (size_t i = 0; i < arrays.size(); ++i) {
if (IsNullArray(arrays[i]))
continue;
CHECK_EQ(ctx, arrays[i]->ctx)
<< "Expected device context " << ctx << ". But got "
<< arrays[i]->ctx << " for " << names[i] << ".";
}
}
// Check whether input tensors are contiguous.
inline void CheckContiguous(
const std::vector<NDArray>& arrays,
const std::vector<std::string>& names) {
for (size_t i = 0; i < arrays.size(); ++i) {
if (IsNullArray(arrays[i]))
continue;
CHECK(arrays[i].IsContiguous())
<< "Expect " << names[i] << " to be a contiguous tensor";
}
}
// Check whether input tensors have valid shape.
inline void CheckShape(
const std::vector<uint64_t>& gdim,
const std::vector<int>& uev_idx,
const std::vector<NDArray>& arrays,
const std::vector<std::string>& names) {
for (size_t i = 0; i < arrays.size(); ++i) {
if (IsNullArray(arrays[i]))
continue;
CHECK_GE(arrays[i]->ndim, 2)
<< "Expect " << names[i] << " to have ndim >= 2, "
<< "Note that for scalar feature we expand its "
<< "dimension with an additional dimension of "
<< "length one.";
CHECK_EQ(gdim[uev_idx[i]], arrays[i]->shape[0])
<< "Expect " << names[i] << " to have size "
<< gdim[uev_idx[i]] << " on the first dimension, "
<< "but got " << arrays[i]->shape[0];
}
}
} // namespace } // namespace
/*! \brief Generalized Sparse Matrix-Matrix Multiplication. */ /*! \brief Generalized Sparse Matrix-Matrix Multiplication. */
......
...@@ -3,15 +3,69 @@ ...@@ -3,15 +3,69 @@
* \file array/cpu/geometry_op_impl.cc * \file array/cpu/geometry_op_impl.cc
* \brief Geometry operator CPU implementation * \brief Geometry operator CPU implementation
*/ */
#include <dgl/array.h> #include <dgl/random.h>
#include <numeric> #include <numeric>
#include <vector> #include <vector>
#include <utility>
#include "../geometry_op.h"
namespace dgl { namespace dgl {
using runtime::NDArray; using runtime::NDArray;
namespace geometry { namespace geometry {
namespace impl { namespace impl {
/*! \brief Knuth shuffle algorithm */
template <typename IdType>
void IndexShuffle(IdType *idxs, int64_t num_elems) {
for (int64_t i = num_elems - 1; i > 0; --i) {
int64_t j = dgl::RandomEngine::ThreadLocal()->RandInt(i);
std::swap(idxs[i], idxs[j]);
}
}
template void IndexShuffle<int32_t>(int32_t *idxs, int64_t num_elems);
template void IndexShuffle<int64_t>(int64_t *idxs, int64_t num_elems);
/*! \brief Groupwise index shuffle algorithm. This function will perform shuffle in subarrays
* indicated by group index. The group index is similar to indptr in CSRMatrix.
*
* \param group_idxs group index array.
* \param idxs index array for shuffle.
* \param num_groups_idxs length of group_idxs
* \param num_elems length of idxs
*/
template <typename IdType>
void GroupIndexShuffle(const IdType *group_idxs, IdType *idxs,
int64_t num_groups_idxs, int64_t num_elems) {
if (num_groups_idxs < 2) return; // empty idxs array
CHECK_LE(group_idxs[num_groups_idxs - 1], num_elems) << "group_idxs out of range";
for (int64_t i = 0; i < num_groups_idxs - 1; ++i) {
auto subarray_len = group_idxs[i + 1] - group_idxs[i];
IndexShuffle(idxs + group_idxs[i], subarray_len);
}
}
template void GroupIndexShuffle<int32_t>(
const int32_t *group_idxs, int32_t *idxs, int64_t num_groups_idxs, int64_t num_elems);
template void GroupIndexShuffle<int64_t>(
const int64_t *group_idxs, int64_t *idxs, int64_t num_groups_idxs, int64_t num_elems);
template <typename IdType>
IdArray RandomPerm(int64_t num_nodes) {
IdArray perm = aten::NewIdArray(num_nodes, DLContext{kDLCPU, 0}, sizeof(IdType) * 8);
IdType* perm_data = static_cast<IdType*>(perm->data);
std::iota(perm_data, perm_data + num_nodes, 0);
IndexShuffle(perm_data, num_nodes);
return perm;
}
template <typename IdType>
IdArray GroupRandomPerm(const IdType *group_idxs, int64_t num_group_idxs, int64_t num_nodes) {
IdArray perm = aten::NewIdArray(num_nodes, DLContext{kDLCPU, 0}, sizeof(IdType) * 8);
IdType* perm_data = static_cast<IdType*>(perm->data);
std::iota(perm_data, perm_data + num_nodes, 0);
GroupIndexShuffle(group_idxs, perm_data, num_group_idxs, num_nodes);
return perm;
}
/*! /*!
* \brief Farthest Point Sampler without the need to compute all pairs of distance. * \brief Farthest Point Sampler without the need to compute all pairs of distance.
* *
...@@ -81,7 +135,6 @@ void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_poin ...@@ -81,7 +135,6 @@ void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_poin
ret_start += sample_points; ret_start += sample_points;
} }
} }
template void FarthestPointSampler<kDLCPU, float, int32_t>( template void FarthestPointSampler<kDLCPU, float, int32_t>(
NDArray array, int64_t batch_size, int64_t sample_points, NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result); NDArray dist, IdArray start_idx, IdArray result);
...@@ -95,6 +148,82 @@ template void FarthestPointSampler<kDLCPU, double, int64_t>( ...@@ -95,6 +148,82 @@ template void FarthestPointSampler<kDLCPU, double, int64_t>(
NDArray array, int64_t batch_size, int64_t sample_points, NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result); NDArray dist, IdArray start_idx, IdArray result);
template <DLDeviceType XPU, typename FloatType, typename IdType>
void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, IdArray result) {
const int64_t num_nodes = result->shape[0];
const IdType *indptr_data = static_cast<IdType*>(csr.indptr->data);
const IdType *indices_data = static_cast<IdType*>(csr.indices->data);
IdType *result_data = static_cast<IdType*>(result->data);
FloatType *weight_data = static_cast<FloatType*>(weight->data);
// build node visiting order
IdArray vis_order = RandomPerm<IdType>(num_nodes);
IdType *vis_order_data = static_cast<IdType*>(vis_order->data);
for (int64_t n = 0; n < num_nodes; ++n) {
auto u = vis_order_data[n];
// if marked
if (result_data[u] >= 0) continue;
auto v_max = u;
FloatType weight_max = 0;
for (auto e = indptr_data[u]; e < indptr_data[u + 1]; ++e) {
auto v = indices_data[e];
if (result_data[v] >= 0) continue;
if (weight_data[e] >= weight_max) {
v_max = v;
weight_max = weight_data[e];
}
}
result_data[u] = std::min(u, v_max);
result_data[v_max] = result_data[u];
}
}
template void WeightedNeighborMatching<kDLCPU, float, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLCPU, float, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLCPU, double, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLCPU, double, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template <DLDeviceType XPU, typename IdType>
void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
const int64_t num_nodes = result->shape[0];
const IdType *indptr_data = static_cast<IdType*>(csr.indptr->data);
const IdType *indices_data = static_cast<IdType*>(csr.indices->data);
IdType *result_data = static_cast<IdType*>(result->data);
// build vis order
IdArray u_vis_order = RandomPerm<IdType>(num_nodes);
IdType *u_vis_order_data = static_cast<IdType*>(u_vis_order->data);
IdArray v_vis_order = GroupRandomPerm<IdType>(
indptr_data, csr.indptr->shape[0], csr.indices->shape[0]);
IdType *v_vis_order_data = static_cast<IdType*>(v_vis_order->data);
for (int64_t n = 0; n < num_nodes; ++n) {
auto u = u_vis_order_data[n];
// if marked
if (result_data[u] >= 0) continue;
result_data[u] = u;
for (auto e = indptr_data[u]; e < indptr_data[u + 1]; ++e) {
auto v = indices_data[v_vis_order_data[e]];
if (result_data[v] >= 0) continue;
result_data[u] = std::min(u, v);
result_data[v] = result_data[u];
break;
}
}
}
template void NeighborMatching<kDLCPU, int32_t>(const aten::CSRMatrix &csr, IdArray result);
template void NeighborMatching<kDLCPU, int64_t>(const aten::CSRMatrix &csr, IdArray result);
} // namespace impl } // namespace impl
} // namespace geometry } // namespace geometry
} // namespace dgl } // namespace dgl
/*!
* Copyright (c) 2019 by Contributors
* \file geometry/cuda/edge_coarsening_impl.cu
* \brief Edge coarsening CUDA implementation
*/
#include <dgl/array.h>
#include <dgl/random.h>
#include <dmlc/thread_local.h>
#include <curand.h>
#include <cstdint>
#include "../geometry_op.h"
#include "../../runtime/cuda/cuda_common.h"
#include "../../array/cuda/utils.h"
#define BLOCKS(N, T) (N + T - 1) / T
namespace dgl {
namespace geometry {
namespace impl {
constexpr float BLUE_P = 0.53406;
constexpr int BLUE = -1;
constexpr int RED = -2;
constexpr int EMPTY_IDX = -1;
__device__ bool done_d;
__global__ void init_done_kernel() { done_d = true; }
template <typename IdType>
__global__ void colorize_kernel(const float *prop, int64_t num_elem, IdType *result) {
const IdType idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_elem) {
if (result[idx] < 0) { // if unmatched
result[idx] = (prop[idx] > BLUE_P) ? RED : BLUE;
done_d = false;
}
}
}
template <typename FloatType, typename IdType>
__global__ void weighted_propose_kernel(const IdType *indptr, const IdType *indices,
const FloatType *weights, int64_t num_elem,
IdType *proposal, IdType *result) {
const IdType idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_elem) {
if (result[idx] != BLUE) return;
bool has_unmatched_neighbor = false;
FloatType weight_max = 0.;
IdType v_max = EMPTY_IDX;
for (IdType i = indptr[idx]; i < indptr[idx + 1]; ++i) {
auto v = indices[i];
if (result[v] < 0)
has_unmatched_neighbor = true;
if (result[v] == RED && weights[i] >= weight_max) {
v_max = v;
weight_max = weights[i];
}
}
proposal[idx] = v_max;
if (!has_unmatched_neighbor)
result[idx] = idx;
}
}
template <typename FloatType, typename IdType>
__global__ void weighted_respond_kernel(const IdType *indptr, const IdType *indices,
const FloatType *weights, int64_t num_elem,
IdType *proposal, IdType *result) {
const IdType idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_elem) {
if (result[idx] != RED) return;
bool has_unmatched_neighbors = false;
IdType v_max = -1;
FloatType weight_max = 0.;
for (IdType i = indptr[idx]; i < indptr[idx + 1]; ++i) {
auto v = indices[i];
if (result[v] < 0) {
has_unmatched_neighbors = true;
}
if (result[v] == BLUE
&& proposal[v] == idx
&& weights[i] >= weight_max) {
v_max = v;
weight_max = weights[i];
}
}
if (v_max >= 0) {
result[v_max] = min(idx, v_max);
result[idx] = min(idx, v_max);
}
if (!has_unmatched_neighbors)
result[idx] = idx;
}
}
/*! \brief The colorize procedure. This procedure randomly marks unmarked
* nodes with BLUE(-1) and RED(-2) and checks whether the node matching
* process has finished.
*/
template<typename IdType>
bool Colorize(IdType * result_data, curandGenerator_t gen, int64_t num_nodes) {
// initial done signal
auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal();
CUDA_KERNEL_CALL(init_done_kernel, 1, 1, 0, thr_entry->stream);
// generate color prop for each node
float *prop;
CUDA_CALL(cudaMalloc(reinterpret_cast<void **>(&prop), num_nodes * sizeof(float)));
CURAND_CALL(curandGenerateUniform(gen, prop, num_nodes));
cudaDeviceSynchronize(); // wait for random number generation finish since curand is async
// call kernel
auto num_threads = cuda::FindNumThreads(num_nodes);
auto num_blocks = cuda::FindNumBlocks<'x'>(BLOCKS(num_nodes, num_threads));
CUDA_KERNEL_CALL(colorize_kernel, num_blocks, num_threads, 0, thr_entry->stream,
prop, num_nodes, result_data);
bool done_h = false;
CUDA_CALL(cudaMemcpyFromSymbol(&done_h, done_d, sizeof(done_h), 0, cudaMemcpyDeviceToHost));
CUDA_CALL(cudaFree(prop));
return done_h;
}
/*! \brief Weighted neighbor matching procedure (GPU version).
* This implementation is from `A GPU Algorithm for Greedy Graph Matching
* <http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf>`__
*
* This algorithm has three parts: colorize, propose and respond.
* In colorize procedure, each unmarked node will be marked as BLUE or
* RED randomly. If all nodes are marked, finish and return.
* In propose procedure, each BLUE node will propose to the RED
* neighbor with the largest weight (or randomly choose one if without weight).
* If all its neighbors are marked, mark this node with its id.
* In respond procedure, each RED node will respond to the BLUE neighbor
* that has proposed to it and has the largest weight. If all neighbors
* are marked, mark this node with its id. Else match this (BLUE, RED) node
* pair and mark them with the smaller id between them.
*/
template <DLDeviceType XPU, typename FloatType, typename IdType>
void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, IdArray result) {
auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal();
if (!thr_entry->curand_gen) {
uint64_t seed = dgl::RandomEngine::ThreadLocal()->RandInt(UINT64_MAX);
CURAND_CALL(curandCreateGenerator(&thr_entry->curand_gen, CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(thr_entry->curand_gen, seed));
}
// create proposal tensor
const int64_t num_nodes = result->shape[0];
IdArray proposal = aten::Full(-1, num_nodes, sizeof(IdType) * 8, result->ctx);
// get data ptrs
IdType *indptr_data = static_cast<IdType*>(csr.indptr->data);
IdType *indices_data = static_cast<IdType*>(csr.indices->data);
IdType *result_data = static_cast<IdType*>(result->data);
IdType *proposal_data = static_cast<IdType*>(proposal->data);
FloatType *weight_data = static_cast<FloatType*>(weight->data);
auto num_threads = cuda::FindNumThreads(num_nodes);
auto num_blocks = cuda::FindNumBlocks<'x'>(BLOCKS(num_nodes, num_threads));
while (!Colorize<IdType>(result_data, thr_entry->curand_gen, num_nodes)) {
CUDA_KERNEL_CALL(weighted_propose_kernel, num_blocks, num_threads, 0, thr_entry->stream,
indptr_data, indices_data, weight_data, num_nodes, proposal_data, result_data);
CUDA_KERNEL_CALL(weighted_respond_kernel, num_blocks, num_threads, 0, thr_entry->stream,
indptr_data, indices_data, weight_data, num_nodes, proposal_data, result_data);
}
}
template void WeightedNeighborMatching<kDLGPU, float, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLGPU, float, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLGPU, double, int32_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
template void WeightedNeighborMatching<kDLGPU, double, int64_t>(
const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
/*! \brief Unweighted neighbor matching procedure (GPU version).
* Instead of directly sample neighbors, we assign each neighbor
* with a random weight. We use random weight for 2 reasons:
* 1. Random sample for each node in GPU is expensive. Although
* we can perform a global group-wise (neighborhood of each
* node as a group) random permutation as in CPU version,
* it still cost too much compared to directly using random weights.
* 2. Graph is sparse, thus neighborhood of each node is small,
* which is suitable for GPU implementation.
*/
template <DLDeviceType XPU, typename IdType>
void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) {
const int64_t num_edges = csr.indices->shape[0];
// generate random weights
auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal();
if (!thr_entry->curand_gen) {
uint64_t seed = dgl::RandomEngine::ThreadLocal()->RandInt(UINT64_MAX);
CURAND_CALL(curandCreateGenerator(&thr_entry->curand_gen, CURAND_RNG_PSEUDO_DEFAULT));
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(thr_entry->curand_gen, seed));
}
NDArray weight = NDArray::Empty(
{num_edges}, DLDataType{kDLFloat, sizeof(float) * 8, 1}, result->ctx);
float *weight_data = static_cast<float*>(weight->data);
CURAND_CALL(curandGenerateUniform(thr_entry->curand_gen, weight_data, num_edges));
cudaDeviceSynchronize();
WeightedNeighborMatching<XPU, float, IdType>(csr, weight, result);
}
template void NeighborMatching<kDLGPU, int32_t>(const aten::CSRMatrix &csr, IdArray result);
template void NeighborMatching<kDLGPU, int64_t>(const aten::CSRMatrix &csr, IdArray result);
} // namespace impl
} // namespace geometry
} // namespace dgl
...@@ -5,8 +5,10 @@ ...@@ -5,8 +5,10 @@
*/ */
#include <dgl/array.h> #include <dgl/array.h>
#include <dgl/runtime/ndarray.h> #include <dgl/runtime/ndarray.h>
#include <dgl/base_heterograph.h>
#include "../c_api_common.h" #include "../c_api_common.h"
#include "./geometry_op.h" #include "./geometry_op.h"
#include "../array/check.h"
using namespace dgl::runtime; using namespace dgl::runtime;
...@@ -31,6 +33,26 @@ void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_poin ...@@ -31,6 +33,26 @@ void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_poin
}); });
} }
void NeighborMatching(HeteroGraphPtr graph, const NDArray weight, IdArray result) {
if (!aten::IsNullArray(weight)) {
ATEN_XPU_SWITCH_CUDA(graph->Context().device_type, XPU, "NeighborMatching", {
ATEN_FLOAT_TYPE_SWITCH(weight->dtype, FloatType, "weight", {
ATEN_ID_TYPE_SWITCH(graph->DataType(), IdType, {
impl::WeightedNeighborMatching<XPU, FloatType, IdType>(
graph->GetCSRMatrix(0), weight, result);
});
});
});
} else {
ATEN_XPU_SWITCH_CUDA(graph->Context().device_type, XPU, "NeighborMatching", {
ATEN_ID_TYPE_SWITCH(graph->DataType(), IdType, {
impl::NeighborMatching<XPU, IdType>(
graph->GetCSRMatrix(0), result);
});
});
}
}
///////////////////////// C APIs ///////////////////////// ///////////////////////// C APIs /////////////////////////
DGL_REGISTER_GLOBAL("geometry._CAPI_FarthestPointSampler") DGL_REGISTER_GLOBAL("geometry._CAPI_FarthestPointSampler")
...@@ -45,5 +67,31 @@ DGL_REGISTER_GLOBAL("geometry._CAPI_FarthestPointSampler") ...@@ -45,5 +67,31 @@ DGL_REGISTER_GLOBAL("geometry._CAPI_FarthestPointSampler")
FarthestPointSampler(data, batch_size, sample_points, dist, start_idx, result); FarthestPointSampler(data, batch_size, sample_points, dist, start_idx, result);
}); });
DGL_REGISTER_GLOBAL("geometry._CAPI_NeighborMatching")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
HeteroGraphRef graph = args[0];
const NDArray weight = args[1];
IdArray result = args[2];
// sanity check
aten::CheckCtx(graph->Context(), {weight, result}, {"edge_weight, result"});
aten::CheckContiguous({weight, result}, {"edge_weight", "result"});
CHECK_EQ(graph->NumEdgeTypes(), 1) << "homogeneous graph has only one edge type";
CHECK_EQ(result->ndim, 1) << "result should be an 1D tensor.";
auto pair = graph->meta_graph()->FindEdge(0);
const dgl_type_t node_type = pair.first;
CHECK_EQ(graph->NumVertices(node_type), result->shape[0])
<< "The number of nodes should be the same as the length of result tensor.";
if (!aten::IsNullArray(weight)) {
CHECK_EQ(weight->ndim, 1) << "weight should be an 1D tensor.";
CHECK_EQ(graph->NumEdges(0), weight->shape[0])
<< "number of edges in graph should be the same "
<< "as the length of edge weight tensor.";
}
// call implementation
NeighborMatching(graph.sptr(), weight, result);
});
} // namespace geometry } // namespace geometry
} // namespace dgl } // namespace dgl
...@@ -16,6 +16,22 @@ template <DLDeviceType XPU, typename FloatType, typename IdType> ...@@ -16,6 +16,22 @@ template <DLDeviceType XPU, typename FloatType, typename IdType>
void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_points, void FarthestPointSampler(NDArray array, int64_t batch_size, int64_t sample_points,
NDArray dist, IdArray start_idx, IdArray result); NDArray dist, IdArray start_idx, IdArray result);
/*! \brief Implementation of weighted neighbor matching process of edge coarsening used
* in Metis and Graclus for homogeneous graph coarsening. This procedure keeps
* picking an unmarked vertex and matching it with one its unmarked neighbors
* (that maximizes its edge weight) until no match can be done.
*/
template <DLDeviceType XPU, typename FloatType, typename IdType>
void WeightedNeighborMatching(const aten::CSRMatrix &csr, const NDArray weight, IdArray result);
/*! \brief Implementation of neighbor matching process of edge coarsening used
* in Metis and Graclus for homogeneous graph coarsening. This procedure keeps
* picking an unmarked vertex and matching it with one its unmarked neighbors
* (that maximizes its edge weight) until no match can be done.
*/
template <DLDeviceType XPU, typename IdType>
void NeighborMatching(const aten::CSRMatrix &csr, IdArray result);
} // namespace impl } // namespace impl
} // namespace geometry } // namespace geometry
} // namespace dgl } // namespace dgl
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "./binary_reduce_impl_decl.h" #include "./binary_reduce_impl_decl.h"
#include "./utils.h" #include "./utils.h"
#include "../c_api_common.h" #include "../c_api_common.h"
#include "../array/check.h"
#include "../graph/unit_graph.h" #include "../graph/unit_graph.h"
#include "./csr_interface.h" #include "./csr_interface.h"
...@@ -175,20 +176,6 @@ std::string IdArrayToStr(IdArray arr) { ...@@ -175,20 +176,6 @@ std::string IdArrayToStr(IdArray arr) {
return oss.str(); return oss.str();
} }
// Check whether the given arguments have the same context.
inline void CheckCtx(
const DLContext& ctx,
const std::vector<NDArray>& arrays,
const std::vector<std::string>& names) {
for (size_t i = 0; i < arrays.size(); ++i) {
if (aten::IsNullArray(arrays[i]))
continue;
CHECK_EQ(ctx, arrays[i]->ctx)
<< "Expected device context " << ctx << ". But got "
<< arrays[i]->ctx << " for " << names[i] << ".";
}
}
// Check whether the given arguments use the same number of bits. // Check whether the given arguments use the same number of bits.
inline void CheckIdArray( inline void CheckIdArray(
const uint8_t bits, const uint8_t bits,
...@@ -303,7 +290,7 @@ void BinaryOpReduce( ...@@ -303,7 +290,7 @@ void BinaryOpReduce(
NDArray out_mapping) { NDArray out_mapping) {
const auto& ctx = graph.Context(); const auto& ctx = graph.Context();
// sanity check // sanity check
CheckCtx(ctx, aten::CheckCtx(ctx,
{lhs_data, rhs_data, out_data, lhs_mapping, rhs_mapping, out_mapping}, {lhs_data, rhs_data, out_data, lhs_mapping, rhs_mapping, out_mapping},
{"lhs_data", "rhs_data", "out_data", "lhs_mapping", "rhs_mapping", "out_mapping"}); {"lhs_data", "rhs_data", "out_data", "lhs_mapping", "rhs_mapping", "out_mapping"});
CheckIdArray(graph.NumBits(), CheckIdArray(graph.NumBits(),
...@@ -392,7 +379,7 @@ void BackwardLhsBinaryOpReduce( ...@@ -392,7 +379,7 @@ void BackwardLhsBinaryOpReduce(
NDArray grad_lhs_data) { NDArray grad_lhs_data) {
const auto& ctx = graph.Context(); const auto& ctx = graph.Context();
// sanity check // sanity check
CheckCtx(ctx, aten::CheckCtx(ctx,
{lhs_data, rhs_data, out_data, grad_out_data, grad_lhs_data, {lhs_data, rhs_data, out_data, grad_out_data, grad_lhs_data,
lhs_mapping, rhs_mapping, out_mapping}, lhs_mapping, rhs_mapping, out_mapping},
{"lhs_data", "rhs_data", "out_data", "grad_out_data", "grad_lhs_data", {"lhs_data", "rhs_data", "out_data", "grad_out_data", "grad_lhs_data",
...@@ -469,7 +456,7 @@ void BackwardRhsBinaryOpReduce( ...@@ -469,7 +456,7 @@ void BackwardRhsBinaryOpReduce(
NDArray grad_rhs_data) { NDArray grad_rhs_data) {
const auto& ctx = graph.Context(); const auto& ctx = graph.Context();
// sanity check // sanity check
CheckCtx(ctx, aten::CheckCtx(ctx,
{lhs_data, rhs_data, out_data, grad_out_data, grad_rhs_data, {lhs_data, rhs_data, out_data, grad_out_data, grad_rhs_data,
lhs_mapping, rhs_mapping, out_mapping}, lhs_mapping, rhs_mapping, out_mapping},
{"lhs_data", "rhs_data", "out_data", "grad_out_data", "grad_rhs_data", {"lhs_data", "rhs_data", "out_data", "grad_out_data", "grad_rhs_data",
...@@ -539,7 +526,7 @@ void CopyReduce( ...@@ -539,7 +526,7 @@ void CopyReduce(
NDArray in_mapping, NDArray out_mapping) { NDArray in_mapping, NDArray out_mapping) {
const auto& ctx = graph.Context(); const auto& ctx = graph.Context();
// sanity check // sanity check
CheckCtx(ctx, aten::CheckCtx(ctx,
{in_data, out_data, in_mapping, out_mapping}, {in_data, out_data, in_mapping, out_mapping},
{"in_data", "out_data", "in_mapping", "out_mapping"}); {"in_data", "out_data", "in_mapping", "out_mapping"});
CheckIdArray(graph.NumBits(), CheckIdArray(graph.NumBits(),
...@@ -582,7 +569,7 @@ void BackwardCopyReduce( ...@@ -582,7 +569,7 @@ void BackwardCopyReduce(
NDArray grad_in_data) { NDArray grad_in_data) {
const auto& ctx = graph.Context(); const auto& ctx = graph.Context();
// sanity check // sanity check
CheckCtx(ctx, aten::CheckCtx(ctx,
{in_data, out_data, grad_out_data, grad_in_data, in_mapping, out_mapping}, {in_data, out_data, grad_out_data, grad_in_data, in_mapping, out_mapping},
{"in_data", "out_data", "grad_out_data", "grad_in_data", "in_mapping", "out_mapping"}); {"in_data", "out_data", "grad_out_data", "grad_in_data", "in_mapping", "out_mapping"});
CheckIdArray(graph.NumBits(), CheckIdArray(graph.NumBits(),
......
...@@ -10,6 +10,10 @@ ...@@ -10,6 +10,10 @@
#include <dgl/random.h> #include <dgl/random.h>
#include <dgl/array.h> #include <dgl/array.h>
#ifdef DGL_USE_CUDA
#include "../runtime/cuda/cuda_common.h"
#endif // DGL_USE_CUDA
using namespace dgl::runtime; using namespace dgl::runtime;
namespace dgl { namespace dgl {
...@@ -18,8 +22,18 @@ DGL_REGISTER_GLOBAL("rng._CAPI_SetSeed") ...@@ -18,8 +22,18 @@ DGL_REGISTER_GLOBAL("rng._CAPI_SetSeed")
.set_body([] (DGLArgs args, DGLRetValue *rv) { .set_body([] (DGLArgs args, DGLRetValue *rv) {
const int seed = args[0]; const int seed = args[0];
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < omp_get_max_threads(); ++i) for (int i = 0; i < omp_get_max_threads(); ++i) {
RandomEngine::ThreadLocal()->SetSeed(seed); RandomEngine::ThreadLocal()->SetSeed(seed);
#ifdef DGL_USE_CUDA
auto* thr_entry = CUDAThreadEntry::ThreadLocal();
if (!thr_entry->curand_gen) {
CURAND_CALL(curandCreateGenerator(&thr_entry->curand_gen, CURAND_RNG_PSEUDO_DEFAULT));
}
CURAND_CALL(curandSetPseudoRandomGeneratorSeed(
thr_entry->curand_gen,
static_cast<uint64_t>(seed + GetThreadId())));
#endif // DGL_USE_CUDA
}
}); });
DGL_REGISTER_GLOBAL("rng._CAPI_Choice") DGL_REGISTER_GLOBAL("rng._CAPI_Choice")
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <cublas_v2.h> #include <cublas_v2.h>
#include <cusparse.h> #include <cusparse.h>
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <curand.h>
#include <dgl/runtime/packed_func.h> #include <dgl/runtime/packed_func.h>
#include <string> #include <string>
#include "../workspace_pool.h" #include "../workspace_pool.h"
...@@ -70,6 +71,47 @@ inline bool is_zero<dim3>(dim3 size) { ...@@ -70,6 +71,47 @@ inline bool is_zero<dim3>(dim3 size) {
CHECK(e == CUBLAS_STATUS_SUCCESS) << "CUBLAS ERROR: " << e; \ CHECK(e == CUBLAS_STATUS_SUCCESS) << "CUBLAS ERROR: " << e; \
} }
#define CURAND_CALL(func) \
{ \
curandStatus_t e = (func); \
CHECK(e == CURAND_STATUS_SUCCESS) \
<< "CURAND Error: " << dgl::runtime::curandGetErrorString(e) \
<< " at " << __FILE__ << ":" << __LINE__; \
}
inline const char* curandGetErrorString(curandStatus_t error) {
switch (error) {
case CURAND_STATUS_SUCCESS:
return "CURAND_STATUS_SUCCESS";
case CURAND_STATUS_VERSION_MISMATCH:
return "CURAND_STATUS_VERSION_MISMATCH";
case CURAND_STATUS_NOT_INITIALIZED:
return "CURAND_STATUS_NOT_INITIALIZED";
case CURAND_STATUS_ALLOCATION_FAILED:
return "CURAND_STATUS_ALLOCATION_FAILED";
case CURAND_STATUS_TYPE_ERROR:
return "CURAND_STATUS_TYPE_ERROR";
case CURAND_STATUS_OUT_OF_RANGE:
return "CURAND_STATUS_OUT_OF_RANGE";
case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
case CURAND_STATUS_LAUNCH_FAILURE:
return "CURAND_STATUS_LAUNCH_FAILURE";
case CURAND_STATUS_PREEXISTING_FAILURE:
return "CURAND_STATUS_PREEXISTING_FAILURE";
case CURAND_STATUS_INITIALIZATION_FAILED:
return "CURAND_STATUS_INITIALIZATION_FAILED";
case CURAND_STATUS_ARCH_MISMATCH:
return "CURAND_STATUS_ARCH_MISMATCH";
case CURAND_STATUS_INTERNAL_ERROR:
return "CURAND_STATUS_INTERNAL_ERROR";
}
// To suppress compiler warning.
return "Unrecognized curand error string";
}
/* /*
* \brief Cast data type to cudaDataType_t. * \brief Cast data type to cudaDataType_t.
*/ */
...@@ -122,6 +164,8 @@ class CUDAThreadEntry { ...@@ -122,6 +164,8 @@ class CUDAThreadEntry {
cusparseHandle_t cusparse_handle{nullptr}; cusparseHandle_t cusparse_handle{nullptr};
/*! \brief The cublas handler */ /*! \brief The cublas handler */
cublasHandle_t cublas_handle{nullptr}; cublasHandle_t cublas_handle{nullptr};
/*! \brief The curand generator */
curandGenerator_t curand_gen{nullptr};
/*! \brief thread local pool*/ /*! \brief thread local pool*/
WorkspacePool pool; WorkspacePool pool;
/*! \brief constructor */ /*! \brief constructor */
......
import torch as th
import dgl.nn
from dgl.geometry.pytorch import FarthestPointSampler
import backend as F import backend as F
import dgl.nn
import dgl
import numpy as np import numpy as np
import pytest
import torch as th
from dgl.geometry.pytorch import FarthestPointSampler
from dgl.geometry import neighbor_matching
from test_utils import parametrize_dtype
from test_utils.graph_cases import get_cases
def test_fps(): def test_fps():
N = 1000 N = 1000
...@@ -43,6 +49,44 @@ def test_knn(): ...@@ -43,6 +49,44 @@ def test_knn():
check_knn(g, x, 0, 3) check_knn(g, x, 0, 3)
check_knn(g, x, 3, 8) check_knn(g, x, 3, 8)
@parametrize_dtype
@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph']))
@pytest.mark.parametrize('weight', [True, False])
@pytest.mark.parametrize('relabel', [True, False])
def test_edge_coarsening(idtype, g, weight, relabel):
num_nodes = g.num_nodes()
g = dgl.to_bidirected(g)
g = g.astype(idtype).to(F.ctx())
edge_weight = None
if weight:
edge_weight = F.abs(F.randn((g.num_edges(),))).to(F.ctx())
node_labels = neighbor_matching(g, edge_weight, relabel_idx=relabel)
unique_ids, counts = th.unique(node_labels, return_counts=True)
num_result_ids = unique_ids.size(0)
# shape correct
assert node_labels.shape == (g.num_nodes(),)
# all nodes marked
assert F.reduce_sum(node_labels < 0).item() == 0
# number of unique node ids correct.
assert num_result_ids >= num_nodes // 2 and num_result_ids <= num_nodes
# each unique id has <= 2 nodes
assert F.reduce_sum(counts > 2).item() == 0
# if two nodes have the same id, they must be neighbors
idxs = F.arange(0, num_nodes, idtype)
for l in unique_ids:
l = l.item()
idx = idxs[(node_labels == l)]
if idx.size(0) == 2:
u, v = idx[0].item(), idx[1].item()
assert g.has_edges_between(u, v)
if __name__ == '__main__': if __name__ == '__main__':
test_fps() test_fps()
test_knn() test_knn()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment