Unverified Commit 8f0df39e authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] clang-format auto fix. (#4810)



* [Misc] clang-format auto fix.

* manual

* manual
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 401e1278
......@@ -13,8 +13,8 @@ namespace dgl {
using dgl::runtime::NDArray;
NDArray CreateNDArrayFromRawData(std::vector<int64_t> shape, DGLDataType dtype,
DGLContext ctx, void* raw) {
NDArray CreateNDArrayFromRawData(
std::vector<int64_t> shape, DGLDataType dtype, DGLContext ctx, void* raw) {
return NDArray::CreateFromRaw(shape, dtype, ctx, raw, true);
}
......@@ -40,7 +40,8 @@ void StreamWithBuffer::PushNDArray(const NDArray& tensor) {
// If the stream is for remote communication or the data is not stored in
// shared memory, serialize the data content as a buffer.
this->Write<bool>(false);
// If this is a null ndarray, we will not push it into the underlying buffer_list
// If this is a null ndarray, we will not push it into the underlying
// buffer_list
if (data_byte_size != 0) {
buffer_list_.emplace_back(tensor, tensor->data, data_byte_size);
}
......@@ -90,8 +91,8 @@ NDArray StreamWithBuffer::PopNDArray() {
// Mean this is a null ndarray
ret = CreateNDArrayFromRawData(shape, dtype, cpu_ctx, nullptr);
} else {
ret = CreateNDArrayFromRawData(shape, dtype, cpu_ctx,
buffer_list_.front().data);
ret = CreateNDArrayFromRawData(
shape, dtype, cpu_ctx, buffer_list_.front().data);
buffer_list_.pop_front();
}
return ret;
......
......@@ -31,8 +31,8 @@ using namespace dgl::aten;
namespace dgl {
template <>
NDArray SharedMemManager::CopyToSharedMem<NDArray>(const NDArray &data,
std::string name) {
NDArray SharedMemManager::CopyToSharedMem<NDArray>(
const NDArray &data, std::string name) {
DGLContext ctx = {kDGLCPU, 0};
std::vector<int64_t> shape(data->shape, data->shape + data->ndim);
strm_->Write(data->ndim);
......@@ -53,21 +53,22 @@ NDArray SharedMemManager::CopyToSharedMem<NDArray>(const NDArray &data,
}
template <>
CSRMatrix SharedMemManager::CopyToSharedMem<CSRMatrix>(const CSRMatrix &csr,
std::string name) {
CSRMatrix SharedMemManager::CopyToSharedMem<CSRMatrix>(
const CSRMatrix &csr, std::string name) {
auto indptr_shared_mem = CopyToSharedMem(csr.indptr, name + "_indptr");
auto indices_shared_mem = CopyToSharedMem(csr.indices, name + "_indices");
auto data_shared_mem = CopyToSharedMem(csr.data, name + "_data");
strm_->Write(csr.num_rows);
strm_->Write(csr.num_cols);
strm_->Write(csr.sorted);
return CSRMatrix(csr.num_rows, csr.num_cols, indptr_shared_mem,
indices_shared_mem, data_shared_mem, csr.sorted);
return CSRMatrix(
csr.num_rows, csr.num_cols, indptr_shared_mem, indices_shared_mem,
data_shared_mem, csr.sorted);
}
template <>
COOMatrix SharedMemManager::CopyToSharedMem<COOMatrix>(const COOMatrix &coo,
std::string name) {
COOMatrix SharedMemManager::CopyToSharedMem<COOMatrix>(
const COOMatrix &coo, std::string name) {
auto row_shared_mem = CopyToSharedMem(coo.row, name + "_row");
auto col_shared_mem = CopyToSharedMem(coo.col, name + "_col");
auto data_shared_mem = CopyToSharedMem(coo.data, name + "_data");
......@@ -75,13 +76,14 @@ COOMatrix SharedMemManager::CopyToSharedMem<COOMatrix>(const COOMatrix &coo,
strm_->Write(coo.num_cols);
strm_->Write(coo.row_sorted);
strm_->Write(coo.col_sorted);
return COOMatrix(coo.num_rows, coo.num_cols, row_shared_mem, col_shared_mem,
return COOMatrix(
coo.num_rows, coo.num_cols, row_shared_mem, col_shared_mem,
data_shared_mem, coo.row_sorted, coo.col_sorted);
}
template <>
bool SharedMemManager::CreateFromSharedMem<NDArray>(NDArray *nd,
std::string name) {
bool SharedMemManager::CreateFromSharedMem<NDArray>(
NDArray *nd, std::string name) {
int ndim;
DGLContext ctx = {kDGLCPU, 0};
DGLDataType dtype;
......@@ -98,15 +100,14 @@ bool SharedMemManager::CreateFromSharedMem<NDArray>(NDArray *nd,
if (is_null) {
*nd = NDArray::Empty(shape, dtype, ctx);
} else {
*nd =
NDArray::EmptyShared(graph_name_ + name, shape, dtype, ctx, false);
*nd = NDArray::EmptyShared(graph_name_ + name, shape, dtype, ctx, false);
}
return true;
}
template <>
bool SharedMemManager::CreateFromSharedMem<COOMatrix>(COOMatrix *coo,
std::string name) {
bool SharedMemManager::CreateFromSharedMem<COOMatrix>(
COOMatrix *coo, std::string name) {
CreateFromSharedMem(&coo->row, name + "_row");
CreateFromSharedMem(&coo->col, name + "_col");
CreateFromSharedMem(&coo->data, name + "_data");
......@@ -118,8 +119,8 @@ bool SharedMemManager::CreateFromSharedMem<COOMatrix>(COOMatrix *coo,
}
template <>
bool SharedMemManager::CreateFromSharedMem<CSRMatrix>(CSRMatrix *csr,
std::string name) {
bool SharedMemManager::CreateFromSharedMem<CSRMatrix>(
CSRMatrix *csr, std::string name) {
CreateFromSharedMem(&csr->indptr, name + "_indptr");
CreateFromSharedMem(&csr->indices, name + "_indices");
CreateFromSharedMem(&csr->data, name + "_data");
......
......@@ -29,8 +29,7 @@ const size_t SHARED_MEM_METAINFO_SIZE_MAX = 1024 * 32;
class SharedMemManager : public dmlc::Stream {
public:
explicit SharedMemManager(std::string graph_name, dmlc::Stream* strm)
: graph_name_(graph_name),
strm_(strm) {}
: graph_name_(graph_name), strm_(strm) {}
template <typename T>
T CopyToSharedMem(const T& data, std::string name);
......
......@@ -11,7 +11,8 @@ namespace dgl {
HeteroSubgraph InEdgeGraphRelabelNodes(
const HeteroGraphPtr graph, const std::vector<IdArray>& vids) {
CHECK_EQ(vids.size(), graph->NumVertexTypes())
<< "Invalid input: the input list size must be the same as the number of vertex types.";
<< "Invalid input: the input list size must be the same as the number of "
"vertex types.";
std::vector<IdArray> eids(graph->NumEdgeTypes());
DGLContext ctx = aten::GetContextOf(vids);
for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) {
......@@ -29,9 +30,11 @@ HeteroSubgraph InEdgeGraphRelabelNodes(
HeteroSubgraph InEdgeGraphNoRelabelNodes(
const HeteroGraphPtr graph, const std::vector<IdArray>& vids) {
// TODO(mufei): This should also use EdgeSubgraph once it is supported for CSR graphs
// TODO(mufei): This should also use EdgeSubgraph once it is supported for CSR
// graphs
CHECK_EQ(vids.size(), graph->NumVertexTypes())
<< "Invalid input: the input list size must be the same as the number of vertex types.";
<< "Invalid input: the input list size must be the same as the number of "
"vertex types.";
std::vector<HeteroGraphPtr> subrels(graph->NumEdgeTypes());
std::vector<IdArray> induced_edges(graph->NumEdgeTypes());
DGLContext ctx = aten::GetContextOf(vids);
......@@ -43,30 +46,28 @@ HeteroSubgraph InEdgeGraphNoRelabelNodes(
if (aten::IsNullArray(vids[dst_vtype])) {
// create a placeholder graph
subrels[etype] = UnitGraph::Empty(
relgraph->NumVertexTypes(),
graph->NumVertices(src_vtype),
graph->NumVertices(dst_vtype),
graph->DataType(), ctx);
induced_edges[etype] = IdArray::Empty({0}, graph->DataType(), graph->Context());
relgraph->NumVertexTypes(), graph->NumVertices(src_vtype),
graph->NumVertices(dst_vtype), graph->DataType(), ctx);
induced_edges[etype] =
IdArray::Empty({0}, graph->DataType(), graph->Context());
} else {
const auto& earr = graph->InEdges(etype, {vids[dst_vtype]});
subrels[etype] = UnitGraph::CreateFromCOO(
relgraph->NumVertexTypes(),
graph->NumVertices(src_vtype),
graph->NumVertices(dst_vtype),
earr.src,
earr.dst);
relgraph->NumVertexTypes(), graph->NumVertices(src_vtype),
graph->NumVertices(dst_vtype), earr.src, earr.dst);
induced_edges[etype] = earr.id;
}
}
HeteroSubgraph ret;
ret.graph = CreateHeteroGraph(graph->meta_graph(), subrels, graph->NumVerticesPerType());
ret.graph = CreateHeteroGraph(
graph->meta_graph(), subrels, graph->NumVerticesPerType());
ret.induced_edges = std::move(induced_edges);
return ret;
}
HeteroSubgraph InEdgeGraph(
const HeteroGraphPtr graph, const std::vector<IdArray>& vids, bool relabel_nodes) {
const HeteroGraphPtr graph, const std::vector<IdArray>& vids,
bool relabel_nodes) {
if (relabel_nodes) {
return InEdgeGraphRelabelNodes(graph, vids);
} else {
......@@ -77,7 +78,8 @@ HeteroSubgraph InEdgeGraph(
HeteroSubgraph OutEdgeGraphRelabelNodes(
const HeteroGraphPtr graph, const std::vector<IdArray>& vids) {
CHECK_EQ(vids.size(), graph->NumVertexTypes())
<< "Invalid input: the input list size must be the same as the number of vertex types.";
<< "Invalid input: the input list size must be the same as the number of "
"vertex types.";
std::vector<IdArray> eids(graph->NumEdgeTypes());
DGLContext ctx = aten::GetContextOf(vids);
for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) {
......@@ -95,9 +97,11 @@ HeteroSubgraph OutEdgeGraphRelabelNodes(
HeteroSubgraph OutEdgeGraphNoRelabelNodes(
const HeteroGraphPtr graph, const std::vector<IdArray>& vids) {
// TODO(mufei): This should also use EdgeSubgraph once it is supported for CSR graphs
// TODO(mufei): This should also use EdgeSubgraph once it is supported for CSR
// graphs
CHECK_EQ(vids.size(), graph->NumVertexTypes())
<< "Invalid input: the input list size must be the same as the number of vertex types.";
<< "Invalid input: the input list size must be the same as the number of "
"vertex types.";
std::vector<HeteroGraphPtr> subrels(graph->NumEdgeTypes());
std::vector<IdArray> induced_edges(graph->NumEdgeTypes());
DGLContext ctx = aten::GetContextOf(vids);
......@@ -109,30 +113,28 @@ HeteroSubgraph OutEdgeGraphNoRelabelNodes(
if (aten::IsNullArray(vids[src_vtype])) {
// create a placeholder graph
subrels[etype] = UnitGraph::Empty(
relgraph->NumVertexTypes(),
graph->NumVertices(src_vtype),
graph->NumVertices(dst_vtype),
graph->DataType(), ctx);
induced_edges[etype] = IdArray::Empty({0}, graph->DataType(), graph->Context());
relgraph->NumVertexTypes(), graph->NumVertices(src_vtype),
graph->NumVertices(dst_vtype), graph->DataType(), ctx);
induced_edges[etype] =
IdArray::Empty({0}, graph->DataType(), graph->Context());
} else {
const auto& earr = graph->OutEdges(etype, {vids[src_vtype]});
subrels[etype] = UnitGraph::CreateFromCOO(
relgraph->NumVertexTypes(),
graph->NumVertices(src_vtype),
graph->NumVertices(dst_vtype),
earr.src,
earr.dst);
relgraph->NumVertexTypes(), graph->NumVertices(src_vtype),
graph->NumVertices(dst_vtype), earr.src, earr.dst);
induced_edges[etype] = earr.id;
}
}
HeteroSubgraph ret;
ret.graph = CreateHeteroGraph(graph->meta_graph(), subrels, graph->NumVerticesPerType());
ret.graph = CreateHeteroGraph(
graph->meta_graph(), subrels, graph->NumVerticesPerType());
ret.induced_edges = std::move(induced_edges);
return ret;
}
HeteroSubgraph OutEdgeGraph(
const HeteroGraphPtr graph, const std::vector<IdArray>& vids, bool relabel_nodes) {
const HeteroGraphPtr graph, const std::vector<IdArray>& vids,
bool relabel_nodes) {
if (relabel_nodes) {
return OutEdgeGraphRelabelNodes(graph, vids);
} else {
......
......@@ -19,18 +19,20 @@
#include "compact.h"
#include <dgl/base_heterograph.h>
#include <dgl/transform.h>
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
#include <dgl/packed_func_ext.h>
#include <dgl/runtime/registry.h>
#include <dgl/runtime/container.h>
#include <vector>
#include <dgl/runtime/registry.h>
#include <dgl/transform.h>
#include <utility>
#include <vector>
#include "../../c_api_common.h"
#include "../unit_graph.h"
// TODO(BarclayII): currently CompactGraphs depend on IdHashMap implementation which
// only works on CPU. Should fix later to make it device agnostic.
// TODO(BarclayII): currently CompactGraphs depend on IdHashMap implementation
// which only works on CPU. Should fix later to make it device agnostic.
#include "../../array/cpu/array_utils.h"
namespace dgl {
......@@ -42,16 +44,16 @@ namespace transform {
namespace {
template<typename IdType>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphsCPU(
template <typename IdType>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> CompactGraphsCPU(
const std::vector<HeteroGraphPtr> &graphs,
const std::vector<IdArray> &always_preserve) {
// TODO(BarclayII): check whether the node space and metagraph of each graph is the same.
// Step 1: Collect the nodes that has connections for each type.
// TODO(BarclayII): check whether the node space and metagraph of each graph
// is the same. Step 1: Collect the nodes that has connections for each type.
const int64_t num_ntypes = graphs[0]->NumVertexTypes();
std::vector<aten::IdHashMap<IdType>> hashmaps(num_ntypes);
std::vector<std::vector<EdgeArray>> all_edges(graphs.size()); // all_edges[i][etype]
std::vector<std::vector<EdgeArray>> all_edges(
graphs.size()); // all_edges[i][etype]
std::vector<int64_t> max_vertex_cnt(num_ntypes, 0);
for (size_t i = 0; i < graphs.size(); ++i) {
......@@ -98,7 +100,8 @@ CompactGraphsCPU(
}
}
// Step 2: Relabel the nodes for each type to a smaller ID space and save the mapping.
// Step 2: Relabel the nodes for each type to a smaller ID space and save the
// mapping.
std::vector<IdArray> induced_nodes(num_ntypes);
std::vector<int64_t> num_induced_nodes(num_ntypes);
for (int64_t i = 0; i < num_ntypes; ++i) {
......@@ -123,14 +126,12 @@ CompactGraphsCPU(
const IdArray mapped_cols = hashmaps[dsttype].Map(edges.dst, -1);
rel_graphs.push_back(UnitGraph::CreateFromCOO(
srctype == dsttype ? 1 : 2,
induced_nodes[srctype]->shape[0],
induced_nodes[dsttype]->shape[0],
mapped_rows,
mapped_cols));
srctype == dsttype ? 1 : 2, induced_nodes[srctype]->shape[0],
induced_nodes[dsttype]->shape[0], mapped_rows, mapped_cols));
}
new_graphs.push_back(CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes));
new_graphs.push_back(
CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes));
}
return std::make_pair(new_graphs, induced_nodes);
......@@ -138,7 +139,7 @@ CompactGraphsCPU(
}; // namespace
template<>
template <>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphs<kDGLCPU, int32_t>(
const std::vector<HeteroGraphPtr> &graphs,
......@@ -146,7 +147,7 @@ CompactGraphs<kDGLCPU, int32_t>(
return CompactGraphsCPU<int32_t>(graphs, always_preserve);
}
template<>
template <>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphs<kDGLCPU, int64_t>(
const std::vector<HeteroGraphPtr> &graphs,
......@@ -155,26 +156,26 @@ CompactGraphs<kDGLCPU, int64_t>(
}
DGL_REGISTER_GLOBAL("transform._CAPI_DGLCompactGraphs")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
.set_body([](DGLArgs args, DGLRetValue *rv) {
List<HeteroGraphRef> graph_refs = args[0];
List<Value> always_preserve_refs = args[1];
std::vector<HeteroGraphPtr> graphs;
std::vector<IdArray> always_preserve;
for (HeteroGraphRef gref : graph_refs)
graphs.push_back(gref.sptr());
for (HeteroGraphRef gref : graph_refs) graphs.push_back(gref.sptr());
for (Value array : always_preserve_refs)
always_preserve.push_back(array->data);
// TODO(BarclayII): check for all IdArrays
CHECK(graphs[0]->DataType() == always_preserve[0]->dtype) << "data type mismatch.";
CHECK(graphs[0]->DataType() == always_preserve[0]->dtype)
<< "data type mismatch.";
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> result_pair;
ATEN_XPU_SWITCH_CUDA(graphs[0]->Context().device_type, XPU, "CompactGraphs", {
ATEN_XPU_SWITCH_CUDA(
graphs[0]->Context().device_type, XPU, "CompactGraphs", {
ATEN_ID_TYPE_SWITCH(graphs[0]->DataType(), IdType, {
result_pair = CompactGraphs<XPU, IdType>(
graphs, always_preserve);
result_pair = CompactGraphs<XPU, IdType>(graphs, always_preserve);
});
});
......
......@@ -24,8 +24,8 @@
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
#include <vector>
#include <utility>
#include <vector>
namespace dgl {
namespace transform {
......@@ -41,9 +41,8 @@ namespace transform {
*
* @return The vector of compacted graphs and the vector of induced nodes.
*/
template<DGLDeviceType XPU, typename IdType>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphs(
template <DGLDeviceType XPU, typename IdType>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> CompactGraphs(
const std::vector<HeteroGraphPtr> &graphs,
const std::vector<IdArray> &always_preserve);
......
......@@ -9,7 +9,9 @@
#include <dgl/array.h>
#include <dmlc/logging.h>
#include <nanoflann.hpp>
#include "../../../c_api_common.h"
namespace dgl {
......@@ -17,29 +19,34 @@ namespace transform {
namespace knn_utils {
/*!
* \brief A simple 2D NDArray adapter for nanoflann, without duplicating the storage.
* \brief A simple 2D NDArray adapter for nanoflann, without duplicating the
* storage.
*
* \tparam FloatType: The type of the point coordinates (typically, double or float).
* \tparam IdType: The type for indices in the KD-tree index (typically, size_t of int)
* \tparam FeatureDim: If set to > 0, it specifies a compile-time fixed dimensionality
* for the points in the data set, allowing more compiler optimizations.
* \tparam Dist: The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2,
* nanoflann::metric_L2_Simple, etc.
* \note The spelling of dgl's adapter ("adapter") is different from naneflann ("adaptor")
* \tparam FloatType: The type of the point coordinates (typically, double or
* float).
* \tparam IdType: The type for indices in the KD-tree index (typically,
* size_t of int)
* \tparam FeatureDim: If set to > 0, it specifies a compile-time fixed
* dimensionality for the points in the data set, allowing more compiler
* optimizations.
* \tparam Dist: The distance metric to use: nanoflann::metric_L1,
nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc.
* \note The spelling of dgl's adapter ("adapter") is different from naneflann
* ("adaptor")
*/
template <typename FloatType,
typename IdType,
int FeatureDim = -1,
template <
typename FloatType, typename IdType, int FeatureDim = -1,
typename Dist = nanoflann::metric_L2>
class KDTreeNDArrayAdapter {
public:
using self_type = KDTreeNDArrayAdapter<FloatType, IdType, FeatureDim, Dist>;
using metric_type = typename Dist::template traits<FloatType, self_type>::distance_t;
using metric_type =
typename Dist::template traits<FloatType, self_type>::distance_t;
using index_type = nanoflann::KDTreeSingleIndexAdaptor<
metric_type, self_type, FeatureDim, IdType>;
KDTreeNDArrayAdapter(const size_t /* dims */,
const NDArray data_points,
KDTreeNDArrayAdapter(
const size_t /* dims */, const NDArray data_points,
const int leaf_max_size = 10)
: data_(data_points) {
CHECK(data_points->shape[0] != 0 && data_points->shape[1] != 0)
......@@ -49,46 +56,38 @@ class KDTreeNDArrayAdapter {
<< "Data set feature dimension does not match the 'FeatureDim' "
<< "template argument.";
index_ = new index_type(
static_cast<int>(dims), *this, nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size));
static_cast<int>(dims), *this,
nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size));
index_->buildIndex();
}
~KDTreeNDArrayAdapter() {
delete index_;
}
~KDTreeNDArrayAdapter() { delete index_; }
index_type* GetIndex() {
return index_;
}
index_type* GetIndex() { return index_; }
/*!
* \brief Query for the \a num_closest points to a given point
* Note that this is a short-cut method for GetIndex()->findNeighbors().
*/
void query(const FloatType* query_pt, const size_t num_closest,
IdType* out_idxs, FloatType* out_dists) const {
void query(
const FloatType* query_pt, const size_t num_closest, IdType* out_idxs,
FloatType* out_dists) const {
nanoflann::KNNResultSet<FloatType, IdType> resultSet(num_closest);
resultSet.init(out_idxs, out_dists);
index_->findNeighbors(resultSet, query_pt, nanoflann::SearchParams());
}
/*! \brief Interface expected by KDTreeSingleIndexAdaptor */
const self_type& derived() const {
return *this;
}
const self_type& derived() const { return *this; }
/*! \brief Interface expected by KDTreeSingleIndexAdaptor */
self_type& derived() {
return *this;
}
self_type& derived() { return *this; }
/*!
* \brief Interface expected by KDTreeSingleIndexAdaptor,
* return the number of data points
*/
size_t kdtree_get_point_count() const {
return data_->shape[0];
}
size_t kdtree_get_point_count() const { return data_->shape[0]; }
/*!
* \brief Interface expected by KDTreeSingleIndexAdaptor,
......
This diff is collapsed.
......@@ -18,13 +18,13 @@
* all given graphs with the same set of nodes.
*/
#include <dgl/runtime/device_api.h>
#include <dgl/immutable_graph.h>
#include <cuda_runtime.h>
#include <utility>
#include <dgl/immutable_graph.h>
#include <dgl/runtime/device_api.h>
#include <algorithm>
#include <memory>
#include <utility>
#include "../../../runtime/cuda/cuda_common.h"
#include "../../heterograph.h"
......@@ -41,54 +41,45 @@ namespace transform {
namespace {
/**
* \brief This function builds node maps for each node type, preserving the
* order of the input nodes. Here it is assumed the nodes are not unique,
* and thus a unique list is generated.
*
* \param input_nodes The set of input nodes.
* \param node_maps The node maps to be constructed.
* \param count_unique_device The number of unique nodes (on the GPU).
* \param unique_nodes_device The unique nodes (on the GPU).
* \param stream The stream to operate on.
*/
template<typename IdType>
* \brief This function builds node maps for each node type, preserving the
* order of the input nodes. Here it is assumed the nodes are not unique,
* and thus a unique list is generated.
*
* \param input_nodes The set of input nodes.
* \param node_maps The node maps to be constructed.
* \param count_unique_device The number of unique nodes (on the GPU).
* \param unique_nodes_device The unique nodes (on the GPU).
* \param stream The stream to operate on.
*/
template <typename IdType>
void BuildNodeMaps(
const std::vector<IdArray>& input_nodes,
DeviceNodeMap<IdType> * const node_maps,
int64_t * const count_unique_device,
std::vector<IdArray>* const unique_nodes_device,
cudaStream_t stream) {
const std::vector<IdArray> &input_nodes,
DeviceNodeMap<IdType> *const node_maps, int64_t *const count_unique_device,
std::vector<IdArray> *const unique_nodes_device, cudaStream_t stream) {
const int64_t num_ntypes = static_cast<int64_t>(input_nodes.size());
CUDA_CALL(cudaMemsetAsync(
count_unique_device,
0,
num_ntypes*sizeof(*count_unique_device),
count_unique_device, 0, num_ntypes * sizeof(*count_unique_device),
stream));
// possibly duplicated nodes
for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
const IdArray& nodes = input_nodes[ntype];
const IdArray &nodes = input_nodes[ntype];
if (nodes->shape[0] > 0) {
CHECK_EQ(nodes->ctx.device_type, kDGLCUDA);
node_maps->LhsHashTable(ntype).FillWithDuplicates(
nodes.Ptr<IdType>(),
nodes->shape[0],
nodes.Ptr<IdType>(), nodes->shape[0],
(*unique_nodes_device)[ntype].Ptr<IdType>(),
count_unique_device+ntype,
stream);
count_unique_device + ntype, stream);
}
}
}
template<typename IdType>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphsGPU(
template <typename IdType>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> CompactGraphsGPU(
const std::vector<HeteroGraphPtr> &graphs,
const std::vector<IdArray> &always_preserve) {
const auto& ctx = graphs[0]->Context();
const auto &ctx = graphs[0]->Context();
auto device = runtime::DeviceAPI::Get(ctx);
cudaStream_t stream = runtime::getCurrentCUDAStream();
......@@ -96,7 +87,8 @@ CompactGraphsGPU(
// Step 1: Collect the nodes that has connections for each type.
const uint64_t num_ntypes = graphs[0]->NumVertexTypes();
std::vector<std::vector<EdgeArray>> all_edges(graphs.size()); // all_edges[i][etype]
std::vector<std::vector<EdgeArray>> all_edges(
graphs.size()); // all_edges[i][etype]
// count the number of nodes per type
std::vector<int64_t> max_vertex_cnt(num_ntypes, 0);
......@@ -123,19 +115,18 @@ CompactGraphsGPU(
std::vector<int64_t> node_offsets(num_ntypes, 0);
for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) {
all_nodes[ntype] = NewIdArray(max_vertex_cnt[ntype], ctx,
sizeof(IdType)*8);
all_nodes[ntype] =
NewIdArray(max_vertex_cnt[ntype], ctx, sizeof(IdType) * 8);
// copy the nodes in always_preserve
if (ntype < always_preserve.size() && always_preserve[ntype]->shape[0] > 0) {
if (ntype < always_preserve.size() &&
always_preserve[ntype]->shape[0] > 0) {
device->CopyDataFromTo(
always_preserve[ntype].Ptr<IdType>(), 0,
all_nodes[ntype].Ptr<IdType>(),
node_offsets[ntype],
sizeof(IdType)*always_preserve[ntype]->shape[0],
always_preserve[ntype]->ctx,
all_nodes[ntype]->ctx,
all_nodes[ntype].Ptr<IdType>(), node_offsets[ntype],
sizeof(IdType) * always_preserve[ntype]->shape[0],
always_preserve[ntype]->ctx, all_nodes[ntype]->ctx,
always_preserve[ntype]->dtype);
node_offsets[ntype] += sizeof(IdType)*always_preserve[ntype]->shape[0];
node_offsets[ntype] += sizeof(IdType) * always_preserve[ntype]->shape[0];
}
}
......@@ -152,25 +143,17 @@ CompactGraphsGPU(
if (edges.src.defined()) {
device->CopyDataFromTo(
edges.src.Ptr<IdType>(), 0,
all_nodes[srctype].Ptr<IdType>(),
node_offsets[srctype],
sizeof(IdType)*edges.src->shape[0],
edges.src->ctx,
all_nodes[srctype]->ctx,
edges.src->dtype);
node_offsets[srctype] += sizeof(IdType)*edges.src->shape[0];
edges.src.Ptr<IdType>(), 0, all_nodes[srctype].Ptr<IdType>(),
node_offsets[srctype], sizeof(IdType) * edges.src->shape[0],
edges.src->ctx, all_nodes[srctype]->ctx, edges.src->dtype);
node_offsets[srctype] += sizeof(IdType) * edges.src->shape[0];
}
if (edges.dst.defined()) {
device->CopyDataFromTo(
edges.dst.Ptr<IdType>(), 0,
all_nodes[dsttype].Ptr<IdType>(),
node_offsets[dsttype],
sizeof(IdType)*edges.dst->shape[0],
edges.dst->ctx,
all_nodes[dsttype]->ctx,
edges.dst->dtype);
node_offsets[dsttype] += sizeof(IdType)*edges.dst->shape[0];
edges.dst.Ptr<IdType>(), 0, all_nodes[dsttype].Ptr<IdType>(),
node_offsets[dsttype], sizeof(IdType) * edges.dst->shape[0],
edges.dst->ctx, all_nodes[dsttype]->ctx, edges.dst->dtype);
node_offsets[dsttype] += sizeof(IdType) * edges.dst->shape[0];
}
all_edges[i].push_back(edges);
}
......@@ -185,29 +168,22 @@ CompactGraphsGPU(
// number of unique nodes per type on CPU
std::vector<int64_t> num_induced_nodes(num_ntypes);
// number of unique nodes per type on GPU
int64_t * count_unique_device = static_cast<int64_t*>(
device->AllocWorkspace(ctx, sizeof(int64_t)*num_ntypes));
int64_t *count_unique_device = static_cast<int64_t *>(
device->AllocWorkspace(ctx, sizeof(int64_t) * num_ntypes));
// the set of unique nodes per type
std::vector<IdArray> induced_nodes(num_ntypes);
for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) {
induced_nodes[ntype] = NewIdArray(max_vertex_cnt[ntype], ctx,
sizeof(IdType)*8);
induced_nodes[ntype] =
NewIdArray(max_vertex_cnt[ntype], ctx, sizeof(IdType) * 8);
}
BuildNodeMaps(
all_nodes,
&node_maps,
count_unique_device,
&induced_nodes,
stream);
all_nodes, &node_maps, count_unique_device, &induced_nodes, stream);
device->CopyDataFromTo(
count_unique_device, 0,
num_induced_nodes.data(), 0,
sizeof(*num_induced_nodes.data())*num_ntypes,
ctx,
DGLContext{kDGLCPU, 0},
DGLDataType{kDGLInt, 64, 1});
count_unique_device, 0, num_induced_nodes.data(), 0,
sizeof(*num_induced_nodes.data()) * num_ntypes, ctx,
DGLContext{kDGLCPU, 0}, DGLDataType{kDGLInt, 64, 1});
device->StreamSync(ctx, stream);
// wait for the node counts to finish transferring
......@@ -230,22 +206,20 @@ CompactGraphsGPU(
std::vector<IdArray> new_src;
std::vector<IdArray> new_dst;
std::tie(new_src, new_dst) = MapEdges(
curr_graph, all_edges[i], node_maps, stream);
std::tie(new_src, new_dst) =
MapEdges(curr_graph, all_edges[i], node_maps, stream);
for (IdType etype = 0; etype < num_etypes; ++etype) {
IdType srctype, dsttype;
std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype);
rel_graphs.push_back(UnitGraph::CreateFromCOO(
srctype == dsttype ? 1 : 2,
induced_nodes[srctype]->shape[0],
induced_nodes[dsttype]->shape[0],
new_src[etype],
new_dst[etype]));
srctype == dsttype ? 1 : 2, induced_nodes[srctype]->shape[0],
induced_nodes[dsttype]->shape[0], new_src[etype], new_dst[etype]));
}
new_graphs.push_back(CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes));
new_graphs.push_back(
CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes));
}
return std::make_pair(new_graphs, induced_nodes);
......@@ -253,7 +227,7 @@ CompactGraphsGPU(
} // namespace
template<>
template <>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphs<kDGLCUDA, int32_t>(
const std::vector<HeteroGraphPtr> &graphs,
......@@ -261,7 +235,7 @@ CompactGraphs<kDGLCUDA, int32_t>(
return CompactGraphsGPU<int32_t>(graphs, always_preserve);
}
template<>
template <>
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphs<kDGLCUDA, int64_t>(
const std::vector<HeteroGraphPtr> &graphs,
......
......@@ -20,13 +20,14 @@
#ifndef DGL_GRAPH_TRANSFORM_CUDA_CUDA_MAP_EDGES_CUH_
#define DGL_GRAPH_TRANSFORM_CUDA_CUDA_MAP_EDGES_CUH_
#include <dgl/runtime/c_runtime_api.h>
#include <cuda_runtime.h>
#include <dgl/runtime/c_runtime_api.h>
#include <algorithm>
#include <memory>
#include <tuple>
#include <vector>
#include <utility>
#include <vector>
#include "../../../runtime/cuda/cuda_common.h"
#include "../../../runtime/cuda/cuda_hashtable.cuh"
......@@ -39,48 +40,46 @@ namespace transform {
namespace cuda {
template<typename IdType, int BLOCK_SIZE, IdType TILE_SIZE>
template <typename IdType, int BLOCK_SIZE, IdType TILE_SIZE>
__device__ void map_vertex_ids(
const IdType * const global,
IdType * const new_global,
const IdType num_vertices,
const DeviceOrderedHashTable<IdType>& table) {
const IdType* const global, IdType* const new_global,
const IdType num_vertices, const DeviceOrderedHashTable<IdType>& table) {
assert(BLOCK_SIZE == blockDim.x);
using Mapping = typename OrderedHashTable<IdType>::Mapping;
const IdType tile_start = TILE_SIZE*blockIdx.x;
const IdType tile_end = min(TILE_SIZE*(blockIdx.x+1), num_vertices);
const IdType tile_start = TILE_SIZE * blockIdx.x;
const IdType tile_end = min(TILE_SIZE * (blockIdx.x + 1), num_vertices);
for (IdType idx = threadIdx.x+tile_start; idx < tile_end; idx+=BLOCK_SIZE) {
for (IdType idx = threadIdx.x + tile_start; idx < tile_end;
idx += BLOCK_SIZE) {
const Mapping& mapping = *table.Search(global[idx]);
new_global[idx] = mapping.local;
}
}
/**
* \brief Generate mapped edge endpoint ids.
*
* \tparam IdType The type of id.
* \tparam BLOCK_SIZE The size of each thread block.
* \tparam TILE_SIZE The number of edges to process per thread block.
* \param global_srcs_device The source ids to map.
* \param new_global_srcs_device The mapped source ids (output).
* \param global_dsts_device The destination ids to map.
* \param new_global_dsts_device The mapped destination ids (output).
* \param num_edges The number of edges to map.
* \param src_mapping The mapping of sources ids.
* \param src_hash_size The the size of source id hash table/mapping.
* \param dst_mapping The mapping of destination ids.
* \param dst_hash_size The the size of destination id hash table/mapping.
*/
template<typename IdType, int BLOCK_SIZE, IdType TILE_SIZE>
* \brief Generate mapped edge endpoint ids.
*
* \tparam IdType The type of id.
* \tparam BLOCK_SIZE The size of each thread block.
* \tparam TILE_SIZE The number of edges to process per thread block.
* \param global_srcs_device The source ids to map.
* \param new_global_srcs_device The mapped source ids (output).
* \param global_dsts_device The destination ids to map.
* \param new_global_dsts_device The mapped destination ids (output).
* \param num_edges The number of edges to map.
* \param src_mapping The mapping of sources ids.
* \param src_hash_size The the size of source id hash table/mapping.
* \param dst_mapping The mapping of destination ids.
* \param dst_hash_size The the size of destination id hash table/mapping.
*/
template <typename IdType, int BLOCK_SIZE, IdType TILE_SIZE>
__global__ void map_edge_ids(
const IdType * const global_srcs_device,
IdType * const new_global_srcs_device,
const IdType * const global_dsts_device,
IdType * const new_global_dsts_device,
const IdType num_edges,
const IdType* const global_srcs_device,
IdType* const new_global_srcs_device,
const IdType* const global_dsts_device,
IdType* const new_global_dsts_device, const IdType num_edges,
DeviceOrderedHashTable<IdType> src_mapping,
DeviceOrderedHashTable<IdType> dst_mapping) {
assert(BLOCK_SIZE == blockDim.x);
......@@ -88,39 +87,32 @@ __global__ void map_edge_ids(
if (blockIdx.y == 0) {
map_vertex_ids<IdType, BLOCK_SIZE, TILE_SIZE>(
global_srcs_device,
new_global_srcs_device,
num_edges,
src_mapping);
global_srcs_device, new_global_srcs_device, num_edges, src_mapping);
} else {
map_vertex_ids<IdType, BLOCK_SIZE, TILE_SIZE>(
global_dsts_device,
new_global_dsts_device,
num_edges,
dst_mapping);
global_dsts_device, new_global_dsts_device, num_edges, dst_mapping);
}
}
/**
* \brief Device level node maps for each node type.
*
* \param num_nodes Number of nodes per type.
* \param offset When offset is set to 0, LhsHashTable is identical to RhsHashTable.
* Or set to num_nodes.size()/2 to use seperated LhsHashTable and RhsHashTable.
* \param ctx The DGL context.
* \param stream The stream to operate on.
*/
template<typename IdType>
* \brief Device level node maps for each node type.
*
* \param num_nodes Number of nodes per type.
* \param offset When offset is set to 0, LhsHashTable is identical to
* RhsHashTable. Or set to num_nodes.size()/2 to use seperated
* LhsHashTable and RhsHashTable.
* \param ctx The DGL context.
* \param stream The stream to operate on.
*/
template <typename IdType>
class DeviceNodeMap {
public:
using Mapping = typename OrderedHashTable<IdType>::Mapping;
DeviceNodeMap(
const std::vector<int64_t>& num_nodes,
const int64_t offset,
DGLContext ctx,
cudaStream_t stream) :
num_types_(num_nodes.size()),
const std::vector<int64_t>& num_nodes, const int64_t offset,
DGLContext ctx, cudaStream_t stream)
: num_types_(num_nodes.size()),
rhs_offset_(offset),
hash_tables_(),
ctx_(ctx) {
......@@ -129,46 +121,33 @@ class DeviceNodeMap {
hash_tables_.reserve(num_types_);
for (int64_t i = 0; i < num_types_; ++i) {
hash_tables_.emplace_back(
new OrderedHashTable<IdType>(
num_nodes[i],
ctx_,
stream));
new OrderedHashTable<IdType>(num_nodes[i], ctx_, stream));
}
}
OrderedHashTable<IdType>& LhsHashTable(
const size_t index) {
OrderedHashTable<IdType>& LhsHashTable(const size_t index) {
return HashData(index);
}
OrderedHashTable<IdType>& RhsHashTable(
const size_t index) {
return HashData(index+rhs_offset_);
OrderedHashTable<IdType>& RhsHashTable(const size_t index) {
return HashData(index + rhs_offset_);
}
const OrderedHashTable<IdType>& LhsHashTable(
const size_t index) const {
const OrderedHashTable<IdType>& LhsHashTable(const size_t index) const {
return HashData(index);
}
const OrderedHashTable<IdType>& RhsHashTable(
const size_t index) const {
return HashData(index+rhs_offset_);
const OrderedHashTable<IdType>& RhsHashTable(const size_t index) const {
return HashData(index + rhs_offset_);
}
IdType LhsHashSize(
const size_t index) const {
return HashSize(index);
}
IdType LhsHashSize(const size_t index) const { return HashSize(index); }
IdType RhsHashSize(
const size_t index) const {
return HashSize(rhs_offset_+index);
IdType RhsHashSize(const size_t index) const {
return HashSize(rhs_offset_ + index);
}
size_t Size() const {
return hash_tables_.size();
}
size_t Size() const { return hash_tables_.size(); }
private:
int64_t num_types_;
......@@ -176,45 +155,35 @@ class DeviceNodeMap {
std::vector<std::unique_ptr<OrderedHashTable<IdType>>> hash_tables_;
DGLContext ctx_;
inline OrderedHashTable<IdType>& HashData(
const size_t index) {
inline OrderedHashTable<IdType>& HashData(const size_t index) {
CHECK_LT(index, hash_tables_.size());
return *hash_tables_[index];
}
inline const OrderedHashTable<IdType>& HashData(
const size_t index) const {
inline const OrderedHashTable<IdType>& HashData(const size_t index) const {
CHECK_LT(index, hash_tables_.size());
return *hash_tables_[index];
}
inline IdType HashSize(
const size_t index) const {
inline IdType HashSize(const size_t index) const {
return HashData(index).size();
}
};
template<typename IdType>
inline size_t RoundUpDiv(
const IdType num,
const size_t divisor) {
return static_cast<IdType>(num/divisor) + (num % divisor == 0 ? 0 : 1);
template <typename IdType>
inline size_t RoundUpDiv(const IdType num, const size_t divisor) {
return static_cast<IdType>(num / divisor) + (num % divisor == 0 ? 0 : 1);
}
template<typename IdType>
inline IdType RoundUp(
const IdType num,
const size_t unit) {
return RoundUpDiv(num, unit)*unit;
template <typename IdType>
inline IdType RoundUp(const IdType num, const size_t unit) {
return RoundUpDiv(num, unit) * unit;
}
template<typename IdType>
std::tuple<std::vector<IdArray>, std::vector<IdArray>>
MapEdges(
HeteroGraphPtr graph,
const std::vector<EdgeArray>& edge_sets,
const DeviceNodeMap<IdType>& node_map,
cudaStream_t stream) {
template <typename IdType>
std::tuple<std::vector<IdArray>, std::vector<IdArray>> MapEdges(
HeteroGraphPtr graph, const std::vector<EdgeArray>& edge_sets,
const DeviceNodeMap<IdType>& node_map, cudaStream_t stream) {
constexpr const int BLOCK_SIZE = 128;
constexpr const size_t TILE_SIZE = 1024;
......@@ -233,8 +202,8 @@ MapEdges(
if (edges.id.defined() && edges.src->shape[0] > 0) {
const int64_t num_edges = edges.src->shape[0];
new_lhs.emplace_back(NewIdArray(num_edges, ctx, sizeof(IdType)*8));
new_rhs.emplace_back(NewIdArray(num_edges, ctx, sizeof(IdType)*8));
new_lhs.emplace_back(NewIdArray(num_edges, ctx, sizeof(IdType) * 8));
new_rhs.emplace_back(NewIdArray(num_edges, ctx, sizeof(IdType) * 8));
const auto src_dst_types = graph->GetEndpointTypes(etype);
const int src_type = src_dst_types.first;
......@@ -244,20 +213,17 @@ MapEdges(
const dim3 block(BLOCK_SIZE);
// map the srcs
CUDA_KERNEL_CALL((map_edge_ids<IdType, BLOCK_SIZE, TILE_SIZE>),
grid, block, 0, stream,
edges.src.Ptr<IdType>(),
new_lhs.back().Ptr<IdType>(),
edges.dst.Ptr<IdType>(),
new_rhs.back().Ptr<IdType>(),
num_edges,
CUDA_KERNEL_CALL(
(map_edge_ids<IdType, BLOCK_SIZE, TILE_SIZE>), grid, block, 0, stream,
edges.src.Ptr<IdType>(), new_lhs.back().Ptr<IdType>(),
edges.dst.Ptr<IdType>(), new_rhs.back().Ptr<IdType>(), num_edges,
node_map.LhsHashTable(src_type).DeviceHandle(),
node_map.RhsHashTable(dst_type).DeviceHandle());
} else {
new_lhs.emplace_back(
aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType)*8, 1}, ctx));
aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx));
new_rhs.emplace_back(
aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType)*8, 1}, ctx));
aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx));
}
}
......@@ -265,7 +231,6 @@ MapEdges(
std::move(new_lhs), std::move(new_rhs));
}
} // namespace cuda
} // namespace transform
} // namespace dgl
......
......@@ -18,13 +18,13 @@
* ids.
*/
#include <dgl/runtime/device_api.h>
#include <dgl/immutable_graph.h>
#include <cuda_runtime.h>
#include <utility>
#include <dgl/immutable_graph.h>
#include <dgl/runtime/device_api.h>
#include <algorithm>
#include <memory>
#include <utility>
#include "../../../runtime/cuda/cuda_common.h"
#include "../../heterograph.h"
......@@ -40,14 +40,13 @@ namespace transform {
namespace {
template<typename IdType>
template <typename IdType>
class DeviceNodeMapMaker {
public:
explicit DeviceNodeMapMaker(
const std::vector<int64_t>& maxNodesPerType) :
max_num_nodes_(0) {
max_num_nodes_ = *std::max_element(maxNodesPerType.begin(),
maxNodesPerType.end());
explicit DeviceNodeMapMaker(const std::vector<int64_t>& maxNodesPerType)
: max_num_nodes_(0) {
max_num_nodes_ =
*std::max_element(maxNodesPerType.begin(), maxNodesPerType.end());
}
/**
......@@ -65,17 +64,12 @@ class DeviceNodeMapMaker {
void Make(
const std::vector<IdArray>& lhs_nodes,
const std::vector<IdArray>& rhs_nodes,
DeviceNodeMap<IdType> * const node_maps,
int64_t * const count_lhs_device,
std::vector<IdArray>* const lhs_device,
cudaStream_t stream) {
DeviceNodeMap<IdType>* const node_maps, int64_t* const count_lhs_device,
std::vector<IdArray>* const lhs_device, cudaStream_t stream) {
const int64_t num_ntypes = lhs_nodes.size() + rhs_nodes.size();
CUDA_CALL(cudaMemsetAsync(
count_lhs_device,
0,
num_ntypes*sizeof(*count_lhs_device),
stream));
count_lhs_device, 0, num_ntypes * sizeof(*count_lhs_device), stream));
// possibly dublicate lhs nodes
const int64_t lhs_num_ntypes = static_cast<int64_t>(lhs_nodes.size());
......@@ -84,10 +78,8 @@ class DeviceNodeMapMaker {
if (nodes->shape[0] > 0) {
CHECK_EQ(nodes->ctx.device_type, kDGLCUDA);
node_maps->LhsHashTable(ntype).FillWithDuplicates(
nodes.Ptr<IdType>(),
nodes->shape[0],
(*lhs_device)[ntype].Ptr<IdType>(),
count_lhs_device+ntype,
nodes.Ptr<IdType>(), nodes->shape[0],
(*lhs_device)[ntype].Ptr<IdType>(), count_lhs_device + ntype,
stream);
}
}
......@@ -98,9 +90,7 @@ class DeviceNodeMapMaker {
const IdArray& nodes = rhs_nodes[ntype];
if (nodes->shape[0] > 0) {
node_maps->RhsHashTable(ntype).FillWithUnique(
nodes.Ptr<IdType>(),
nodes->shape[0],
stream);
nodes.Ptr<IdType>(), nodes->shape[0], stream);
}
}
}
......@@ -118,8 +108,7 @@ class DeviceNodeMapMaker {
void Make(
const std::vector<IdArray>& lhs_nodes,
const std::vector<IdArray>& rhs_nodes,
DeviceNodeMap<IdType> * const node_maps,
cudaStream_t stream) {
DeviceNodeMap<IdType>* const node_maps, cudaStream_t stream) {
const int64_t num_ntypes = lhs_nodes.size() + rhs_nodes.size();
// unique lhs nodes
......@@ -129,9 +118,7 @@ class DeviceNodeMapMaker {
if (nodes->shape[0] > 0) {
CHECK_EQ(nodes->ctx.device_type, kDGLCUDA);
node_maps->LhsHashTable(ntype).FillWithUnique(
nodes.Ptr<IdType>(),
nodes->shape[0],
stream);
nodes.Ptr<IdType>(), nodes->shape[0], stream);
}
}
......@@ -141,9 +128,7 @@ class DeviceNodeMapMaker {
const IdArray& nodes = rhs_nodes[ntype];
if (nodes->shape[0] > 0) {
node_maps->RhsHashTable(ntype).FillWithUnique(
nodes.Ptr<IdType>(),
nodes->shape[0],
stream);
nodes.Ptr<IdType>(), nodes->shape[0], stream);
}
}
}
......@@ -152,20 +137,15 @@ class DeviceNodeMapMaker {
IdType max_num_nodes_;
};
// Since partial specialization is not allowed for functions, use this as an
// intermediate for ToBlock where XPU = kDGLCUDA.
template<typename IdType>
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ToBlockGPU(
HeteroGraphPtr graph,
const std::vector<IdArray> &rhs_nodes,
const bool include_rhs_in_lhs,
std::vector<IdArray>* const lhs_nodes_ptr) {
template <typename IdType>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockGPU(
HeteroGraphPtr graph, const std::vector<IdArray>& rhs_nodes,
const bool include_rhs_in_lhs, std::vector<IdArray>* const lhs_nodes_ptr) {
std::vector<IdArray>& lhs_nodes = *lhs_nodes_ptr;
const bool generate_lhs_nodes = lhs_nodes.empty();
const auto& ctx = graph->Context();
auto device = runtime::DeviceAPI::Get(ctx);
cudaStream_t stream = runtime::getCurrentCUDAStream();
......@@ -176,10 +156,11 @@ ToBlockGPU(
}
// Since DST nodes are included in SRC nodes, a common requirement is to fetch
// the DST node features from the SRC nodes features. To avoid expensive sparse lookup,
// the function assures that the DST nodes in both SRC and DST sets have the same ids.
// As a result, given the node feature tensor ``X`` of type ``utype``,
// the following code finds the corresponding DST node features of type ``vtype``:
// the DST node features from the SRC nodes features. To avoid expensive
// sparse lookup, the function assures that the DST nodes in both SRC and DST
// sets have the same ids. As a result, given the node feature tensor ``X`` of
// type ``utype``, the following code finds the corresponding DST node
// features of type ``vtype``:
const int64_t num_etypes = graph->NumEdgeTypes();
const int64_t num_ntypes = graph->NumVertexTypes();
......@@ -197,9 +178,9 @@ ToBlockGPU(
}
// count lhs and rhs nodes
std::vector<int64_t> maxNodesPerType(num_ntypes*2, 0);
std::vector<int64_t> maxNodesPerType(num_ntypes * 2, 0);
for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
maxNodesPerType[ntype+num_ntypes] += rhs_nodes[ntype]->shape[0];
maxNodesPerType[ntype + num_ntypes] += rhs_nodes[ntype]->shape[0];
if (generate_lhs_nodes) {
if (include_rhs_in_lhs) {
......@@ -226,16 +207,16 @@ ToBlockGPU(
if (generate_lhs_nodes) {
std::vector<int64_t> src_node_offsets(num_ntypes, 0);
for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
src_nodes[ntype] = NewIdArray(maxNodesPerType[ntype], ctx,
sizeof(IdType)*8);
src_nodes[ntype] =
NewIdArray(maxNodesPerType[ntype], ctx, sizeof(IdType) * 8);
if (include_rhs_in_lhs) {
// place rhs nodes first
device->CopyDataFromTo(rhs_nodes[ntype].Ptr<IdType>(), 0,
src_nodes[ntype].Ptr<IdType>(), src_node_offsets[ntype],
sizeof(IdType)*rhs_nodes[ntype]->shape[0],
rhs_nodes[ntype]->ctx, src_nodes[ntype]->ctx,
rhs_nodes[ntype]->dtype);
src_node_offsets[ntype] += sizeof(IdType)*rhs_nodes[ntype]->shape[0];
device->CopyDataFromTo(
rhs_nodes[ntype].Ptr<IdType>(), 0, src_nodes[ntype].Ptr<IdType>(),
src_node_offsets[ntype],
sizeof(IdType) * rhs_nodes[ntype]->shape[0], rhs_nodes[ntype]->ctx,
src_nodes[ntype]->ctx, rhs_nodes[ntype]->dtype);
src_node_offsets[ntype] += sizeof(IdType) * rhs_nodes[ntype]->shape[0];
}
}
for (int64_t etype = 0; etype < num_etypes; ++etype) {
......@@ -244,14 +225,13 @@ ToBlockGPU(
if (edge_arrays[etype].src.defined()) {
device->CopyDataFromTo(
edge_arrays[etype].src.Ptr<IdType>(), 0,
src_nodes[srctype].Ptr<IdType>(),
src_node_offsets[srctype],
sizeof(IdType)*edge_arrays[etype].src->shape[0],
rhs_nodes[srctype]->ctx,
src_nodes[srctype]->ctx,
src_nodes[srctype].Ptr<IdType>(), src_node_offsets[srctype],
sizeof(IdType) * edge_arrays[etype].src->shape[0],
rhs_nodes[srctype]->ctx, src_nodes[srctype]->ctx,
rhs_nodes[srctype]->dtype);
src_node_offsets[srctype] += sizeof(IdType)*edge_arrays[etype].src->shape[0];
src_node_offsets[srctype] +=
sizeof(IdType) * edge_arrays[etype].src->shape[0];
}
}
} else {
......@@ -267,47 +247,35 @@ ToBlockGPU(
if (generate_lhs_nodes) {
lhs_nodes.reserve(num_ntypes);
for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
lhs_nodes.emplace_back(NewIdArray(
maxNodesPerType[ntype], ctx, sizeof(IdType)*8));
lhs_nodes.emplace_back(
NewIdArray(maxNodesPerType[ntype], ctx, sizeof(IdType) * 8));
}
}
std::vector<int64_t> num_nodes_per_type(num_ntypes*2);
std::vector<int64_t> num_nodes_per_type(num_ntypes * 2);
// populate RHS nodes from what we already know
for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
num_nodes_per_type[num_ntypes+ntype] = rhs_nodes[ntype]->shape[0];
num_nodes_per_type[num_ntypes + ntype] = rhs_nodes[ntype]->shape[0];
}
// populate the mappings
if (generate_lhs_nodes) {
int64_t * count_lhs_device = static_cast<int64_t*>(
device->AllocWorkspace(ctx, sizeof(int64_t)*num_ntypes*2));
int64_t* count_lhs_device = static_cast<int64_t*>(
device->AllocWorkspace(ctx, sizeof(int64_t) * num_ntypes * 2));
maker.Make(
src_nodes,
rhs_nodes,
&node_maps,
count_lhs_device,
&lhs_nodes,
stream);
src_nodes, rhs_nodes, &node_maps, count_lhs_device, &lhs_nodes, stream);
device->CopyDataFromTo(
count_lhs_device, 0,
num_nodes_per_type.data(), 0,
sizeof(*num_nodes_per_type.data())*num_ntypes,
ctx,
DGLContext{kDGLCPU, 0},
DGLDataType{kDGLInt, 64, 1});
count_lhs_device, 0, num_nodes_per_type.data(), 0,
sizeof(*num_nodes_per_type.data()) * num_ntypes, ctx,
DGLContext{kDGLCPU, 0}, DGLDataType{kDGLInt, 64, 1});
device->StreamSync(ctx, stream);
// wait for the node counts to finish transferring
device->FreeWorkspace(ctx, count_lhs_device);
} else {
maker.Make(
lhs_nodes,
rhs_nodes,
&node_maps,
stream);
maker.Make(lhs_nodes, rhs_nodes, &node_maps, stream);
for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) {
num_nodes_per_type[ntype] = lhs_nodes[ntype]->shape[0];
......@@ -321,7 +289,7 @@ ToBlockGPU(
induced_edges.push_back(edge_arrays[etype].id);
} else {
induced_edges.push_back(
aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType)*8, 1}, ctx));
aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx));
}
}
......@@ -329,8 +297,8 @@ ToBlockGPU(
const auto meta_graph = graph->meta_graph();
const EdgeArray etypes = meta_graph->Edges("eid");
const IdArray new_dst = Add(etypes.dst, num_ntypes);
const auto new_meta_graph = ImmutableGraph::CreateFromCOO(
num_ntypes * 2, etypes.src, new_dst);
const auto new_meta_graph =
ImmutableGraph::CreateFromCOO(num_ntypes * 2, etypes.src, new_dst);
// allocate vector for graph relations while GPU is busy
std::vector<HeteroGraphPtr> rel_graphs;
......@@ -358,20 +326,17 @@ ToBlockGPU(
// No rhs nodes are given for this edge type. Create an empty graph.
rel_graphs.push_back(CreateFromCOO(
2, lhs_nodes[srctype]->shape[0], rhs_nodes[dsttype]->shape[0],
aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType)*8, 1}, ctx),
aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType)*8, 1}, ctx)));
aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx),
aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx)));
} else {
rel_graphs.push_back(CreateFromCOO(
2,
lhs_nodes[srctype]->shape[0],
rhs_nodes[dsttype]->shape[0],
new_lhs[etype],
new_rhs[etype]));
2, lhs_nodes[srctype]->shape[0], rhs_nodes[dsttype]->shape[0],
new_lhs[etype], new_rhs[etype]));
}
}
HeteroGraphPtr new_graph = CreateHeteroGraph(
new_meta_graph, rel_graphs, num_nodes_per_type);
HeteroGraphPtr new_graph =
CreateHeteroGraph(new_meta_graph, rel_graphs, num_nodes_per_type);
// return the new graph, the new src nodes, and new edges
return std::make_tuple(new_graph, induced_edges);
......@@ -379,26 +344,22 @@ ToBlockGPU(
} // namespace
// Use explicit names to get around MSVC's broken mangling that thinks the following two
// functions are the same.
// Using template<> fails to export the symbols.
// Use explicit names to get around MSVC's broken mangling that thinks the
// following two functions are the same. Using template<> fails to export the
// symbols.
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
// ToBlock<kDGLCUDA, int32_t>
ToBlockGPU32(
HeteroGraphPtr graph,
const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs,
std::vector<IdArray>* const lhs_nodes) {
HeteroGraphPtr graph, const std::vector<IdArray>& rhs_nodes,
bool include_rhs_in_lhs, std::vector<IdArray>* const lhs_nodes) {
return ToBlockGPU<int32_t>(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
}
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
// ToBlock<kDGLCUDA, int64_t>
ToBlockGPU64(
HeteroGraphPtr graph,
const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs,
std::vector<IdArray>* const lhs_nodes) {
HeteroGraphPtr graph, const std::vector<IdArray>& rhs_nodes,
bool include_rhs_in_lhs, std::vector<IdArray>* const lhs_nodes) {
return ToBlockGPU<int64_t>(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
}
......
This diff is collapsed.
......@@ -4,9 +4,11 @@
* \brief k-nearest-neighbor (KNN) interface
*/
#include <dgl/runtime/registry.h>
#include <dgl/runtime/packed_func.h>
#include "knn.h"
#include <dgl/runtime/packed_func.h>
#include <dgl/runtime/registry.h>
#include "../../array/check.h"
using namespace dgl::runtime;
......@@ -14,7 +16,7 @@ namespace dgl {
namespace transform {
DGL_REGISTER_GLOBAL("transform._CAPI_DGLKNN")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
.set_body([](DGLArgs args, DGLRetValue* rv) {
const NDArray data_points = args[0];
const IdArray data_offsets = args[1];
const NDArray query_points = args[2];
......@@ -25,7 +27,8 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLKNN")
aten::CheckContiguous(
{data_points, data_offsets, query_points, query_offsets, result},
{"data_points", "data_offsets", "query_points", "query_offsets", "result"});
{"data_points", "data_offsets", "query_points", "query_offsets",
"result"});
aten::CheckCtx(
data_points->ctx, {data_offsets, query_points, query_offsets, result},
{"data_offsets", "query_points", "query_offsets", "result"});
......@@ -34,15 +37,15 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLKNN")
ATEN_FLOAT_TYPE_SWITCH(data_points->dtype, FloatType, "data_points", {
ATEN_ID_TYPE_SWITCH(result->dtype, IdType, {
KNN<XPU, FloatType, IdType>(
data_points, data_offsets, query_points,
query_offsets, k, result, algorithm);
data_points, data_offsets, query_points, query_offsets, k,
result, algorithm);
});
});
});
});
DGL_REGISTER_GLOBAL("transform._CAPI_DGLNNDescent")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
.set_body([](DGLArgs args, DGLRetValue* rv) {
const NDArray points = args[0];
const IdArray offsets = args[1];
const IdArray result = args[2];
......@@ -54,7 +57,8 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLNNDescent")
aten::CheckContiguous(
{points, offsets, result}, {"points", "offsets", "result"});
aten::CheckCtx(
points->ctx, {points, offsets, result}, {"points", "offsets", "result"});
points->ctx, {points, offsets, result},
{"points", "offsets", "result"});
ATEN_XPU_SWITCH_CUDA(points->ctx.device_type, XPU, "NNDescent", {
ATEN_FLOAT_TYPE_SWITCH(points->dtype, FloatType, "points", {
......
......@@ -8,6 +8,7 @@
#define DGL_GRAPH_TRANSFORM_KNN_H_
#include <dgl/array.h>
#include <string>
namespace dgl {
......@@ -15,22 +16,24 @@ namespace transform {
/*!
* \brief For each point in each segment in \a query_points, find \a k nearest
* points in the same segment in \a data_points. \a data_offsets and \a query_offsets
* determine the start index of each segment in \a data_points and \a query_points.
* points in the same segment in \a data_points. \a data_offsets and \a
* query_offsets determine the start index of each segment in \a
* data_points and \a query_points.
*
* \param data_points dataset points.
* \param data_offsets offsets of point index in \a data_points.
* \param query_points query points.
* \param query_offsets offsets of point index in \a query_points.
* \param k the number of nearest points.
* \param result output array. A 2D tensor indicating the index
* relation between \a query_points and \a data_points.
* \param result output array. A 2D tensor indicating the index relation
* between \a query_points and \a data_points.
* \param algorithm algorithm used to compute the k-nearest neighbors.
*/
template <DGLDeviceType XPU, typename FloatType, typename IdType>
void KNN(const NDArray& data_points, const IdArray& data_offsets,
const NDArray& query_points, const IdArray& query_offsets,
const int k, IdArray result, const std::string& algorithm);
void KNN(
const NDArray& data_points, const IdArray& data_offsets,
const NDArray& query_points, const IdArray& query_offsets, const int k,
IdArray result, const std::string& algorithm);
/*!
* \brief For each input point, find \a k approximate nearest points in the same
......@@ -38,19 +41,20 @@ void KNN(const NDArray& data_points, const IdArray& data_offsets,
*
* \param points input points.
* \param offsets offsets of point index.
* \param result output array. A 2D tensor indicating the index relation between points.
* \param result output array. A 2D tensor indicating the index relation between
* points.
* \param k the number of nearest points.
* \param num_iters The maximum number of NN-descent iterations to perform.
* \param num_candidates The maximum number of candidates to be considered during one iteration.
* \param num_candidates The maximum number of candidates to be considered
* during one iteration.
* \param delta A value controls the early abort.
*/
template <DGLDeviceType XPU, typename FloatType, typename IdType>
void NNDescent(const NDArray& points, const IdArray& offsets,
IdArray result, const int k, const int num_iters,
const int num_candidates, const double delta);
void NNDescent(
const NDArray& points, const IdArray& offsets, IdArray result, const int k,
const int num_iters, const int num_candidates, const double delta);
} // namespace transform
} // namespace dgl
#endif // DGL_GRAPH_TRANSFORM_KNN_H_
......@@ -4,12 +4,14 @@
* \brief Line graph implementation
*/
#include <dgl/base_heterograph.h>
#include <dgl/transform.h>
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
#include <dgl/packed_func_ext.h>
#include <vector>
#include <dgl/transform.h>
#include <utility>
#include <vector>
#include "../../c_api_common.h"
#include "../heterograph.h"
......@@ -21,26 +23,25 @@ using namespace dgl::aten;
namespace transform {
/*!
* \brief Create Line Graph
* \param hg Graph
* \param backtracking whether the pair of (v, u) (u, v) edges are treated as linked
* \return The Line Graph
* \brief Create Line Graph.
* \param hg Graph.
* \param backtracking whether the pair of (v, u) (u, v) edges are treated as
* linked.
* \return The Line Graph.
*/
HeteroGraphPtr CreateLineGraph(
HeteroGraphPtr hg,
bool backtracking) {
HeteroGraphPtr CreateLineGraph(HeteroGraphPtr hg, bool backtracking) {
const auto hgp = std::dynamic_pointer_cast<HeteroGraph>(hg);
return hgp->LineGraph(backtracking);
}
DGL_REGISTER_GLOBAL("transform._CAPI_DGLHeteroLineGraph")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
.set_body([](DGLArgs args, DGLRetValue* rv) {
HeteroGraphRef hg = args[0];
bool backtracking = args[1];
auto hgptr = CreateLineGraph(hg.sptr(), backtracking);
*rv = HeteroGraphRef(hgptr);
});
});
}; // namespace transform
}; // namespace dgl
......@@ -19,8 +19,9 @@ namespace transform {
#if !defined(_WIN32)
IdArray MetisPartition(UnitGraphPtr g, int k, NDArray vwgt_arr,
const std::string &mode, bool obj_cut) {
IdArray MetisPartition(
UnitGraphPtr g, int k, NDArray vwgt_arr, const std::string &mode,
bool obj_cut) {
// Mode can only be "k-way" or "recursive"
CHECK(mode == "k-way" || mode == "recursive")
<< "mode can only be \"k-way\" or \"recursive\"";
......@@ -51,7 +52,8 @@ IdArray MetisPartition(UnitGraphPtr g, int k, NDArray vwgt_arr,
vwgt = static_cast<idx_t *>(vwgt_arr->data);
}
auto partition_func = (mode == "k-way") ? METIS_PartGraphKway : METIS_PartGraphRecursive;
auto partition_func =
(mode == "k-way") ? METIS_PartGraphKway : METIS_PartGraphRecursive;
idx_t options[METIS_NOPTIONS];
METIS_SetDefaultOptions(options);
......
......@@ -37,20 +37,23 @@ HeteroGraphPtr ReorderUnitGraph(UnitGraphPtr ug, IdArray new_order) {
if (format & CSC_CODE) {
auto cscmat = ug->GetCSCMatrix(0);
auto new_cscmat = aten::CSRReorder(cscmat, new_order, new_order);
return UnitGraph::CreateFromCSC(ug->NumVertexTypes(), new_cscmat, ug->GetAllowedFormats());
return UnitGraph::CreateFromCSC(
ug->NumVertexTypes(), new_cscmat, ug->GetAllowedFormats());
} else if (format & CSR_CODE) {
auto csrmat = ug->GetCSRMatrix(0);
auto new_csrmat = aten::CSRReorder(csrmat, new_order, new_order);
return UnitGraph::CreateFromCSR(ug->NumVertexTypes(), new_csrmat, ug->GetAllowedFormats());
return UnitGraph::CreateFromCSR(
ug->NumVertexTypes(), new_csrmat, ug->GetAllowedFormats());
} else {
auto coomat = ug->GetCOOMatrix(0);
auto new_coomat = aten::COOReorder(coomat, new_order, new_order);
return UnitGraph::CreateFromCOO(ug->NumVertexTypes(), new_coomat, ug->GetAllowedFormats());
return UnitGraph::CreateFromCOO(
ug->NumVertexTypes(), new_coomat, ug->GetAllowedFormats());
}
}
HaloHeteroSubgraph GetSubgraphWithHalo(std::shared_ptr<HeteroGraph> hg,
IdArray nodes, int num_hops) {
HaloHeteroSubgraph GetSubgraphWithHalo(
std::shared_ptr<HeteroGraph> hg, IdArray nodes, int num_hops) {
CHECK_EQ(hg->NumBits(), 64) << "halo subgraph only supports 64bits graph";
CHECK_EQ(hg->relation_graphs().size(), 1)
<< "halo subgraph only supports homogeneous graph";
......@@ -113,8 +116,8 @@ HaloHeteroSubgraph GetSubgraphWithHalo(std::shared_ptr<HeteroGraph> hg,
const dgl_id_t *eid_data = static_cast<dgl_id_t *>(eid->data);
for (int64_t i = 0; i < num_edges; i++) {
auto it1 = orig_nodes.find(src_data[i]);
// If the source node is in the partition, we have got this edge when we iterate over
// the out-edges above.
// If the source node is in the partition, we have got this edge when we
// iterate over the out-edges above.
if (it1 == orig_nodes.end()) {
edge_src.push_back(src_data[i]);
edge_dst.push_back(dst_data[i]);
......@@ -164,10 +167,10 @@ HaloHeteroSubgraph GetSubgraphWithHalo(std::shared_ptr<HeteroGraph> hg,
}
num_edges = edge_src.size();
IdArray new_src = IdArray::Empty({num_edges}, DGLDataType{kDGLInt, 64, 1},
DGLContext{kDGLCPU, 0});
IdArray new_dst = IdArray::Empty({num_edges}, DGLDataType{kDGLInt, 64, 1},
DGLContext{kDGLCPU, 0});
IdArray new_src = IdArray::Empty(
{num_edges}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0});
IdArray new_dst = IdArray::Empty(
{num_edges}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0});
dgl_id_t *new_src_data = static_cast<dgl_id_t *>(new_src->data);
dgl_id_t *new_dst_data = static_cast<dgl_id_t *>(new_dst->data);
for (size_t i = 0; i < edge_src.size(); i++) {
......@@ -180,8 +183,8 @@ HaloHeteroSubgraph GetSubgraphWithHalo(std::shared_ptr<HeteroGraph> hg,
dgl_id_t old_nid = old_node_ids[i];
inner_nodes[i] = all_nodes[old_nid];
}
aten::COOMatrix coo(old_node_ids.size(), old_node_ids.size(), new_src,
new_dst);
aten::COOMatrix coo(
old_node_ids.size(), old_node_ids.size(), new_src, new_dst);
HeteroGraphPtr ugptr = UnitGraph::CreateFromCOO(1, coo);
HeteroGraphPtr subg = CreateHeteroGraph(hg->meta_graph(), {ugptr});
HaloHeteroSubgraph halo_subg;
......@@ -245,10 +248,10 @@ DGL_REGISTER_GLOBAL("partition._CAPI_DGLPartitionWithHalo_Hetero")
part_ids.push_back(it->first);
part_nodes.push_back(it->second);
}
// When we construct subgraphs, we need to access both in-edges and out-edges.
// We need to make sure the in-CSR and out-CSR exist. Otherwise, we'll
// try to construct in-CSR and out-CSR in openmp for loop, which will lead
// to some unexpected results.
// When we construct subgraphs, we need to access both in-edges and
// out-edges. We need to make sure the in-CSR and out-CSR exist.
// Otherwise, we'll try to construct in-CSR and out-CSR in openmp for
// loop, which will lead to some unexpected results.
ugptr->GetInCSR();
ugptr->GetOutCSR();
std::vector<std::shared_ptr<HaloHeteroSubgraph>> subgs(max_part_id + 1);
......@@ -270,7 +273,7 @@ DGL_REGISTER_GLOBAL("partition._CAPI_DGLPartitionWithHalo_Hetero")
*rv = ret_list;
});
template<class IdType>
template <class IdType>
struct EdgeProperty {
IdType eid;
int64_t idx;
......@@ -315,15 +318,18 @@ DGL_REGISTER_GLOBAL("partition._CAPI_DGLReassignEdges_Hetero")
indexed_eids[j].part_id = part_id_data[i];
}
}
auto comp = [etype_data](const EdgeProperty<IdType> &a, const EdgeProperty<IdType> &b) {
auto comp = [etype_data](
const EdgeProperty<IdType> &a,
const EdgeProperty<IdType> &b) {
if (a.part_id == b.part_id) {
return etype_data[a.eid] < etype_data[b.eid];
} else {
return a.part_id < b.part_id;
}
};
// We only need to sort the edges if the input graph has multiple relations.
// If it's a homogeneous grap, we'll just assign edge Ids based on its previous order.
// We only need to sort the edges if the input graph has multiple
// relations. If it's a homogeneous grap, we'll just assign edge Ids
// based on its previous order.
if (etype->shape[0] > 0) {
std::sort(indexed_eids.begin(), indexed_eids.end(), comp);
}
......@@ -345,7 +351,6 @@ DGL_REGISTER_GLOBAL("partition._CAPI_GetHaloSubgraphInnerNodes_Hetero")
*rv = gptr->inner_nodes[0];
});
DGL_REGISTER_GLOBAL("partition._CAPI_DGLMakeSymmetric_Hetero")
.set_body([](DGLArgs args, DGLRetValue *rv) {
HeteroGraphRef g = args[0];
......@@ -363,13 +368,14 @@ DGL_REGISTER_GLOBAL("partition._CAPI_DGLMakeSymmetric_Hetero")
gk_csr_Free(&gk_csr);
gk_csr_Free(&sym_gk_csr);
auto new_ugptr = UnitGraph::CreateFromCSC(ugptr->NumVertexTypes(), mat,
ugptr->GetAllowedFormats());
auto new_ugptr = UnitGraph::CreateFromCSC(
ugptr->NumVertexTypes(), mat, ugptr->GetAllowedFormats());
std::vector<HeteroGraphPtr> rel_graphs = {new_ugptr};
*rv = HeteroGraphRef(std::make_shared<HeteroGraph>(
hgptr->meta_graph(), rel_graphs, hgptr->NumVerticesPerType()));
#else
LOG(FATAL) << "The fast version of making symmetric graph is not supported in Windows.";
LOG(FATAL) << "The fast version of making symmetric graph is not "
"supported in Windows.";
#endif // !defined(_WIN32)
});
......
......@@ -4,15 +4,16 @@
* \brief Remove edges.
*/
#include <dgl/base_heterograph.h>
#include <dgl/transform.h>
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
#include <dgl/packed_func_ext.h>
#include <dgl/runtime/registry.h>
#include <dgl/runtime/container.h>
#include <vector>
#include <utility>
#include <dgl/runtime/registry.h>
#include <dgl/transform.h>
#include <tuple>
#include <utility>
#include <vector>
namespace dgl {
......@@ -21,8 +22,8 @@ using namespace dgl::aten;
namespace transform {
std::pair<HeteroGraphPtr, std::vector<IdArray>>
RemoveEdges(const HeteroGraphPtr graph, const std::vector<IdArray> &eids) {
std::pair<HeteroGraphPtr, std::vector<IdArray>> RemoveEdges(
const HeteroGraphPtr graph, const std::vector<IdArray> &eids) {
std::vector<IdArray> induced_eids;
std::vector<HeteroGraphPtr> rel_graphs;
const int64_t num_etypes = graph->NumEdgeTypes();
......@@ -40,23 +41,30 @@ RemoveEdges(const HeteroGraphPtr graph, const std::vector<IdArray> &eids) {
const COOMatrix &coo = graph->GetCOOMatrix(etype);
const COOMatrix &result = COORemove(coo, eids[etype]);
new_rel_graph = CreateFromCOO(
num_ntypes_rel, result.num_rows, result.num_cols, result.row, result.col);
num_ntypes_rel, result.num_rows, result.num_cols, result.row,
result.col);
induced_eids_rel = result.data;
} else if (fmt == SparseFormat::kCSR) {
const CSRMatrix &csr = graph->GetCSRMatrix(etype);
const CSRMatrix &result = CSRRemove(csr, eids[etype]);
new_rel_graph = CreateFromCSR(
num_ntypes_rel, result.num_rows, result.num_cols, result.indptr, result.indices,
num_ntypes_rel, result.num_rows, result.num_cols, result.indptr,
result.indices,
// TODO(BarclayII): make CSR support null eid array
Range(0, result.indices->shape[0], result.indices->dtype.bits, result.indices->ctx));
Range(
0, result.indices->shape[0], result.indices->dtype.bits,
result.indices->ctx));
induced_eids_rel = result.data;
} else if (fmt == SparseFormat::kCSC) {
const CSRMatrix &csc = graph->GetCSCMatrix(etype);
const CSRMatrix &result = CSRRemove(csc, eids[etype]);
new_rel_graph = CreateFromCSC(
num_ntypes_rel, result.num_rows, result.num_cols, result.indptr, result.indices,
num_ntypes_rel, result.num_rows, result.num_cols, result.indptr,
result.indices,
// TODO(BarclayII): make CSR support null eid array
Range(0, result.indices->shape[0], result.indices->dtype.bits, result.indices->ctx));
Range(
0, result.indices->shape[0], result.indices->dtype.bits,
result.indices->ctx));
induced_eids_rel = result.data;
}
......@@ -70,7 +78,7 @@ RemoveEdges(const HeteroGraphPtr graph, const std::vector<IdArray> &eids) {
}
DGL_REGISTER_GLOBAL("transform._CAPI_DGLRemoveEdges")
.set_body([] (DGLArgs args, DGLRetValue *rv) {
.set_body([](DGLArgs args, DGLRetValue *rv) {
const HeteroGraphRef graph_ref = args[0];
const std::vector<IdArray> &eids = ListValueToVector<IdArray>(args[1]);
......
......@@ -19,16 +19,18 @@
#include "to_bipartite.h"
#include <dgl/base_heterograph.h>
#include <dgl/transform.h>
#include <dgl/array.h>
#include <dgl/packed_func_ext.h>
#include <dgl/base_heterograph.h>
#include <dgl/immutable_graph.h>
#include <dgl/runtime/registry.h>
#include <dgl/packed_func_ext.h>
#include <dgl/runtime/container.h>
#include <vector>
#include <dgl/runtime/registry.h>
#include <dgl/transform.h>
#include <tuple>
#include <utility>
#include <vector>
#include "../../array/cpu/array_utils.h"
namespace dgl {
......@@ -42,11 +44,11 @@ namespace {
// Since partial specialization is not allowed for functions, use this as an
// intermediate for ToBlock where XPU = kDGLCPU.
template<typename IdType>
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs, std::vector<IdArray>* const lhs_nodes_ptr) {
std::vector<IdArray>& lhs_nodes = *lhs_nodes_ptr;
template <typename IdType>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockCPU(
HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes_ptr) {
std::vector<IdArray> &lhs_nodes = *lhs_nodes_ptr;
const bool generate_lhs_nodes = lhs_nodes.empty();
const int64_t num_etypes = graph->NumEdgeTypes();
......@@ -56,7 +58,8 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
CHECK(rhs_nodes.size() == static_cast<size_t>(num_ntypes))
<< "rhs_nodes not given for every node type";
const std::vector<IdHashMap<IdType>> rhs_node_mappings(rhs_nodes.begin(), rhs_nodes.end());
const std::vector<IdHashMap<IdType>> rhs_node_mappings(
rhs_nodes.begin(), rhs_nodes.end());
std::vector<IdHashMap<IdType>> lhs_node_mappings;
if (generate_lhs_nodes) {
......@@ -66,16 +69,16 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
else
lhs_node_mappings.resize(num_ntypes);
} else {
lhs_node_mappings = std::vector<IdHashMap<IdType>>(lhs_nodes.begin(), lhs_nodes.end());
lhs_node_mappings =
std::vector<IdHashMap<IdType>>(lhs_nodes.begin(), lhs_nodes.end());
}
for (int64_t etype = 0; etype < num_etypes; ++etype) {
const auto src_dst_types = graph->GetEndpointTypes(etype);
const dgl_type_t srctype = src_dst_types.first;
const dgl_type_t dsttype = src_dst_types.second;
if (!aten::IsNullArray(rhs_nodes[dsttype])) {
const EdgeArray& edges = graph->Edges(etype);
const EdgeArray &edges = graph->Edges(etype);
if (generate_lhs_nodes) {
lhs_node_mappings[srctype].Update(edges.src);
}
......@@ -89,8 +92,8 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
const auto meta_graph = graph->meta_graph();
const EdgeArray etypes = meta_graph->Edges("eid");
const IdArray new_dst = Add(etypes.dst, num_ntypes);
const auto new_meta_graph = ImmutableGraph::CreateFromCOO(
num_ntypes * 2, etypes.src, new_dst);
const auto new_meta_graph =
ImmutableGraph::CreateFromCOO(num_ntypes * 2, etypes.src, new_dst);
for (int64_t ntype = 0; ntype < num_ntypes; ++ntype)
num_nodes_per_type.push_back(lhs_node_mappings[ntype].Size());
......@@ -108,8 +111,8 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
if (rhs_map.Size() == 0) {
// No rhs nodes are given for this edge type. Create an empty graph.
rel_graphs.push_back(CreateFromCOO(
2, lhs_map.Size(), rhs_map.Size(),
aten::NullArray(), aten::NullArray()));
2, lhs_map.Size(), rhs_map.Size(), aten::NullArray(),
aten::NullArray()));
induced_edges.push_back(aten::NullArray());
} else {
IdArray new_src = lhs_map.Map(edge_arrays[etype].src, -1);
......@@ -117,18 +120,18 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
// Check whether there are unmapped IDs and raise error.
for (int64_t i = 0; i < new_dst->shape[0]; ++i)
CHECK_NE(new_dst.Ptr<IdType>()[i], -1)
<< "Node " << edge_arrays[etype].dst.Ptr<IdType>()[i] << " does not exist"
<< "Node " << edge_arrays[etype].dst.Ptr<IdType>()[i]
<< " does not exist"
<< " in `rhs_nodes`. Argument `rhs_nodes` must contain all the edge"
<< " destination nodes.";
rel_graphs.push_back(CreateFromCOO(
2, lhs_map.Size(), rhs_map.Size(),
new_src, new_dst));
rel_graphs.push_back(
CreateFromCOO(2, lhs_map.Size(), rhs_map.Size(), new_src, new_dst));
induced_edges.push_back(edge_arrays[etype].id);
}
}
const HeteroGraphPtr new_graph = CreateHeteroGraph(
new_meta_graph, rel_graphs, num_nodes_per_type);
const HeteroGraphPtr new_graph =
CreateHeteroGraph(new_meta_graph, rel_graphs, num_nodes_per_type);
if (generate_lhs_nodes) {
CHECK_EQ(lhs_nodes.size(), 0) << "InteralError: lhs_nodes should be empty "
......@@ -141,59 +144,56 @@ ToBlockCPU(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
} // namespace
template<>
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ToBlock<kDGLCPU, int32_t>(HeteroGraphPtr graph,
const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs,
std::vector<IdArray>* const lhs_nodes) {
template <>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCPU, int32_t>(
HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
return ToBlockCPU<int32_t>(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
}
template<>
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ToBlock<kDGLCPU, int64_t>(HeteroGraphPtr graph,
const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs,
std::vector<IdArray>* const lhs_nodes) {
template <>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCPU, int64_t>(
HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
return ToBlockCPU<int64_t>(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
}
#ifdef DGL_USE_CUDA
// Forward declaration of GPU ToBlock implementations - actual implementation is in
// Forward declaration of GPU ToBlock implementations - actual implementation is
// in
// ./cuda/cuda_to_block.cu
// This is to get around the broken name mangling in VS2019 CL 16.5.5 + CUDA 11.3
// which complains that the two template specializations have the same signature.
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ToBlockGPU32(HeteroGraphPtr, const std::vector<IdArray>&, bool, std::vector<IdArray>* const);
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ToBlockGPU64(HeteroGraphPtr, const std::vector<IdArray>&, bool, std::vector<IdArray>* const);
template<>
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ToBlock<kDGLCUDA, int32_t>(HeteroGraphPtr graph,
const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs,
std::vector<IdArray>* const lhs_nodes) {
// This is to get around the broken name mangling in VS2019 CL 16.5.5 +
// CUDA 11.3 which complains that the two template specializations have the same
// signature.
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockGPU32(
HeteroGraphPtr, const std::vector<IdArray> &, bool,
std::vector<IdArray> *const);
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlockGPU64(
HeteroGraphPtr, const std::vector<IdArray> &, bool,
std::vector<IdArray> *const);
template <>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCUDA, int32_t>(
HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
return ToBlockGPU32(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
}
template<>
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ToBlock<kDGLCUDA, int64_t>(HeteroGraphPtr graph,
const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs,
std::vector<IdArray>* const lhs_nodes) {
template <>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock<kDGLCUDA, int64_t>(
HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs, std::vector<IdArray> *const lhs_nodes) {
return ToBlockGPU64(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes);
}
#endif // DGL_USE_CUDA
DGL_REGISTER_GLOBAL("transform._CAPI_DGLToBlock")
.set_body([] (DGLArgs args, DGLRetValue *rv) {
.set_body([](DGLArgs args, DGLRetValue *rv) {
const HeteroGraphRef graph_ref = args[0];
const std::vector<IdArray> &rhs_nodes = ListValueToVector<IdArray>(args[1]);
const std::vector<IdArray> &rhs_nodes =
ListValueToVector<IdArray>(args[1]);
const bool include_rhs_in_lhs = args[2];
std::vector<IdArray> lhs_nodes = ListValueToVector<IdArray>(args[3]);
......@@ -203,8 +203,7 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLToBlock")
ATEN_XPU_SWITCH_CUDA(graph_ref->Context().device_type, XPU, "ToBlock", {
ATEN_ID_TYPE_SWITCH(graph_ref->DataType(), IdType, {
std::tie(new_graph, induced_edges) = ToBlock<XPU, IdType>(
graph_ref.sptr(), rhs_nodes, include_rhs_in_lhs,
&lhs_nodes);
graph_ref.sptr(), rhs_nodes, include_rhs_in_lhs, &lhs_nodes);
});
});
......
......@@ -44,9 +44,9 @@ namespace transform {
*
* @return The block and the induced edges.
*/
template<DGLDeviceType XPU, typename IdType>
std::tuple<HeteroGraphPtr, std::vector<IdArray>>
ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
template <DGLDeviceType XPU, typename IdType>
std::tuple<HeteroGraphPtr, std::vector<IdArray>> ToBlock(
HeteroGraphPtr graph, const std::vector<IdArray>& rhs_nodes,
bool include_rhs_in_lhs, std::vector<IdArray>* lhs_nodes);
} // namespace transform
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment