Unverified Commit b2d38ca8 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] clang-format auto fix. (#4803)



* [Misc] clang-format auto fix.

* manual
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 07dc8fb6
......@@ -10,6 +10,7 @@
#include <dgl/graph_serializer.h>
#include <dmlc/io.h>
#include <dmlc/serializer.h>
#include <memory>
namespace dmlc {
......
/*!
* Copyright (c) 2020-2022 by Contributors
* \file array/tensordispatch.h
* \brief This file defines the dispatcher of tensor operators to framework-specific
* implementations.
* \brief This file defines the dispatcher of tensor operators to
* framework-specific implementations.
*
* The dispatcher consists of a TensorDispatcher singleton in DGL C library and
* one separately-built shared library per supported backend.
......@@ -15,14 +15,14 @@
* The TensorDispatcher singleton maintains a mapping from an array operator to
* the address of the corresponding symbol in the shared library. During
* initialization, the TensorDispatcher checks which backend DGL is using.
* It then locates and opens the corresponding shared library using dlopen(3) (or
* LoadLibrary in Windows), and populates the said mapping above with dlsym(3)
* (or GetProcAddress in Windows).
* It then locates and opens the corresponding shared library using dlopen(3)
* (or LoadLibrary in Windows), and populates the said mapping above with
* dlsym(3) (or GetProcAddress in Windows).
*
* A tensor operator in TensorDispatcher first checks whether the corresponding symbol
* address is found in the mapping. If so, it calls the function located at the
* symbol address instead, allocate/free pieces of memory on CPU/GPU.
* If not, it falls back to DeviceAPI::AllocWorkspace/FreeWorkspace.
* A tensor operator in TensorDispatcher first checks whether the corresponding
* symbol address is found in the mapping. If so, it calls the function located
* at the symbol address instead, allocate/free pieces of memory on CPU/GPU. If
* not, it falls back to DeviceAPI::AllocWorkspace/FreeWorkspace.
*/
#ifndef DGL_RUNTIME_TENSORDISPATCH_H_
......@@ -38,14 +38,18 @@
#endif // DGL_USE_CUDA
#include "ndarray.h"
/*! \brief Casts a pointer \c entry to a function pointer with signature of \c func */
#define FUNCCAST(func, entry) (*reinterpret_cast<decltype(&(func))>(entry))
/*!
* \brief Casts a pointer \c entry to a function pointer with signature of \c
* func.
*/
#define FUNCCAST(func, entry) (*reinterpret_cast<decltype(&(func))>(entry))
namespace dgl {
namespace runtime {
/*!
* \brief Dispatcher that delegates the function calls to framework-specific C++ APIs.
* \brief Dispatcher that delegates the function calls to framework-specific C++
* APIs.
*
* This class is not thread-safe.
*/
......@@ -57,17 +61,14 @@ class TensorDispatcher {
return &inst;
}
/*! \brief Whether an adapter library is available */
inline bool IsAvailable() {
return available_;
}
/*! \brief Whether an adapter library is available. */
inline bool IsAvailable() { return available_; }
/*! \brief Load symbols from the given tensor adapter library path */
bool Load(const char *path_cstr);
/*! \brief Load symbols from the given tensor adapter library path. */
bool Load(const char* path_cstr);
/*!
* \brief Allocate a piece of CPU memory via
* PyTorch's CPUAllocator.
* \brief Allocate a piece of CPU memory via PyTorch's CPUAllocator.
* Used in CPUDeviceAPI::AllocWorkspace().
*
* \param nbytes The size to be allocated.
......@@ -94,7 +95,7 @@ class TensorDispatcher {
* \brief Allocate a piece of GPU memory via
* PyTorch's THCCachingAllocator.
* Used in CUDADeviceAPI::AllocWorkspace().
*
*
* \note THCCachingAllocator specify the device to allocate on
* via cudaGetDevice(). Make sure to call cudaSetDevice()
* before invoking this function.
......@@ -120,15 +121,15 @@ class TensorDispatcher {
}
/*!
* \brief Find the current PyTorch CUDA stream
* Used in runtime::getCurrentCUDAStream().
*
* \note PyTorch pre-allocates/sets the current CUDA stream
* on current device via cudaGetDevice(). Make sure to call cudaSetDevice()
* before invoking this function.
*
* \return cudaStream_t stream handle
*/
* \brief Find the current PyTorch CUDA stream
* Used in runtime::getCurrentCUDAStream().
*
* \note PyTorch pre-allocates/sets the current CUDA stream
* on current device via cudaGetDevice(). Make sure to call cudaSetDevice()
* before invoking this function.
*
* \return cudaStream_t stream handle
*/
inline cudaStream_t CUDAGetCurrentStream() {
auto entry = entrypoints_[Op::kCUDACurrentStream];
return FUNCCAST(tensoradapter::CUDACurrentStream, entry)();
......@@ -146,8 +147,8 @@ class TensorDispatcher {
inline void RecordStream(void* ptr, DGLStreamHandle stream, int device_id) {
#ifdef DGL_USE_CUDA
auto entry = entrypoints_[Op::kRecordStream];
FUNCCAST(tensoradapter::RecordStream, entry)(
ptr, static_cast<cudaStream_t>(stream), device_id);
FUNCCAST(tensoradapter::RecordStream, entry)
(ptr, static_cast<cudaStream_t>(stream), device_id);
#endif // DGL_USE_CUDA
}
......@@ -162,14 +163,10 @@ class TensorDispatcher {
*
* Must match the functions in tensoradapter/include/tensoradapter.h.
*/
static constexpr const char *names_[] = {
"CPURawAlloc",
"CPURawDelete",
static constexpr const char* names_[] = {
"CPURawAlloc", "CPURawDelete",
#ifdef DGL_USE_CUDA
"CUDARawAlloc",
"CUDARawDelete",
"CUDACurrentStream",
"RecordStream",
"CUDARawAlloc", "CUDARawDelete", "CUDACurrentStream", "RecordStream",
#endif // DGL_USE_CUDA
};
......@@ -191,13 +188,9 @@ class TensorDispatcher {
/*! \brief Entrypoints of each function */
void* entrypoints_[num_entries_] = {
nullptr,
nullptr,
nullptr, nullptr,
#ifdef DGL_USE_CUDA
nullptr,
nullptr,
nullptr,
nullptr,
nullptr, nullptr, nullptr, nullptr,
#endif // DGL_USE_CUDA
};
......
......@@ -22,26 +22,26 @@ class ThreadGroup {
public:
class Impl;
/*!
* \brief Creates a collection of threads which run a provided function.
*
* \param num_workers The total number of worker threads in this group.
Includes main thread if `exclude_worker0 = true`
* \param worker_callback A callback which is run in its own thread.
Receives the worker_id as an argument.
* \param exclude_worker0 Whether to use the main thread as a worker.
* If `true`, worker0 will not be launched in a new thread and
* `worker_callback` will only be called for values >= 1. This
* allows use of the main thread as a worker.
*/
ThreadGroup(int num_workers,
std::function<void(int)> worker_callback,
bool exclude_worker0 = false);
/*!
* \brief Creates a collection of threads which run a provided function.
*
* \param num_workers The total number of worker threads in this group.
Includes main thread if `exclude_worker0 = true`
* \param worker_callback A callback which is run in its own thread.
Receives the worker_id as an argument.
* \param exclude_worker0 Whether to use the main thread as a worker.
* If `true`, worker0 will not be launched in a new thread and
* `worker_callback` will only be called for values >= 1. This
* allows use of the main thread as a worker.
*/
ThreadGroup(
int num_workers, std::function<void(int)> worker_callback,
bool exclude_worker0 = false);
~ThreadGroup();
/*!
* \brief Blocks until all non-main threads in the pool finish.
*/
/*!
* \brief Blocks until all non-main threads in the pool finish.
*/
void Join();
enum AffinityMode : int {
......@@ -70,8 +70,8 @@ class ThreadGroup {
/*!
* \brief Platform-agnostic no-op.
*/
// This used to be Yield(), renaming to YieldThread() because windows.h defined it as a
// macro in later SDKs.
// This used to be Yield(), renaming to YieldThread() because windows.h defined
// it as a macro in later SDKs.
void YieldThread();
/*!
......@@ -79,7 +79,6 @@ void YieldThread();
*/
int MaxConcurrency();
} // namespace threading
} // namespace runtime
} // namespace dgl
......
......@@ -6,10 +6,11 @@
#ifndef DGL_SAMPLER_H_
#define DGL_SAMPLER_H_
#include <vector>
#include <string>
#include <cstdlib>
#include <ctime>
#include <string>
#include <vector>
#include "graph_interface.h"
#include "nodeflow.h"
......@@ -32,13 +33,11 @@ class SamplerOp {
* \param probability the transition probability (float/double).
* \return a NodeFlow graph.
*/
template<typename ValueType>
static NodeFlow NeighborSample(const ImmutableGraph *graph,
const std::vector<dgl_id_t>& seeds,
const std::string &edge_type,
int num_hops, int expand_factor,
const bool add_self_loop,
const ValueType *probability);
template <typename ValueType>
static NodeFlow NeighborSample(
const ImmutableGraph *graph, const std::vector<dgl_id_t> &seeds,
const std::string &edge_type, int num_hops, int expand_factor,
const bool add_self_loop, const ValueType *probability);
/*!
* \brief Sample a graph from the seed vertices with layer sampling.
......@@ -50,10 +49,9 @@ class SamplerOp {
* \param layer_sizes The size of layers.
* \return a NodeFlow graph.
*/
static NodeFlow LayerUniformSample(const ImmutableGraph *graph,
const std::vector<dgl_id_t>& seeds,
const std::string &neigh_type,
IdArray layer_sizes);
static NodeFlow LayerUniformSample(
const ImmutableGraph *graph, const std::vector<dgl_id_t> &seeds,
const std::string &neigh_type, IdArray layer_sizes);
};
} // namespace dgl
......
......@@ -6,40 +6,37 @@
#ifndef DGL_SAMPLING_NEGATIVE_H_
#define DGL_SAMPLING_NEGATIVE_H_
#include <dgl/base_heterograph.h>
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
#include <utility>
namespace dgl {
namespace sampling {
/*!
* \brief Given an edge type, uniformly sample source-destination pairs that do not have
* an edge in between using rejection sampling.
* \brief Given an edge type, uniformly sample source-destination pairs that do
* not have an edge in between using rejection sampling.
*
* \note This function may not return the same number of elements as the given number
* of samples.
* \note This function requires sorting the CSR or CSC matrix of the graph in-place. It
* prefers CSC over CSR.
* \note This function may not return the same number of elements as the given
* number of samples.
* \note This function requires sorting the CSR or CSC matrix of the graph
* in-place. It prefers CSC over CSR.
*
* \param hg The graph.
* \param etype The edge type.
* \param num_samples The number of negative examples to sample.
* \param num_trials The number of rejection sampling trials.
* \param exclude_self_loops Do not include the examples where the source equals the
* destination.
* \param exclude_self_loops Do not include the examples where the source equals
* the destination.
* \param replace Whether to sample with replacement.
* \param redundancy How much redundant negative examples to take in case of duplicate examples.
* \param redundancy How much redundant negative examples to take in case of
* duplicate examples.
* \return The pair of source and destination tensors.
*/
std::pair<IdArray, IdArray> GlobalUniformNegativeSampling(
HeteroGraphPtr hg,
dgl_type_t etype,
int64_t num_samples,
int num_trials,
bool exclude_self_loops,
bool replace,
double redundancy);
HeteroGraphPtr hg, dgl_type_t etype, int64_t num_samples, int num_trials,
bool exclude_self_loops, bool replace, double redundancy);
}; // namespace sampling
}; // namespace dgl
......
......@@ -6,81 +6,75 @@
#ifndef DGL_SAMPLING_NEIGHBOR_H_
#define DGL_SAMPLING_NEIGHBOR_H_
#include <dgl/base_heterograph.h>
#include <dgl/array.h>
#include <dgl/base_heterograph.h>
#include <vector>
namespace dgl {
namespace sampling {
/*!
* \brief Sample from the neighbors of the given nodes and return the sampled edges as a graph.
* \brief Sample from the neighbors of the given nodes and return the sampled
* edges as a graph.
*
* When sampling with replacement, the sampled subgraph could have parallel edges.
* When sampling with replacement, the sampled subgraph could have parallel
* edges.
*
* For sampling without replace, if fanout > the number of neighbors, all the
* neighbors will be sampled.
*
* \param hg The input graph.
* \param nodes Node IDs of each type. The vector length must be equal to the number
* of node types. Empty array is allowed.
* \param fanouts Number of sampled neighbors for each edge type. The vector length
* should be equal to the number of edge types, or one if they all
* have the same fanout.
* \param nodes Node IDs of each type. The vector length must be equal to the
* number of node types. Empty array is allowed.
* \param fanouts Number of sampled neighbors for each edge type. The vector
* length should be equal to the number of edge types, or one if they all have
* the same fanout.
* \param dir Edge direction.
* \param probability A vector of 1D float arrays, indicating the transition probability of
* each edge by edge type. An empty float array assumes uniform transition.
* \param exclude_edges Edges IDs of each type which will be excluded during sampling.
* The vector length must be equal to the number of edges types. Empty array is allowed.
* \param probability A vector of 1D float arrays, indicating the transition
* probability of each edge by edge type. An empty float array assumes uniform
* transition.
* \param exclude_edges Edges IDs of each type which will be excluded during
* sampling. The vector length must be equal to the number of edges types. Empty
* array is allowed.
* \param replace If true, sample with replacement.
* \return Sampled neighborhoods as a graph. The return graph has the same schema as the
* original one.
* \return Sampled neighborhoods as a graph. The return graph has the same
* schema as the original one.
*/
HeteroSubgraph SampleNeighbors(
const HeteroGraphPtr hg,
const std::vector<IdArray>& nodes,
const std::vector<int64_t>& fanouts,
EdgeDir dir,
const HeteroGraphPtr hg, const std::vector<IdArray>& nodes,
const std::vector<int64_t>& fanouts, EdgeDir dir,
const std::vector<FloatArray>& probability,
const std::vector<IdArray>& exclude_edges,
bool replace = true);
const std::vector<IdArray>& exclude_edges, bool replace = true);
/*!
* Select the neighbors with k-largest weights on the connecting edges for each given node.
* Select the neighbors with k-largest weights on the connecting edges for each
* given node.
*
* If k > the number of neighbors, all the neighbors are sampled.
*
* \param hg The input graph.
* \param nodes Node IDs of each type. The vector length must be equal to the number
* of node types. Empty array is allowed.
* \param k The k value for each edge type. The vector length
* should be equal to the number of edge types, or one if they all
* have the same fanout.
* \param nodes Node IDs of each type. The vector length must be equal to the
* number of node types. Empty array is allowed.
* \param k The k value for each edge type. The vector length should be equal to
* the number of edge types, or one if they all have the same fanout.
* \param dir Edge direction.
* \param weight A vector of 1D float arrays, indicating the weights associated with
* each edge.
* \param ascending If true, elements are sorted by ascending order, equivalent to find
* the K smallest values. Otherwise, find K largest values.
* \return Sampled neighborhoods as a graph. The return graph has the same schema as the
* original one.
* \param weight A vector of 1D float arrays, indicating the weights associated
* witheach edge.
* \param ascending If true, elements are sorted by ascending order, equivalent
* to find the K smallest values. Otherwise, find K largest values.
* \return Sampled neighborhoods as a graph. The return graph has the same
* schema as the original one.
*/
HeteroSubgraph SampleNeighborsTopk(
const HeteroGraphPtr hg,
const std::vector<IdArray>& nodes,
const std::vector<int64_t>& k,
EdgeDir dir,
const std::vector<FloatArray>& weight,
bool ascending = false);
const HeteroGraphPtr hg, const std::vector<IdArray>& nodes,
const std::vector<int64_t>& k, EdgeDir dir,
const std::vector<FloatArray>& weight, bool ascending = false);
HeteroSubgraph SampleNeighborsBiased(
const HeteroGraphPtr hg,
const IdArray& nodes,
const int64_t fanouts,
const NDArray& bias,
const NDArray& tag_offset,
const EdgeDir dir,
const bool replace
);
const HeteroGraphPtr hg, const IdArray& nodes, const int64_t fanouts,
const NDArray& bias, const NDArray& tag_offset, const EdgeDir dir,
const bool replace);
} // namespace sampling
} // namespace dgl
......
......@@ -6,11 +6,12 @@
#ifndef DGL_SAMPLING_RANDOMWALKS_H_
#define DGL_SAMPLING_RANDOMWALKS_H_
#include <dgl/base_heterograph.h>
#include <dgl/array.h>
#include <vector>
#include <utility>
#include <dgl/base_heterograph.h>
#include <tuple>
#include <utility>
#include <vector>
namespace dgl {
......@@ -19,71 +20,67 @@ namespace sampling {
/*!
* \brief Metapath-based random walk.
* \param hg The heterograph.
* \param seeds A 1D array of seed nodes, with the type the source type of the first
* edge type in the metapath.
* \param seeds A 1D array of seed nodes, with the type the source type of the
* first edge type in the metapath.
* \param metapath A 1D array of edge types representing the metapath.
* \param prob A vector of 1D float arrays, indicating the transition probability of
* each edge by edge type. An empty float array assumes uniform transition.
* \param prob A vector of 1D float arrays, indicating the transition
* probability of each edge by edge type. An empty float array assumes uniform
* transition.
* \return A pair of
* 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The
* paths that terminated early are padded with -1.
* 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs. The
* paths that terminated early are padded with -1.
* 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node
* IDs. The paths that terminated early are padded with -1.
* 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs.
* The paths that terminated early are padded with -1.
* 3. One 1D array of shape (len(metapath) + 1) with node type IDs.
*/
std::tuple<IdArray, IdArray, TypeArray> RandomWalk(
const HeteroGraphPtr hg,
const IdArray seeds,
const TypeArray metapath,
const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath,
const std::vector<FloatArray> &prob);
/*!
* \brief Metapath-based random walk with restart probability.
* \param hg The heterograph.
* \param seeds A 1D array of seed nodes, with the type the source type of the first
* edge type in the metapath.
* \param seeds A 1D array of seed nodes, with the type the source type of the
* first edge type in the metapath.
* \param metapath A 1D array of edge types representing the metapath.
* \param prob A vector of 1D float arrays, indicating the transition probability of
* each edge by edge type. An empty float array assumes uniform transition.
* \param restart_prob Restart probability
* \param prob A vector of 1D float arrays, indicating the transition
* probability of each edge by edge type. An empty float array assumes uniform
* transition.
* \param restart_prob Restart probability.
* \return A pair of
* 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The
* paths that terminated early are padded with -1.
* 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs. The
* paths that terminated early are padded with -1.
* 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node
* IDs. The paths that terminated early are padded with -1.
* 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs.
* The paths that terminated early are padded with -1.
* 3. One 1D array of shape (len(metapath) + 1) with node type IDs.
*/
std::tuple<IdArray, IdArray, TypeArray> RandomWalkWithRestart(
const HeteroGraphPtr hg,
const IdArray seeds,
const TypeArray metapath,
const std::vector<FloatArray> &prob,
double restart_prob);
const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath,
const std::vector<FloatArray> &prob, double restart_prob);
/*!
* \brief Metapath-based random walk with stepwise restart probability. Useful
* \brief Metapath-based random walk with stepwise restart probability. Useful
* for PinSAGE-like models.
* \param hg The heterograph.
* \param seeds A 1D array of seed nodes, with the type the source type of the first
* edge type in the metapath.
* \param seeds A 1D array of seed nodes, with the type the source type of the
* first edge type in the metapath.
* \param metapath A 1D array of edge types representing the metapath.
* \param prob A vector of 1D float arrays, indicating the transition probability of
* each edge by edge type. An empty float array assumes uniform transition.
* \param restart_prob Restart probability array which has the same number of elements
* as \c metapath, indicating the probability to terminate after transition.
* \param prob A vector of 1D float arrays, indicating the transition
* probability of each edge by edge type. An empty float array assumes uniform
* transition.
* \param restart_prob Restart probability array which has the same number of
* elements as \c metapath, indicating the probability to terminate after
* transition.
* \return A pair of
* 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node IDs. The
* paths that terminated early are padded with -1.
* 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs. The
* paths that terminated early are padded with -1.
* 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node
* IDs. The paths that terminated early are padded with -1.
* 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs.
* The paths that terminated early are padded with -1.
* 3. One 1D array of shape (len(metapath) + 1) with node type IDs.
*/
std::tuple<IdArray, IdArray, TypeArray> RandomWalkWithStepwiseRestart(
const HeteroGraphPtr hg,
const IdArray seeds,
const TypeArray metapath,
const std::vector<FloatArray> &prob,
FloatArray restart_prob);
const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath,
const std::vector<FloatArray> &prob, FloatArray restart_prob);
}; // namespace sampling
......
......@@ -7,6 +7,7 @@
#define DGL_SCHEDULER_H_
#include <vector>
#include "runtime/ndarray.h"
namespace dgl {
......@@ -21,8 +22,8 @@ namespace sched {
* \param msg_ids The edge id for each message
* \param vids The destination vertex for each message
* \param recv_ids The recv nodes (for checking zero degree nodes)
* \note If there are multiple messages going into the same destination vertex, then
* there will be multiple copies of the destination vertex in vids
* \note If there are multiple messages going into the same destination vertex,
* then there will be multiple copies of the destination vertex in vids.
* \return a vector of 5 IdArrays for degree bucketing. The 5 arrays are:
* degrees: degrees for each bucket
* nids: destination node ids
......@@ -31,8 +32,8 @@ namespace sched {
* mid_section: number of messages in each bucket (used to split mids)
*/
template <class IdType>
std::vector<IdArray> DegreeBucketing(const IdArray& msg_ids, const IdArray& vids,
const IdArray& recv_ids);
std::vector<IdArray> DegreeBucketing(
const IdArray& msg_ids, const IdArray& vids, const IdArray& recv_ids);
/*!
* \brief Generate degree bucketing schedule for group_apply edge
......@@ -53,8 +54,8 @@ std::vector<IdArray> DegreeBucketing(const IdArray& msg_ids, const IdArray& vids
* new_uids, new_vids, and new_eids)
*/
template <class IdType>
std::vector<IdArray> GroupEdgeByNodeDegree(const IdArray& uids,
const IdArray& vids, const IdArray& eids);
std::vector<IdArray> GroupEdgeByNodeDegree(
const IdArray& uids, const IdArray& vids, const IdArray& eids);
} // namespace sched
......
......@@ -7,50 +7,51 @@
#ifndef DGL_TRANSFORM_H_
#define DGL_TRANSFORM_H_
#include <vector>
#include <tuple>
#include <utility>
#include "base_heterograph.h"
#include <vector>
#include "array.h"
#include "base_heterograph.h"
namespace dgl {
namespace transform {
/*!
* \brief Given a list of graphs, remove the common nodes that do not have inbound and
* outbound edges.
* \brief Given a list of graphs, remove the common nodes that do not have
* inbound and outbound edges.
*
* The graphs should have identical node ID space (i.e. should have the same set of nodes,
* including types and IDs).
* The graphs should have identical node ID space (i.e. should have the same set
* of nodes, including types and IDs).
*
* \param graphs The list of graphs.
* \param always_preserve The list of nodes to preserve regardless of whether the inbound
* or outbound edges exist.
* \param always_preserve The list of nodes to preserve regardless of whether
* the inbound or outbound edges exist.
*
* \return A pair. The first element is the list of compacted graphs, and the second
* element is the mapping from the compacted graphs and the original graph.
* \return A pair. The first element is the list of compacted graphs, and the
* second element is the mapping from the compacted graphs and the original
* graph.
*/
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>>
CompactGraphs(
std::pair<std::vector<HeteroGraphPtr>, std::vector<IdArray>> CompactGraphs(
const std::vector<HeteroGraphPtr> &graphs,
const std::vector<IdArray> &always_preserve);
/*!
* \brief Convert a graph into a bipartite-structured graph for message passing.
*
* Specifically, we create one node type \c ntype_l on the "left" side and another
* node type \c ntype_r on the "right" side for each node type \c ntype. The nodes of
* type \c ntype_r would contain the nodes designated by the caller, and node type
* \c ntype_l would contain the nodes that has an edge connecting to one of the
* designated nodes.
* Specifically, we create one node type \c ntype_l on the "left" side and
* another node type \c ntype_r on the "right" side for each node type \c ntype.
* The nodes of type \c ntype_r would contain the nodes designated by the
* caller, and node type \c ntype_l would contain the nodes that has an edge
* connecting to one of the designated nodes.
*
* The nodes of \c ntype_l would also contain the nodes in node type \c ntype_r.
*
* This function is often used for constructing a series of dependency graphs for
* multi-layer message passing, where we first construct a series of frontier graphs
* on the original node space, and run the following to get the bipartite graph needed
* for message passing with each GNN layer:
* This function is often used for constructing a series of dependency graphs
* for multi-layer message passing, where we first construct a series of
* frontier graphs on the original node space, and run the following to get the
* bipartite graph needed for message passing with each GNN layer:
*
* <code>
* bipartites = [None] * len(num_layers)
......@@ -66,20 +67,21 @@ CompactGraphs(
*
* \param graph The graph.
* \param rhs_nodes Designated nodes that would appear on the right side.
* \param include_rhs_in_lhs If false, do not include the nodes of node type \c ntype_r
* in \c ntype_l.
* \param include_rhs_in_lhs If false, do not include the nodes of node type \c
* ntype_r in \c ntype_l.
*
* \return A triplet containing
* * The bipartite-structured graph,
* * The induced node from the left side for each graph,
* * The induced edges.
*
* \note If include_rhs_in_lhs is true, then for each node type \c ntype, the nodes
* in rhs_nodes[ntype] would always appear first in the nodes of type \c ntype_l
* in the new graph.
* \note If include_rhs_in_lhs is true, then for each node type \c ntype, the
* nodes in rhs_nodes[ntype] would always appear first in the nodes of type \c
* ntype_l in the new graph.
*/
std::tuple<HeteroGraphPtr, std::vector<IdArray>, std::vector<IdArray>>
ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool include_rhs_in_lhs);
std::tuple<HeteroGraphPtr, std::vector<IdArray>, std::vector<IdArray>> ToBlock(
HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes,
bool include_rhs_in_lhs);
/*!
* \brief Convert a multigraph to a simple graph.
......@@ -87,7 +89,8 @@ ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool includ
* \return A triplet of
* * \c hg : The said simple graph.
* * \c count : The array of edge occurrences per edge type.
* * \c edge_map : The mapping from original edge IDs to new edge IDs per edge type.
* * \c edge_map : The mapping from original edge IDs to new edge IDs per edge
* type.
*
* \note Example: consider a graph with the following edges
*
......@@ -99,13 +102,14 @@ ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool includ
*
* [(0, 1), (1, 3), (1, 4), (2, 2)]
*
* * The second element is an array \c count. \c count[i] stands for the number of edges
* connecting simple_g.src[i] and simple_g.dst[i] in the original graph.
* * The second element is an array \c count. \c count[i] stands for the number
* of edges connecting simple_g.src[i] and simple_g.dst[i] in the original
* graph.
*
* count[0] = [1, 2, 2, 1]
*
* * One can find the mapping between edges from the original graph to the new simple
* graph.
* * One can find the mapping between edges from the original graph to the new
* simple graph.
*
* edge_map[0] = [0, 1, 3, 1, 2, 2]
*/
......@@ -118,11 +122,11 @@ ToSimpleGraph(const HeteroGraphPtr graph);
* \param graph The graph.
* \param eids The edge IDs to remove per edge type.
*
* \return A pair of the graph with edges removed, as well as the edge ID mapping from
* the original graph to the new graph per edge type.
* \return A pair of the graph with edges removed, as well as the edge ID
* mapping from the original graph to the new graph per edge type.
*/
std::pair<HeteroGraphPtr, std::vector<IdArray>>
RemoveEdges(const HeteroGraphPtr graph, const std::vector<IdArray> &eids);
std::pair<HeteroGraphPtr, std::vector<IdArray>> RemoveEdges(
const HeteroGraphPtr graph, const std::vector<IdArray> &eids);
}; // namespace transform
......
......@@ -12,12 +12,12 @@
#include <dmlc/serializer.h>
#include <deque>
#include <memory>
#include <queue>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include <memory>
#include "dmlc/logging.h"
......@@ -85,8 +85,9 @@ class StreamWithBuffer : public dmlc::SeekStream {
* // Read from remote sended pointer list
* StreamWithBuffer buf_strm(&blob, data_ptr_list)
*/
StreamWithBuffer(std::unique_ptr<dmlc::SeekStream> strm,
const std::vector<void*>& data_ptr_list)
StreamWithBuffer(
std::unique_ptr<dmlc::SeekStream> strm,
const std::vector<void*>& data_ptr_list)
: strm_(std::move(strm)), send_to_remote_(true) {
for (void* data : data_ptr_list) {
buffer_list_.emplace_back(data);
......@@ -136,8 +137,8 @@ class StreamWithBuffer : public dmlc::SeekStream {
* \param size buffer size
* \param data_ptr_list pointer list for NDArrays to deconstruct from
*/
StreamWithBuffer(char* p_buffer, size_t size,
const std::vector<void*>& data_ptr_list)
StreamWithBuffer(
char* p_buffer, size_t size, const std::vector<void*>& data_ptr_list)
: strm_(new dmlc::MemoryFixedSizeStream(p_buffer, size)),
send_to_remote_(true) {
for (void* data : data_ptr_list) {
......
......@@ -9,6 +9,7 @@
#include <memory>
#include <tuple>
#include <type_traits>
#include "dmlc/logging.h"
#include "meta_utils.h"
#include "xbyak/xbyak.h"
......@@ -61,10 +62,10 @@ class ElemWiseAddUpdate : public Xbyak::CodeGenerator {
public:
typedef typename Op::type DType;
static_assert(
std::is_base_of<std::true_type,
utils::has_type<DType, supported_types>>::value,
"Use case fail dgl::ElemWiseAddUpdate< Operator<DType> > DType is not "
"supported !");
std::is_base_of<
std::true_type, utils::has_type<DType, supported_types>>::value,
"Use case fail dgl::ElemWiseAddUpdate< Operator<DType> > DType is not "
"supported !");
protected:
const Xbyak::Reg64 &r_out_;
......@@ -80,77 +81,86 @@ class ElemWiseAddUpdate : public Xbyak::CodeGenerator {
static constexpr int BITS_IN_BYTES = 8;
static constexpr int REG_BIT_SIZE = 512;
static constexpr int UNIT_PER_REG =
REG_BIT_SIZE / (UNIT_SIZE_BYTES * BITS_IN_BYTES);
REG_BIT_SIZE / (UNIT_SIZE_BYTES * BITS_IN_BYTES);
template <class TType, class R1, class R2,
utils::CheckCmp<TType, float> = true>
template <
class TType, class R1, class R2, utils::CheckCmp<TType, float> = true>
void alias_load(R1 r1, R2 r2) {
vmovups(r1, r2);
}
template <class TType, class R1, class R2,
utils::CheckCmp<TType, double> = true>
template <
class TType, class R1, class R2, utils::CheckCmp<TType, double> = true>
void alias_load(R1 r1, R2 r2) {
vmovupd(r1, r2);
}
template <class TType, class R1, class R2,
utils::CheckCmp<TType, float> = true>
template <
class TType, class R1, class R2, utils::CheckCmp<TType, float> = true>
void alias_save(R1 r1, R2 r2) {
alias_load<TType>(r1, r2);
}
template <class TType, class R1, class R2,
utils::CheckCmp<TType, double> = true>
template <
class TType, class R1, class R2, utils::CheckCmp<TType, double> = true>
void alias_save(R1 r1, R2 r2) {
alias_load<TType>(r1, r2);
}
template <class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, float> = true>
template <
class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, float> = true>
void alias_ADD(R1 r1, R2 r2, R3 r3) {
vaddps(r1, r2, r3);
}
template <class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, double> = true>
template <
class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, double> = true>
void alias_ADD(R1 r1, R2 r2, R3 r3) {
vaddpd(r1, r2, r3);
}
template <class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, float> = true>
template <
class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, float> = true>
void alias_SUB(R1 r1, R2 r2, R3 r3) {
vsubps(r1, r2, r3);
}
template <class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, double> = true>
template <
class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, double> = true>
void alias_SUB(R1 r1, R2 r2, R3 r3) {
vsubpd(r1, r2, r3);
}
template <class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, float> = true>
template <
class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, float> = true>
void alias_DIV(R1 r1, R2 r2, R3 r3) {
vdivps(r1, r2, r3);
}
template <class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, double> = true>
template <
class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, double> = true>
void alias_DIV(R1 r1, R2 r2, R3 r3) {
vdivpd(r1, r2, r3);
}
template <class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, float> = true>
template <
class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, float> = true>
void alias_MUL(R1 r1, R2 r2, R3 r3) {
vmulps(r1, r2, r3);
}
template <class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, double> = true>
template <
class TType, class R1, class R2, class R3,
utils::CheckCmp<TType, double> = true>
void alias_MUL(R1 r1, R2 r2, R3 r3) {
vmulpd(r1, r2, r3);
}
template <class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::CopyLhs,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::CopyLhs, supported_types> =
true>
void full_chunk_loop_operations() {
typedef typename Operator::type IType;
alias_load<IType>(zmm0, ptr[r_out_ + r9 * sizeof(IType)]);
......@@ -158,9 +168,10 @@ class ElemWiseAddUpdate : public Xbyak::CodeGenerator {
alias_ADD<IType>(zmm2, zmm0, zmm1);
alias_save<IType>(ptr[r_out_ + r9 * sizeof(IType)], zmm2);
}
template <class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::CopyRhs,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::CopyRhs, supported_types> =
true>
void full_chunk_loop_operations() {
typedef typename Operator::type IType;
alias_load<IType>(zmm0, ptr[r_out_ + r9 * sizeof(IType)]);
......@@ -179,16 +190,20 @@ class ElemWiseAddUpdate : public Xbyak::CodeGenerator {
alias_ADD<T>(zmm2, zmm0, zmm2);
alias_save<T>(ptr[r_out_ + r9 * sizeof(T)], zmm2);
}
template <class Operator, utils::Verify<Operator, ::dgl::aten::cpu::op::Add,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::Add, supported_types> =
true>
void full_chunk_loop_operations() {
typedef typename Operator::type IType;
loop_pre<IType>();
alias_ADD<IType>(zmm2, zmm1, zmm2);
loop_post<IType>();
}
template <class Operator, utils::Verify<Operator, ::dgl::aten::cpu::op::Sub,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::Sub, supported_types> =
true>
void full_chunk_loop_operations() {
typedef typename Operator::type IType;
loop_pre<IType>();
......@@ -196,8 +211,10 @@ class ElemWiseAddUpdate : public Xbyak::CodeGenerator {
loop_post<IType>();
}
template <class Operator, utils::Verify<Operator, ::dgl::aten::cpu::op::Div,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::Div, supported_types> =
true>
void full_chunk_loop_operations() {
typedef typename Operator::type IType;
loop_pre<IType>();
......@@ -205,8 +222,10 @@ class ElemWiseAddUpdate : public Xbyak::CodeGenerator {
loop_post<IType>();
}
template <class Operator, utils::Verify<Operator, ::dgl::aten::cpu::op::Mul,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::Mul, supported_types> =
true>
void full_chunk_loop_operations() {
typedef typename Operator::type IType;
loop_pre<IType>();
......@@ -214,17 +233,19 @@ class ElemWiseAddUpdate : public Xbyak::CodeGenerator {
loop_post<IType>();
}
template <class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::CopyLhs,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::CopyLhs, supported_types> =
true>
void remainder_operations(const Xbyak::Opmask mask) {
typedef typename Operator::type IType;
alias_load<IType>(make_zmm(zmm2) | mask, ptr[r_left_ + r9 * sizeof(IType)]);
}
template <class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::CopyRhs,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::CopyRhs, supported_types> =
true>
void remainder_operations(const Xbyak::Opmask mask) {
typedef typename Operator::type IType;
alias_load<IType>(make_zmm(zmm2) | mask, ptr[r_right + r9 * sizeof(IType)]);
......@@ -236,32 +257,40 @@ class ElemWiseAddUpdate : public Xbyak::CodeGenerator {
alias_load<T>(make_zmm(zmm1) | mask, ptr[r_right + r9 * sizeof(T)]);
}
template <class Operator, utils::Verify<Operator, ::dgl::aten::cpu::op::Mul,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::Mul, supported_types> =
true>
void remainder_operations(const Xbyak::Opmask mask) {
typedef typename Operator::type IType;
remainder_fetch_LR<IType>(mask);
alias_MUL<IType>(zmm2, zmm2, zmm1);
}
template <class Operator, utils::Verify<Operator, ::dgl::aten::cpu::op::Add,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::Add, supported_types> =
true>
void remainder_operations(const Xbyak::Opmask mask) {
typedef typename Operator::type IType;
remainder_fetch_LR<IType>(mask);
alias_ADD<DType>(zmm2, zmm2, zmm1);
}
template <class Operator, utils::Verify<Operator, ::dgl::aten::cpu::op::Div,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::Div, supported_types> =
true>
void remainder_operations(const Xbyak::Opmask mask) {
typedef typename Operator::type IType;
remainder_fetch_LR<IType>(mask);
alias_DIV<DType>(zmm2, zmm2, zmm1);
}
template <class Operator, utils::Verify<Operator, ::dgl::aten::cpu::op::Sub,
supported_types> = true>
template <
class Operator,
utils::Verify<Operator, ::dgl::aten::cpu::op::Sub, supported_types> =
true>
void remainder_operations(const Xbyak::Opmask mask) {
typedef typename Operator::type IType;
remainder_fetch_LR<IType>(mask);
......@@ -280,9 +309,10 @@ class ElemWiseAddUpdate : public Xbyak::CodeGenerator {
if (current_cpu.has(Xbyak::util::Cpu::tAVX512F)) {
/* prepare REMAINDER */
mov(r8, r_size_);
and_(r8,
UNIT_PER_REG - 1); // r8_modulo = size/(sizeof(zmm)/sizeof(float))
xor_(r9, r9); // reset r9
and_(
r8,
UNIT_PER_REG - 1); // r8_modulo = size/(sizeof(zmm)/sizeof(float))
xor_(r9, r9); // reset r9
cmp(r_size_, UNIT_PER_REG); // if ( size < 16 ) { }
jl("remainder");
......@@ -306,12 +336,12 @@ class ElemWiseAddUpdate : public Xbyak::CodeGenerator {
sal(rax, cl);
dec(rax); // k1= (1 << r8 )-1
kmovw(k1, eax); // set bitmask
alias_load<DType>(make_zmm(zmm0) | k1,
ptr[r_out_ + r9 * UNIT_SIZE_BYTES]);
alias_load<DType>(
make_zmm(zmm0) | k1, ptr[r_out_ + r9 * UNIT_SIZE_BYTES]);
remainder_operations<Op>(k1);
alias_ADD<DType>(zmm3, zmm2, zmm0);
alias_save<DType>(ptr[r_out_ + r9 * UNIT_SIZE_BYTES],
make_zmm(zmm3) | k1);
alias_save<DType>(
ptr[r_out_ + r9 * UNIT_SIZE_BYTES], make_zmm(zmm3) | k1);
L("done");
applicable_ = true;
log_intel("AVX512F cpu kernel is ready");
......
......@@ -23,8 +23,9 @@ struct has_type<T, std::tuple<U, Ts...>> : has_type<T, std::tuple<Ts...>> {};
template <typename T, typename... Ts>
struct has_type<T, std::tuple<T, Ts...>> : std::true_type {};
template <class OCmp, template <class> class ToP, class Tup,
int ok = std::tuple_size<Tup>::value>
template <
class OCmp, template <class> class ToP, class Tup,
int ok = std::tuple_size<Tup>::value>
struct DeepType;
template <class OCmp, template <class> class ToP, class Tup>
......@@ -38,8 +39,9 @@ struct DeepType<OCmp, ToP, Tup, 2> {
typedef typename std::tuple_element<0, Tup>::type EL1;
typedef typename std::tuple_element<1, Tup>::type EL2;
enum {
value = (std::is_same<OCmp, ToP<EL1>>::value ||
std::is_same<OCmp, ToP<EL2>>::value)
value =
(std::is_same<OCmp, ToP<EL1>>::value ||
std::is_same<OCmp, ToP<EL2>>::value)
};
};
......@@ -49,9 +51,10 @@ struct DeepType<OCmp, ToP, Tup, 3> {
typedef typename std::tuple_element<1, Tup>::type EL2;
typedef typename std::tuple_element<2, Tup>::type EL3;
enum {
value = (std::is_same<OCmp, ToP<EL1>>::value ||
std::is_same<OCmp, ToP<EL2>>::value ||
std::is_same<OCmp, ToP<EL3>>::value)
value =
(std::is_same<OCmp, ToP<EL1>>::value ||
std::is_same<OCmp, ToP<EL2>>::value ||
std::is_same<OCmp, ToP<EL3>>::value)
};
};
......@@ -63,7 +66,7 @@ using CheckCmp = Required<std::is_same<L, R>::value>;
template <class L, class R1, class R2>
using CheckCmp_2 =
Required<std::is_same<L, R1>::value || std::is_same<L, R2>::value>;
Required<std::is_same<L, R1>::value || std::is_same<L, R2>::value>;
template <class OpType, template <class> class TPP, class Tup>
using Verify = Required<utils::DeepType<OpType, TPP, Tup>::value>;
......
......@@ -3,73 +3,68 @@
* \file api/api_container.cc
* \brief Runtime container APIs. (reference: tvm/src/api/api_lang.cc)
*/
#include <dgl/runtime/ndarray.h>
#include <dgl/packed_func_ext.h>
#include <dgl/runtime/container.h>
#include <dgl/runtime/ndarray.h>
#include <dgl/runtime/registry.h>
#include <dgl/packed_func_ext.h>
namespace dgl {
namespace runtime {
DGL_REGISTER_GLOBAL("_List")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
auto ret_obj = std::make_shared<runtime::ListObject>();
for (int i = 0; i < args.size(); ++i) {
ret_obj->data.push_back(args[i].obj_sptr());
}
*rv = ret_obj;
});
DGL_REGISTER_GLOBAL("_List").set_body([](DGLArgs args, DGLRetValue* rv) {
auto ret_obj = std::make_shared<runtime::ListObject>();
for (int i = 0; i < args.size(); ++i) {
ret_obj->data.push_back(args[i].obj_sptr());
}
*rv = ret_obj;
});
DGL_REGISTER_GLOBAL("_ListGetItem")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
CHECK(sptr->is_type<ListObject>());
auto* o = static_cast<const ListObject*>(sptr.get());
int64_t i = args[1];
CHECK_LT(i, o->data.size()) << "list out of bound";
*rv = o->data[i];
});
DGL_REGISTER_GLOBAL("_ListGetItem").set_body([](DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
CHECK(sptr->is_type<ListObject>());
auto* o = static_cast<const ListObject*>(sptr.get());
int64_t i = args[1];
CHECK_LT(i, o->data.size()) << "list out of bound";
*rv = o->data[i];
});
DGL_REGISTER_GLOBAL("_ListSize")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
CHECK(sptr->is_type<ListObject>());
auto* o = static_cast<const ListObject*>(sptr.get());
*rv = static_cast<int64_t>(o->data.size());
});
DGL_REGISTER_GLOBAL("_ListSize").set_body([](DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
CHECK(sptr->is_type<ListObject>());
auto* o = static_cast<const ListObject*>(sptr.get());
*rv = static_cast<int64_t>(o->data.size());
});
DGL_REGISTER_GLOBAL("_Map")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
CHECK_EQ(args.size() % 2, 0);
if (args.size() != 0 && args[0].type_code() == kStr) {
// StrMap
StrMapObject::ContainerType data;
for (int i = 0; i < args.size(); i += 2) {
CHECK(args[i].type_code() == kStr)
<< "The key of the map must be string";
CHECK(args[i + 1].type_code() == kObjectHandle)
DGL_REGISTER_GLOBAL("_Map").set_body([](DGLArgs args, DGLRetValue* rv) {
CHECK_EQ(args.size() % 2, 0);
if (args.size() != 0 && args[0].type_code() == kStr) {
// StrMap
StrMapObject::ContainerType data;
for (int i = 0; i < args.size(); i += 2) {
CHECK(args[i].type_code() == kStr) << "The key of the map must be string";
CHECK(args[i + 1].type_code() == kObjectHandle)
<< "The value of the map must be an object type";
data.emplace(std::make_pair(args[i].operator std::string(),
args[i + 1].obj_sptr()));
}
auto obj = std::make_shared<StrMapObject>();
obj->data = std::move(data);
*rv = obj;
} else {
// object container
MapObject::ContainerType data;
for (int i = 0; i < args.size(); i += 2) {
CHECK(args[i].type_code() == kObjectHandle)
data.emplace(std::make_pair(
args[i].operator std::string(), args[i + 1].obj_sptr()));
}
auto obj = std::make_shared<StrMapObject>();
obj->data = std::move(data);
*rv = obj;
} else {
// object container
MapObject::ContainerType data;
for (int i = 0; i < args.size(); i += 2) {
CHECK(args[i].type_code() == kObjectHandle)
<< "The key of the map must be an object type";
CHECK(args[i + 1].type_code() == kObjectHandle)
CHECK(args[i + 1].type_code() == kObjectHandle)
<< "The value of the map must be an object type";
data.emplace(std::make_pair(args[i].obj_sptr(), args[i + 1].obj_sptr()));
}
auto obj = std::make_shared<MapObject>();
obj->data = std::move(data);
*rv = obj;
data.emplace(std::make_pair(args[i].obj_sptr(), args[i + 1].obj_sptr()));
}
});
auto obj = std::make_shared<MapObject>();
obj->data = std::move(data);
*rv = obj;
}
});
DGL_REGISTER_GLOBAL("_EmptyStrMap").set_body([](DGLArgs args, DGLRetValue* rv) {
StrMapObject::ContainerType data;
......@@ -78,84 +73,78 @@ DGL_REGISTER_GLOBAL("_EmptyStrMap").set_body([](DGLArgs args, DGLRetValue* rv) {
*rv = obj;
});
DGL_REGISTER_GLOBAL("_MapSize")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
if (sptr->is_type<MapObject>()) {
auto* o = static_cast<const MapObject*>(sptr.get());
*rv = static_cast<int64_t>(o->data.size());
} else {
CHECK(sptr->is_type<StrMapObject>());
auto* o = static_cast<const StrMapObject*>(sptr.get());
*rv = static_cast<int64_t>(o->data.size());
}
});
DGL_REGISTER_GLOBAL("_MapSize").set_body([](DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
if (sptr->is_type<MapObject>()) {
auto* o = static_cast<const MapObject*>(sptr.get());
*rv = static_cast<int64_t>(o->data.size());
} else {
CHECK(sptr->is_type<StrMapObject>());
auto* o = static_cast<const StrMapObject*>(sptr.get());
*rv = static_cast<int64_t>(o->data.size());
}
});
DGL_REGISTER_GLOBAL("_MapGetItem")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
if (sptr->is_type<MapObject>()) {
auto* o = static_cast<const MapObject*>(sptr.get());
auto it = o->data.find(args[1].obj_sptr());
CHECK(it != o->data.end()) << "cannot find the key in the map";
*rv = (*it).second;
} else {
CHECK(sptr->is_type<StrMapObject>());
auto* o = static_cast<const StrMapObject*>(sptr.get());
auto it = o->data.find(args[1].operator std::string());
CHECK(it != o->data.end()) << "cannot find the key in the map";
*rv = (*it).second;
}
});
DGL_REGISTER_GLOBAL("_MapGetItem").set_body([](DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
if (sptr->is_type<MapObject>()) {
auto* o = static_cast<const MapObject*>(sptr.get());
auto it = o->data.find(args[1].obj_sptr());
CHECK(it != o->data.end()) << "cannot find the key in the map";
*rv = (*it).second;
} else {
CHECK(sptr->is_type<StrMapObject>());
auto* o = static_cast<const StrMapObject*>(sptr.get());
auto it = o->data.find(args[1].operator std::string());
CHECK(it != o->data.end()) << "cannot find the key in the map";
*rv = (*it).second;
}
});
DGL_REGISTER_GLOBAL("_MapItems")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
if (sptr->is_type<MapObject>()) {
auto* o = static_cast<const MapObject*>(sptr.get());
auto rkvs = std::make_shared<ListObject>();
for (const auto& kv : o->data) {
rkvs->data.push_back(kv.first);
rkvs->data.push_back(kv.second);
}
*rv = rkvs;
} else {
CHECK(sptr->is_type<StrMapObject>());
auto* o = static_cast<const StrMapObject*>(sptr.get());
auto rkvs = std::make_shared<ListObject>();
for (const auto& kv : o->data) {
rkvs->data.push_back(MakeValue(kv.first));
rkvs->data.push_back(kv.second);
}
*rv = rkvs;
DGL_REGISTER_GLOBAL("_MapItems").set_body([](DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
if (sptr->is_type<MapObject>()) {
auto* o = static_cast<const MapObject*>(sptr.get());
auto rkvs = std::make_shared<ListObject>();
for (const auto& kv : o->data) {
rkvs->data.push_back(kv.first);
rkvs->data.push_back(kv.second);
}
});
DGL_REGISTER_GLOBAL("_MapCount")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
if (sptr->is_type<MapObject>()) {
auto* o = static_cast<const MapObject*>(sptr.get());
*rv = static_cast<int64_t>(o->data.count(args[1].obj_sptr()));
} else {
CHECK(sptr->is_type<StrMapObject>());
auto* o = static_cast<const StrMapObject*>(sptr.get());
*rv = static_cast<int64_t>(o->data.count(args[1].operator std::string()));
*rv = rkvs;
} else {
CHECK(sptr->is_type<StrMapObject>());
auto* o = static_cast<const StrMapObject*>(sptr.get());
auto rkvs = std::make_shared<ListObject>();
for (const auto& kv : o->data) {
rkvs->data.push_back(MakeValue(kv.first));
rkvs->data.push_back(kv.second);
}
});
*rv = rkvs;
}
});
DGL_REGISTER_GLOBAL("_Value")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
*rv = MakeValue(args[0]);
});
DGL_REGISTER_GLOBAL("_MapCount").set_body([](DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
if (sptr->is_type<MapObject>()) {
auto* o = static_cast<const MapObject*>(sptr.get());
*rv = static_cast<int64_t>(o->data.count(args[1].obj_sptr()));
} else {
CHECK(sptr->is_type<StrMapObject>());
auto* o = static_cast<const StrMapObject*>(sptr.get());
*rv = static_cast<int64_t>(o->data.count(args[1].operator std::string()));
}
});
DGL_REGISTER_GLOBAL("_ValueGet")
.set_body([] (DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
CHECK(sptr->is_type<ValueObject>());
auto* o = static_cast<const ValueObject*>(sptr.get());
*rv = o->data;
});
DGL_REGISTER_GLOBAL("_Value").set_body([](DGLArgs args, DGLRetValue* rv) {
*rv = MakeValue(args[0]);
});
DGL_REGISTER_GLOBAL("_ValueGet").set_body([](DGLArgs args, DGLRetValue* rv) {
auto& sptr = args[0].obj_sptr();
CHECK(sptr->is_type<ValueObject>());
auto* o = static_cast<const ValueObject*>(sptr.get());
*rv = o->data;
});
} // namespace runtime
} // namespace dgl
......@@ -3,10 +3,11 @@
* \file api/api_test.cc
* \brief C APIs for testing FFI
*/
#include <dgl/runtime/ndarray.h>
#include <dgl/packed_func_ext.h>
#include <dgl/runtime/container.h>
#include <dgl/runtime/ndarray.h>
#include <dgl/runtime/registry.h>
#include <dgl/packed_func_ext.h>
#include <thread>
namespace dgl {
......@@ -18,12 +19,12 @@ namespace runtime {
// - The argument to pass to the python callback
// It returns what python callback returns
DGL_REGISTER_GLOBAL("_TestPythonCallback")
.set_body([](DGLArgs args, DGLRetValue* rv) {
LOG(INFO) << "Inside C API";
PackedFunc fn = args[0];
DGLArgs cb_args(args.values + 1, args.type_codes + 1, 1);
fn.CallPacked(cb_args, rv);
});
.set_body([](DGLArgs args, DGLRetValue* rv) {
LOG(INFO) << "Inside C API";
PackedFunc fn = args[0];
DGLArgs cb_args(args.values + 1, args.type_codes + 1, 1);
fn.CallPacked(cb_args, rv);
});
// Register an internal API for testing python callback.
// It receives two arguments:
......@@ -34,17 +35,16 @@ DGL_REGISTER_GLOBAL("_TestPythonCallback")
// The API runs the python callback in a separate thread to test
// python GIL is properly released.
DGL_REGISTER_GLOBAL("_TestPythonCallbackThread")
.set_body([](DGLArgs args, DGLRetValue* rv) {
LOG(INFO) << "Inside C API";
PackedFunc fn = args[0];
auto thr = std::make_shared<std::thread>(
[fn, args, rv]() {
.set_body([](DGLArgs args, DGLRetValue* rv) {
LOG(INFO) << "Inside C API";
PackedFunc fn = args[0];
auto thr = std::make_shared<std::thread>([fn, args, rv]() {
LOG(INFO) << "Callback thread " << std::this_thread::get_id();
DGLArgs cb_args(args.values + 1, args.type_codes + 1, 1);
fn.CallPacked(cb_args, rv);
});
thr->join();
});
thr->join();
});
} // namespace runtime
} // namespace dgl
......@@ -4,11 +4,12 @@
* \brief DGL array arithmetic operations
*/
#include <dgl/packed_func_ext.h>
#include <dgl/runtime/ndarray.h>
#include <dgl/runtime/container.h>
#include <dgl/runtime/ndarray.h>
#include "../c_api_common.h"
#include "./array_op.h"
#include "./arith.h"
#include "./array_op.h"
using namespace dgl::runtime;
......@@ -16,56 +17,55 @@ namespace dgl {
namespace aten {
// Generate operators with both operations being NDArrays.
#define BINARY_ELEMENT_OP(name, op) \
IdArray name(IdArray lhs, IdArray rhs) { \
IdArray ret; \
CHECK_SAME_DTYPE(lhs, rhs); \
CHECK_SAME_CONTEXT(lhs, rhs); \
ATEN_XPU_SWITCH_CUDA(lhs->ctx.device_type, XPU, #name, { \
ATEN_ID_TYPE_SWITCH(lhs->dtype, IdType, { \
ret = impl::BinaryElewise<XPU, IdType, arith::op>(lhs, rhs); \
}); \
}); \
return ret; \
#define BINARY_ELEMENT_OP(name, op) \
IdArray name(IdArray lhs, IdArray rhs) { \
IdArray ret; \
CHECK_SAME_DTYPE(lhs, rhs); \
CHECK_SAME_CONTEXT(lhs, rhs); \
ATEN_XPU_SWITCH_CUDA(lhs->ctx.device_type, XPU, #name, { \
ATEN_ID_TYPE_SWITCH(lhs->dtype, IdType, { \
ret = impl::BinaryElewise<XPU, IdType, arith::op>(lhs, rhs); \
}); \
}); \
return ret; \
}
// Generate operators with only lhs being NDArray.
#define BINARY_ELEMENT_OP_L(name, op) \
IdArray name(IdArray lhs, int64_t rhs) { \
IdArray ret; \
ATEN_XPU_SWITCH_CUDA(lhs->ctx.device_type, XPU, #name, { \
ATEN_ID_TYPE_SWITCH(lhs->dtype, IdType, { \
ret = impl::BinaryElewise<XPU, IdType, arith::op>(lhs, rhs); \
}); \
}); \
return ret; \
#define BINARY_ELEMENT_OP_L(name, op) \
IdArray name(IdArray lhs, int64_t rhs) { \
IdArray ret; \
ATEN_XPU_SWITCH_CUDA(lhs->ctx.device_type, XPU, #name, { \
ATEN_ID_TYPE_SWITCH(lhs->dtype, IdType, { \
ret = impl::BinaryElewise<XPU, IdType, arith::op>(lhs, rhs); \
}); \
}); \
return ret; \
}
// Generate operators with only lhs being NDArray.
#define BINARY_ELEMENT_OP_R(name, op) \
IdArray name(int64_t lhs, IdArray rhs) { \
IdArray ret; \
ATEN_XPU_SWITCH_CUDA(rhs->ctx.device_type, XPU, #name, { \
ATEN_ID_TYPE_SWITCH(rhs->dtype, IdType, { \
ret = impl::BinaryElewise<XPU, IdType, arith::op>(lhs, rhs); \
}); \
}); \
return ret; \
#define BINARY_ELEMENT_OP_R(name, op) \
IdArray name(int64_t lhs, IdArray rhs) { \
IdArray ret; \
ATEN_XPU_SWITCH_CUDA(rhs->ctx.device_type, XPU, #name, { \
ATEN_ID_TYPE_SWITCH(rhs->dtype, IdType, { \
ret = impl::BinaryElewise<XPU, IdType, arith::op>(lhs, rhs); \
}); \
}); \
return ret; \
}
// Generate operators with only lhs being NDArray.
#define UNARY_ELEMENT_OP(name, op) \
IdArray name(IdArray lhs) { \
IdArray ret; \
ATEN_XPU_SWITCH_CUDA(lhs->ctx.device_type, XPU, #name, { \
ATEN_ID_TYPE_SWITCH(lhs->dtype, IdType, { \
ret = impl::UnaryElewise<XPU, IdType, arith::op>(lhs); \
}); \
}); \
return ret; \
#define UNARY_ELEMENT_OP(name, op) \
IdArray name(IdArray lhs) { \
IdArray ret; \
ATEN_XPU_SWITCH_CUDA(lhs->ctx.device_type, XPU, #name, { \
ATEN_ID_TYPE_SWITCH(lhs->dtype, IdType, { \
ret = impl::UnaryElewise<XPU, IdType, arith::op>(lhs); \
}); \
}); \
return ret; \
}
BINARY_ELEMENT_OP(Add, Add)
BINARY_ELEMENT_OP(Sub, Sub)
BINARY_ELEMENT_OP(Mul, Mul)
......@@ -108,106 +108,104 @@ UNARY_ELEMENT_OP(Neg, Neg)
} // namespace dgl
///////////////// Operator overloading for NDArray /////////////////
NDArray operator + (const NDArray& lhs, const NDArray& rhs) {
NDArray operator+(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::Add(lhs, rhs);
}
NDArray operator - (const NDArray& lhs, const NDArray& rhs) {
NDArray operator-(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::Sub(lhs, rhs);
}
NDArray operator * (const NDArray& lhs, const NDArray& rhs) {
NDArray operator*(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::Mul(lhs, rhs);
}
NDArray operator / (const NDArray& lhs, const NDArray& rhs) {
NDArray operator/(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::Div(lhs, rhs);
}
NDArray operator % (const NDArray& lhs, const NDArray& rhs) {
NDArray operator%(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::Mod(lhs, rhs);
}
NDArray operator + (const NDArray& lhs, int64_t rhs) {
NDArray operator+(const NDArray& lhs, int64_t rhs) {
return dgl::aten::Add(lhs, rhs);
}
NDArray operator - (const NDArray& lhs, int64_t rhs) {
NDArray operator-(const NDArray& lhs, int64_t rhs) {
return dgl::aten::Sub(lhs, rhs);
}
NDArray operator * (const NDArray& lhs, int64_t rhs) {
NDArray operator*(const NDArray& lhs, int64_t rhs) {
return dgl::aten::Mul(lhs, rhs);
}
NDArray operator / (const NDArray& lhs, int64_t rhs) {
NDArray operator/(const NDArray& lhs, int64_t rhs) {
return dgl::aten::Div(lhs, rhs);
}
NDArray operator % (const NDArray& lhs, int64_t rhs) {
NDArray operator%(const NDArray& lhs, int64_t rhs) {
return dgl::aten::Mod(lhs, rhs);
}
NDArray operator + (int64_t lhs, const NDArray& rhs) {
NDArray operator+(int64_t lhs, const NDArray& rhs) {
return dgl::aten::Add(lhs, rhs);
}
NDArray operator - (int64_t lhs, const NDArray& rhs) {
NDArray operator-(int64_t lhs, const NDArray& rhs) {
return dgl::aten::Sub(lhs, rhs);
}
NDArray operator * (int64_t lhs, const NDArray& rhs) {
NDArray operator*(int64_t lhs, const NDArray& rhs) {
return dgl::aten::Mul(lhs, rhs);
}
NDArray operator / (int64_t lhs, const NDArray& rhs) {
NDArray operator/(int64_t lhs, const NDArray& rhs) {
return dgl::aten::Div(lhs, rhs);
}
NDArray operator % (int64_t lhs, const NDArray& rhs) {
NDArray operator%(int64_t lhs, const NDArray& rhs) {
return dgl::aten::Mod(lhs, rhs);
}
NDArray operator - (const NDArray& array) {
return dgl::aten::Neg(array);
}
NDArray operator-(const NDArray& array) { return dgl::aten::Neg(array); }
NDArray operator > (const NDArray& lhs, const NDArray& rhs) {
NDArray operator>(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::GT(lhs, rhs);
}
NDArray operator < (const NDArray& lhs, const NDArray& rhs) {
NDArray operator<(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::LT(lhs, rhs);
}
NDArray operator >= (const NDArray& lhs, const NDArray& rhs) {
NDArray operator>=(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::GE(lhs, rhs);
}
NDArray operator <= (const NDArray& lhs, const NDArray& rhs) {
NDArray operator<=(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::LE(lhs, rhs);
}
NDArray operator == (const NDArray& lhs, const NDArray& rhs) {
NDArray operator==(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::EQ(lhs, rhs);
}
NDArray operator != (const NDArray& lhs, const NDArray& rhs) {
NDArray operator!=(const NDArray& lhs, const NDArray& rhs) {
return dgl::aten::NE(lhs, rhs);
}
NDArray operator > (const NDArray& lhs, int64_t rhs) {
NDArray operator>(const NDArray& lhs, int64_t rhs) {
return dgl::aten::GT(lhs, rhs);
}
NDArray operator < (const NDArray& lhs, int64_t rhs) {
NDArray operator<(const NDArray& lhs, int64_t rhs) {
return dgl::aten::LT(lhs, rhs);
}
NDArray operator >= (const NDArray& lhs, int64_t rhs) {
NDArray operator>=(const NDArray& lhs, int64_t rhs) {
return dgl::aten::GE(lhs, rhs);
}
NDArray operator <= (const NDArray& lhs, int64_t rhs) {
NDArray operator<=(const NDArray& lhs, int64_t rhs) {
return dgl::aten::LE(lhs, rhs);
}
NDArray operator == (const NDArray& lhs, int64_t rhs) {
NDArray operator==(const NDArray& lhs, int64_t rhs) {
return dgl::aten::EQ(lhs, rhs);
}
NDArray operator != (const NDArray& lhs, int64_t rhs) {
NDArray operator!=(const NDArray& lhs, int64_t rhs) {
return dgl::aten::NE(lhs, rhs);
}
NDArray operator > (int64_t lhs, const NDArray& rhs) {
NDArray operator>(int64_t lhs, const NDArray& rhs) {
return dgl::aten::GT(lhs, rhs);
}
NDArray operator < (int64_t lhs, const NDArray& rhs) {
NDArray operator<(int64_t lhs, const NDArray& rhs) {
return dgl::aten::LT(lhs, rhs);
}
NDArray operator >= (int64_t lhs, const NDArray& rhs) {
NDArray operator>=(int64_t lhs, const NDArray& rhs) {
return dgl::aten::GE(lhs, rhs);
}
NDArray operator <= (int64_t lhs, const NDArray& rhs) {
NDArray operator<=(int64_t lhs, const NDArray& rhs) {
return dgl::aten::LE(lhs, rhs);
}
NDArray operator == (int64_t lhs, const NDArray& rhs) {
NDArray operator==(int64_t lhs, const NDArray& rhs) {
return dgl::aten::EQ(lhs, rhs);
}
NDArray operator != (int64_t lhs, const NDArray& rhs) {
NDArray operator!=(int64_t lhs, const NDArray& rhs) {
return dgl::aten::NE(lhs, rhs);
}
......@@ -6,58 +6,52 @@
#ifndef DGL_ARRAY_CHECK_H_
#define DGL_ARRAY_CHECK_H_
#include <dgl/runtime/ndarray.h>
#include <dgl/array.h>
#include <vector>
#include <dgl/runtime/ndarray.h>
#include <string>
#include <vector>
namespace dgl {
namespace aten {
// Check whether the given arguments have the same context.
inline void CheckCtx(
const DGLContext& ctx,
const std::vector<NDArray>& arrays,
const DGLContext& ctx, const std::vector<NDArray>& arrays,
const std::vector<std::string>& names) {
for (size_t i = 0; i < arrays.size(); ++i) {
if (IsNullArray(arrays[i]))
continue;
if (IsNullArray(arrays[i])) continue;
CHECK_EQ(ctx, arrays[i]->ctx)
<< "Expected device context " << ctx << ". But got "
<< arrays[i]->ctx << " for " << names[i] << ".";
<< "Expected device context " << ctx << ". But got " << arrays[i]->ctx
<< " for " << names[i] << ".";
}
}
// Check whether input tensors are contiguous.
inline void CheckContiguous(
const std::vector<NDArray>& arrays,
const std::vector<std::string>& names) {
const std::vector<NDArray>& arrays, const std::vector<std::string>& names) {
for (size_t i = 0; i < arrays.size(); ++i) {
if (IsNullArray(arrays[i]))
continue;
if (IsNullArray(arrays[i])) continue;
CHECK(arrays[i].IsContiguous())
<< "Expect " << names[i] << " to be a contiguous tensor";
<< "Expect " << names[i] << " to be a contiguous tensor";
}
}
// Check whether input tensors have valid shape.
inline void CheckShape(
const std::vector<uint64_t>& gdim,
const std::vector<int>& uev_idx,
const std::vector<NDArray>& arrays,
const std::vector<std::string>& names) {
const std::vector<uint64_t>& gdim, const std::vector<int>& uev_idx,
const std::vector<NDArray>& arrays, const std::vector<std::string>& names) {
for (size_t i = 0; i < arrays.size(); ++i) {
if (IsNullArray(arrays[i]))
continue;
if (IsNullArray(arrays[i])) continue;
CHECK_GE(arrays[i]->ndim, 2)
<< "Expect " << names[i] << " to have ndim >= 2, "
<< "Note that for scalar feature we expand its "
<< "dimension with an additional dimension of "
<< "length one.";
<< "Expect " << names[i] << " to have ndim >= 2, "
<< "Note that for scalar feature we expand its "
<< "dimension with an additional dimension of "
<< "length one.";
CHECK_EQ(gdim[uev_idx[i]], arrays[i]->shape[0])
<< "Expect " << names[i] << " to have size "
<< gdim[uev_idx[i]] << " on the first dimension, "
<< "but got " << arrays[i]->shape[0];
<< "Expect " << names[i] << " to have size " << gdim[uev_idx[i]]
<< " on the first dimension, "
<< "but got " << arrays[i]->shape[0];
}
}
......
......@@ -14,22 +14,21 @@ template <DGLDeviceType XPU, typename IdType>
IdArray CumSum(IdArray array, bool prepend_zero) {
const int64_t len = array.NumElements();
if (len == 0)
return !prepend_zero ? array : aten::Full(0, 1, array->dtype.bits, array->ctx);
return !prepend_zero ? array
: aten::Full(0, 1, array->dtype.bits, array->ctx);
if (prepend_zero) {
IdArray ret = aten::NewIdArray(len + 1, array->ctx, array->dtype.bits);
const IdType* in_d = array.Ptr<IdType>();
IdType* out_d = ret.Ptr<IdType>();
out_d[0] = 0;
for (int64_t i = 0; i < len; ++i)
out_d[i + 1] = out_d[i] + in_d[i];
for (int64_t i = 0; i < len; ++i) out_d[i + 1] = out_d[i] + in_d[i];
return ret;
} else {
IdArray ret = aten::NewIdArray(len, array->ctx, array->dtype.bits);
const IdType* in_d = array.Ptr<IdType>();
IdType* out_d = ret.Ptr<IdType>();
out_d[0] = in_d[0];
for (int64_t i = 1; i < len; ++i)
out_d[i] = out_d[i - 1] + in_d[i];
for (int64_t i = 1; i < len; ++i) out_d[i] = out_d[i - 1] + in_d[i];
return ret;
}
}
......
......@@ -10,10 +10,11 @@ using runtime::NDArray;
namespace aten {
namespace impl {
template<DGLDeviceType XPU, typename DType, typename IdType>
template <DGLDeviceType XPU, typename DType, typename IdType>
NDArray IndexSelect(NDArray array, IdArray index) {
CHECK_EQ(array->shape[0], array.NumElements()) << "Only support tensor"
<< " whose first dimension equals number of elements, e.g. (5,), (5, 1)";
CHECK_EQ(array->shape[0], array.NumElements())
<< "Only support tensor"
<< " whose first dimension equals number of elements, e.g. (5,), (5, 1)";
const DType* array_data = static_cast<DType*>(array->data);
const IdType* idx_data = static_cast<IdType*>(index->data);
......
......@@ -15,8 +15,7 @@ IdArray NonZero(IdArray array) {
std::vector<int64_t> ret;
const IdType* data = array.Ptr<IdType>();
for (int64_t i = 0; i < array->shape[0]; ++i)
if (data[i] != 0)
ret.push_back(i);
if (data[i] != 0) ret.push_back(i);
return NDArray::FromVector(ret, array->ctx);
}
......
......@@ -6,7 +6,9 @@
#include <dgl/array.h>
#include <dgl/runtime/ndarray.h>
#include <dgl/runtime/parallel_for.h>
#include <numeric>
#include "../arith.h"
namespace dgl {
......@@ -51,116 +53,186 @@ IdArray BinaryElewise(IdArray lhs, IdArray rhs) {
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
// etc., especially since the workload is very light. Need to replace with parallel_for.
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning,
// scheduling, etc., especially since the workload is very light. Need to
// replace with parallel_for.
for (int64_t i = 0; i < lhs->shape[0]; i++) {
ret_data[i] = Op::Call(lhs_data[i], rhs_data[i]);
}
return ret;
}
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Add>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Sub>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mul>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Div>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mod>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GT>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LT>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::EQ>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::NE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Add>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Sub>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mul>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Div>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mod>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GT>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LT>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::EQ>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::NE>(IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Add>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Sub>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mul>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Div>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mod>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GT>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LT>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GE>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LE>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::EQ>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::NE>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Add>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Sub>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mul>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Div>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mod>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GT>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LT>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GE>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LE>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::EQ>(
IdArray lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::NE>(
IdArray lhs, IdArray rhs);
template <DGLDeviceType XPU, typename IdType, typename Op>
IdArray BinaryElewise(IdArray lhs, IdType rhs) {
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
// etc., especially since the workload is very light. Need to replace with parallel_for.
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning,
// scheduling, etc., especially since the workload is very light. Need to
// replace with parallel_for.
for (int64_t i = 0; i < lhs->shape[0]; i++) {
ret_data[i] = Op::Call(lhs_data[i], rhs);
}
return ret;
}
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Add>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Sub>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mul>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Div>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mod>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GT>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LT>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GE>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LE>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::EQ>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::NE>(IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Add>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Sub>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mul>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Div>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mod>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GT>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LT>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GE>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LE>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::EQ>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::NE>(IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Add>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Sub>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mul>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Div>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mod>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GT>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LT>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GE>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LE>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::EQ>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::NE>(
IdArray lhs, int32_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Add>(
IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Sub>(
IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mul>(
IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Div>(
IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mod>(
IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GT>(
IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LT>(
IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GE>(
IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LE>(
IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::EQ>(
IdArray lhs, int64_t rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::NE>(
IdArray lhs, int64_t rhs);
template <DGLDeviceType XPU, typename IdType, typename Op>
IdArray BinaryElewise(IdType lhs, IdArray rhs) {
IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
// etc., especially since the workload is very light. Need to replace with parallel_for.
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning,
// scheduling, etc., especially since the workload is very light. Need to
// replace with parallel_for.
for (int64_t i = 0; i < rhs->shape[0]; i++) {
ret_data[i] = Op::Call(lhs, rhs_data[i]);
}
return ret;
}
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Add>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Sub>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mul>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Div>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mod>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GT>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LT>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GE>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LE>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::EQ>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::NE>(int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Add>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Sub>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mul>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Div>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mod>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GT>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LT>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GE>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LE>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::EQ>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::NE>(int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Add>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Sub>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mul>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Div>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::Mod>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GT>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LT>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::GE>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::LE>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::EQ>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int32_t, arith::NE>(
int32_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Add>(
int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Sub>(
int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mul>(
int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Div>(
int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::Mod>(
int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GT>(
int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LT>(
int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::GE>(
int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::LE>(
int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::EQ>(
int64_t lhs, IdArray rhs);
template IdArray BinaryElewise<kDGLCPU, int64_t, arith::NE>(
int64_t lhs, IdArray rhs);
template <DGLDeviceType XPU, typename IdType, typename Op>
IdArray UnaryElewise(IdArray lhs) {
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning, scheduling,
// etc., especially since the workload is very light. Need to replace with parallel_for.
// TODO(BarclayII): this usually incurs lots of overhead in thread spawning,
// scheduling, etc., especially since the workload is very light. Need to
// replace with parallel_for.
for (int64_t i = 0; i < lhs->shape[0]; i++) {
ret_data[i] = Op::Call(lhs_data[i]);
}
......@@ -180,10 +252,14 @@ NDArray Full(DType val, int64_t length, DGLContext ctx) {
return ret;
}
template NDArray Full<kDGLCPU, int32_t>(int32_t val, int64_t length, DGLContext ctx);
template NDArray Full<kDGLCPU, int64_t>(int64_t val, int64_t length, DGLContext ctx);
template NDArray Full<kDGLCPU, float>(float val, int64_t length, DGLContext ctx);
template NDArray Full<kDGLCPU, double>(double val, int64_t length, DGLContext ctx);
template NDArray Full<kDGLCPU, int32_t>(
int32_t val, int64_t length, DGLContext ctx);
template NDArray Full<kDGLCPU, int64_t>(
int64_t val, int64_t length, DGLContext ctx);
template NDArray Full<kDGLCPU, float>(
float val, int64_t length, DGLContext ctx);
template NDArray Full<kDGLCPU, double>(
double val, int64_t length, DGLContext ctx);
///////////////////////////// Range /////////////////////////////
......@@ -216,7 +292,8 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
}
}
// map array
IdArray maparr = NewIdArray(newid, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8);
IdArray maparr =
NewIdArray(newid, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8);
IdType* maparr_data = static_cast<IdType*>(maparr->data);
for (const auto& kv : oldv2newv) {
maparr_data[kv.second] = kv.first;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment