Unverified Commit 619d735d authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Replace \xxx with @XXX in structured comment. (#4822)



* param

* brief

* note

* return

* tparam

* brief2

* file

* return2

* return

* blabla

* all
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 96297fb8
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/gather_mm.cu * @file array/cuda/gather_mm.cu
* \brief GatherMM C APIs and definitions. * @brief GatherMM C APIs and definitions.
*/ */
#include <dgl/array.h> #include <dgl/array.h>
#include <algorithm> // std::swap #include <algorithm> // std::swap
...@@ -15,7 +15,7 @@ namespace aten { ...@@ -15,7 +15,7 @@ namespace aten {
namespace { namespace {
/*! \brief Call cuBLAS GEMM API for dense matmul operation for float and double. */ /*! @brief Call cuBLAS GEMM API for dense matmul operation for float and double. */
template <typename DType> template <typename DType>
cublasStatus_t cublasGemm(cublasHandle_t handle, cublasOperation_t transa, cublasStatus_t cublasGemm(cublasHandle_t handle, cublasOperation_t transa,
cublasOperation_t transb, int m, int n, int k, cublasOperation_t transb, int m, int n, int k,
...@@ -77,12 +77,13 @@ cublasStatus_t cublasGemm<double>(cublasHandle_t handle, cublasOperation_t trans ...@@ -77,12 +77,13 @@ cublasStatus_t cublasGemm<double>(cublasHandle_t handle, cublasOperation_t trans
namespace cuda { namespace cuda {
/* \Note Each row of A multiplies a segment of matrix of B of dimension in_len * outlen. /**
One warp is assigned to process one row of A. Each WARP sequentially multiplies * @note Each row of A multiplies a segment of matrix of B of dimension in_len * outlen.
one element of A and a row of B to compute partial result of the output. A * One warp is assigned to process one row of A. Each WARP sequentially multiplies
is loaded in shared memory in a coalesced way. Output matrix is loaded in * one element of A and a row of B to compute partial result of the output. A
registers. B should get benefit from L2 cache. * is loaded in shared memory in a coalesced way. Output matrix is loaded in
*/ * registers. B should get benefit from L2 cache.
*/
template <typename Idx, typename DType> template <typename Idx, typename DType>
__global__ void GatherMMScatterKernel( __global__ void GatherMMScatterKernel(
const DType* __restrict__ A, const DType* __restrict__ A,
...@@ -138,12 +139,13 @@ __global__ void GatherMMScatterKernel( ...@@ -138,12 +139,13 @@ __global__ void GatherMMScatterKernel(
} }
/* \Note Output matrix is accumulated via atomic operations. Rest of the strategies /**
are similar to GatherMMKernel. One warp is assigned to process one row of A. Each * @note Output matrix is accumulated via atomic operations. Rest of the strategies
WARP sequentially multiplies one element of A and a row of B to compute partial * are similar to GatherMMKernel. One warp is assigned to process one row of A. Each
result of the output. A is loaded in shared memory in a coalesced way. B should * WARP sequentially multiplies one element of A and a row of B to compute partial
get benefit from L2 cache. * result of the output. A is loaded in shared memory in a coalesced way. B should
*/ * get benefit from L2 cache.
*/
template <typename Idx, typename DType> template <typename Idx, typename DType>
__global__ void GatherMMScatterKernel2( __global__ void GatherMMScatterKernel2(
const DType* __restrict__ A, const DType* __restrict__ A,
...@@ -197,15 +199,15 @@ __global__ void GatherMMScatterKernel2( ...@@ -197,15 +199,15 @@ __global__ void GatherMMScatterKernel2(
} // namespace cuda } // namespace cuda
/*! /*!
* \brief Implementation of Gather_mm operator. The input matrix A is * @brief Implementation of Gather_mm operator. The input matrix A is
* expected to be sorted according to relation type. * expected to be sorted according to relation type.
* \param A The input dense matrix of dimension m x k * @param A The input dense matrix of dimension m x k
* \param B The input dense matrix of dimension k x n * @param B The input dense matrix of dimension k x n
* \param C The output dense matrix of dimension m x n * @param C The output dense matrix of dimension m x n
* \param seglen_A The input vector of size R. Each element * @param seglen_A The input vector of size R. Each element
* is the length of segments of input ``A`` * is the length of segments of input ``A``
* \param a_trans Matrix A to be transposed * @param a_trans Matrix A to be transposed
* \param b_trans Matrix B to be transposed * @param b_trans Matrix B to be transposed
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
void SegmentMM(const NDArray A, void SegmentMM(const NDArray A,
...@@ -308,13 +310,13 @@ void SegmentMMBackwardB(const NDArray A, ...@@ -308,13 +310,13 @@ void SegmentMMBackwardB(const NDArray A,
} }
/*! /*!
* \brief Implementation of Gather_mm operator. The input matrix A is * @brief Implementation of Gather_mm operator. The input matrix A is
* expected to be sorted according to relation type. * expected to be sorted according to relation type.
* \param A The input dense matrix of dimension m x k * @param A The input dense matrix of dimension m x k
* \param B The input dense matrix of dimension k x n * @param B The input dense matrix of dimension k x n
* \param C The output dense matrix of dimension m x n * @param C The output dense matrix of dimension m x n
* \param idx_a The input vector to gather left hand operand on * @param idx_a The input vector to gather left hand operand on
* \param idx_b The input vector to gather right hand operand on * @param idx_b The input vector to gather right hand operand on
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
...@@ -345,17 +347,17 @@ void GatherMM(const NDArray A, ...@@ -345,17 +347,17 @@ void GatherMM(const NDArray A,
} }
/*! /*!
* \brief Implementation of Gather_mm operator. The input matrix A is * @brief Implementation of Gather_mm operator. The input matrix A is
* expected to be sorted according to relation type. * expected to be sorted according to relation type.
* \param A The input dense matrix of dimension m x k * @param A The input dense matrix of dimension m x k
* \param B The input dense matrix of dimension k x n * @param B The input dense matrix of dimension k x n
* \param C The output dense matrix of dimension m x n * @param C The output dense matrix of dimension m x n
* \param idx_a The input vector to gather left hand operand on * @param idx_a The input vector to gather left hand operand on
* \param idx_b The input vector to gather right hand operand on * @param idx_b The input vector to gather right hand operand on
* \param idx_c The input vector to gather output operand on * @param idx_c The input vector to gather output operand on
* \param num_rel The number of idx types in idx_b * @param num_rel The number of idx types in idx_b
* \param a_trans Matrix A to be transposed * @param a_trans Matrix A to be transposed
* \param b_trans Matrix B to be transposed * @param b_trans Matrix B to be transposed
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
void GatherMMScatter(const NDArray A, void GatherMMScatter(const NDArray A,
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/ge_spmm.cuh * @file array/cuda/ge_spmm.cuh
* \brief GE-SpMM CUDA kernel function header. * @brief GE-SpMM CUDA kernel function header.
*/ */
#ifndef DGL_ARRAY_CUDA_GE_SPMM_CUH_ #ifndef DGL_ARRAY_CUDA_GE_SPMM_CUH_
#define DGL_ARRAY_CUDA_GE_SPMM_CUH_ #define DGL_ARRAY_CUDA_GE_SPMM_CUH_
...@@ -19,8 +19,8 @@ namespace aten { ...@@ -19,8 +19,8 @@ namespace aten {
namespace cuda { namespace cuda {
/*! /*!
* \brief CUDA kernel of GE-SpMM on Csr. * @brief CUDA kernel of GE-SpMM on Csr.
* \note GE-SpMM: https://arxiv.org/pdf/2007.03179.pdf * @note GE-SpMM: https://arxiv.org/pdf/2007.03179.pdf
* The grid dimension x and y are reordered for better performance. * The grid dimension x and y are reordered for better performance.
*/ */
template <typename Idx, typename DType, typename BinaryOp> template <typename Idx, typename DType, typename BinaryOp>
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/macro.cuh * @file array/cuda/macro.cuh
* \brief Macro to call SPMM/SDDMM cuda kernels. * @brief Macro to call SPMM/SDDMM cuda kernels.
*/ */
#ifndef DGL_ARRAY_CUDA_MACRO_CUH_ #ifndef DGL_ARRAY_CUDA_MACRO_CUH_
#define DGL_ARRAY_CUDA_MACRO_CUH_ #define DGL_ARRAY_CUDA_MACRO_CUH_
......
/*! /*!
* Copyright (c) 2021 by Contributors * Copyright (c) 2021 by Contributors
* \file array/cuda/negative_sampling.cu * @file array/cuda/negative_sampling.cu
* \brief rowwise sampling * @brief rowwise sampling
*/ */
#include <curand_kernel.h> #include <curand_kernel.h>
...@@ -81,7 +81,7 @@ struct IsNotMinusOne { ...@@ -81,7 +81,7 @@ struct IsNotMinusOne {
}; };
/*! /*!
* \brief Sort ordered pairs in ascending order, using \a tmp_major and \a * @brief Sort ordered pairs in ascending order, using \a tmp_major and \a
* tmp_minor as temporary buffers, each with \a n elements. * tmp_minor as temporary buffers, each with \a n elements.
*/ */
template <typename IdType> template <typename IdType>
......
/*! /*!
* Copyright (c) 2021 by Contributors * Copyright (c) 2021 by Contributors
* \file array/cuda/rowwise_sampling.cu * @file array/cuda/rowwise_sampling.cu
* \brief uniform rowwise sampling * @brief uniform rowwise sampling
*/ */
#include <curand_kernel.h> #include <curand_kernel.h>
......
/*! /*!
* Copyright (c) 2022 by Contributors * Copyright (c) 2022 by Contributors
* \file array/cuda/rowwise_sampling_prob.cu * @file array/cuda/rowwise_sampling_prob.cu
* \brief weighted rowwise sampling. The degree computing kernels and * @brief weighted rowwise sampling. The degree computing kernels and
* host-side functions are partially borrowed from the uniform rowwise * host-side functions are partially borrowed from the uniform rowwise
* sampling code rowwise_sampling.cu. * sampling code rowwise_sampling.cu.
* \author pengqirong (OPPO), dlasalle and Xin from Nvidia. * @author pengqirong (OPPO), dlasalle and Xin from Nvidia.
*/ */
#include <dgl/random.h> #include <dgl/random.h>
#include <dgl/runtime/device_api.h> #include <dgl/runtime/device_api.h>
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/sddmm.cu * @file array/cuda/sddmm.cu
* \brief SDDMM C APIs and definitions. * @brief SDDMM C APIs and definitions.
*/ */
#include <dgl/array.h> #include <dgl/array.h>
#include "./sddmm.cuh" #include "./sddmm.cuh"
...@@ -11,7 +11,7 @@ namespace dgl { ...@@ -11,7 +11,7 @@ namespace dgl {
namespace aten { namespace aten {
/*! /*!
* \brief CUDA implementation of g-SDDMM on Csr format. * @brief CUDA implementation of g-SDDMM on Csr format.
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
void SDDMMCsr(const std::string& op, void SDDMMCsr(const std::string& op,
...@@ -31,7 +31,7 @@ void SDDMMCsr(const std::string& op, ...@@ -31,7 +31,7 @@ void SDDMMCsr(const std::string& op,
/*! /*!
* \brief CUDA implementation of g-SDDMM on Coo format. * @brief CUDA implementation of g-SDDMM on Coo format.
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
void SDDMMCoo(const std::string& op, void SDDMMCoo(const std::string& op,
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/sddmm.cuh * @file array/cuda/sddmm.cuh
* \brief SDDMM CUDA kernel function header. * @brief SDDMM CUDA kernel function header.
*/ */
#ifndef DGL_ARRAY_CUDA_SDDMM_CUH_ #ifndef DGL_ARRAY_CUDA_SDDMM_CUH_
#define DGL_ARRAY_CUDA_SDDMM_CUH_ #define DGL_ARRAY_CUDA_SDDMM_CUH_
...@@ -87,8 +87,8 @@ namespace cuda { ...@@ -87,8 +87,8 @@ namespace cuda {
constexpr unsigned int full_mask = 0xffffffff; constexpr unsigned int full_mask = 0xffffffff;
/*! /*!
* \brief CUDA kernel of g-SDDMM on Coo format. * @brief CUDA kernel of g-SDDMM on Coo format.
* \note it uses edge parallel strategy, different threadblocks (on y-axis) * @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks * is responsible for the computation on different edges. Threadblocks
* on the x-axis are responsible for the computation on different positions * on the x-axis are responsible for the computation on different positions
* in feature dimension. * in feature dimension.
...@@ -136,8 +136,8 @@ __global__ void SDDMMCooKernel( ...@@ -136,8 +136,8 @@ __global__ void SDDMMCooKernel(
} }
/*! /*!
* \brief CUDA kernel of SDDMM-dot on Coo format, accelerated with tree reduction. * @brief CUDA kernel of SDDMM-dot on Coo format, accelerated with tree reduction.
* \note it uses edge parallel strategy, different threadblocks (on y-axis) * @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks * is responsible for the computation on different edges. Threadblocks
* on the x-axis are responsible for the computation on different positions * on the x-axis are responsible for the computation on different positions
* in feature dimension. * in feature dimension.
...@@ -204,8 +204,8 @@ __device__ __forceinline__ Idx BinarySearchSrc(const Idx *array, Idx length, Idx ...@@ -204,8 +204,8 @@ __device__ __forceinline__ Idx BinarySearchSrc(const Idx *array, Idx length, Idx
} }
/*! /*!
* \brief CUDA kernel of g-SDDMM on Csr format. * @brief CUDA kernel of g-SDDMM on Csr format.
* \note it uses edge parallel strategy, different threadblocks (on y-axis) * @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks * is responsible for the computation on different edges. Threadblocks
* on the x-axis are responsible for the computation on different positions * on the x-axis are responsible for the computation on different positions
* in feature dimension. * in feature dimension.
...@@ -255,12 +255,12 @@ __global__ void SDDMMCsrKernel( ...@@ -255,12 +255,12 @@ __global__ void SDDMMCsrKernel(
} }
/*! /*!
* \brief CUDA implementation of g-SDDMM on Coo format. * @brief CUDA implementation of g-SDDMM on Coo format.
* \param bcast Broadcast information. * @param bcast Broadcast information.
* \param coo The Coo matrix. * @param coo The Coo matrix.
* \param lhs The left hand side operand feature. * @param lhs The left hand side operand feature.
* \param rhs The right hand size operand feature. * @param rhs The right hand size operand feature.
* \param out The result feature on edges. * @param out The result feature on edges.
*/ */
template <typename Idx, typename DType, typename Op, template <typename Idx, typename DType, typename Op,
int LhsTarget = 0, int RhsTarget = 2> int LhsTarget = 0, int RhsTarget = 2>
...@@ -324,12 +324,12 @@ void SDDMMCoo( ...@@ -324,12 +324,12 @@ void SDDMMCoo(
} }
/*! /*!
* \brief CUDA implementation of g-SDDMM on Csr format. * @brief CUDA implementation of g-SDDMM on Csr format.
* \param bcast Broadcast information. * @param bcast Broadcast information.
* \param csr The Csr matrix. * @param csr The Csr matrix.
* \param lhs The left hand side operand feature. * @param lhs The left hand side operand feature.
* \param rhs The right hand size operand feature. * @param rhs The right hand size operand feature.
* \param out The result feature on edges. * @param out The result feature on edges.
*/ */
template <typename Idx, typename DType, typename Op, template <typename Idx, typename DType, typename Op,
int LhsTarget = 0, int RhsTarget = 2> int LhsTarget = 0, int RhsTarget = 2>
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/sddmm.cu * @file array/cuda/sddmm.cu
* \brief SDDMM C APIs and definitions. * @brief SDDMM C APIs and definitions.
*/ */
#include <dgl/array.h> #include <dgl/array.h>
#include "./sddmm.cuh" #include "./sddmm.cuh"
...@@ -10,7 +10,7 @@ namespace dgl { ...@@ -10,7 +10,7 @@ namespace dgl {
namespace aten { namespace aten {
/*! /*!
* \brief CUDA implementation of g-SDDMM on heterograph using * @brief CUDA implementation of g-SDDMM on heterograph using
Csr format. Csr format.
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/sddmm.cu * @file array/cuda/sddmm.cu
* \brief SDDMM C APIs and definitions. * @brief SDDMM C APIs and definitions.
*/ */
#include <dgl/array.h> #include <dgl/array.h>
#include "./sddmm.cuh" #include "./sddmm.cuh"
...@@ -10,7 +10,7 @@ namespace dgl { ...@@ -10,7 +10,7 @@ namespace dgl {
namespace aten { namespace aten {
/*! /*!
* \brief CUDA implementation of g-SDDMM on heterograph using * @brief CUDA implementation of g-SDDMM on heterograph using
Csr format. Csr format.
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/segment_reduce.cu * @file array/cuda/segment_reduce.cu
* \brief Segment reduce C APIs and definitions. * @brief Segment reduce C APIs and definitions.
*/ */
#include <dgl/array.h> #include <dgl/array.h>
#include <dgl/base_heterograph.h> #include <dgl/base_heterograph.h>
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/segment_reduce.cuh * @file array/cuda/segment_reduce.cuh
* \brief Segment reduce kernel function header. * @brief Segment reduce kernel function header.
*/ */
#ifndef DGL_ARRAY_CUDA_SEGMENT_REDUCE_CUH_ #ifndef DGL_ARRAY_CUDA_SEGMENT_REDUCE_CUH_
#define DGL_ARRAY_CUDA_SEGMENT_REDUCE_CUH_ #define DGL_ARRAY_CUDA_SEGMENT_REDUCE_CUH_
...@@ -21,8 +21,8 @@ namespace aten { ...@@ -21,8 +21,8 @@ namespace aten {
namespace cuda { namespace cuda {
/*! /*!
* \brief CUDA kernel of segment reduce. * @brief CUDA kernel of segment reduce.
* \note each blockthread is responsible for aggregation on a row * @note each blockthread is responsible for aggregation on a row
* in the result tensor. * in the result tensor.
*/ */
template <typename IdType, typename DType, typename ReduceOp> template <typename IdType, typename DType, typename ReduceOp>
...@@ -45,8 +45,8 @@ __global__ void SegmentReduceKernel( ...@@ -45,8 +45,8 @@ __global__ void SegmentReduceKernel(
} }
/*! /*!
* \brief CUDA kernel of scatter add. * @brief CUDA kernel of scatter add.
* \note each blockthread is responsible for adding a row in feature tensor * @note each blockthread is responsible for adding a row in feature tensor
* to a target row in output tensor. * to a target row in output tensor.
*/ */
template <typename IdType, typename DType> template <typename IdType, typename DType>
...@@ -63,8 +63,8 @@ __global__ void ScatterAddKernel( ...@@ -63,8 +63,8 @@ __global__ void ScatterAddKernel(
} }
/*! /*!
* \brief CUDA kernel to update gradients for reduce op max/min * @brief CUDA kernel to update gradients for reduce op max/min
* \note each WARP (group of 32 threads) is responsible for adding a row in * @note each WARP (group of 32 threads) is responsible for adding a row in
* feature tensor to a target row in output tensor. * feature tensor to a target row in output tensor.
*/ */
...@@ -91,8 +91,8 @@ __global__ void UpdateGradMinMaxHeteroKernel( ...@@ -91,8 +91,8 @@ __global__ void UpdateGradMinMaxHeteroKernel(
} }
/*! /*!
* \brief CUDA kernel of backward phase in segment min/max. * @brief CUDA kernel of backward phase in segment min/max.
* \note each blockthread is responsible for writing a row in the * @note each blockthread is responsible for writing a row in the
* result gradient tensor by lookup the ArgMin/Max for index information. * result gradient tensor by lookup the ArgMin/Max for index information.
*/ */
template <typename IdType, typename DType> template <typename IdType, typename DType>
...@@ -111,11 +111,11 @@ __global__ void BackwardSegmentCmpKernel( ...@@ -111,11 +111,11 @@ __global__ void BackwardSegmentCmpKernel(
} }
/*! /*!
* \brief CUDA implementation of forward phase of Segment Reduce. * @brief CUDA implementation of forward phase of Segment Reduce.
* \param feat The input tensor. * @param feat The input tensor.
* \param offsets The offsets tensor. * @param offsets The offsets tensor.
* \param out The output tensor. * @param out The output tensor.
* \param arg An auxiliary tensor storing ArgMax/Min information, * @param arg An auxiliary tensor storing ArgMax/Min information,
*/ */
template <typename IdType, typename DType, typename ReduceOp> template <typename IdType, typename DType, typename ReduceOp>
void SegmentReduce(NDArray feat, NDArray offsets, NDArray out, NDArray arg) { void SegmentReduce(NDArray feat, NDArray offsets, NDArray out, NDArray arg) {
...@@ -142,11 +142,11 @@ void SegmentReduce(NDArray feat, NDArray offsets, NDArray out, NDArray arg) { ...@@ -142,11 +142,11 @@ void SegmentReduce(NDArray feat, NDArray offsets, NDArray out, NDArray arg) {
} }
/*! /*!
* \brief CUDA implementation of Scatter Add (on first dimension). * @brief CUDA implementation of Scatter Add (on first dimension).
* \note math equation: out[idx[i], *] += feat[i, *] * @note math equation: out[idx[i], *] += feat[i, *]
* \param feat The input tensor. * @param feat The input tensor.
* \param idx The indices tensor. * @param idx The indices tensor.
* \param out The output tensor. * @param out The output tensor.
*/ */
template <typename IdType, typename DType> template <typename IdType, typename DType>
void ScatterAdd(NDArray feat, NDArray idx, NDArray out) { void ScatterAdd(NDArray feat, NDArray idx, NDArray out) {
...@@ -171,13 +171,13 @@ void ScatterAdd(NDArray feat, NDArray idx, NDArray out) { ...@@ -171,13 +171,13 @@ void ScatterAdd(NDArray feat, NDArray idx, NDArray out) {
} }
/*! /*!
* \brief CUDA implementation to update gradients for reduce op max/min * @brief CUDA implementation to update gradients for reduce op max/min
* \param graph The input heterogeneous graph. * @param graph The input heterogeneous graph.
* \param op The binary operator, could be `copy_u`, `copy_e'. * @param op The binary operator, could be `copy_u`, `copy_e'.
* \param list_feat List of the input tensors. * @param list_feat List of the input tensors.
* \param list_idx List of the indices tensors. * @param list_idx List of the indices tensors.
* \param list_idx_etype List of the node- or edge-type tensors. * @param list_idx_etype List of the node- or edge-type tensors.
* \param list_out List of the output tensors. * @param list_out List of the output tensors.
*/ */
template <typename IdType, typename DType> template <typename IdType, typename DType>
void UpdateGradMinMax_hetero( void UpdateGradMinMax_hetero(
...@@ -224,12 +224,12 @@ void UpdateGradMinMax_hetero( ...@@ -224,12 +224,12 @@ void UpdateGradMinMax_hetero(
} }
/*! /*!
* \brief CUDA implementation of backward phase of Segment Reduce with Min/Max * @brief CUDA implementation of backward phase of Segment Reduce with Min/Max
* reducer. * reducer.
* \note math equation: out[arg[i, k], k] = feat[i, k] \param feat The input * @note math equation: out[arg[i, k], k] = feat[i, k] \param feat The input
* tensor. * tensor.
* \param arg The ArgMin/Max information, used for indexing. * @param arg The ArgMin/Max information, used for indexing.
* \param out The output tensor. * @param out The output tensor.
*/ */
template <typename IdType, typename DType> template <typename IdType, typename DType>
void BackwardSegmentCmp(NDArray feat, NDArray arg, NDArray out) { void BackwardSegmentCmp(NDArray feat, NDArray arg, NDArray out) {
......
/*! /*!
* Copyright (c) 2021 by contributors. * Copyright (c) 2021 by contributors.
* \file array/cuda/spmat_op_impl_coo.cu * @file array/cuda/spmat_op_impl_coo.cu
* \brief COO operator GPU implementation * @brief COO operator GPU implementation
*/ */
#include <dgl/array.h> #include <dgl/array.h>
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/spmat_op_impl_csr.cu * @file array/cuda/spmat_op_impl_csr.cu
* \brief CSR operator CPU implementation * @brief CSR operator CPU implementation
*/ */
#include <dgl/array.h> #include <dgl/array.h>
...@@ -82,7 +82,7 @@ template NDArray CSRIsNonZero<kDGLCUDA, int64_t>(CSRMatrix, NDArray, NDArray); ...@@ -82,7 +82,7 @@ template NDArray CSRIsNonZero<kDGLCUDA, int64_t>(CSRMatrix, NDArray, NDArray);
///////////////////////////// CSRHasDuplicate ///////////////////////////// ///////////////////////////// CSRHasDuplicate /////////////////////////////
/*! /*!
* \brief Check whether each row does not have any duplicate entries. * @brief Check whether each row does not have any duplicate entries.
* Assume the CSR is sorted. * Assume the CSR is sorted.
*/ */
template <typename IdType> template <typename IdType>
...@@ -228,7 +228,7 @@ template CSRMatrix CSRSliceRows<kDGLCUDA, int32_t>(CSRMatrix, int64_t, int64_t); ...@@ -228,7 +228,7 @@ template CSRMatrix CSRSliceRows<kDGLCUDA, int32_t>(CSRMatrix, int64_t, int64_t);
template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, int64_t, int64_t); template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, int64_t, int64_t);
/*! /*!
* \brief Copy data segment to output buffers * @brief Copy data segment to output buffers
* *
* For the i^th row r = row[i], copy the data from indptr[r] ~ indptr[r+1] * For the i^th row r = row[i], copy the data from indptr[r] ~ indptr[r+1]
* to the out_data from out_indptr[i] ~ out_indptr[i+1] * to the out_data from out_indptr[i] ~ out_indptr[i+1]
...@@ -299,7 +299,7 @@ template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, NDArray); ...@@ -299,7 +299,7 @@ template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, NDArray);
///////////////////////////// CSRGetDataAndIndices ///////////////////////////// ///////////////////////////// CSRGetDataAndIndices /////////////////////////////
/*! /*!
* \brief Generate a 0-1 mask for each index that hits the provided (row, col) * @brief Generate a 0-1 mask for each index that hits the provided (row, col)
* index. * index.
* *
* Examples: * Examples:
...@@ -331,7 +331,7 @@ __global__ void _SegmentMaskKernel( ...@@ -331,7 +331,7 @@ __global__ void _SegmentMaskKernel(
} }
/*! /*!
* \brief Search for the insertion positions for needle in the hay. * @brief Search for the insertion positions for needle in the hay.
* *
* The hay is a list of sorted elements and the result is the insertion position * The hay is a list of sorted elements and the result is the insertion position
* of each needle so that the insertion still gives sorted order. * of each needle so that the insertion still gives sorted order.
...@@ -424,7 +424,7 @@ template std::vector<NDArray> CSRGetDataAndIndices<kDGLCUDA, int64_t>( ...@@ -424,7 +424,7 @@ template std::vector<NDArray> CSRGetDataAndIndices<kDGLCUDA, int64_t>(
///////////////////////////// CSRSliceMatrix ///////////////////////////// ///////////////////////////// CSRSliceMatrix /////////////////////////////
/*! /*!
* \brief Generate a 0-1 mask for each index whose column is in the provided * @brief Generate a 0-1 mask for each index whose column is in the provided
* set. It also counts the number of masked values per row. * set. It also counts the number of masked values per row.
*/ */
template <typename IdType> template <typename IdType>
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/spmm.cu * @file array/cuda/spmm.cu
* \brief SPMM C APIs and definitions. * @brief SPMM C APIs and definitions.
*/ */
#include <dgl/array.h> #include <dgl/array.h>
#include "./spmm.cuh" #include "./spmm.cuh"
...@@ -16,8 +16,8 @@ using namespace cuda; ...@@ -16,8 +16,8 @@ using namespace cuda;
namespace aten { namespace aten {
/*! /*!
* \brief CUDA implementation of g-SpMM on Csr format. * @brief CUDA implementation of g-SpMM on Csr format.
* \note use cusparse if the reduce operator is `sum` and there is * @note use cusparse if the reduce operator is `sum` and there is
* no broadcast, use dgl's kernel in other cases. * no broadcast, use dgl's kernel in other cases.
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
...@@ -81,7 +81,7 @@ void SpMMCsr(const std::string& op, const std::string& reduce, ...@@ -81,7 +81,7 @@ void SpMMCsr(const std::string& op, const std::string& reduce,
/*! /*!
* \brief CUDA implementation of g-SpMM on Coo format. * @brief CUDA implementation of g-SpMM on Coo format.
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
void SpMMCoo(const std::string& op, const std::string& reduce, void SpMMCoo(const std::string& op, const std::string& reduce,
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/spmm.cuh * @file array/cuda/spmm.cuh
* \brief SPMM CUDA kernel function header. * @brief SPMM CUDA kernel function header.
*/ */
#ifndef DGL_ARRAY_CUDA_SPMM_CUH_ #ifndef DGL_ARRAY_CUDA_SPMM_CUH_
#define DGL_ARRAY_CUDA_SPMM_CUH_ #define DGL_ARRAY_CUDA_SPMM_CUH_
...@@ -22,7 +22,7 @@ using namespace cuda; ...@@ -22,7 +22,7 @@ using namespace cuda;
namespace aten { namespace aten {
/*! /*!
* \brief Determine whether cusparse SpMM function is applicable. * @brief Determine whether cusparse SpMM function is applicable.
*/ */
template <typename DType, typename IdType> template <typename DType, typename IdType>
inline bool cusparse_available(bool more_nnz_than_matrix_size) { inline bool cusparse_available(bool more_nnz_than_matrix_size) {
...@@ -41,7 +41,7 @@ inline bool cusparse_available(bool more_nnz_than_matrix_size) { ...@@ -41,7 +41,7 @@ inline bool cusparse_available(bool more_nnz_than_matrix_size) {
namespace { namespace {
/*! \brief Call cuBLAS geam API for transpose operation for float and double. */ /*! @brief Call cuBLAS geam API for transpose operation for float and double. */
template <typename DType> template <typename DType>
cublasStatus_t Xgeam(cublasHandle_t handle, cublasOperation_t transa, cublasStatus_t Xgeam(cublasHandle_t handle, cublasOperation_t transa,
cublasOperation_t transb, int m, int n, cublasOperation_t transb, int m, int n,
...@@ -98,8 +98,8 @@ cublasStatus_t Xgeam<double>(cublasHandle_t handle, cublasOperation_t transa, ...@@ -98,8 +98,8 @@ cublasStatus_t Xgeam<double>(cublasHandle_t handle, cublasOperation_t transa,
beta, B, ldb, C, ldc); beta, B, ldb, C, ldc);
} }
/* \brief IndexSelect operator kernel implementation. /* @brief IndexSelect operator kernel implementation.
* \note duplicate of IndexSelectKernel defined in array_index_select.cu * @note duplicate of IndexSelectKernel defined in array_index_select.cu
*/ */
template <typename DType, typename IdType> template <typename DType, typename IdType>
__global__ void _IndexSelectKernel( __global__ void _IndexSelectKernel(
...@@ -112,8 +112,8 @@ __global__ void _IndexSelectKernel( ...@@ -112,8 +112,8 @@ __global__ void _IndexSelectKernel(
out[i * m + j] = in[idx[i] * m + j]; out[i * m + j] = in[idx[i] * m + j];
} }
/* \brief Transpose operator kernel implementation. /* @brief Transpose operator kernel implementation.
* \note not efficient but it's not a bottleneck, used for float16 dtype. * @note not efficient but it's not a bottleneck, used for float16 dtype.
*/ */
template <typename DType> template <typename DType>
__global__ void _TransposeKernel( __global__ void _TransposeKernel(
...@@ -126,9 +126,9 @@ __global__ void _TransposeKernel( ...@@ -126,9 +126,9 @@ __global__ void _TransposeKernel(
} }
/* /*
* \brief Tranpose the input matrix. * @brief Tranpose the input matrix.
* \param row number of rows of input matrix. * @param row number of rows of input matrix.
* \param col number of columns of input matrix. * @param col number of columns of input matrix.
*/ */
template <typename DType> template <typename DType>
void _Transpose(const DType* in, DType* out, void _Transpose(const DType* in, DType* out,
...@@ -150,8 +150,8 @@ void _Transpose(const DType* in, DType* out, ...@@ -150,8 +150,8 @@ void _Transpose(const DType* in, DType* out,
} }
/* /*
* \brief Tranpose the input matrix for data type half. * @brief Tranpose the input matrix for data type half.
* \note cuBLAS has no geam API for half data type, fallback to our kernel. * @note cuBLAS has no geam API for half data type, fallback to our kernel.
*/ */
template <> template <>
void _Transpose<half>(const half* in, half* out, void _Transpose<half>(const half* in, half* out,
...@@ -164,8 +164,8 @@ void _Transpose<half>(const half* in, half* out, ...@@ -164,8 +164,8 @@ void _Transpose<half>(const half* in, half* out,
#if BF16_ENABLED #if BF16_ENABLED
/* /*
* \brief Tranpose the input matrix for data type half. * @brief Tranpose the input matrix for data type half.
* \note cuBLAS has no geam API for bf16 data type, fallback to our kernel. * @note cuBLAS has no geam API for bf16 data type, fallback to our kernel.
*/ */
template <> template <>
void _Transpose<__nv_bfloat16>(const __nv_bfloat16* in, __nv_bfloat16* out, void _Transpose<__nv_bfloat16>(const __nv_bfloat16* in, __nv_bfloat16* out,
...@@ -178,7 +178,7 @@ void _Transpose<__nv_bfloat16>(const __nv_bfloat16* in, __nv_bfloat16* out, ...@@ -178,7 +178,7 @@ void _Transpose<__nv_bfloat16>(const __nv_bfloat16* in, __nv_bfloat16* out,
#endif // BF16_ENABLED #endif // BF16_ENABLED
/* /*
* \brief * @brief
*/ */
template <typename DType, typename IdType> template <typename DType, typename IdType>
__global__ void _IndexSelectKernel(const DType* array, const IdType* index, __global__ void _IndexSelectKernel(const DType* array, const IdType* index,
...@@ -191,8 +191,8 @@ __global__ void _IndexSelectKernel(const DType* array, const IdType* index, ...@@ -191,8 +191,8 @@ __global__ void _IndexSelectKernel(const DType* array, const IdType* index,
} }
} }
/* \brief IndexSelect operator. /* @brief IndexSelect operator.
* \note duplicate of IndexSelect defined in array_op.h but it can * @note duplicate of IndexSelect defined in array_op.h but it can
* not be applied to float16 dtype. * not be applied to float16 dtype.
*/ */
template<typename DType, typename IdType> template<typename DType, typename IdType>
...@@ -477,8 +477,8 @@ namespace cuda { ...@@ -477,8 +477,8 @@ namespace cuda {
/*! /*!
* \brief CUDA kernel of g-SpMM on Coo format. * @brief CUDA kernel of g-SpMM on Coo format.
* \note it uses edge parallel strategy, different threadblocks (on y-axis) * @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks * is responsible for the computation on different edges. Threadblocks
* on the x-axis are responsible for the computation on different positions * on the x-axis are responsible for the computation on different positions
* in feature dimension. * in feature dimension.
...@@ -526,8 +526,8 @@ __global__ void SpMMCooKernel( ...@@ -526,8 +526,8 @@ __global__ void SpMMCooKernel(
} }
/*! /*!
* \brief CUDA kernel to compute argu and arge in g-SpMM on Coo format. * @brief CUDA kernel to compute argu and arge in g-SpMM on Coo format.
* \note it uses edge parallel strategy, different threadblocks (on y-axis) * @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks * is responsible for the computation on different edges. Threadblocks
* on the x-axis are responsible for the computation on different positions * on the x-axis are responsible for the computation on different positions
* in feature dimension. * in feature dimension.
...@@ -574,8 +574,8 @@ __global__ void ArgSpMMCooKernel( ...@@ -574,8 +574,8 @@ __global__ void ArgSpMMCooKernel(
} }
/*! /*!
* \brief CUDA kernel of g-SpMM on Csr format. * @brief CUDA kernel of g-SpMM on Csr format.
* \note it uses node parallel strategy, different threadblocks (on y-axis) * @note it uses node parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different destination nodes. * is responsible for the computation on different destination nodes.
* Threadblocks on the x-axis are responsible for the computation on * Threadblocks on the x-axis are responsible for the computation on
* different positions in feature dimension. * different positions in feature dimension.
...@@ -632,8 +632,8 @@ __global__ void SpMMCsrKernel( ...@@ -632,8 +632,8 @@ __global__ void SpMMCsrKernel(
} }
/*! /*!
* \brief CUDA kernel of SpMM-Min/Max on Csr format. * @brief CUDA kernel of SpMM-Min/Max on Csr format.
* \note it uses node parallel strategy, different threadblocks (on y-axis) * @note it uses node parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different destination nodes. * is responsible for the computation on different destination nodes.
* Threadblocks on the x-axis are responsible for the computation on * Threadblocks on the x-axis are responsible for the computation on
* different positions in feature dimension. * different positions in feature dimension.
...@@ -693,16 +693,16 @@ __global__ void SpMMCmpCsrHeteroKernel( ...@@ -693,16 +693,16 @@ __global__ void SpMMCmpCsrHeteroKernel(
} }
/*! /*!
* \brief CUDA implementation of g-SpMM on Coo format. * @brief CUDA implementation of g-SpMM on Coo format.
* \param bcast Broadcast information. * @param bcast Broadcast information.
* \param coo The Coo matrix. * @param coo The Coo matrix.
* \param ufeat The feature on source nodes. * @param ufeat The feature on source nodes.
* \param efeat The feature on edges. * @param efeat The feature on edges.
* \param out The result feature on destination nodes. * @param out The result feature on destination nodes.
* \param argu Arg-Min/Max on source nodes, which refers the source node indices * @param argu Arg-Min/Max on source nodes, which refers the source node indices
* correspond to the minimum/maximum values of reduction result on * correspond to the minimum/maximum values of reduction result on
* destination nodes. It's useful in computing gradients of Min/Max reducer. * destination nodes. It's useful in computing gradients of Min/Max reducer.
* \param arge Arg-Min/Max on edges. which refers the source node indices * @param arge Arg-Min/Max on edges. which refers the source node indices
* correspond to the minimum/maximum values of reduction result on * correspond to the minimum/maximum values of reduction result on
* destination nodes. It's useful in computing gradients of Min/Max reducer. * destination nodes. It's useful in computing gradients of Min/Max reducer.
*/ */
...@@ -770,16 +770,16 @@ void SpMMCoo( ...@@ -770,16 +770,16 @@ void SpMMCoo(
} }
/*! /*!
* \brief CUDA implementation of g-SpMM on Csr format. * @brief CUDA implementation of g-SpMM on Csr format.
* \param bcast Broadcast information. * @param bcast Broadcast information.
* \param csr The Csr matrix. * @param csr The Csr matrix.
* \param ufeat The feature on source nodes. * @param ufeat The feature on source nodes.
* \param efeat The feature on edges. * @param efeat The feature on edges.
* \param out The result feature on destination nodes. * @param out The result feature on destination nodes.
* \param argu Arg-Min/Max on source nodes, which refers the source node indices * @param argu Arg-Min/Max on source nodes, which refers the source node indices
* correspond to the minimum/maximum values of reduction result on * correspond to the minimum/maximum values of reduction result on
* destination nodes. It's useful in computing gradients of Min/Max reducer. * destination nodes. It's useful in computing gradients of Min/Max reducer.
* \param arge Arg-Min/Max on edges. which refers the source node indices * @param arge Arg-Min/Max on edges. which refers the source node indices
* correspond to the minimum/maximum values of reduction result on * correspond to the minimum/maximum values of reduction result on
* destination nodes. It's useful in computing gradients of Min/Max reducer. * destination nodes. It's useful in computing gradients of Min/Max reducer.
*/ */
...@@ -825,26 +825,26 @@ void SpMMCsr( ...@@ -825,26 +825,26 @@ void SpMMCsr(
} }
/*! /*!
* \brief CUDA kernel of SpMM-Min/Max on Csr format on heterogeneous graph. * @brief CUDA kernel of SpMM-Min/Max on Csr format on heterogeneous graph.
* \param bcast Broadcast information. * @param bcast Broadcast information.
* \param csr The Csr matrix. * @param csr The Csr matrix.
* \param ufeat The feature on source nodes. * @param ufeat The feature on source nodes.
* \param efeat The feature on edges. * @param efeat The feature on edges.
* \param out The result feature on destination nodes. * @param out The result feature on destination nodes.
* \param argu Arg-Min/Max on source nodes, which refers the source node indices * @param argu Arg-Min/Max on source nodes, which refers the source node indices
* correspond to the minimum/maximum values of reduction result on * correspond to the minimum/maximum values of reduction result on
* destination nodes. It's useful in computing gradients of Min/Max reducer. * destination nodes. It's useful in computing gradients of Min/Max reducer.
* \param arge Arg-Min/Max on edges. which refers the source node indices * @param arge Arg-Min/Max on edges. which refers the source node indices
* correspond to the minimum/maximum values of reduction result on * correspond to the minimum/maximum values of reduction result on
* destination nodes. It's useful in computing gradients of Min/Max reducer. * destination nodes. It's useful in computing gradients of Min/Max reducer.
* \param argu_ntype Node type of the arg-Min/Max on source nodes, which refers the * @param argu_ntype Node type of the arg-Min/Max on source nodes, which refers the
* source node types correspond to the minimum/maximum values of reduction result * source node types correspond to the minimum/maximum values of reduction result
* on destination nodes. It's useful in computing gradients of Min/Max reducer. * on destination nodes. It's useful in computing gradients of Min/Max reducer.
* \param arge_etype Edge-type of the arg-Min/Max on edges. which refers the source * @param arge_etype Edge-type of the arg-Min/Max on edges. which refers the source
* node indices correspond to the minimum/maximum values of reduction result on * node indices correspond to the minimum/maximum values of reduction result on
* destination nodes. It's useful in computing gradients of Min/Max reducer. * destination nodes. It's useful in computing gradients of Min/Max reducer.
* \param src_type Node type of the source nodes of an etype * @param src_type Node type of the source nodes of an etype
* \param etype Edge type * @param etype Edge type
*/ */
template <typename Idx, typename DType, template <typename Idx, typename DType,
typename BinaryOp, typename ReduceOp> typename BinaryOp, typename ReduceOp>
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/spmm.cu * @file array/cuda/spmm.cu
* \brief SPMM C APIs and definitions. * @brief SPMM C APIs and definitions.
*/ */
#include <dgl/array.h> #include <dgl/array.h>
#include "./spmm.cuh" #include "./spmm.cuh"
...@@ -16,8 +16,8 @@ using namespace cuda; ...@@ -16,8 +16,8 @@ using namespace cuda;
namespace aten { namespace aten {
/*! /*!
* \brief CUDA implementation of g-SpMM on Csr format. * @brief CUDA implementation of g-SpMM on Csr format.
* \note use cusparse if the reduce operator is `sum` and there is * @note use cusparse if the reduce operator is `sum` and there is
* no broadcast, use dgl's kernel in other cases. * no broadcast, use dgl's kernel in other cases.
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/utils.cu * @file array/cuda/utils.cu
* \brief Utilities for CUDA kernels. * @brief Utilities for CUDA kernels.
*/ */
#include "./utils.h" #include "./utils.h"
......
/*! /*!
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* \file array/cuda/utils.h * @file array/cuda/utils.h
* \brief Utilities for CUDA kernels. * @brief Utilities for CUDA kernels.
*/ */
#ifndef DGL_ARRAY_CUDA_UTILS_H_ #ifndef DGL_ARRAY_CUDA_UTILS_H_
#define DGL_ARRAY_CUDA_UTILS_H_ #define DGL_ARRAY_CUDA_UTILS_H_
...@@ -23,7 +23,7 @@ namespace cuda { ...@@ -23,7 +23,7 @@ namespace cuda {
#define CUDA_MAX_NUM_THREADS 256 #define CUDA_MAX_NUM_THREADS 256
/*! \brief Calculate the number of threads needed given the dimension length. /*! @brief Calculate the number of threads needed given the dimension length.
* *
* It finds the biggest number that is smaller than min(dim, max_nthrs) * It finds the biggest number that is smaller than min(dim, max_nthrs)
* and is also power of two. * and is also power of two.
...@@ -78,20 +78,20 @@ __device__ __forceinline__ T _ldg(T* addr) { ...@@ -78,20 +78,20 @@ __device__ __forceinline__ T _ldg(T* addr) {
} }
/*! /*!
* \brief Return true if the given bool flag array is all true. * @brief Return true if the given bool flag array is all true.
* The input bool array is in int8_t type so it is aligned with byte address. * The input bool array is in int8_t type so it is aligned with byte address.
* *
* \param flags The bool array. * @param flags The bool array.
* \param length The length. * @param length The length.
* \param ctx Device context. * @param ctx Device context.
* \return True if all the flags are true. * @return True if all the flags are true.
*/ */
bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx); bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx);
/*! /*!
* \brief CUDA Kernel of filling the vector started from ptr of size length * @brief CUDA Kernel of filling the vector started from ptr of size length
* with val. * with val.
* \note internal use only. * @note internal use only.
*/ */
template <typename DType> template <typename DType>
__global__ void _FillKernel(DType* ptr, size_t length, DType val) { __global__ void _FillKernel(DType* ptr, size_t length, DType val) {
...@@ -103,7 +103,7 @@ __global__ void _FillKernel(DType* ptr, size_t length, DType val) { ...@@ -103,7 +103,7 @@ __global__ void _FillKernel(DType* ptr, size_t length, DType val) {
} }
} }
/*! \brief Fill the vector started from ptr of size length with val */ /*! @brief Fill the vector started from ptr of size length with val */
template <typename DType> template <typename DType>
void _Fill(DType* ptr, size_t length, DType val) { void _Fill(DType* ptr, size_t length, DType val) {
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
...@@ -113,7 +113,7 @@ void _Fill(DType* ptr, size_t length, DType val) { ...@@ -113,7 +113,7 @@ void _Fill(DType* ptr, size_t length, DType val) {
} }
/*! /*!
* \brief Search adjacency list linearly for each (row, col) pair and * @brief Search adjacency list linearly for each (row, col) pair and
* write the data under the matched position in the indices array to the output. * write the data under the matched position in the indices array to the output.
* *
* If there is no match, the value in \c filler is written. * If there is no match, the value in \c filler is written.
...@@ -157,7 +157,7 @@ __global__ void _LinearSearchKernel( ...@@ -157,7 +157,7 @@ __global__ void _LinearSearchKernel(
#if BF16_ENABLED #if BF16_ENABLED
/*! /*!
* \brief Specialization for bf16 because conversion from long long to bfloat16 * @brief Specialization for bf16 because conversion from long long to bfloat16
* doesn't exist before SM80. * doesn't exist before SM80.
*/ */
template <typename IdType> template <typename IdType>
...@@ -206,7 +206,7 @@ inline DType GetCUDAScalar( ...@@ -206,7 +206,7 @@ inline DType GetCUDAScalar(
} }
/*! /*!
* \brief Given a sorted array and a value this function returns the index * @brief Given a sorted array and a value this function returns the index
* of the first element which compares greater than value. * of the first element which compares greater than value.
* *
* This function assumes 0-based index * This function assumes 0-based index
...@@ -231,7 +231,7 @@ __device__ IdType _UpperBound(const IdType *A, int64_t n, IdType x) { ...@@ -231,7 +231,7 @@ __device__ IdType _UpperBound(const IdType *A, int64_t n, IdType x) {
} }
/*! /*!
* \brief Given a sorted array and a value this function returns the index * @brief Given a sorted array and a value this function returns the index
* of the element who is equal to val. If not exist returns n+1 * of the element who is equal to val. If not exist returns n+1
* *
* This function assumes 0-based index * This function assumes 0-based index
......
/*! /*!
* Copyright (c) 2019-2022 by Contributors * Copyright (c) 2019-2022 by Contributors
* \file array/cuda/uvm/array_index_select_uvm.cu * @file array/cuda/uvm/array_index_select_uvm.cu
* \brief Array index select GPU implementation * @brief Array index select GPU implementation
*/ */
#include <dgl/array.h> #include <dgl/array.h>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment