Unverified Commit bcd37684 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Replace /*! with /**. (#4823)



* replace

* blabla

* balbla

* blabla
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 619d735d
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/macro.cuh * @file array/cuda/macro.cuh
* @brief Macro to call SPMM/SDDMM cuda kernels. * @brief Macro to call SPMM/SDDMM cuda kernels.
......
/*! /**
* Copyright (c) 2021 by Contributors * Copyright (c) 2021 by Contributors
* @file array/cuda/negative_sampling.cu * @file array/cuda/negative_sampling.cu
* @brief rowwise sampling * @brief rowwise sampling
...@@ -80,7 +80,7 @@ struct IsNotMinusOne { ...@@ -80,7 +80,7 @@ struct IsNotMinusOne {
} }
}; };
/*! /**
* @brief Sort ordered pairs in ascending order, using \a tmp_major and \a * @brief Sort ordered pairs in ascending order, using \a tmp_major and \a
* tmp_minor as temporary buffers, each with \a n elements. * tmp_minor as temporary buffers, each with \a n elements.
*/ */
......
/*! /**
* Copyright (c) 2021 by Contributors * Copyright (c) 2021 by Contributors
* @file array/cuda/rowwise_sampling.cu * @file array/cuda/rowwise_sampling.cu
* @brief uniform rowwise sampling * @brief uniform rowwise sampling
......
/*! /**
* Copyright (c) 2022 by Contributors * Copyright (c) 2022 by Contributors
* @file array/cuda/rowwise_sampling_prob.cu * @file array/cuda/rowwise_sampling_prob.cu
* @brief weighted rowwise sampling. The degree computing kernels and * @brief weighted rowwise sampling. The degree computing kernels and
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/sddmm.cu * @file array/cuda/sddmm.cu
* @brief SDDMM C APIs and definitions. * @brief SDDMM C APIs and definitions.
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
namespace dgl { namespace dgl {
namespace aten { namespace aten {
/*! /**
* @brief CUDA implementation of g-SDDMM on Csr format. * @brief CUDA implementation of g-SDDMM on Csr format.
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
...@@ -30,7 +30,7 @@ void SDDMMCsr(const std::string& op, ...@@ -30,7 +30,7 @@ void SDDMMCsr(const std::string& op,
} }
/*! /**
* @brief CUDA implementation of g-SDDMM on Coo format. * @brief CUDA implementation of g-SDDMM on Coo format.
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/sddmm.cuh * @file array/cuda/sddmm.cuh
* @brief SDDMM CUDA kernel function header. * @brief SDDMM CUDA kernel function header.
...@@ -86,7 +86,7 @@ namespace cuda { ...@@ -86,7 +86,7 @@ namespace cuda {
constexpr unsigned int full_mask = 0xffffffff; constexpr unsigned int full_mask = 0xffffffff;
/*! /**
* @brief CUDA kernel of g-SDDMM on Coo format. * @brief CUDA kernel of g-SDDMM on Coo format.
* @note it uses edge parallel strategy, different threadblocks (on y-axis) * @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks * is responsible for the computation on different edges. Threadblocks
...@@ -135,7 +135,7 @@ __global__ void SDDMMCooKernel( ...@@ -135,7 +135,7 @@ __global__ void SDDMMCooKernel(
} }
} }
/*! /**
* @brief CUDA kernel of SDDMM-dot on Coo format, accelerated with tree reduction. * @brief CUDA kernel of SDDMM-dot on Coo format, accelerated with tree reduction.
* @note it uses edge parallel strategy, different threadblocks (on y-axis) * @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks * is responsible for the computation on different edges. Threadblocks
...@@ -203,7 +203,7 @@ __device__ __forceinline__ Idx BinarySearchSrc(const Idx *array, Idx length, Idx ...@@ -203,7 +203,7 @@ __device__ __forceinline__ Idx BinarySearchSrc(const Idx *array, Idx length, Idx
} }
} }
/*! /**
* @brief CUDA kernel of g-SDDMM on Csr format. * @brief CUDA kernel of g-SDDMM on Csr format.
* @note it uses edge parallel strategy, different threadblocks (on y-axis) * @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks * is responsible for the computation on different edges. Threadblocks
...@@ -254,7 +254,7 @@ __global__ void SDDMMCsrKernel( ...@@ -254,7 +254,7 @@ __global__ void SDDMMCsrKernel(
} }
} }
/*! /**
* @brief CUDA implementation of g-SDDMM on Coo format. * @brief CUDA implementation of g-SDDMM on Coo format.
* @param bcast Broadcast information. * @param bcast Broadcast information.
* @param coo The Coo matrix. * @param coo The Coo matrix.
...@@ -323,7 +323,7 @@ void SDDMMCoo( ...@@ -323,7 +323,7 @@ void SDDMMCoo(
} }
} }
/*! /**
* @brief CUDA implementation of g-SDDMM on Csr format. * @brief CUDA implementation of g-SDDMM on Csr format.
* @param bcast Broadcast information. * @param bcast Broadcast information.
* @param csr The Csr matrix. * @param csr The Csr matrix.
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/sddmm.cu * @file array/cuda/sddmm.cu
* @brief SDDMM C APIs and definitions. * @brief SDDMM C APIs and definitions.
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
namespace dgl { namespace dgl {
namespace aten { namespace aten {
/*! /**
* @brief CUDA implementation of g-SDDMM on heterograph using * @brief CUDA implementation of g-SDDMM on heterograph using
Csr format. Csr format.
*/ */
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/sddmm.cu * @file array/cuda/sddmm.cu
* @brief SDDMM C APIs and definitions. * @brief SDDMM C APIs and definitions.
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
namespace dgl { namespace dgl {
namespace aten { namespace aten {
/*! /**
* @brief CUDA implementation of g-SDDMM on heterograph using * @brief CUDA implementation of g-SDDMM on heterograph using
Csr format. Csr format.
*/ */
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/segment_reduce.cu * @file array/cuda/segment_reduce.cu
* @brief Segment reduce C APIs and definitions. * @brief Segment reduce C APIs and definitions.
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/segment_reduce.cuh * @file array/cuda/segment_reduce.cuh
* @brief Segment reduce kernel function header. * @brief Segment reduce kernel function header.
...@@ -20,7 +20,7 @@ using namespace cuda; ...@@ -20,7 +20,7 @@ using namespace cuda;
namespace aten { namespace aten {
namespace cuda { namespace cuda {
/*! /**
* @brief CUDA kernel of segment reduce. * @brief CUDA kernel of segment reduce.
* @note each blockthread is responsible for aggregation on a row * @note each blockthread is responsible for aggregation on a row
* in the result tensor. * in the result tensor.
...@@ -44,7 +44,7 @@ __global__ void SegmentReduceKernel( ...@@ -44,7 +44,7 @@ __global__ void SegmentReduceKernel(
} }
} }
/*! /**
* @brief CUDA kernel of scatter add. * @brief CUDA kernel of scatter add.
* @note each blockthread is responsible for adding a row in feature tensor * @note each blockthread is responsible for adding a row in feature tensor
* to a target row in output tensor. * to a target row in output tensor.
...@@ -62,7 +62,7 @@ __global__ void ScatterAddKernel( ...@@ -62,7 +62,7 @@ __global__ void ScatterAddKernel(
} }
} }
/*! /**
* @brief CUDA kernel to update gradients for reduce op max/min * @brief CUDA kernel to update gradients for reduce op max/min
* @note each WARP (group of 32 threads) is responsible for adding a row in * @note each WARP (group of 32 threads) is responsible for adding a row in
* feature tensor to a target row in output tensor. * feature tensor to a target row in output tensor.
...@@ -90,7 +90,7 @@ __global__ void UpdateGradMinMaxHeteroKernel( ...@@ -90,7 +90,7 @@ __global__ void UpdateGradMinMaxHeteroKernel(
} }
} }
/*! /**
* @brief CUDA kernel of backward phase in segment min/max. * @brief CUDA kernel of backward phase in segment min/max.
* @note each blockthread is responsible for writing a row in the * @note each blockthread is responsible for writing a row in the
* result gradient tensor by lookup the ArgMin/Max for index information. * result gradient tensor by lookup the ArgMin/Max for index information.
...@@ -110,7 +110,7 @@ __global__ void BackwardSegmentCmpKernel( ...@@ -110,7 +110,7 @@ __global__ void BackwardSegmentCmpKernel(
} }
} }
/*! /**
* @brief CUDA implementation of forward phase of Segment Reduce. * @brief CUDA implementation of forward phase of Segment Reduce.
* @param feat The input tensor. * @param feat The input tensor.
* @param offsets The offsets tensor. * @param offsets The offsets tensor.
...@@ -141,7 +141,7 @@ void SegmentReduce(NDArray feat, NDArray offsets, NDArray out, NDArray arg) { ...@@ -141,7 +141,7 @@ void SegmentReduce(NDArray feat, NDArray offsets, NDArray out, NDArray arg) {
feat_data, offsets_data, out_data, arg_data, n, dim); feat_data, offsets_data, out_data, arg_data, n, dim);
} }
/*! /**
* @brief CUDA implementation of Scatter Add (on first dimension). * @brief CUDA implementation of Scatter Add (on first dimension).
* @note math equation: out[idx[i], *] += feat[i, *] * @note math equation: out[idx[i], *] += feat[i, *]
* @param feat The input tensor. * @param feat The input tensor.
...@@ -170,7 +170,7 @@ void ScatterAdd(NDArray feat, NDArray idx, NDArray out) { ...@@ -170,7 +170,7 @@ void ScatterAdd(NDArray feat, NDArray idx, NDArray out) {
idx_data, out_data, n, dim); idx_data, out_data, n, dim);
} }
/*! /**
* @brief CUDA implementation to update gradients for reduce op max/min * @brief CUDA implementation to update gradients for reduce op max/min
* @param graph The input heterogeneous graph. * @param graph The input heterogeneous graph.
* @param op The binary operator, could be `copy_u`, `copy_e'. * @param op The binary operator, could be `copy_u`, `copy_e'.
...@@ -223,7 +223,7 @@ void UpdateGradMinMax_hetero( ...@@ -223,7 +223,7 @@ void UpdateGradMinMax_hetero(
} }
} }
/*! /**
* @brief CUDA implementation of backward phase of Segment Reduce with Min/Max * @brief CUDA implementation of backward phase of Segment Reduce with Min/Max
* reducer. * reducer.
* @note math equation: out[arg[i, k], k] = feat[i, k] \param feat The input * @note math equation: out[arg[i, k], k] = feat[i, k] \param feat The input
......
/*! /**
* Copyright (c) 2021 by contributors. * Copyright (c) 2021 by contributors.
* @file array/cuda/spmat_op_impl_coo.cu * @file array/cuda/spmat_op_impl_coo.cu
* @brief COO operator GPU implementation * @brief COO operator GPU implementation
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/spmat_op_impl_csr.cu * @file array/cuda/spmat_op_impl_csr.cu
* @brief CSR operator CPU implementation * @brief CSR operator CPU implementation
...@@ -81,7 +81,7 @@ template NDArray CSRIsNonZero<kDGLCUDA, int64_t>(CSRMatrix, NDArray, NDArray); ...@@ -81,7 +81,7 @@ template NDArray CSRIsNonZero<kDGLCUDA, int64_t>(CSRMatrix, NDArray, NDArray);
///////////////////////////// CSRHasDuplicate ///////////////////////////// ///////////////////////////// CSRHasDuplicate /////////////////////////////
/*! /**
* @brief Check whether each row does not have any duplicate entries. * @brief Check whether each row does not have any duplicate entries.
* Assume the CSR is sorted. * Assume the CSR is sorted.
*/ */
...@@ -227,7 +227,7 @@ CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) { ...@@ -227,7 +227,7 @@ CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) {
template CSRMatrix CSRSliceRows<kDGLCUDA, int32_t>(CSRMatrix, int64_t, int64_t); template CSRMatrix CSRSliceRows<kDGLCUDA, int32_t>(CSRMatrix, int64_t, int64_t);
template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, int64_t, int64_t); template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, int64_t, int64_t);
/*! /**
* @brief Copy data segment to output buffers * @brief Copy data segment to output buffers
* *
* For the i^th row r = row[i], copy the data from indptr[r] ~ indptr[r+1] * For the i^th row r = row[i], copy the data from indptr[r] ~ indptr[r+1]
...@@ -298,7 +298,7 @@ template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, NDArray); ...@@ -298,7 +298,7 @@ template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, NDArray);
///////////////////////////// CSRGetDataAndIndices ///////////////////////////// ///////////////////////////// CSRGetDataAndIndices /////////////////////////////
/*! /**
* @brief Generate a 0-1 mask for each index that hits the provided (row, col) * @brief Generate a 0-1 mask for each index that hits the provided (row, col)
* index. * index.
* *
...@@ -330,7 +330,7 @@ __global__ void _SegmentMaskKernel( ...@@ -330,7 +330,7 @@ __global__ void _SegmentMaskKernel(
} }
} }
/*! /**
* @brief Search for the insertion positions for needle in the hay. * @brief Search for the insertion positions for needle in the hay.
* *
* The hay is a list of sorted elements and the result is the insertion position * The hay is a list of sorted elements and the result is the insertion position
...@@ -423,7 +423,7 @@ template std::vector<NDArray> CSRGetDataAndIndices<kDGLCUDA, int64_t>( ...@@ -423,7 +423,7 @@ template std::vector<NDArray> CSRGetDataAndIndices<kDGLCUDA, int64_t>(
///////////////////////////// CSRSliceMatrix ///////////////////////////// ///////////////////////////// CSRSliceMatrix /////////////////////////////
/*! /**
* @brief Generate a 0-1 mask for each index whose column is in the provided * @brief Generate a 0-1 mask for each index whose column is in the provided
* set. It also counts the number of masked values per row. * set. It also counts the number of masked values per row.
*/ */
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/spmm.cu * @file array/cuda/spmm.cu
* @brief SPMM C APIs and definitions. * @brief SPMM C APIs and definitions.
...@@ -15,7 +15,7 @@ using namespace cuda; ...@@ -15,7 +15,7 @@ using namespace cuda;
namespace aten { namespace aten {
/*! /**
* @brief CUDA implementation of g-SpMM on Csr format. * @brief CUDA implementation of g-SpMM on Csr format.
* @note use cusparse if the reduce operator is `sum` and there is * @note use cusparse if the reduce operator is `sum` and there is
* no broadcast, use dgl's kernel in other cases. * no broadcast, use dgl's kernel in other cases.
...@@ -80,7 +80,7 @@ void SpMMCsr(const std::string& op, const std::string& reduce, ...@@ -80,7 +80,7 @@ void SpMMCsr(const std::string& op, const std::string& reduce,
} }
/*! /**
* @brief CUDA implementation of g-SpMM on Coo format. * @brief CUDA implementation of g-SpMM on Coo format.
*/ */
template <int XPU, typename IdType, typename DType> template <int XPU, typename IdType, typename DType>
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/spmm.cuh * @file array/cuda/spmm.cuh
* @brief SPMM CUDA kernel function header. * @brief SPMM CUDA kernel function header.
...@@ -21,7 +21,7 @@ using namespace cuda; ...@@ -21,7 +21,7 @@ using namespace cuda;
namespace aten { namespace aten {
/*! /**
* @brief Determine whether cusparse SpMM function is applicable. * @brief Determine whether cusparse SpMM function is applicable.
*/ */
template <typename DType, typename IdType> template <typename DType, typename IdType>
...@@ -41,7 +41,7 @@ inline bool cusparse_available(bool more_nnz_than_matrix_size) { ...@@ -41,7 +41,7 @@ inline bool cusparse_available(bool more_nnz_than_matrix_size) {
namespace { namespace {
/*! @brief Call cuBLAS geam API for transpose operation for float and double. */ /** @brief Call cuBLAS geam API for transpose operation for float and double. */
template <typename DType> template <typename DType>
cublasStatus_t Xgeam(cublasHandle_t handle, cublasOperation_t transa, cublasStatus_t Xgeam(cublasHandle_t handle, cublasOperation_t transa,
cublasOperation_t transb, int m, int n, cublasOperation_t transb, int m, int n,
...@@ -98,7 +98,8 @@ cublasStatus_t Xgeam<double>(cublasHandle_t handle, cublasOperation_t transa, ...@@ -98,7 +98,8 @@ cublasStatus_t Xgeam<double>(cublasHandle_t handle, cublasOperation_t transa,
beta, B, ldb, C, ldc); beta, B, ldb, C, ldc);
} }
/* @brief IndexSelect operator kernel implementation. /**
* @brief IndexSelect operator kernel implementation.
* @note duplicate of IndexSelectKernel defined in array_index_select.cu * @note duplicate of IndexSelectKernel defined in array_index_select.cu
*/ */
template <typename DType, typename IdType> template <typename DType, typename IdType>
...@@ -112,7 +113,8 @@ __global__ void _IndexSelectKernel( ...@@ -112,7 +113,8 @@ __global__ void _IndexSelectKernel(
out[i * m + j] = in[idx[i] * m + j]; out[i * m + j] = in[idx[i] * m + j];
} }
/* @brief Transpose operator kernel implementation. /**
* @brief Transpose operator kernel implementation.
* @note not efficient but it's not a bottleneck, used for float16 dtype. * @note not efficient but it's not a bottleneck, used for float16 dtype.
*/ */
template <typename DType> template <typename DType>
...@@ -125,7 +127,7 @@ __global__ void _TransposeKernel( ...@@ -125,7 +127,7 @@ __global__ void _TransposeKernel(
out[i * m + j] = in[j * n + i]; out[i * m + j] = in[j * n + i];
} }
/* /**
* @brief Tranpose the input matrix. * @brief Tranpose the input matrix.
* @param row number of rows of input matrix. * @param row number of rows of input matrix.
* @param col number of columns of input matrix. * @param col number of columns of input matrix.
...@@ -149,7 +151,7 @@ void _Transpose(const DType* in, DType* out, ...@@ -149,7 +151,7 @@ void _Transpose(const DType* in, DType* out,
out, row)); out, row));
} }
/* /**
* @brief Tranpose the input matrix for data type half. * @brief Tranpose the input matrix for data type half.
* @note cuBLAS has no geam API for half data type, fallback to our kernel. * @note cuBLAS has no geam API for half data type, fallback to our kernel.
*/ */
...@@ -163,7 +165,7 @@ void _Transpose<half>(const half* in, half* out, ...@@ -163,7 +165,7 @@ void _Transpose<half>(const half* in, half* out,
} }
#if BF16_ENABLED #if BF16_ENABLED
/* /**
* @brief Tranpose the input matrix for data type half. * @brief Tranpose the input matrix for data type half.
* @note cuBLAS has no geam API for bf16 data type, fallback to our kernel. * @note cuBLAS has no geam API for bf16 data type, fallback to our kernel.
*/ */
...@@ -177,7 +179,7 @@ void _Transpose<__nv_bfloat16>(const __nv_bfloat16* in, __nv_bfloat16* out, ...@@ -177,7 +179,7 @@ void _Transpose<__nv_bfloat16>(const __nv_bfloat16* in, __nv_bfloat16* out,
} }
#endif // BF16_ENABLED #endif // BF16_ENABLED
/* /**
* @brief * @brief
*/ */
template <typename DType, typename IdType> template <typename DType, typename IdType>
...@@ -247,7 +249,7 @@ cusparseStatus_t Xcsrmm2<double>(cusparseHandle_t handle, cusparseOperation_t tr ...@@ -247,7 +249,7 @@ cusparseStatus_t Xcsrmm2<double>(cusparseHandle_t handle, cusparseOperation_t tr
} }
#endif #endif
/*! Cusparse implementation of SpMM on Csr format. */ /** Cusparse implementation of SpMM on Csr format. */
template <typename DType, typename IdType> template <typename DType, typename IdType>
void CusparseCsrmm2( void CusparseCsrmm2(
const DGLContext& ctx, const DGLContext& ctx,
...@@ -347,7 +349,7 @@ void CusparseCsrmm2( ...@@ -347,7 +349,7 @@ void CusparseCsrmm2(
device->FreeWorkspace(ctx, valptr); device->FreeWorkspace(ctx, valptr);
} }
/*! Cusparse implementation of SpMM on Csr format. */ /** Cusparse implementation of SpMM on Csr format. */
template <typename DType, typename IdType> template <typename DType, typename IdType>
void CusparseCsrmm2Hetero( void CusparseCsrmm2Hetero(
const DGLContext& ctx, const DGLContext& ctx,
...@@ -476,7 +478,7 @@ void CusparseCsrmm2Hetero( ...@@ -476,7 +478,7 @@ void CusparseCsrmm2Hetero(
namespace cuda { namespace cuda {
/*! /**
* @brief CUDA kernel of g-SpMM on Coo format. * @brief CUDA kernel of g-SpMM on Coo format.
* @note it uses edge parallel strategy, different threadblocks (on y-axis) * @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks * is responsible for the computation on different edges. Threadblocks
...@@ -525,7 +527,7 @@ __global__ void SpMMCooKernel( ...@@ -525,7 +527,7 @@ __global__ void SpMMCooKernel(
} }
} }
/*! /**
* @brief CUDA kernel to compute argu and arge in g-SpMM on Coo format. * @brief CUDA kernel to compute argu and arge in g-SpMM on Coo format.
* @note it uses edge parallel strategy, different threadblocks (on y-axis) * @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks * is responsible for the computation on different edges. Threadblocks
...@@ -573,7 +575,7 @@ __global__ void ArgSpMMCooKernel( ...@@ -573,7 +575,7 @@ __global__ void ArgSpMMCooKernel(
} }
} }
/*! /**
* @brief CUDA kernel of g-SpMM on Csr format. * @brief CUDA kernel of g-SpMM on Csr format.
* @note it uses node parallel strategy, different threadblocks (on y-axis) * @note it uses node parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different destination nodes. * is responsible for the computation on different destination nodes.
...@@ -631,7 +633,7 @@ __global__ void SpMMCsrKernel( ...@@ -631,7 +633,7 @@ __global__ void SpMMCsrKernel(
} }
} }
/*! /**
* @brief CUDA kernel of SpMM-Min/Max on Csr format. * @brief CUDA kernel of SpMM-Min/Max on Csr format.
* @note it uses node parallel strategy, different threadblocks (on y-axis) * @note it uses node parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different destination nodes. * is responsible for the computation on different destination nodes.
...@@ -692,7 +694,7 @@ __global__ void SpMMCmpCsrHeteroKernel( ...@@ -692,7 +694,7 @@ __global__ void SpMMCmpCsrHeteroKernel(
} }
} }
/*! /**
* @brief CUDA implementation of g-SpMM on Coo format. * @brief CUDA implementation of g-SpMM on Coo format.
* @param bcast Broadcast information. * @param bcast Broadcast information.
* @param coo The Coo matrix. * @param coo The Coo matrix.
...@@ -769,7 +771,7 @@ void SpMMCoo( ...@@ -769,7 +771,7 @@ void SpMMCoo(
}); });
} }
/*! /**
* @brief CUDA implementation of g-SpMM on Csr format. * @brief CUDA implementation of g-SpMM on Csr format.
* @param bcast Broadcast information. * @param bcast Broadcast information.
* @param csr The Csr matrix. * @param csr The Csr matrix.
...@@ -824,7 +826,7 @@ void SpMMCsr( ...@@ -824,7 +826,7 @@ void SpMMCsr(
}); });
} }
/*! /**
* @brief CUDA kernel of SpMM-Min/Max on Csr format on heterogeneous graph. * @brief CUDA kernel of SpMM-Min/Max on Csr format on heterogeneous graph.
* @param bcast Broadcast information. * @param bcast Broadcast information.
* @param csr The Csr matrix. * @param csr The Csr matrix.
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/spmm.cu * @file array/cuda/spmm.cu
* @brief SPMM C APIs and definitions. * @brief SPMM C APIs and definitions.
...@@ -15,7 +15,7 @@ using namespace cuda; ...@@ -15,7 +15,7 @@ using namespace cuda;
namespace aten { namespace aten {
/*! /**
* @brief CUDA implementation of g-SpMM on Csr format. * @brief CUDA implementation of g-SpMM on Csr format.
* @note use cusparse if the reduce operator is `sum` and there is * @note use cusparse if the reduce operator is `sum` and there is
* no broadcast, use dgl's kernel in other cases. * no broadcast, use dgl's kernel in other cases.
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/utils.cu * @file array/cuda/utils.cu
* @brief Utilities for CUDA kernels. * @brief Utilities for CUDA kernels.
......
/*! /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file array/cuda/utils.h * @file array/cuda/utils.h
* @brief Utilities for CUDA kernels. * @brief Utilities for CUDA kernels.
...@@ -23,7 +23,7 @@ namespace cuda { ...@@ -23,7 +23,7 @@ namespace cuda {
#define CUDA_MAX_NUM_THREADS 256 #define CUDA_MAX_NUM_THREADS 256
/*! @brief Calculate the number of threads needed given the dimension length. /** @brief Calculate the number of threads needed given the dimension length.
* *
* It finds the biggest number that is smaller than min(dim, max_nthrs) * It finds the biggest number that is smaller than min(dim, max_nthrs)
* and is also power of two. * and is also power of two.
...@@ -39,7 +39,7 @@ inline int FindNumThreads(int dim, int max_nthrs = CUDA_MAX_NUM_THREADS) { ...@@ -39,7 +39,7 @@ inline int FindNumThreads(int dim, int max_nthrs = CUDA_MAX_NUM_THREADS) {
return ret; return ret;
} }
/* /**
* !\brief Find number of blocks is smaller than nblks and max_nblks * !\brief Find number of blocks is smaller than nblks and max_nblks
* on the given axis ('x', 'y' or 'z'). * on the given axis ('x', 'y' or 'z').
*/ */
...@@ -77,7 +77,7 @@ __device__ __forceinline__ T _ldg(T* addr) { ...@@ -77,7 +77,7 @@ __device__ __forceinline__ T _ldg(T* addr) {
#endif #endif
} }
/*! /**
* @brief Return true if the given bool flag array is all true. * @brief Return true if the given bool flag array is all true.
* The input bool array is in int8_t type so it is aligned with byte address. * The input bool array is in int8_t type so it is aligned with byte address.
* *
...@@ -88,7 +88,7 @@ __device__ __forceinline__ T _ldg(T* addr) { ...@@ -88,7 +88,7 @@ __device__ __forceinline__ T _ldg(T* addr) {
*/ */
bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx); bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx);
/*! /**
* @brief CUDA Kernel of filling the vector started from ptr of size length * @brief CUDA Kernel of filling the vector started from ptr of size length
* with val. * with val.
* @note internal use only. * @note internal use only.
...@@ -103,7 +103,7 @@ __global__ void _FillKernel(DType* ptr, size_t length, DType val) { ...@@ -103,7 +103,7 @@ __global__ void _FillKernel(DType* ptr, size_t length, DType val) {
} }
} }
/*! @brief Fill the vector started from ptr of size length with val */ /** @brief Fill the vector started from ptr of size length with val */
template <typename DType> template <typename DType>
void _Fill(DType* ptr, size_t length, DType val) { void _Fill(DType* ptr, size_t length, DType val) {
cudaStream_t stream = runtime::getCurrentCUDAStream(); cudaStream_t stream = runtime::getCurrentCUDAStream();
...@@ -112,7 +112,7 @@ void _Fill(DType* ptr, size_t length, DType val) { ...@@ -112,7 +112,7 @@ void _Fill(DType* ptr, size_t length, DType val) {
CUDA_KERNEL_CALL(cuda::_FillKernel, nb, nt, 0, stream, ptr, length, val); CUDA_KERNEL_CALL(cuda::_FillKernel, nb, nt, 0, stream, ptr, length, val);
} }
/*! /**
* @brief Search adjacency list linearly for each (row, col) pair and * @brief Search adjacency list linearly for each (row, col) pair and
* write the data under the matched position in the indices array to the output. * write the data under the matched position in the indices array to the output.
* *
...@@ -156,7 +156,7 @@ __global__ void _LinearSearchKernel( ...@@ -156,7 +156,7 @@ __global__ void _LinearSearchKernel(
} }
#if BF16_ENABLED #if BF16_ENABLED
/*! /**
* @brief Specialization for bf16 because conversion from long long to bfloat16 * @brief Specialization for bf16 because conversion from long long to bfloat16
* doesn't exist before SM80. * doesn't exist before SM80.
*/ */
...@@ -205,7 +205,7 @@ inline DType GetCUDAScalar( ...@@ -205,7 +205,7 @@ inline DType GetCUDAScalar(
return result; return result;
} }
/*! /**
* @brief Given a sorted array and a value this function returns the index * @brief Given a sorted array and a value this function returns the index
* of the first element which compares greater than value. * of the first element which compares greater than value.
* *
...@@ -230,7 +230,7 @@ __device__ IdType _UpperBound(const IdType *A, int64_t n, IdType x) { ...@@ -230,7 +230,7 @@ __device__ IdType _UpperBound(const IdType *A, int64_t n, IdType x) {
return l; return l;
} }
/*! /**
* @brief Given a sorted array and a value this function returns the index * @brief Given a sorted array and a value this function returns the index
* of the element who is equal to val. If not exist returns n+1 * of the element who is equal to val. If not exist returns n+1
* *
......
/*! /**
* Copyright (c) 2019-2022 by Contributors * Copyright (c) 2019-2022 by Contributors
* @file array/cuda/uvm/array_index_select_uvm.cu * @file array/cuda/uvm/array_index_select_uvm.cu
* @brief Array index select GPU implementation * @brief Array index select GPU implementation
......
/*! /**
* Copyright (c) 2021 by Contributors * Copyright (c) 2021 by Contributors
* @file array/cpu/array_index_select_uvm.cuh * @file array/cpu/array_index_select_uvm.cuh
* @brief Array index select GPU kernel implementation * @brief Array index select GPU kernel implementation
...@@ -13,9 +13,10 @@ namespace dgl { ...@@ -13,9 +13,10 @@ namespace dgl {
namespace aten { namespace aten {
namespace impl { namespace impl {
/* This is a cross-device access version of IndexSelectMultiKernel. /**
* Since the memory access over PCIe is more sensitive to the * This is a cross-device access version of IndexSelectMultiKernel.
* data access aligment (cacheline), we need a separate version here. * Since the memory access over PCIe is more sensitive to the
* data access aligment (cacheline), we need a separate version here.
*/ */
template <typename DType, typename IdType> template <typename DType, typename IdType>
__global__ void IndexSelectMultiKernelAligned( __global__ void IndexSelectMultiKernelAligned(
......
/*! /**
* Copyright (c) 2021 by Contributors * Copyright (c) 2021 by Contributors
* @file array/filter.cc * @file array/filter.cc
* @brief Object for selecting items in a set, or selecting items not in a set. * @brief Object for selecting items in a set, or selecting items not in a set.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment