Unverified Commit bcd37684 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Replace /*! with /**. (#4823)



* replace

* blabla

* balbla

* blabla
Co-authored-by: default avatarSteve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
parent 619d735d
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/macro.cuh
* @brief Macro to call SPMM/SDDMM cuda kernels.
......
/*!
/**
* Copyright (c) 2021 by Contributors
* @file array/cuda/negative_sampling.cu
* @brief rowwise sampling
......@@ -80,7 +80,7 @@ struct IsNotMinusOne {
}
};
/*!
/**
* @brief Sort ordered pairs in ascending order, using \a tmp_major and \a
* tmp_minor as temporary buffers, each with \a n elements.
*/
......
/*!
/**
* Copyright (c) 2021 by Contributors
* @file array/cuda/rowwise_sampling.cu
* @brief uniform rowwise sampling
......
/*!
/**
* Copyright (c) 2022 by Contributors
* @file array/cuda/rowwise_sampling_prob.cu
* @brief weighted rowwise sampling. The degree computing kernels and
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/sddmm.cu
* @brief SDDMM C APIs and definitions.
......@@ -10,7 +10,7 @@
namespace dgl {
namespace aten {
/*!
/**
* @brief CUDA implementation of g-SDDMM on Csr format.
*/
template <int XPU, typename IdType, typename DType>
......@@ -30,7 +30,7 @@ void SDDMMCsr(const std::string& op,
}
/*!
/**
* @brief CUDA implementation of g-SDDMM on Coo format.
*/
template <int XPU, typename IdType, typename DType>
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/sddmm.cuh
* @brief SDDMM CUDA kernel function header.
......@@ -86,7 +86,7 @@ namespace cuda {
constexpr unsigned int full_mask = 0xffffffff;
/*!
/**
* @brief CUDA kernel of g-SDDMM on Coo format.
* @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks
......@@ -135,7 +135,7 @@ __global__ void SDDMMCooKernel(
}
}
/*!
/**
* @brief CUDA kernel of SDDMM-dot on Coo format, accelerated with tree reduction.
* @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks
......@@ -203,7 +203,7 @@ __device__ __forceinline__ Idx BinarySearchSrc(const Idx *array, Idx length, Idx
}
}
/*!
/**
* @brief CUDA kernel of g-SDDMM on Csr format.
* @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks
......@@ -254,7 +254,7 @@ __global__ void SDDMMCsrKernel(
}
}
/*!
/**
* @brief CUDA implementation of g-SDDMM on Coo format.
* @param bcast Broadcast information.
* @param coo The Coo matrix.
......@@ -323,7 +323,7 @@ void SDDMMCoo(
}
}
/*!
/**
* @brief CUDA implementation of g-SDDMM on Csr format.
* @param bcast Broadcast information.
* @param csr The Csr matrix.
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/sddmm.cu
* @brief SDDMM C APIs and definitions.
......@@ -9,7 +9,7 @@
namespace dgl {
namespace aten {
/*!
/**
* @brief CUDA implementation of g-SDDMM on heterograph using
Csr format.
*/
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/sddmm.cu
* @brief SDDMM C APIs and definitions.
......@@ -9,7 +9,7 @@
namespace dgl {
namespace aten {
/*!
/**
* @brief CUDA implementation of g-SDDMM on heterograph using
Csr format.
*/
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/segment_reduce.cu
* @brief Segment reduce C APIs and definitions.
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/segment_reduce.cuh
* @brief Segment reduce kernel function header.
......@@ -20,7 +20,7 @@ using namespace cuda;
namespace aten {
namespace cuda {
/*!
/**
* @brief CUDA kernel of segment reduce.
* @note each blockthread is responsible for aggregation on a row
* in the result tensor.
......@@ -44,7 +44,7 @@ __global__ void SegmentReduceKernel(
}
}
/*!
/**
* @brief CUDA kernel of scatter add.
* @note each blockthread is responsible for adding a row in feature tensor
* to a target row in output tensor.
......@@ -62,7 +62,7 @@ __global__ void ScatterAddKernel(
}
}
/*!
/**
* @brief CUDA kernel to update gradients for reduce op max/min
* @note each WARP (group of 32 threads) is responsible for adding a row in
* feature tensor to a target row in output tensor.
......@@ -90,7 +90,7 @@ __global__ void UpdateGradMinMaxHeteroKernel(
}
}
/*!
/**
* @brief CUDA kernel of backward phase in segment min/max.
* @note each blockthread is responsible for writing a row in the
* result gradient tensor by lookup the ArgMin/Max for index information.
......@@ -110,7 +110,7 @@ __global__ void BackwardSegmentCmpKernel(
}
}
/*!
/**
* @brief CUDA implementation of forward phase of Segment Reduce.
* @param feat The input tensor.
* @param offsets The offsets tensor.
......@@ -141,7 +141,7 @@ void SegmentReduce(NDArray feat, NDArray offsets, NDArray out, NDArray arg) {
feat_data, offsets_data, out_data, arg_data, n, dim);
}
/*!
/**
* @brief CUDA implementation of Scatter Add (on first dimension).
* @note math equation: out[idx[i], *] += feat[i, *]
* @param feat The input tensor.
......@@ -170,7 +170,7 @@ void ScatterAdd(NDArray feat, NDArray idx, NDArray out) {
idx_data, out_data, n, dim);
}
/*!
/**
* @brief CUDA implementation to update gradients for reduce op max/min
* @param graph The input heterogeneous graph.
* @param op The binary operator, could be `copy_u`, `copy_e'.
......@@ -223,7 +223,7 @@ void UpdateGradMinMax_hetero(
}
}
/*!
/**
* @brief CUDA implementation of backward phase of Segment Reduce with Min/Max
* reducer.
* @note math equation: out[arg[i, k], k] = feat[i, k] \param feat The input
......
/*!
/**
* Copyright (c) 2021 by contributors.
* @file array/cuda/spmat_op_impl_coo.cu
* @brief COO operator GPU implementation
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/spmat_op_impl_csr.cu
* @brief CSR operator CPU implementation
......@@ -81,7 +81,7 @@ template NDArray CSRIsNonZero<kDGLCUDA, int64_t>(CSRMatrix, NDArray, NDArray);
///////////////////////////// CSRHasDuplicate /////////////////////////////
/*!
/**
* @brief Check whether each row does not have any duplicate entries.
* Assume the CSR is sorted.
*/
......@@ -227,7 +227,7 @@ CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) {
template CSRMatrix CSRSliceRows<kDGLCUDA, int32_t>(CSRMatrix, int64_t, int64_t);
template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, int64_t, int64_t);
/*!
/**
* @brief Copy data segment to output buffers
*
* For the i^th row r = row[i], copy the data from indptr[r] ~ indptr[r+1]
......@@ -298,7 +298,7 @@ template CSRMatrix CSRSliceRows<kDGLCUDA, int64_t>(CSRMatrix, NDArray);
///////////////////////////// CSRGetDataAndIndices /////////////////////////////
/*!
/**
* @brief Generate a 0-1 mask for each index that hits the provided (row, col)
* index.
*
......@@ -330,7 +330,7 @@ __global__ void _SegmentMaskKernel(
}
}
/*!
/**
* @brief Search for the insertion positions for needle in the hay.
*
* The hay is a list of sorted elements and the result is the insertion position
......@@ -423,7 +423,7 @@ template std::vector<NDArray> CSRGetDataAndIndices<kDGLCUDA, int64_t>(
///////////////////////////// CSRSliceMatrix /////////////////////////////
/*!
/**
* @brief Generate a 0-1 mask for each index whose column is in the provided
* set. It also counts the number of masked values per row.
*/
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/spmm.cu
* @brief SPMM C APIs and definitions.
......@@ -15,7 +15,7 @@ using namespace cuda;
namespace aten {
/*!
/**
* @brief CUDA implementation of g-SpMM on Csr format.
* @note use cusparse if the reduce operator is `sum` and there is
* no broadcast, use dgl's kernel in other cases.
......@@ -80,7 +80,7 @@ void SpMMCsr(const std::string& op, const std::string& reduce,
}
/*!
/**
* @brief CUDA implementation of g-SpMM on Coo format.
*/
template <int XPU, typename IdType, typename DType>
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/spmm.cuh
* @brief SPMM CUDA kernel function header.
......@@ -21,7 +21,7 @@ using namespace cuda;
namespace aten {
/*!
/**
* @brief Determine whether cusparse SpMM function is applicable.
*/
template <typename DType, typename IdType>
......@@ -41,7 +41,7 @@ inline bool cusparse_available(bool more_nnz_than_matrix_size) {
namespace {
/*! @brief Call cuBLAS geam API for transpose operation for float and double. */
/** @brief Call cuBLAS geam API for transpose operation for float and double. */
template <typename DType>
cublasStatus_t Xgeam(cublasHandle_t handle, cublasOperation_t transa,
cublasOperation_t transb, int m, int n,
......@@ -98,7 +98,8 @@ cublasStatus_t Xgeam<double>(cublasHandle_t handle, cublasOperation_t transa,
beta, B, ldb, C, ldc);
}
/* @brief IndexSelect operator kernel implementation.
/**
* @brief IndexSelect operator kernel implementation.
* @note duplicate of IndexSelectKernel defined in array_index_select.cu
*/
template <typename DType, typename IdType>
......@@ -112,7 +113,8 @@ __global__ void _IndexSelectKernel(
out[i * m + j] = in[idx[i] * m + j];
}
/* @brief Transpose operator kernel implementation.
/**
* @brief Transpose operator kernel implementation.
* @note not efficient but it's not a bottleneck, used for float16 dtype.
*/
template <typename DType>
......@@ -125,7 +127,7 @@ __global__ void _TransposeKernel(
out[i * m + j] = in[j * n + i];
}
/*
/**
* @brief Tranpose the input matrix.
* @param row number of rows of input matrix.
* @param col number of columns of input matrix.
......@@ -149,7 +151,7 @@ void _Transpose(const DType* in, DType* out,
out, row));
}
/*
/**
* @brief Tranpose the input matrix for data type half.
* @note cuBLAS has no geam API for half data type, fallback to our kernel.
*/
......@@ -163,7 +165,7 @@ void _Transpose<half>(const half* in, half* out,
}
#if BF16_ENABLED
/*
/**
* @brief Tranpose the input matrix for data type half.
* @note cuBLAS has no geam API for bf16 data type, fallback to our kernel.
*/
......@@ -177,7 +179,7 @@ void _Transpose<__nv_bfloat16>(const __nv_bfloat16* in, __nv_bfloat16* out,
}
#endif // BF16_ENABLED
/*
/**
* @brief
*/
template <typename DType, typename IdType>
......@@ -247,7 +249,7 @@ cusparseStatus_t Xcsrmm2<double>(cusparseHandle_t handle, cusparseOperation_t tr
}
#endif
/*! Cusparse implementation of SpMM on Csr format. */
/** Cusparse implementation of SpMM on Csr format. */
template <typename DType, typename IdType>
void CusparseCsrmm2(
const DGLContext& ctx,
......@@ -347,7 +349,7 @@ void CusparseCsrmm2(
device->FreeWorkspace(ctx, valptr);
}
/*! Cusparse implementation of SpMM on Csr format. */
/** Cusparse implementation of SpMM on Csr format. */
template <typename DType, typename IdType>
void CusparseCsrmm2Hetero(
const DGLContext& ctx,
......@@ -476,7 +478,7 @@ void CusparseCsrmm2Hetero(
namespace cuda {
/*!
/**
* @brief CUDA kernel of g-SpMM on Coo format.
* @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks
......@@ -525,7 +527,7 @@ __global__ void SpMMCooKernel(
}
}
/*!
/**
* @brief CUDA kernel to compute argu and arge in g-SpMM on Coo format.
* @note it uses edge parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different edges. Threadblocks
......@@ -573,7 +575,7 @@ __global__ void ArgSpMMCooKernel(
}
}
/*!
/**
* @brief CUDA kernel of g-SpMM on Csr format.
* @note it uses node parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different destination nodes.
......@@ -631,7 +633,7 @@ __global__ void SpMMCsrKernel(
}
}
/*!
/**
* @brief CUDA kernel of SpMM-Min/Max on Csr format.
* @note it uses node parallel strategy, different threadblocks (on y-axis)
* is responsible for the computation on different destination nodes.
......@@ -692,7 +694,7 @@ __global__ void SpMMCmpCsrHeteroKernel(
}
}
/*!
/**
* @brief CUDA implementation of g-SpMM on Coo format.
* @param bcast Broadcast information.
* @param coo The Coo matrix.
......@@ -769,7 +771,7 @@ void SpMMCoo(
});
}
/*!
/**
* @brief CUDA implementation of g-SpMM on Csr format.
* @param bcast Broadcast information.
* @param csr The Csr matrix.
......@@ -824,7 +826,7 @@ void SpMMCsr(
});
}
/*!
/**
* @brief CUDA kernel of SpMM-Min/Max on Csr format on heterogeneous graph.
* @param bcast Broadcast information.
* @param csr The Csr matrix.
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/spmm.cu
* @brief SPMM C APIs and definitions.
......@@ -15,7 +15,7 @@ using namespace cuda;
namespace aten {
/*!
/**
* @brief CUDA implementation of g-SpMM on Csr format.
* @note use cusparse if the reduce operator is `sum` and there is
* no broadcast, use dgl's kernel in other cases.
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/utils.cu
* @brief Utilities for CUDA kernels.
......
/*!
/**
* Copyright (c) 2020 by Contributors
* @file array/cuda/utils.h
* @brief Utilities for CUDA kernels.
......@@ -23,7 +23,7 @@ namespace cuda {
#define CUDA_MAX_NUM_THREADS 256
/*! @brief Calculate the number of threads needed given the dimension length.
/** @brief Calculate the number of threads needed given the dimension length.
*
* It finds the biggest number that is smaller than min(dim, max_nthrs)
* and is also power of two.
......@@ -39,7 +39,7 @@ inline int FindNumThreads(int dim, int max_nthrs = CUDA_MAX_NUM_THREADS) {
return ret;
}
/*
/**
* !\brief Find number of blocks is smaller than nblks and max_nblks
* on the given axis ('x', 'y' or 'z').
*/
......@@ -77,7 +77,7 @@ __device__ __forceinline__ T _ldg(T* addr) {
#endif
}
/*!
/**
* @brief Return true if the given bool flag array is all true.
* The input bool array is in int8_t type so it is aligned with byte address.
*
......@@ -88,7 +88,7 @@ __device__ __forceinline__ T _ldg(T* addr) {
*/
bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx);
/*!
/**
* @brief CUDA Kernel of filling the vector started from ptr of size length
* with val.
* @note internal use only.
......@@ -103,7 +103,7 @@ __global__ void _FillKernel(DType* ptr, size_t length, DType val) {
}
}
/*! @brief Fill the vector started from ptr of size length with val */
/** @brief Fill the vector started from ptr of size length with val */
template <typename DType>
void _Fill(DType* ptr, size_t length, DType val) {
cudaStream_t stream = runtime::getCurrentCUDAStream();
......@@ -112,7 +112,7 @@ void _Fill(DType* ptr, size_t length, DType val) {
CUDA_KERNEL_CALL(cuda::_FillKernel, nb, nt, 0, stream, ptr, length, val);
}
/*!
/**
* @brief Search adjacency list linearly for each (row, col) pair and
* write the data under the matched position in the indices array to the output.
*
......@@ -156,7 +156,7 @@ __global__ void _LinearSearchKernel(
}
#if BF16_ENABLED
/*!
/**
* @brief Specialization for bf16 because conversion from long long to bfloat16
* doesn't exist before SM80.
*/
......@@ -205,7 +205,7 @@ inline DType GetCUDAScalar(
return result;
}
/*!
/**
* @brief Given a sorted array and a value this function returns the index
* of the first element which compares greater than value.
*
......@@ -230,7 +230,7 @@ __device__ IdType _UpperBound(const IdType *A, int64_t n, IdType x) {
return l;
}
/*!
/**
* @brief Given a sorted array and a value this function returns the index
* of the element who is equal to val. If not exist returns n+1
*
......
/*!
/**
* Copyright (c) 2019-2022 by Contributors
* @file array/cuda/uvm/array_index_select_uvm.cu
* @brief Array index select GPU implementation
......
/*!
/**
* Copyright (c) 2021 by Contributors
* @file array/cpu/array_index_select_uvm.cuh
* @brief Array index select GPU kernel implementation
......@@ -13,9 +13,10 @@ namespace dgl {
namespace aten {
namespace impl {
/* This is a cross-device access version of IndexSelectMultiKernel.
* Since the memory access over PCIe is more sensitive to the
* data access aligment (cacheline), we need a separate version here.
/**
* This is a cross-device access version of IndexSelectMultiKernel.
* Since the memory access over PCIe is more sensitive to the
* data access aligment (cacheline), we need a separate version here.
*/
template <typename DType, typename IdType>
__global__ void IndexSelectMultiKernelAligned(
......
/*!
/**
* Copyright (c) 2021 by Contributors
* @file array/filter.cc
* @brief Object for selecting items in a set, or selecting items not in a set.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment