"googlemock/Makefile.am" did not exist on "02f7106557fde1f1075dc53d65ef1f7a11851f93"
Commit ec0ff893 authored by YdrMaster's avatar YdrMaster
Browse files

issue/52: 格式化所有 c/c++ 文件


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent 27ba98d1
#include "../../utils.h"
#include "./matmul_cuda.cuh"
template<typename Tdata>
template <typename Tdata>
infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c, float beta, void const *a, void const *b, float alpha, void *stream) {
auto info = desc->info;
......@@ -26,7 +26,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
auto op_a = info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_b = info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
use_cublas(desc->cublas_handle_pool, desc->device_id, (cudaStream_t) stream,
use_cublas(desc->cublas_handle_pool, desc->device_id, (cudaStream_t)stream,
[&](cublasHandle_t handle) { cublasGemmStridedBatchedEx(
handle,
op_a,
......
......@@ -4,35 +4,35 @@ __C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handl
switch (handle->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuCreateRandomSampleDescriptor(handle, (RandomSampleCpuDescriptor_t *) desc_ptr, result, probs);
return cpuCreateRandomSampleDescriptor(handle, (RandomSampleCpuDescriptor_t *)desc_ptr, result, probs);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu:
return cudaCreateRandomSampleDescriptor((CudaHandle_t) handle, (RandomSampleCudaDescriptor_t *) desc_ptr, result, probs);
return cudaCreateRandomSampleDescriptor((CudaHandle_t)handle, (RandomSampleCudaDescriptor_t *)desc_ptr, result, probs);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangCreateRandomSampleDescriptor((BangHandle_t) handle,
(RandomSampleBangDescriptor_t *) desc_ptr, result,
return bangCreateRandomSampleDescriptor((BangHandle_t)handle,
(RandomSampleBangDescriptor_t *)desc_ptr, result,
probs);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return ascendCreateRandomSampleDescriptor((AscendHandle_t) handle,
(RandomSampleAscendDescriptor_t *) desc_ptr, result, probs);
return ascendCreateRandomSampleDescriptor((AscendHandle_t)handle,
(RandomSampleAscendDescriptor_t *)desc_ptr, result, probs);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaCreateRandomSampleDescriptor((MacaHandle_t) handle,
(RandomSampleMacaDescriptor_t *) desc_ptr, result,
return macaCreateRandomSampleDescriptor((MacaHandle_t)handle,
(RandomSampleMacaDescriptor_t *)desc_ptr, result,
probs);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu:
return musaCreateRandomSampleDescriptor((MusaHandle_t) handle, (RandomSampleMusaDescriptor_t *) desc_ptr, result, probs);
return musaCreateRandomSampleDescriptor((MusaHandle_t)handle, (RandomSampleMusaDescriptor_t *)desc_ptr, result, probs);
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -42,33 +42,33 @@ __C infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDe
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuGetRandomSampleWorkspaceSize((RandomSampleCpuDescriptor_t) desc, size);
return cpuGetRandomSampleWorkspaceSize((RandomSampleCpuDescriptor_t)desc, size);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaGetRandomSampleWorkspaceSize((RandomSampleCudaDescriptor_t) desc, size);
return cudaGetRandomSampleWorkspaceSize((RandomSampleCudaDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangGetRandomSampleWorkspaceSize((RandomSampleBangDescriptor_t) desc, size);
return bangGetRandomSampleWorkspaceSize((RandomSampleBangDescriptor_t)desc, size);
// return cnnlGetRandomSampleWorkspaceSize((RandomSampleCnnlDescriptor_t) desc, size);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return ascendGetRandomSampleWorkspaceSize((RandomSampleAscendDescriptor_t) desc, size);
return ascendGetRandomSampleWorkspaceSize((RandomSampleAscendDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaGetRandomSampleWorkspaceSize((RandomSampleMacaDescriptor_t) desc, size);
return macaGetRandomSampleWorkspaceSize((RandomSampleMacaDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaGetRandomSampleWorkspaceSize((RandomSampleMusaDescriptor_t) desc, size);
return musaGetRandomSampleWorkspaceSize((RandomSampleMusaDescriptor_t)desc, size);
}
#endif
}
......@@ -88,30 +88,30 @@ __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuRandomSample((RandomSampleCpuDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
return cpuRandomSample((RandomSampleCpuDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu:
return cudaRandomSample((RandomSampleCudaDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
return cudaRandomSample((RandomSampleCudaDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangRandomSample((RandomSampleBangDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
return bangRandomSample((RandomSampleBangDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return ascendRandomSample((RandomSampleAscendDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
return ascendRandomSample((RandomSampleAscendDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaRandomSample((RandomSampleMacaDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
return macaRandomSample((RandomSampleMacaDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu:
return musaRandomSample((RandomSampleMusaDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
return musaRandomSample((RandomSampleMusaDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -121,30 +121,30 @@ __C infiniopStatus_t infiniopDestroyRandomSampleDescriptor(infiniopRandomSampleD
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuDestroyRandomSampleDescriptor((RandomSampleCpuDescriptor_t) desc);
return cpuDestroyRandomSampleDescriptor((RandomSampleCpuDescriptor_t)desc);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu:
return cudaDestroyRandomSampleDescriptor((RandomSampleCudaDescriptor_t) desc);
return cudaDestroyRandomSampleDescriptor((RandomSampleCudaDescriptor_t)desc);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangDestroyRandomSampleDescriptor((RandomSampleBangDescriptor_t) desc);
return bangDestroyRandomSampleDescriptor((RandomSampleBangDescriptor_t)desc);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return ascendDestroyRandomSampleDescriptor((RandomSampleAscendDescriptor_t) desc);
return ascendDestroyRandomSampleDescriptor((RandomSampleAscendDescriptor_t)desc);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaDestroyRandomSampleDescriptor((RandomSampleMacaDescriptor_t) desc);
return macaDestroyRandomSampleDescriptor((RandomSampleMacaDescriptor_t)desc);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu:
return musaDestroyRandomSampleDescriptor((RandomSampleMusaDescriptor_t) desc);
return musaDestroyRandomSampleDescriptor((RandomSampleMusaDescriptor_t)desc);
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
......@@ -8,35 +8,35 @@ __C infiniopStatus_t infiniopCreateRearrangeDescriptor(
switch (handle->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuCreateRearrangeDescriptor(handle, (RearrangeCpuDescriptor_t *) desc_ptr, dst, src);
return cpuCreateRearrangeDescriptor(handle, (RearrangeCpuDescriptor_t *)desc_ptr, dst, src);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaCreateRearrangeDescriptor((CudaHandle_t) handle, (RearrangeCudaDescriptor_t *) desc_ptr, dst, src);
return cudaCreateRearrangeDescriptor((CudaHandle_t)handle, (RearrangeCudaDescriptor_t *)desc_ptr, dst, src);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangCreateRearrangeDescriptor((BangHandle_t) handle, (RearrangeBangDescriptor_t *) desc_ptr, dst, src);
return bangCreateRearrangeDescriptor((BangHandle_t)handle, (RearrangeBangDescriptor_t *)desc_ptr, dst, src);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnCreateRearrangeDescriptor((AscendHandle_t) handle,
(RearrangeAclnnDescriptor_t *) desc_ptr,
return aclnnCreateRearrangeDescriptor((AscendHandle_t)handle,
(RearrangeAclnnDescriptor_t *)desc_ptr,
dst,
src);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaCreateRearrangeDescriptor((MacaHandle_t) handle, (RearrangeMacaDescriptor_t *) desc_ptr, dst, src);
return macaCreateRearrangeDescriptor((MacaHandle_t)handle, (RearrangeMacaDescriptor_t *)desc_ptr, dst, src);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaCreateRearrangeDescriptor((MusaHandle_t)handle, (RearrangeMusaDescriptor_t *) desc_ptr, dst, src);
return musaCreateRearrangeDescriptor((MusaHandle_t)handle, (RearrangeMusaDescriptor_t *)desc_ptr, dst, src);
}
#endif
}
......@@ -47,22 +47,22 @@ __C infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuRearrange((RearrangeCpuDescriptor_t) desc, dst, src, stream);
return cpuRearrange((RearrangeCpuDescriptor_t)desc, dst, src, stream);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaRearrange((RearrangeCudaDescriptor_t) desc, dst, src, stream);
return cudaRearrange((RearrangeCudaDescriptor_t)desc, dst, src, stream);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangRearrange((RearrangeBangDescriptor_t) desc, dst, src, stream);
return bangRearrange((RearrangeBangDescriptor_t)desc, dst, src, stream);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnRearrange((RearrangeAclnnDescriptor_t) desc,
return aclnnRearrange((RearrangeAclnnDescriptor_t)desc,
dst,
src,
stream);
......@@ -70,12 +70,12 @@ __C infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaRearrange((RearrangeMacaDescriptor_t) desc, dst, src, stream);
return macaRearrange((RearrangeMacaDescriptor_t)desc, dst, src, stream);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaRearrange((RearrangeMusaDescriptor_t) desc, dst, src, stream);
return musaRearrange((RearrangeMusaDescriptor_t)desc, dst, src, stream);
}
#endif
}
......@@ -86,32 +86,32 @@ __C infiniopStatus_t infiniopDestroyRearrangeDescriptor(infiniopRearrangeDescrip
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuDestroyRearrangeDescriptor((RearrangeCpuDescriptor_t) desc);
return cpuDestroyRearrangeDescriptor((RearrangeCpuDescriptor_t)desc);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaDestroyRearrangeDescriptor((RearrangeCudaDescriptor_t) desc);
return cudaDestroyRearrangeDescriptor((RearrangeCudaDescriptor_t)desc);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangDestroyRearrangeDescriptor((RearrangeBangDescriptor_t) desc);
return bangDestroyRearrangeDescriptor((RearrangeBangDescriptor_t)desc);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnDestroyRearrangeDescriptor((RearrangeAclnnDescriptor_t) desc);
return aclnnDestroyRearrangeDescriptor((RearrangeAclnnDescriptor_t)desc);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaDestroyRearrangeDescriptor((RearrangeMacaDescriptor_t) desc);
return macaDestroyRearrangeDescriptor((RearrangeMacaDescriptor_t)desc);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaDestroyRearrangeDescriptor((RearrangeMusaDescriptor_t) desc);
return musaDestroyRearrangeDescriptor((RearrangeMusaDescriptor_t)desc);
}
#endif
}
......
......@@ -10,22 +10,22 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
switch (handle->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuCreateRMSNormDescriptor(handle, (RMSNormCpuDescriptor_t *) desc_ptr, y_desc, x_desc, w_desc, epsilon);
return cpuCreateRMSNormDescriptor(handle, (RMSNormCpuDescriptor_t *)desc_ptr, y_desc, x_desc, w_desc, epsilon);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaCreateRMSNormDescriptor((CudaHandle_t) handle, (RMSNormCudaDescriptor_t *) desc_ptr, y_desc, x_desc, w_desc, epsilon);
return cudaCreateRMSNormDescriptor((CudaHandle_t)handle, (RMSNormCudaDescriptor_t *)desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangCreateRMSNormDescriptor((BangHandle_t) handle, (RMSNormBangDescriptor_t *) desc_ptr, y_desc, x_desc, w_desc, epsilon);
return bangCreateRMSNormDescriptor((BangHandle_t)handle, (RMSNormBangDescriptor_t *)desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnCreateRMSNormDescriptor((AscendHandle_t) handle,
(RMSNormAclnnDescriptor_t *) desc_ptr,
return aclnnCreateRMSNormDescriptor((AscendHandle_t)handle,
(RMSNormAclnnDescriptor_t *)desc_ptr,
y_desc,
x_desc,
w_desc,
......@@ -34,12 +34,12 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaCreateRMSNormDescriptor((MacaHandle_t) handle, (RMSNormMacaDescriptor_t *) desc_ptr, y_desc, x_desc, w_desc, epsilon);
return macaCreateRMSNormDescriptor((MacaHandle_t)handle, (RMSNormMacaDescriptor_t *)desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaCreateRMSNormDescriptor((MusaHandle_t) handle, (RMSNormMusaDescriptor_t *) desc_ptr, y_desc, x_desc, w_desc, epsilon);
return musaCreateRMSNormDescriptor((MusaHandle_t)handle, (RMSNormMusaDescriptor_t *)desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
#endif
}
......@@ -50,33 +50,33 @@ __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuGetRMSNormWorkspaceSize((RMSNormCpuDescriptor_t) desc, size);
return cpuGetRMSNormWorkspaceSize((RMSNormCpuDescriptor_t)desc, size);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaGetRMSNormWorkspaceSize((RMSNormCudaDescriptor_t) desc, size);
return cudaGetRMSNormWorkspaceSize((RMSNormCudaDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangGetRMSNormWorkspaceSize((RMSNormBangDescriptor_t) desc, size);
return bangGetRMSNormWorkspaceSize((RMSNormBangDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnGetRMSNormWorkspaceSize((RMSNormAclnnDescriptor_t) desc,
return aclnnGetRMSNormWorkspaceSize((RMSNormAclnnDescriptor_t)desc,
size);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaGetRMSNormWorkspaceSize((RMSNormMacaDescriptor_t) desc, size);
return macaGetRMSNormWorkspaceSize((RMSNormMacaDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaGetRMSNormWorkspaceSize((RMSNormMusaDescriptor_t) desc, size);
return musaGetRMSNormWorkspaceSize((RMSNormMusaDescriptor_t)desc, size);
}
#endif
}
......@@ -88,22 +88,22 @@ __C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *wor
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuRMSNorm((RMSNormCpuDescriptor_t) desc, workspace, workspace_size, y, x, w, stream);
return cpuRMSNorm((RMSNormCpuDescriptor_t)desc, workspace, workspace_size, y, x, w, stream);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaRMSNorm((RMSNormCudaDescriptor_t) desc, workspace, workspace_size, y, x, w, stream);
return cudaRMSNorm((RMSNormCudaDescriptor_t)desc, workspace, workspace_size, y, x, w, stream);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangRMSNorm((RMSNormBangDescriptor_t) desc, workspace, workspace_size, y, x, w, stream);
return bangRMSNorm((RMSNormBangDescriptor_t)desc, workspace, workspace_size, y, x, w, stream);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnRMSNorm((RMSNormAclnnDescriptor_t) desc,
return aclnnRMSNorm((RMSNormAclnnDescriptor_t)desc,
workspace,
workspace_size,
y,
......@@ -114,12 +114,12 @@ __C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *wor
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaRMSNorm((RMSNormMacaDescriptor_t) desc, workspace, workspace_size, y, x, w, stream);
return macaRMSNorm((RMSNormMacaDescriptor_t)desc, workspace, workspace_size, y, x, w, stream);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaRMSNorm((RMSNormMusaDescriptor_t) desc, workspace, workspace_size, y, x, w, stream);
return musaRMSNorm((RMSNormMusaDescriptor_t)desc, workspace, workspace_size, y, x, w, stream);
}
#endif
}
......@@ -130,32 +130,32 @@ __C infiniopStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuDestroyRMSNormDescriptor((RMSNormCpuDescriptor_t) desc);
return cpuDestroyRMSNormDescriptor((RMSNormCpuDescriptor_t)desc);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaDestroyRMSNormDescriptor((RMSNormCudaDescriptor_t) desc);
return cudaDestroyRMSNormDescriptor((RMSNormCudaDescriptor_t)desc);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangDestroyRMSNormDescriptor((RMSNormBangDescriptor_t) desc);
return bangDestroyRMSNormDescriptor((RMSNormBangDescriptor_t)desc);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnDestroyRMSNormDescriptor((RMSNormAclnnDescriptor_t) desc);
return aclnnDestroyRMSNormDescriptor((RMSNormAclnnDescriptor_t)desc);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaDestroyRMSNormDescriptor((RMSNormMacaDescriptor_t) desc);
return macaDestroyRMSNormDescriptor((RMSNormMacaDescriptor_t)desc);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaDestroyRMSNormDescriptor((RMSNormMusaDescriptor_t) desc);
return musaDestroyRMSNormDescriptor((RMSNormMusaDescriptor_t)desc);
}
#endif
}
......
......@@ -67,8 +67,7 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
// compute broadcasted shape
for (size_t i = 0; i < max_rank; ++i) {
if (padded_shape1[i] == padded_shape2[i] || padded_shape1[i] == 1 ||
padded_shape2[i] == 1) {
if (padded_shape1[i] == padded_shape2[i] || padded_shape1[i] == 1 || padded_shape2[i] == 1) {
broadcast_shape[i] = std::max(padded_shape1[i], padded_shape2[i]);
} else {
return false;
......@@ -89,10 +88,7 @@ inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
auto broadcast_shape = broadcast_shape_.data(),
padded_shape1 = padded_shape1_.data(),
padded_shape2 = padded_shape2_.data();
if (broadcast_ndim != c->ndim ||
!getBroadcastShape(a->shape, a->ndim, b->shape, b->ndim,
broadcast_shape, padded_shape1, padded_shape2,
broadcast_ndim)) {
if (broadcast_ndim != c->ndim || !getBroadcastShape(a->shape, a->ndim, b->shape, b->ndim, broadcast_shape, padded_shape1, padded_shape2, broadcast_ndim)) {
return false;
}
return std::equal(broadcast_shape, broadcast_shape + broadcast_ndim,
......@@ -126,7 +122,6 @@ inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
return isValidBroadcastShape(a, b, c, std::max(a->ndim, b->ndim));
}
// permute the dimensions of a tensor descriptor
inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
const std::vector<size_t> &order) {
......@@ -151,8 +146,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
inline bool isContiguous(const infiniopTensorDescriptor_t &desc,
size_t dim_start, size_t dim_end) {
for (size_t i = dim_start + 1; i <= dim_end; i++) {
if (desc->strides[i - 1] !=
static_cast<int64_t>(desc->shape[i]) * desc->strides[i]) {
if (desc->strides[i - 1] != static_cast<int64_t>(desc->shape[i]) * desc->strides[i]) {
return false;
}
}
......@@ -206,8 +200,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
size_t dim,
const std::vector<size_t> &dims) {
size_t ndim = desc->ndim;
if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (size_t)1,
std::multiplies{})) {
if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (size_t)1, std::multiplies{})) {
return nullptr;
}
size_t new_ndim = ndim + dims.size() - 1;
......@@ -221,10 +214,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
}
for (size_t i = 0; i < dims.size(); i++) {
new_shape[index] = dims[i];
new_strides[index] =
desc->strides[dim] * desc->shape[dim] /
std::accumulate(dims.begin(), dims.begin() + i + 1, (size_t)1,
std::multiplies<size_t>());
new_strides[index] = desc->strides[dim] * desc->shape[dim] / std::accumulate(dims.begin(), dims.begin() + i + 1, (size_t)1, std::multiplies<size_t>());
index++;
}
for (size_t i = dim + 1; i < ndim; i++) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment