Commit ec0ff893 authored by YdrMaster's avatar YdrMaster
Browse files

issue/52: 格式化所有 c/c++ 文件


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent 27ba98d1
#include "../../utils.h"
#include "./matmul_cuda.cuh"
template<typename Tdata>
template <typename Tdata>
infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c, float beta, void const *a, void const *b, float alpha, void *stream) {
auto info = desc->info;
......@@ -26,7 +26,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
auto op_a = info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_b = info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
use_cublas(desc->cublas_handle_pool, desc->device_id, (cudaStream_t) stream,
use_cublas(desc->cublas_handle_pool, desc->device_id, (cudaStream_t)stream,
[&](cublasHandle_t handle) { cublasGemmStridedBatchedEx(
handle,
op_a,
......
......@@ -3,36 +3,36 @@
__C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handle, infiniopRandomSampleDescriptor_t *desc_ptr, infiniopTensorDescriptor_t result, infiniopTensorDescriptor_t probs) {
switch (handle->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuCreateRandomSampleDescriptor(handle, (RandomSampleCpuDescriptor_t *) desc_ptr, result, probs);
case DevCpu:
return cpuCreateRandomSampleDescriptor(handle, (RandomSampleCpuDescriptor_t *)desc_ptr, result, probs);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu:
return cudaCreateRandomSampleDescriptor((CudaHandle_t) handle, (RandomSampleCudaDescriptor_t *) desc_ptr, result, probs);
case DevNvGpu:
return cudaCreateRandomSampleDescriptor((CudaHandle_t)handle, (RandomSampleCudaDescriptor_t *)desc_ptr, result, probs);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangCreateRandomSampleDescriptor((BangHandle_t) handle,
(RandomSampleBangDescriptor_t *) desc_ptr, result,
probs);
}
case DevCambriconMlu: {
return bangCreateRandomSampleDescriptor((BangHandle_t)handle,
(RandomSampleBangDescriptor_t *)desc_ptr, result,
probs);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return ascendCreateRandomSampleDescriptor((AscendHandle_t) handle,
(RandomSampleAscendDescriptor_t *) desc_ptr, result, probs);
}
case DevAscendNpu: {
return ascendCreateRandomSampleDescriptor((AscendHandle_t)handle,
(RandomSampleAscendDescriptor_t *)desc_ptr, result, probs);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaCreateRandomSampleDescriptor((MacaHandle_t) handle,
(RandomSampleMacaDescriptor_t *) desc_ptr, result,
probs);
}
case DevMetaxGpu: {
return macaCreateRandomSampleDescriptor((MacaHandle_t)handle,
(RandomSampleMacaDescriptor_t *)desc_ptr, result,
probs);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu:
return musaCreateRandomSampleDescriptor((MusaHandle_t) handle, (RandomSampleMusaDescriptor_t *) desc_ptr, result, probs);
case DevMthreadsGpu:
return musaCreateRandomSampleDescriptor((MusaHandle_t)handle, (RandomSampleMusaDescriptor_t *)desc_ptr, result, probs);
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -41,35 +41,35 @@ __C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handl
__C infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDescriptor_t desc, uint64_t *size) {
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuGetRandomSampleWorkspaceSize((RandomSampleCpuDescriptor_t) desc, size);
case DevCpu:
return cpuGetRandomSampleWorkspaceSize((RandomSampleCpuDescriptor_t)desc, size);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaGetRandomSampleWorkspaceSize((RandomSampleCudaDescriptor_t) desc, size);
}
case DevNvGpu: {
return cudaGetRandomSampleWorkspaceSize((RandomSampleCudaDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangGetRandomSampleWorkspaceSize((RandomSampleBangDescriptor_t) desc, size);
// return cnnlGetRandomSampleWorkspaceSize((RandomSampleCnnlDescriptor_t) desc, size);
}
case DevCambriconMlu: {
return bangGetRandomSampleWorkspaceSize((RandomSampleBangDescriptor_t)desc, size);
// return cnnlGetRandomSampleWorkspaceSize((RandomSampleCnnlDescriptor_t) desc, size);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return ascendGetRandomSampleWorkspaceSize((RandomSampleAscendDescriptor_t) desc, size);
}
case DevAscendNpu: {
return ascendGetRandomSampleWorkspaceSize((RandomSampleAscendDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaGetRandomSampleWorkspaceSize((RandomSampleMacaDescriptor_t) desc, size);
}
case DevMetaxGpu: {
return macaGetRandomSampleWorkspaceSize((RandomSampleMacaDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaGetRandomSampleWorkspaceSize((RandomSampleMusaDescriptor_t) desc, size);
}
case DevMthreadsGpu: {
return musaGetRandomSampleWorkspaceSize((RandomSampleMusaDescriptor_t)desc, size);
}
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -87,31 +87,31 @@ __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
void *stream) {
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuRandomSample((RandomSampleCpuDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
case DevCpu:
return cpuRandomSample((RandomSampleCpuDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu:
return cudaRandomSample((RandomSampleCudaDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
case DevNvGpu:
return cudaRandomSample((RandomSampleCudaDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangRandomSample((RandomSampleBangDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
}
case DevCambriconMlu: {
return bangRandomSample((RandomSampleBangDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return ascendRandomSample((RandomSampleAscendDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
}
case DevAscendNpu: {
return ascendRandomSample((RandomSampleAscendDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaRandomSample((RandomSampleMacaDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
}
case DevMetaxGpu: {
return macaRandomSample((RandomSampleMacaDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu:
return musaRandomSample((RandomSampleMusaDescriptor_t) desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
case DevMthreadsGpu:
return musaRandomSample((RandomSampleMusaDescriptor_t)desc, workspace, workspace_size, result, probs, random_val, topp, topk, temperature, stream);
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -120,31 +120,31 @@ __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
__C infiniopStatus_t infiniopDestroyRandomSampleDescriptor(infiniopRandomSampleDescriptor_t desc) {
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuDestroyRandomSampleDescriptor((RandomSampleCpuDescriptor_t) desc);
case DevCpu:
return cpuDestroyRandomSampleDescriptor((RandomSampleCpuDescriptor_t)desc);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu:
return cudaDestroyRandomSampleDescriptor((RandomSampleCudaDescriptor_t) desc);
case DevNvGpu:
return cudaDestroyRandomSampleDescriptor((RandomSampleCudaDescriptor_t)desc);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangDestroyRandomSampleDescriptor((RandomSampleBangDescriptor_t) desc);
}
case DevCambriconMlu: {
return bangDestroyRandomSampleDescriptor((RandomSampleBangDescriptor_t)desc);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return ascendDestroyRandomSampleDescriptor((RandomSampleAscendDescriptor_t) desc);
}
case DevAscendNpu: {
return ascendDestroyRandomSampleDescriptor((RandomSampleAscendDescriptor_t)desc);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaDestroyRandomSampleDescriptor((RandomSampleMacaDescriptor_t) desc);
}
case DevMetaxGpu: {
return macaDestroyRandomSampleDescriptor((RandomSampleMacaDescriptor_t)desc);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu:
return musaDestroyRandomSampleDescriptor((RandomSampleMusaDescriptor_t) desc);
case DevMthreadsGpu:
return musaDestroyRandomSampleDescriptor((RandomSampleMusaDescriptor_t)desc);
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
......@@ -7,37 +7,37 @@ __C infiniopStatus_t infiniopCreateRearrangeDescriptor(
infiniopTensorDescriptor_t src) {
switch (handle->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuCreateRearrangeDescriptor(handle, (RearrangeCpuDescriptor_t *) desc_ptr, dst, src);
case DevCpu:
return cpuCreateRearrangeDescriptor(handle, (RearrangeCpuDescriptor_t *)desc_ptr, dst, src);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaCreateRearrangeDescriptor((CudaHandle_t) handle, (RearrangeCudaDescriptor_t *) desc_ptr, dst, src);
}
case DevNvGpu: {
return cudaCreateRearrangeDescriptor((CudaHandle_t)handle, (RearrangeCudaDescriptor_t *)desc_ptr, dst, src);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangCreateRearrangeDescriptor((BangHandle_t) handle, (RearrangeBangDescriptor_t *) desc_ptr, dst, src);
}
case DevCambriconMlu: {
return bangCreateRearrangeDescriptor((BangHandle_t)handle, (RearrangeBangDescriptor_t *)desc_ptr, dst, src);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnCreateRearrangeDescriptor((AscendHandle_t) handle,
(RearrangeAclnnDescriptor_t *) desc_ptr,
dst,
src);
}
case DevAscendNpu: {
return aclnnCreateRearrangeDescriptor((AscendHandle_t)handle,
(RearrangeAclnnDescriptor_t *)desc_ptr,
dst,
src);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaCreateRearrangeDescriptor((MacaHandle_t) handle, (RearrangeMacaDescriptor_t *) desc_ptr, dst, src);
}
case DevMetaxGpu: {
return macaCreateRearrangeDescriptor((MacaHandle_t)handle, (RearrangeMacaDescriptor_t *)desc_ptr, dst, src);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaCreateRearrangeDescriptor((MusaHandle_t)handle, (RearrangeMusaDescriptor_t *) desc_ptr, dst, src);
}
case DevMthreadsGpu: {
return musaCreateRearrangeDescriptor((MusaHandle_t)handle, (RearrangeMusaDescriptor_t *)desc_ptr, dst, src);
}
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -46,37 +46,37 @@ __C infiniopStatus_t infiniopCreateRearrangeDescriptor(
__C infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void *dst, void const *src, void *stream) {
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuRearrange((RearrangeCpuDescriptor_t) desc, dst, src, stream);
case DevCpu:
return cpuRearrange((RearrangeCpuDescriptor_t)desc, dst, src, stream);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaRearrange((RearrangeCudaDescriptor_t) desc, dst, src, stream);
}
case DevNvGpu: {
return cudaRearrange((RearrangeCudaDescriptor_t)desc, dst, src, stream);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangRearrange((RearrangeBangDescriptor_t) desc, dst, src, stream);
}
case DevCambriconMlu: {
return bangRearrange((RearrangeBangDescriptor_t)desc, dst, src, stream);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnRearrange((RearrangeAclnnDescriptor_t) desc,
dst,
src,
stream);
}
case DevAscendNpu: {
return aclnnRearrange((RearrangeAclnnDescriptor_t)desc,
dst,
src,
stream);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaRearrange((RearrangeMacaDescriptor_t) desc, dst, src, stream);
}
case DevMetaxGpu: {
return macaRearrange((RearrangeMacaDescriptor_t)desc, dst, src, stream);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaRearrange((RearrangeMusaDescriptor_t) desc, dst, src, stream);
}
case DevMthreadsGpu: {
return musaRearrange((RearrangeMusaDescriptor_t)desc, dst, src, stream);
}
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -85,34 +85,34 @@ __C infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void
__C infiniopStatus_t infiniopDestroyRearrangeDescriptor(infiniopRearrangeDescriptor_t desc) {
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuDestroyRearrangeDescriptor((RearrangeCpuDescriptor_t) desc);
case DevCpu:
return cpuDestroyRearrangeDescriptor((RearrangeCpuDescriptor_t)desc);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaDestroyRearrangeDescriptor((RearrangeCudaDescriptor_t) desc);
}
case DevNvGpu: {
return cudaDestroyRearrangeDescriptor((RearrangeCudaDescriptor_t)desc);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangDestroyRearrangeDescriptor((RearrangeBangDescriptor_t) desc);
}
case DevCambriconMlu: {
return bangDestroyRearrangeDescriptor((RearrangeBangDescriptor_t)desc);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnDestroyRearrangeDescriptor((RearrangeAclnnDescriptor_t) desc);
}
case DevAscendNpu: {
return aclnnDestroyRearrangeDescriptor((RearrangeAclnnDescriptor_t)desc);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaDestroyRearrangeDescriptor((RearrangeMacaDescriptor_t) desc);
}
case DevMetaxGpu: {
return macaDestroyRearrangeDescriptor((RearrangeMacaDescriptor_t)desc);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaDestroyRearrangeDescriptor((RearrangeMusaDescriptor_t) desc);
}
case DevMthreadsGpu: {
return musaDestroyRearrangeDescriptor((RearrangeMusaDescriptor_t)desc);
}
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
......@@ -9,38 +9,38 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
float epsilon) {
switch (handle->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuCreateRMSNormDescriptor(handle, (RMSNormCpuDescriptor_t *) desc_ptr, y_desc, x_desc, w_desc, epsilon);
case DevCpu:
return cpuCreateRMSNormDescriptor(handle, (RMSNormCpuDescriptor_t *)desc_ptr, y_desc, x_desc, w_desc, epsilon);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaCreateRMSNormDescriptor((CudaHandle_t) handle, (RMSNormCudaDescriptor_t *) desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
case DevNvGpu: {
return cudaCreateRMSNormDescriptor((CudaHandle_t)handle, (RMSNormCudaDescriptor_t *)desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangCreateRMSNormDescriptor((BangHandle_t) handle, (RMSNormBangDescriptor_t *) desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
case DevCambriconMlu: {
return bangCreateRMSNormDescriptor((BangHandle_t)handle, (RMSNormBangDescriptor_t *)desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnCreateRMSNormDescriptor((AscendHandle_t) handle,
(RMSNormAclnnDescriptor_t *) desc_ptr,
y_desc,
x_desc,
w_desc,
epsilon);
}
case DevAscendNpu: {
return aclnnCreateRMSNormDescriptor((AscendHandle_t)handle,
(RMSNormAclnnDescriptor_t *)desc_ptr,
y_desc,
x_desc,
w_desc,
epsilon);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaCreateRMSNormDescriptor((MacaHandle_t) handle, (RMSNormMacaDescriptor_t *) desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
case DevMetaxGpu: {
return macaCreateRMSNormDescriptor((MacaHandle_t)handle, (RMSNormMacaDescriptor_t *)desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaCreateRMSNormDescriptor((MusaHandle_t) handle, (RMSNormMusaDescriptor_t *) desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
case DevMthreadsGpu: {
return musaCreateRMSNormDescriptor((MusaHandle_t)handle, (RMSNormMusaDescriptor_t *)desc_ptr, y_desc, x_desc, w_desc, epsilon);
}
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -49,35 +49,35 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
__C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t desc, uint64_t *size) {
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuGetRMSNormWorkspaceSize((RMSNormCpuDescriptor_t) desc, size);
case DevCpu:
return cpuGetRMSNormWorkspaceSize((RMSNormCpuDescriptor_t)desc, size);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaGetRMSNormWorkspaceSize((RMSNormCudaDescriptor_t) desc, size);
}
case DevNvGpu: {
return cudaGetRMSNormWorkspaceSize((RMSNormCudaDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangGetRMSNormWorkspaceSize((RMSNormBangDescriptor_t) desc, size);
}
case DevCambriconMlu: {
return bangGetRMSNormWorkspaceSize((RMSNormBangDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnGetRMSNormWorkspaceSize((RMSNormAclnnDescriptor_t) desc,
size);
}
case DevAscendNpu: {
return aclnnGetRMSNormWorkspaceSize((RMSNormAclnnDescriptor_t)desc,
size);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaGetRMSNormWorkspaceSize((RMSNormMacaDescriptor_t) desc, size);
}
case DevMetaxGpu: {
return macaGetRMSNormWorkspaceSize((RMSNormMacaDescriptor_t)desc, size);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaGetRMSNormWorkspaceSize((RMSNormMusaDescriptor_t) desc, size);
}
case DevMthreadsGpu: {
return musaGetRMSNormWorkspaceSize((RMSNormMusaDescriptor_t)desc, size);
}
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -87,40 +87,40 @@ __C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *wor
void *y, void const *x, void const *w, void *stream) {
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuRMSNorm((RMSNormCpuDescriptor_t) desc, workspace, workspace_size, y, x, w, stream);
case DevCpu:
return cpuRMSNorm((RMSNormCpuDescriptor_t)desc, workspace, workspace_size, y, x, w, stream);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaRMSNorm((RMSNormCudaDescriptor_t) desc, workspace, workspace_size, y, x, w, stream);
}
case DevNvGpu: {
return cudaRMSNorm((RMSNormCudaDescriptor_t)desc, workspace, workspace_size, y, x, w, stream);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangRMSNorm((RMSNormBangDescriptor_t) desc, workspace, workspace_size, y, x, w, stream);
}
case DevCambriconMlu: {
return bangRMSNorm((RMSNormBangDescriptor_t)desc, workspace, workspace_size, y, x, w, stream);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnRMSNorm((RMSNormAclnnDescriptor_t) desc,
workspace,
workspace_size,
y,
x,
w,
stream);
}
case DevAscendNpu: {
return aclnnRMSNorm((RMSNormAclnnDescriptor_t)desc,
workspace,
workspace_size,
y,
x,
w,
stream);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaRMSNorm((RMSNormMacaDescriptor_t) desc, workspace, workspace_size, y, x, w, stream);
}
case DevMetaxGpu: {
return macaRMSNorm((RMSNormMacaDescriptor_t)desc, workspace, workspace_size, y, x, w, stream);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaRMSNorm((RMSNormMusaDescriptor_t) desc, workspace, workspace_size, y, x, w, stream);
}
case DevMthreadsGpu: {
return musaRMSNorm((RMSNormMusaDescriptor_t)desc, workspace, workspace_size, y, x, w, stream);
}
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......@@ -129,34 +129,34 @@ __C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *wor
__C infiniopStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_t desc) {
switch (desc->device) {
#ifdef ENABLE_CPU
case DevCpu:
return cpuDestroyRMSNormDescriptor((RMSNormCpuDescriptor_t) desc);
case DevCpu:
return cpuDestroyRMSNormDescriptor((RMSNormCpuDescriptor_t)desc);
#endif
#ifdef ENABLE_NV_GPU
case DevNvGpu: {
return cudaDestroyRMSNormDescriptor((RMSNormCudaDescriptor_t) desc);
}
case DevNvGpu: {
return cudaDestroyRMSNormDescriptor((RMSNormCudaDescriptor_t)desc);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: {
return bangDestroyRMSNormDescriptor((RMSNormBangDescriptor_t) desc);
}
case DevCambriconMlu: {
return bangDestroyRMSNormDescriptor((RMSNormBangDescriptor_t)desc);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: {
return aclnnDestroyRMSNormDescriptor((RMSNormAclnnDescriptor_t) desc);
}
case DevAscendNpu: {
return aclnnDestroyRMSNormDescriptor((RMSNormAclnnDescriptor_t)desc);
}
#endif
#ifdef ENABLE_METAX_GPU
case DevMetaxGpu: {
return macaDestroyRMSNormDescriptor((RMSNormMacaDescriptor_t) desc);
}
case DevMetaxGpu: {
return macaDestroyRMSNormDescriptor((RMSNormMacaDescriptor_t)desc);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: {
return musaDestroyRMSNormDescriptor((RMSNormMusaDescriptor_t) desc);
}
case DevMthreadsGpu: {
return musaDestroyRMSNormDescriptor((RMSNormMusaDescriptor_t)desc);
}
#endif
}
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
......@@ -13,28 +13,28 @@
#define ROUND_UP_DIV(x, y) ((x + y - 1) / y)
#define CHECK_ERROR(call, target, errCode) \
do { \
if (auto value = (call); value == (target)) { \
std::cerr << "Error: expected " << (target) << " but got " \
<< value << " in file " << __FILE__ << ", function " \
<< __func__ << ", line " << __LINE__ << std::endl; \
return (errCode); \
} \
#define CHECK_ERROR(call, target, errCode) \
do { \
if (auto value = (call); value == (target)) { \
std::cerr << "Error: expected " << (target) << " but got " \
<< value << " in file " << __FILE__ << ", function " \
<< __func__ << ", line " << __LINE__ << std::endl; \
return (errCode); \
} \
} while (0)
#define CREATE_CHECK_ERROR(expr, value, target, errCode) \
expr; \
#define CREATE_CHECK_ERROR(expr, value, target, errCode) \
expr; \
CHECK_ERROR(value, target, errCode)
#define CHECK_STATUS(call, target) \
do { \
if (auto value = (call); value != (target)) { \
std::cerr << "Error: expected " << (target) << " but got " \
<< value << " in file " << __FILE__ << ", function " \
<< __func__ << ", line " << __LINE__ << std::endl; \
return value; \
} \
#define CHECK_STATUS(call, target) \
do { \
if (auto value = (call); value != (target)) { \
std::cerr << "Error: expected " << (target) << " but got " \
<< value << " in file " << __FILE__ << ", function " \
<< __func__ << ", line " << __LINE__ << std::endl; \
return value; \
} \
} while (0)
inline std::vector<int64_t> getByteStrides(infiniopTensorDescriptor_t desc) {
......@@ -67,8 +67,7 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
// compute broadcasted shape
for (size_t i = 0; i < max_rank; ++i) {
if (padded_shape1[i] == padded_shape2[i] || padded_shape1[i] == 1 ||
padded_shape2[i] == 1) {
if (padded_shape1[i] == padded_shape2[i] || padded_shape1[i] == 1 || padded_shape2[i] == 1) {
broadcast_shape[i] = std::max(padded_shape1[i], padded_shape2[i]);
} else {
return false;
......@@ -89,10 +88,7 @@ inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
auto broadcast_shape = broadcast_shape_.data(),
padded_shape1 = padded_shape1_.data(),
padded_shape2 = padded_shape2_.data();
if (broadcast_ndim != c->ndim ||
!getBroadcastShape(a->shape, a->ndim, b->shape, b->ndim,
broadcast_shape, padded_shape1, padded_shape2,
broadcast_ndim)) {
if (broadcast_ndim != c->ndim || !getBroadcastShape(a->shape, a->ndim, b->shape, b->ndim, broadcast_shape, padded_shape1, padded_shape2, broadcast_ndim)) {
return false;
}
return std::equal(broadcast_shape, broadcast_shape + broadcast_ndim,
......@@ -126,7 +122,6 @@ inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
return isValidBroadcastShape(a, b, c, std::max(a->ndim, b->ndim));
}
// permute the dimensions of a tensor descriptor
inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
const std::vector<size_t> &order) {
......@@ -149,10 +144,9 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
// check if the dimensions [dim_start, dim_end] of a tensor descriptor are
// contiguous
inline bool isContiguous(const infiniopTensorDescriptor_t &desc,
size_t dim_start, size_t dim_end) {
size_t dim_start, size_t dim_end) {
for (size_t i = dim_start + 1; i <= dim_end; i++) {
if (desc->strides[i - 1] !=
static_cast<int64_t>(desc->shape[i]) * desc->strides[i]) {
if (desc->strides[i - 1] != static_cast<int64_t>(desc->shape[i]) * desc->strides[i]) {
return false;
}
}
......@@ -168,7 +162,7 @@ inline bool isContiguous(const infiniopTensorDescriptor_t &desc) {
// merge the dimensions [dim_start, dim_end] of a tensor descriptor
inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
size_t dim_start, size_t dim_end) {
size_t dim_start, size_t dim_end) {
size_t ndim = desc->ndim;
if (dim_start > dim_end || dim_end >= ndim) {
return nullptr;
......@@ -203,11 +197,10 @@ inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
// split the dimension dim of a tensor descriptor into multiple dimensions
inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
size_t dim,
const std::vector<size_t> &dims) {
size_t dim,
const std::vector<size_t> &dims) {
size_t ndim = desc->ndim;
if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (size_t)1,
std::multiplies{})) {
if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (size_t)1, std::multiplies{})) {
return nullptr;
}
size_t new_ndim = ndim + dims.size() - 1;
......@@ -221,10 +214,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
}
for (size_t i = 0; i < dims.size(); i++) {
new_shape[index] = dims[i];
new_strides[index] =
desc->strides[dim] * desc->shape[dim] /
std::accumulate(dims.begin(), dims.begin() + i + 1, (size_t)1,
std::multiplies<size_t>());
new_strides[index] = desc->strides[dim] * desc->shape[dim] / std::accumulate(dims.begin(), dims.begin() + i + 1, (size_t)1, std::multiplies<size_t>());
index++;
}
for (size_t i = dim + 1; i < ndim; i++) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment