Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
ec0ff893
Commit
ec0ff893
authored
Feb 17, 2025
by
YdrMaster
Browse files
issue/52: 格式化所有 c/c++ 文件
Signed-off-by:
YdrMaster
<
ydrml@hotmail.com
>
parent
27ba98d1
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
234 additions
and
244 deletions
+234
-244
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
+2
-2
src/infiniop/ops/random_sample/operator.cc
src/infiniop/ops/random_sample/operator.cc
+68
-68
src/infiniop/ops/rearrange/operator.cc
src/infiniop/ops/rearrange/operator.cc
+57
-57
src/infiniop/ops/rms_norm/operator.cc
src/infiniop/ops/rms_norm/operator.cc
+80
-80
src/infiniop/ops/utils.h
src/infiniop/ops/utils.h
+27
-37
No files found.
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
View file @
ec0ff893
#include "../../utils.h"
#include "../../utils.h"
#include "./matmul_cuda.cuh"
#include "./matmul_cuda.cuh"
template
<
typename
Tdata
>
template
<
typename
Tdata
>
infiniopStatus_t
cudaMatmulCublas
(
infiniopMatmulCudaDescriptor_t
desc
,
void
*
c
,
float
beta
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
void
*
stream
)
{
infiniopStatus_t
cudaMatmulCublas
(
infiniopMatmulCudaDescriptor_t
desc
,
void
*
c
,
float
beta
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
void
*
stream
)
{
auto
info
=
desc
->
info
;
auto
info
=
desc
->
info
;
...
@@ -26,7 +26,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
...
@@ -26,7 +26,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
auto
op_a
=
info
.
a_matrix
.
row_stride
==
1
?
CUBLAS_OP_N
:
CUBLAS_OP_T
;
auto
op_a
=
info
.
a_matrix
.
row_stride
==
1
?
CUBLAS_OP_N
:
CUBLAS_OP_T
;
auto
op_b
=
info
.
b_matrix
.
row_stride
==
1
?
CUBLAS_OP_N
:
CUBLAS_OP_T
;
auto
op_b
=
info
.
b_matrix
.
row_stride
==
1
?
CUBLAS_OP_N
:
CUBLAS_OP_T
;
use_cublas
(
desc
->
cublas_handle_pool
,
desc
->
device_id
,
(
cudaStream_t
)
stream
,
use_cublas
(
desc
->
cublas_handle_pool
,
desc
->
device_id
,
(
cudaStream_t
)
stream
,
[
&
](
cublasHandle_t
handle
)
{
cublasGemmStridedBatchedEx
(
[
&
](
cublasHandle_t
handle
)
{
cublasGemmStridedBatchedEx
(
handle
,
handle
,
op_a
,
op_a
,
...
...
src/infiniop/ops/random_sample/operator.cc
View file @
ec0ff893
...
@@ -3,36 +3,36 @@
...
@@ -3,36 +3,36 @@
__C
infiniopStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
result
,
infiniopTensorDescriptor_t
probs
)
{
__C
infiniopStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
result
,
infiniopTensorDescriptor_t
probs
)
{
switch
(
handle
->
device
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuCreateRandomSampleDescriptor
(
handle
,
(
RandomSampleCpuDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
return
cpuCreateRandomSampleDescriptor
(
handle
,
(
RandomSampleCpuDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
case
DevNvGpu
:
return
cudaCreateRandomSampleDescriptor
((
CudaHandle_t
)
handle
,
(
RandomSampleCudaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
return
cudaCreateRandomSampleDescriptor
((
CudaHandle_t
)
handle
,
(
RandomSampleCudaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangCreateRandomSampleDescriptor
((
BangHandle_t
)
handle
,
return
bangCreateRandomSampleDescriptor
((
BangHandle_t
)
handle
,
(
RandomSampleBangDescriptor_t
*
)
desc_ptr
,
result
,
(
RandomSampleBangDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
probs
);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
ascendCreateRandomSampleDescriptor
((
AscendHandle_t
)
handle
,
return
ascendCreateRandomSampleDescriptor
((
AscendHandle_t
)
handle
,
(
RandomSampleAscendDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
(
RandomSampleAscendDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaCreateRandomSampleDescriptor
((
MacaHandle_t
)
handle
,
return
macaCreateRandomSampleDescriptor
((
MacaHandle_t
)
handle
,
(
RandomSampleMacaDescriptor_t
*
)
desc_ptr
,
result
,
(
RandomSampleMacaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
probs
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
case
DevMthreadsGpu
:
return
musaCreateRandomSampleDescriptor
((
MusaHandle_t
)
handle
,
(
RandomSampleMusaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
return
musaCreateRandomSampleDescriptor
((
MusaHandle_t
)
handle
,
(
RandomSampleMusaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -41,35 +41,35 @@ __C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handl
...
@@ -41,35 +41,35 @@ __C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handl
__C
infiniopStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
uint64_t
*
size
)
{
__C
infiniopStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
uint64_t
*
size
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuGetRandomSampleWorkspaceSize
((
RandomSampleCpuDescriptor_t
)
desc
,
size
);
return
cpuGetRandomSampleWorkspaceSize
((
RandomSampleCpuDescriptor_t
)
desc
,
size
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaGetRandomSampleWorkspaceSize
((
RandomSampleCudaDescriptor_t
)
desc
,
size
);
return
cudaGetRandomSampleWorkspaceSize
((
RandomSampleCudaDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangGetRandomSampleWorkspaceSize
((
RandomSampleBangDescriptor_t
)
desc
,
size
);
return
bangGetRandomSampleWorkspaceSize
((
RandomSampleBangDescriptor_t
)
desc
,
size
);
// return cnnlGetRandomSampleWorkspaceSize((RandomSampleCnnlDescriptor_t) desc, size);
// return cnnlGetRandomSampleWorkspaceSize((RandomSampleCnnlDescriptor_t) desc, size);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
ascendGetRandomSampleWorkspaceSize
((
RandomSampleAscendDescriptor_t
)
desc
,
size
);
return
ascendGetRandomSampleWorkspaceSize
((
RandomSampleAscendDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaGetRandomSampleWorkspaceSize
((
RandomSampleMacaDescriptor_t
)
desc
,
size
);
return
macaGetRandomSampleWorkspaceSize
((
RandomSampleMacaDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaGetRandomSampleWorkspaceSize
((
RandomSampleMusaDescriptor_t
)
desc
,
size
);
return
musaGetRandomSampleWorkspaceSize
((
RandomSampleMusaDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -87,31 +87,31 @@ __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
...
@@ -87,31 +87,31 @@ __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
void
*
stream
)
{
void
*
stream
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuRandomSample
((
RandomSampleCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
return
cpuRandomSample
((
RandomSampleCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
case
DevNvGpu
:
return
cudaRandomSample
((
RandomSampleCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
return
cudaRandomSample
((
RandomSampleCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangRandomSample
((
RandomSampleBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
return
bangRandomSample
((
RandomSampleBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
ascendRandomSample
((
RandomSampleAscendDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
return
ascendRandomSample
((
RandomSampleAscendDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaRandomSample
((
RandomSampleMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
return
macaRandomSample
((
RandomSampleMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
case
DevMthreadsGpu
:
return
musaRandomSample
((
RandomSampleMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
return
musaRandomSample
((
RandomSampleMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -120,31 +120,31 @@ __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
...
@@ -120,31 +120,31 @@ __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
__C
infiniopStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
)
{
__C
infiniopStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuDestroyRandomSampleDescriptor
((
RandomSampleCpuDescriptor_t
)
desc
);
return
cpuDestroyRandomSampleDescriptor
((
RandomSampleCpuDescriptor_t
)
desc
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
case
DevNvGpu
:
return
cudaDestroyRandomSampleDescriptor
((
RandomSampleCudaDescriptor_t
)
desc
);
return
cudaDestroyRandomSampleDescriptor
((
RandomSampleCudaDescriptor_t
)
desc
);
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangDestroyRandomSampleDescriptor
((
RandomSampleBangDescriptor_t
)
desc
);
return
bangDestroyRandomSampleDescriptor
((
RandomSampleBangDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
ascendDestroyRandomSampleDescriptor
((
RandomSampleAscendDescriptor_t
)
desc
);
return
ascendDestroyRandomSampleDescriptor
((
RandomSampleAscendDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaDestroyRandomSampleDescriptor
((
RandomSampleMacaDescriptor_t
)
desc
);
return
macaDestroyRandomSampleDescriptor
((
RandomSampleMacaDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
case
DevMthreadsGpu
:
return
musaDestroyRandomSampleDescriptor
((
RandomSampleMusaDescriptor_t
)
desc
);
return
musaDestroyRandomSampleDescriptor
((
RandomSampleMusaDescriptor_t
)
desc
);
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infiniop/ops/rearrange/operator.cc
View file @
ec0ff893
...
@@ -7,37 +7,37 @@ __C infiniopStatus_t infiniopCreateRearrangeDescriptor(
...
@@ -7,37 +7,37 @@ __C infiniopStatus_t infiniopCreateRearrangeDescriptor(
infiniopTensorDescriptor_t
src
)
{
infiniopTensorDescriptor_t
src
)
{
switch
(
handle
->
device
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuCreateRearrangeDescriptor
(
handle
,
(
RearrangeCpuDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
return
cpuCreateRearrangeDescriptor
(
handle
,
(
RearrangeCpuDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaCreateRearrangeDescriptor
((
CudaHandle_t
)
handle
,
(
RearrangeCudaDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
return
cudaCreateRearrangeDescriptor
((
CudaHandle_t
)
handle
,
(
RearrangeCudaDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangCreateRearrangeDescriptor
((
BangHandle_t
)
handle
,
(
RearrangeBangDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
return
bangCreateRearrangeDescriptor
((
BangHandle_t
)
handle
,
(
RearrangeBangDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnCreateRearrangeDescriptor
((
AscendHandle_t
)
handle
,
return
aclnnCreateRearrangeDescriptor
((
AscendHandle_t
)
handle
,
(
RearrangeAclnnDescriptor_t
*
)
desc_ptr
,
(
RearrangeAclnnDescriptor_t
*
)
desc_ptr
,
dst
,
dst
,
src
);
src
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaCreateRearrangeDescriptor
((
MacaHandle_t
)
handle
,
(
RearrangeMacaDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
return
macaCreateRearrangeDescriptor
((
MacaHandle_t
)
handle
,
(
RearrangeMacaDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaCreateRearrangeDescriptor
((
MusaHandle_t
)
handle
,
(
RearrangeMusaDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
return
musaCreateRearrangeDescriptor
((
MusaHandle_t
)
handle
,
(
RearrangeMusaDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
}
}
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -46,37 +46,37 @@ __C infiniopStatus_t infiniopCreateRearrangeDescriptor(
...
@@ -46,37 +46,37 @@ __C infiniopStatus_t infiniopCreateRearrangeDescriptor(
__C
infiniopStatus_t
infiniopRearrange
(
infiniopRearrangeDescriptor_t
desc
,
void
*
dst
,
void
const
*
src
,
void
*
stream
)
{
__C
infiniopStatus_t
infiniopRearrange
(
infiniopRearrangeDescriptor_t
desc
,
void
*
dst
,
void
const
*
src
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuRearrange
((
RearrangeCpuDescriptor_t
)
desc
,
dst
,
src
,
stream
);
return
cpuRearrange
((
RearrangeCpuDescriptor_t
)
desc
,
dst
,
src
,
stream
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaRearrange
((
RearrangeCudaDescriptor_t
)
desc
,
dst
,
src
,
stream
);
return
cudaRearrange
((
RearrangeCudaDescriptor_t
)
desc
,
dst
,
src
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangRearrange
((
RearrangeBangDescriptor_t
)
desc
,
dst
,
src
,
stream
);
return
bangRearrange
((
RearrangeBangDescriptor_t
)
desc
,
dst
,
src
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnRearrange
((
RearrangeAclnnDescriptor_t
)
desc
,
return
aclnnRearrange
((
RearrangeAclnnDescriptor_t
)
desc
,
dst
,
dst
,
src
,
src
,
stream
);
stream
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaRearrange
((
RearrangeMacaDescriptor_t
)
desc
,
dst
,
src
,
stream
);
return
macaRearrange
((
RearrangeMacaDescriptor_t
)
desc
,
dst
,
src
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaRearrange
((
RearrangeMusaDescriptor_t
)
desc
,
dst
,
src
,
stream
);
return
musaRearrange
((
RearrangeMusaDescriptor_t
)
desc
,
dst
,
src
,
stream
);
}
}
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -85,34 +85,34 @@ __C infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void
...
@@ -85,34 +85,34 @@ __C infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void
__C
infiniopStatus_t
infiniopDestroyRearrangeDescriptor
(
infiniopRearrangeDescriptor_t
desc
)
{
__C
infiniopStatus_t
infiniopDestroyRearrangeDescriptor
(
infiniopRearrangeDescriptor_t
desc
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuDestroyRearrangeDescriptor
((
RearrangeCpuDescriptor_t
)
desc
);
return
cpuDestroyRearrangeDescriptor
((
RearrangeCpuDescriptor_t
)
desc
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaDestroyRearrangeDescriptor
((
RearrangeCudaDescriptor_t
)
desc
);
return
cudaDestroyRearrangeDescriptor
((
RearrangeCudaDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangDestroyRearrangeDescriptor
((
RearrangeBangDescriptor_t
)
desc
);
return
bangDestroyRearrangeDescriptor
((
RearrangeBangDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnDestroyRearrangeDescriptor
((
RearrangeAclnnDescriptor_t
)
desc
);
return
aclnnDestroyRearrangeDescriptor
((
RearrangeAclnnDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaDestroyRearrangeDescriptor
((
RearrangeMacaDescriptor_t
)
desc
);
return
macaDestroyRearrangeDescriptor
((
RearrangeMacaDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaDestroyRearrangeDescriptor
((
RearrangeMusaDescriptor_t
)
desc
);
return
musaDestroyRearrangeDescriptor
((
RearrangeMusaDescriptor_t
)
desc
);
}
}
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infiniop/ops/rms_norm/operator.cc
View file @
ec0ff893
...
@@ -9,38 +9,38 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
...
@@ -9,38 +9,38 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
float
epsilon
)
{
float
epsilon
)
{
switch
(
handle
->
device
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuCreateRMSNormDescriptor
(
handle
,
(
RMSNormCpuDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
return
cpuCreateRMSNormDescriptor
(
handle
,
(
RMSNormCpuDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaCreateRMSNormDescriptor
((
CudaHandle_t
)
handle
,
(
RMSNormCudaDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
return
cudaCreateRMSNormDescriptor
((
CudaHandle_t
)
handle
,
(
RMSNormCudaDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangCreateRMSNormDescriptor
((
BangHandle_t
)
handle
,
(
RMSNormBangDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
return
bangCreateRMSNormDescriptor
((
BangHandle_t
)
handle
,
(
RMSNormBangDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnCreateRMSNormDescriptor
((
AscendHandle_t
)
handle
,
return
aclnnCreateRMSNormDescriptor
((
AscendHandle_t
)
handle
,
(
RMSNormAclnnDescriptor_t
*
)
desc_ptr
,
(
RMSNormAclnnDescriptor_t
*
)
desc_ptr
,
y_desc
,
y_desc
,
x_desc
,
x_desc
,
w_desc
,
w_desc
,
epsilon
);
epsilon
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaCreateRMSNormDescriptor
((
MacaHandle_t
)
handle
,
(
RMSNormMacaDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
return
macaCreateRMSNormDescriptor
((
MacaHandle_t
)
handle
,
(
RMSNormMacaDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaCreateRMSNormDescriptor
((
MusaHandle_t
)
handle
,
(
RMSNormMusaDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
return
musaCreateRMSNormDescriptor
((
MusaHandle_t
)
handle
,
(
RMSNormMusaDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
}
}
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -49,35 +49,35 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
...
@@ -49,35 +49,35 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
__C
infiniopStatus_t
infiniopGetRMSNormWorkspaceSize
(
infiniopRMSNormDescriptor_t
desc
,
uint64_t
*
size
)
{
__C
infiniopStatus_t
infiniopGetRMSNormWorkspaceSize
(
infiniopRMSNormDescriptor_t
desc
,
uint64_t
*
size
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuGetRMSNormWorkspaceSize
((
RMSNormCpuDescriptor_t
)
desc
,
size
);
return
cpuGetRMSNormWorkspaceSize
((
RMSNormCpuDescriptor_t
)
desc
,
size
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaGetRMSNormWorkspaceSize
((
RMSNormCudaDescriptor_t
)
desc
,
size
);
return
cudaGetRMSNormWorkspaceSize
((
RMSNormCudaDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangGetRMSNormWorkspaceSize
((
RMSNormBangDescriptor_t
)
desc
,
size
);
return
bangGetRMSNormWorkspaceSize
((
RMSNormBangDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnGetRMSNormWorkspaceSize
((
RMSNormAclnnDescriptor_t
)
desc
,
return
aclnnGetRMSNormWorkspaceSize
((
RMSNormAclnnDescriptor_t
)
desc
,
size
);
size
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaGetRMSNormWorkspaceSize
((
RMSNormMacaDescriptor_t
)
desc
,
size
);
return
macaGetRMSNormWorkspaceSize
((
RMSNormMacaDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaGetRMSNormWorkspaceSize
((
RMSNormMusaDescriptor_t
)
desc
,
size
);
return
musaGetRMSNormWorkspaceSize
((
RMSNormMusaDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -87,40 +87,40 @@ __C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *wor
...
@@ -87,40 +87,40 @@ __C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *wor
void
*
y
,
void
const
*
x
,
void
const
*
w
,
void
*
stream
)
{
void
*
y
,
void
const
*
x
,
void
const
*
w
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuRMSNorm
((
RMSNormCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
return
cpuRMSNorm
((
RMSNormCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaRMSNorm
((
RMSNormCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
return
cudaRMSNorm
((
RMSNormCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangRMSNorm
((
RMSNormBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
return
bangRMSNorm
((
RMSNormBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnRMSNorm
((
RMSNormAclnnDescriptor_t
)
desc
,
return
aclnnRMSNorm
((
RMSNormAclnnDescriptor_t
)
desc
,
workspace
,
workspace
,
workspace_size
,
workspace_size
,
y
,
y
,
x
,
x
,
w
,
w
,
stream
);
stream
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaRMSNorm
((
RMSNormMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
return
macaRMSNorm
((
RMSNormMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaRMSNorm
((
RMSNormMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
return
musaRMSNorm
((
RMSNormMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
}
}
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
@@ -129,34 +129,34 @@ __C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *wor
...
@@ -129,34 +129,34 @@ __C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *wor
__C
infiniopStatus_t
infiniopDestroyRMSNormDescriptor
(
infiniopRMSNormDescriptor_t
desc
)
{
__C
infiniopStatus_t
infiniopDestroyRMSNormDescriptor
(
infiniopRMSNormDescriptor_t
desc
)
{
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuDestroyRMSNormDescriptor
((
RMSNormCpuDescriptor_t
)
desc
);
return
cpuDestroyRMSNormDescriptor
((
RMSNormCpuDescriptor_t
)
desc
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaDestroyRMSNormDescriptor
((
RMSNormCudaDescriptor_t
)
desc
);
return
cudaDestroyRMSNormDescriptor
((
RMSNormCudaDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangDestroyRMSNormDescriptor
((
RMSNormBangDescriptor_t
)
desc
);
return
bangDestroyRMSNormDescriptor
((
RMSNormBangDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnDestroyRMSNormDescriptor
((
RMSNormAclnnDescriptor_t
)
desc
);
return
aclnnDestroyRMSNormDescriptor
((
RMSNormAclnnDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaDestroyRMSNormDescriptor
((
RMSNormMacaDescriptor_t
)
desc
);
return
macaDestroyRMSNormDescriptor
((
RMSNormMacaDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaDestroyRMSNormDescriptor
((
RMSNormMusaDescriptor_t
)
desc
);
return
musaDestroyRMSNormDescriptor
((
RMSNormMusaDescriptor_t
)
desc
);
}
}
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infiniop/ops/utils.h
View file @
ec0ff893
...
@@ -13,28 +13,28 @@
...
@@ -13,28 +13,28 @@
#define ROUND_UP_DIV(x, y) ((x + y - 1) / y)
#define ROUND_UP_DIV(x, y) ((x + y - 1) / y)
#define CHECK_ERROR(call, target, errCode)
\
#define CHECK_ERROR(call, target, errCode) \
do {
\
do { \
if (auto value = (call); value == (target)) {
\
if (auto value = (call); value == (target)) { \
std::cerr << "Error: expected " << (target) << " but got "
\
std::cerr << "Error: expected " << (target) << " but got " \
<< value << " in file " << __FILE__ << ", function "
\
<< value << " in file " << __FILE__ << ", function " \
<< __func__ << ", line " << __LINE__ << std::endl;
\
<< __func__ << ", line " << __LINE__ << std::endl; \
return (errCode);
\
return (errCode); \
}
\
} \
} while (0)
} while (0)
#define CREATE_CHECK_ERROR(expr, value, target, errCode)
\
#define CREATE_CHECK_ERROR(expr, value, target, errCode) \
expr;
\
expr; \
CHECK_ERROR(value, target, errCode)
CHECK_ERROR(value, target, errCode)
#define CHECK_STATUS(call, target)
\
#define CHECK_STATUS(call, target) \
do {
\
do { \
if (auto value = (call); value != (target)) {
\
if (auto value = (call); value != (target)) { \
std::cerr << "Error: expected " << (target) << " but got "
\
std::cerr << "Error: expected " << (target) << " but got " \
<< value << " in file " << __FILE__ << ", function "
\
<< value << " in file " << __FILE__ << ", function " \
<< __func__ << ", line " << __LINE__ << std::endl;
\
<< __func__ << ", line " << __LINE__ << std::endl; \
return value;
\
return value; \
}
\
} \
} while (0)
} while (0)
inline
std
::
vector
<
int64_t
>
getByteStrides
(
infiniopTensorDescriptor_t
desc
)
{
inline
std
::
vector
<
int64_t
>
getByteStrides
(
infiniopTensorDescriptor_t
desc
)
{
...
@@ -67,8 +67,7 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
...
@@ -67,8 +67,7 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
// compute broadcasted shape
// compute broadcasted shape
for
(
size_t
i
=
0
;
i
<
max_rank
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
max_rank
;
++
i
)
{
if
(
padded_shape1
[
i
]
==
padded_shape2
[
i
]
||
padded_shape1
[
i
]
==
1
||
if
(
padded_shape1
[
i
]
==
padded_shape2
[
i
]
||
padded_shape1
[
i
]
==
1
||
padded_shape2
[
i
]
==
1
)
{
padded_shape2
[
i
]
==
1
)
{
broadcast_shape
[
i
]
=
std
::
max
(
padded_shape1
[
i
],
padded_shape2
[
i
]);
broadcast_shape
[
i
]
=
std
::
max
(
padded_shape1
[
i
],
padded_shape2
[
i
]);
}
else
{
}
else
{
return
false
;
return
false
;
...
@@ -89,10 +88,7 @@ inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
...
@@ -89,10 +88,7 @@ inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
auto
broadcast_shape
=
broadcast_shape_
.
data
(),
auto
broadcast_shape
=
broadcast_shape_
.
data
(),
padded_shape1
=
padded_shape1_
.
data
(),
padded_shape1
=
padded_shape1_
.
data
(),
padded_shape2
=
padded_shape2_
.
data
();
padded_shape2
=
padded_shape2_
.
data
();
if
(
broadcast_ndim
!=
c
->
ndim
||
if
(
broadcast_ndim
!=
c
->
ndim
||
!
getBroadcastShape
(
a
->
shape
,
a
->
ndim
,
b
->
shape
,
b
->
ndim
,
broadcast_shape
,
padded_shape1
,
padded_shape2
,
broadcast_ndim
))
{
!
getBroadcastShape
(
a
->
shape
,
a
->
ndim
,
b
->
shape
,
b
->
ndim
,
broadcast_shape
,
padded_shape1
,
padded_shape2
,
broadcast_ndim
))
{
return
false
;
return
false
;
}
}
return
std
::
equal
(
broadcast_shape
,
broadcast_shape
+
broadcast_ndim
,
return
std
::
equal
(
broadcast_shape
,
broadcast_shape
+
broadcast_ndim
,
...
@@ -126,7 +122,6 @@ inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
...
@@ -126,7 +122,6 @@ inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
return
isValidBroadcastShape
(
a
,
b
,
c
,
std
::
max
(
a
->
ndim
,
b
->
ndim
));
return
isValidBroadcastShape
(
a
,
b
,
c
,
std
::
max
(
a
->
ndim
,
b
->
ndim
));
}
}
// permute the dimensions of a tensor descriptor
// permute the dimensions of a tensor descriptor
inline
infiniopTensorDescriptor_t
permute
(
infiniopTensorDescriptor_t
desc
,
inline
infiniopTensorDescriptor_t
permute
(
infiniopTensorDescriptor_t
desc
,
const
std
::
vector
<
size_t
>
&
order
)
{
const
std
::
vector
<
size_t
>
&
order
)
{
...
@@ -149,10 +144,9 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
...
@@ -149,10 +144,9 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
// check if the dimensions [dim_start, dim_end] of a tensor descriptor are
// check if the dimensions [dim_start, dim_end] of a tensor descriptor are
// contiguous
// contiguous
inline
bool
isContiguous
(
const
infiniopTensorDescriptor_t
&
desc
,
inline
bool
isContiguous
(
const
infiniopTensorDescriptor_t
&
desc
,
size_t
dim_start
,
size_t
dim_end
)
{
size_t
dim_start
,
size_t
dim_end
)
{
for
(
size_t
i
=
dim_start
+
1
;
i
<=
dim_end
;
i
++
)
{
for
(
size_t
i
=
dim_start
+
1
;
i
<=
dim_end
;
i
++
)
{
if
(
desc
->
strides
[
i
-
1
]
!=
if
(
desc
->
strides
[
i
-
1
]
!=
static_cast
<
int64_t
>
(
desc
->
shape
[
i
])
*
desc
->
strides
[
i
])
{
static_cast
<
int64_t
>
(
desc
->
shape
[
i
])
*
desc
->
strides
[
i
])
{
return
false
;
return
false
;
}
}
}
}
...
@@ -168,7 +162,7 @@ inline bool isContiguous(const infiniopTensorDescriptor_t &desc) {
...
@@ -168,7 +162,7 @@ inline bool isContiguous(const infiniopTensorDescriptor_t &desc) {
// merge the dimensions [dim_start, dim_end] of a tensor descriptor
// merge the dimensions [dim_start, dim_end] of a tensor descriptor
inline
infiniopTensorDescriptor_t
dimMerge
(
infiniopTensorDescriptor_t
desc
,
inline
infiniopTensorDescriptor_t
dimMerge
(
infiniopTensorDescriptor_t
desc
,
size_t
dim_start
,
size_t
dim_end
)
{
size_t
dim_start
,
size_t
dim_end
)
{
size_t
ndim
=
desc
->
ndim
;
size_t
ndim
=
desc
->
ndim
;
if
(
dim_start
>
dim_end
||
dim_end
>=
ndim
)
{
if
(
dim_start
>
dim_end
||
dim_end
>=
ndim
)
{
return
nullptr
;
return
nullptr
;
...
@@ -203,11 +197,10 @@ inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
...
@@ -203,11 +197,10 @@ inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
// split the dimension dim of a tensor descriptor into multiple dimensions
// split the dimension dim of a tensor descriptor into multiple dimensions
inline
infiniopTensorDescriptor_t
dimSplit
(
infiniopTensorDescriptor_t
desc
,
inline
infiniopTensorDescriptor_t
dimSplit
(
infiniopTensorDescriptor_t
desc
,
size_t
dim
,
size_t
dim
,
const
std
::
vector
<
size_t
>
&
dims
)
{
const
std
::
vector
<
size_t
>
&
dims
)
{
size_t
ndim
=
desc
->
ndim
;
size_t
ndim
=
desc
->
ndim
;
if
(
desc
->
shape
[
dim
]
!=
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
(
size_t
)
1
,
if
(
desc
->
shape
[
dim
]
!=
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
(
size_t
)
1
,
std
::
multiplies
{}))
{
std
::
multiplies
{}))
{
return
nullptr
;
return
nullptr
;
}
}
size_t
new_ndim
=
ndim
+
dims
.
size
()
-
1
;
size_t
new_ndim
=
ndim
+
dims
.
size
()
-
1
;
...
@@ -221,10 +214,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
...
@@ -221,10 +214,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
}
}
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
i
++
)
{
new_shape
[
index
]
=
dims
[
i
];
new_shape
[
index
]
=
dims
[
i
];
new_strides
[
index
]
=
new_strides
[
index
]
=
desc
->
strides
[
dim
]
*
desc
->
shape
[
dim
]
/
std
::
accumulate
(
dims
.
begin
(),
dims
.
begin
()
+
i
+
1
,
(
size_t
)
1
,
std
::
multiplies
<
size_t
>
());
desc
->
strides
[
dim
]
*
desc
->
shape
[
dim
]
/
std
::
accumulate
(
dims
.
begin
(),
dims
.
begin
()
+
i
+
1
,
(
size_t
)
1
,
std
::
multiplies
<
size_t
>
());
index
++
;
index
++
;
}
}
for
(
size_t
i
=
dim
+
1
;
i
<
ndim
;
i
++
)
{
for
(
size_t
i
=
dim
+
1
;
i
<
ndim
;
i
++
)
{
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment