Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
7a833987
Commit
7a833987
authored
Feb 13, 2025
by
PanZezhongQY
Browse files
feat: 添加大模型算子operator.cc,cpu和cuda视warning为错误
parent
e3ea5bae
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
957 additions
and
66 deletions
+957
-66
src/infiniop/devices/cpu/common_cpu.cc
src/infiniop/devices/cpu/common_cpu.cc
+3
-3
src/infiniop/ops/causal_softmax/operator.cc
src/infiniop/ops/causal_softmax/operator.cc
+152
-0
src/infiniop/ops/matmul/blas.h
src/infiniop/ops/matmul/blas.h
+3
-3
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
+7
-7
src/infiniop/ops/matmul/operator.cc
src/infiniop/ops/matmul/operator.cc
+0
-1
src/infiniop/ops/random_sample/operator.cc
src/infiniop/ops/random_sample/operator.cc
+151
-0
src/infiniop/ops/rearrange/operator.cc
src/infiniop/ops/rearrange/operator.cc
+119
-0
src/infiniop/ops/rms_norm/operator.cc
src/infiniop/ops/rms_norm/operator.cc
+163
-0
src/infiniop/ops/rotary_embedding/operator.cc
src/infiniop/ops/rotary_embedding/operator.cc
+174
-0
src/infiniop/ops/swiglu/operator.cc
src/infiniop/ops/swiglu/operator.cc
+111
-0
src/infiniop/ops/utils.h
src/infiniop/ops/utils.h
+70
-52
xmake/cpu.lua
xmake/cpu.lua
+2
-0
xmake/cuda.lua
xmake/cuda.lua
+2
-0
No files found.
src/infiniop/devices/cpu/common_cpu.cc
View file @
7a833987
...
...
@@ -52,14 +52,14 @@ uint16_t f32_to_f16(float val) {
// Infinity
return
sign
|
0x7C00
;
}
else
if
(
exponent
>=
-
14
)
{
// Normalized case
return
sign
|
((
exponent
+
15
)
<<
10
)
|
(
mantissa
>>
13
);
return
(
uint16_t
)(
sign
|
((
exponent
+
15
)
<<
10
)
|
(
mantissa
>>
13
)
)
;
}
else
if
(
exponent
>=
-
24
)
{
mantissa
|=
0x800000
;
// Add implicit leading 1
mantissa
>>=
(
-
14
-
exponent
);
return
sign
|
(
mantissa
>>
13
);
return
(
uint16_t
)(
sign
|
(
mantissa
>>
13
)
)
;
}
else
{
// Too small for subnormal: return signed zero
return
sign
;
return
(
uint16_t
)
sign
;
}
}
...
...
src/infiniop/ops/causal_softmax/operator.cc
0 → 100644
View file @
7a833987
#include "infiniop/ops/causal_softmax.h"
__C
infiniopStatus_t
infiniopCreateCausalSoftmaxDescriptor
(
infiniopHandle_t
handle
,
infiniopCausalSoftmaxDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCreateCausalSoftmaxDescriptor
(
handle
,
(
CausalSoftmaxCpuDescriptor_t
*
)
desc_ptr
,
y_desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaCreateCausalSoftmaxDescriptor
((
CudaHandle_t
)
handle
,
(
CausalSoftmaxCudaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateCausalSoftmaxDescriptor
((
BangHandle_t
)
handle
,
(
CausalSoftmaxBangDescriptor_t
*
)
desc_ptr
,
y_desc
);
// return cnnlCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxCnnlDescriptor_t *) desc_ptr, y_desc);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnCreateCausalSoftmaxDescriptor
((
AscendHandle_t
)
handle
,
(
CausalSoftmaxAclnnDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCreateCausalSoftmaxDescriptor
((
MacaHandle_t
)
handle
,
(
CausalSoftmaxMacaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaCreateCausalSoftmaxDescriptor
((
MusaHandle_t
)
handle
,
(
CausalSoftmaxMusaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopGetCausalSoftmaxWorkspaceSize
(
infiniopCausalSoftmaxDescriptor_t
desc
,
uint64_t
*
size
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxCpuDescriptor_t
)
desc
,
size
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxCudaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxBangDescriptor_t
)
desc
,
size
);
// return cnnlGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCnnlDescriptor_t) desc, size);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxAclnnDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxMacaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxMusaDescriptor_t
)
desc
,
size
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopCausalSoftmax
(
infiniopCausalSoftmaxDescriptor_t
desc
,
void
*
workspace
,
uint64_t
workspace_size
,
void
*
data
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCausalSoftmax
((
CausalSoftmaxCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaCausalSoftmax
((
CausalSoftmaxCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCausalSoftmax
((
CausalSoftmaxBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
// return cnnlCausalSoftmax((CausalSoftmaxCnnlDescriptor_t) desc, workspace, workspace_size, data, stream);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnCausalSoftmax
((
CausalSoftmaxAclnnDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCausalSoftmax
((
CausalSoftmaxMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaCausalSoftmax
((
CausalSoftmaxMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopDestroyCausalSoftmaxDescriptor
(
infiniopCausalSoftmaxDescriptor_t
desc
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxCpuDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxCudaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxBangDescriptor_t
)
desc
);
// return cnnlDestroyCausalSoftmaxDescriptor((CausalSoftmaxCnnlDescriptor_t) desc);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxAclnnDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxMacaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxMusaDescriptor_t
)
desc
);
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
src/infiniop/ops/matmul/blas.h
View file @
7a833987
...
...
@@ -47,8 +47,8 @@ typedef struct BlasMatrix {
*
status
=
INFINIOP_STATUS_SUCCESS
;
}
bool
match_batch
(
size_t
batch
)
const
{
return
this
->
batch
==
batch
||
this
->
batch
==
1
;
bool
match_batch
(
size_t
_
batch
)
const
{
return
this
->
batch
==
_
batch
||
this
->
batch
==
1
;
}
void
transpose
()
{
...
...
@@ -56,7 +56,7 @@ typedef struct BlasMatrix {
std
::
swap
(
row_stride
,
col_stride
);
}
int
ld
()
const
{
int
64_t
ld
()
const
{
if
(
this
->
row_stride
==
1
)
{
return
this
->
col_stride
;
}
else
{
...
...
src/infiniop/ops/matmul/cuda/matmul_cuda_kernel.cu
View file @
7a833987
...
...
@@ -31,24 +31,24 @@ infiniopStatus_t matmul_cuda(infiniopMatmulCudaDescriptor_t desc, void *c, float
handle
,
op_a
,
op_b
,
info
.
m
,
info
.
n
,
info
.
k
,
static_cast
<
int
>
(
info
.
m
)
,
static_cast
<
int
>
(
info
.
n
)
,
static_cast
<
int
>
(
info
.
k
)
,
&
alpha
,
a
,
a_type
,
info
.
a_matrix
.
ld
(),
static_cast
<
int
>
(
info
.
a_matrix
.
ld
()
)
,
info
.
a_matrix
.
stride
,
b
,
b_type
,
info
.
b_matrix
.
ld
(),
static_cast
<
int
>
(
info
.
b_matrix
.
ld
()
)
,
info
.
b_matrix
.
stride
,
&
beta
,
c
,
c_type
,
info
.
c_matrix
.
ld
(),
static_cast
<
int
>
(
info
.
c_matrix
.
ld
()
)
,
info
.
c_matrix
.
stride
,
info
.
batch
,
static_cast
<
int
>
(
info
.
batch
)
,
compute_type
,
CUBLAS_GEMM_DEFAULT_TENSOR_OP
);
});
return
INFINIOP_STATUS_SUCCESS
;
...
...
src/infiniop/ops/matmul/operator.cc
View file @
7a833987
#include "../utils.h"
#include "infiniop/ops/matmul.h"
#ifdef ENABLE_CPU_API
...
...
src/infiniop/ops/random_sample/operator.cc
0 → 100644
View file @
7a833987
#include "infiniop/ops/random_sample.h"
__C
infiniopStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
result
,
infiniopTensorDescriptor_t
probs
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCreateRandomSampleDescriptor
(
handle
,
(
RandomSampleCpuDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaCreateRandomSampleDescriptor
((
CudaHandle_t
)
handle
,
(
RandomSampleCudaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateRandomSampleDescriptor
((
BangHandle_t
)
handle
,
(
RandomSampleBangDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendCreateRandomSampleDescriptor
((
AscendHandle_t
)
handle
,
(
RandomSampleAscendDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCreateRandomSampleDescriptor
((
MacaHandle_t
)
handle
,
(
RandomSampleMacaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaCreateRandomSampleDescriptor
((
MusaHandle_t
)
handle
,
(
RandomSampleMusaDescriptor_t
*
)
desc_ptr
,
result
,
probs
);
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
};
__C
infiniopStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
uint64_t
*
size
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuGetRandomSampleWorkspaceSize
((
RandomSampleCpuDescriptor_t
)
desc
,
size
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaGetRandomSampleWorkspaceSize
((
RandomSampleCudaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangGetRandomSampleWorkspaceSize
((
RandomSampleBangDescriptor_t
)
desc
,
size
);
// return cnnlGetRandomSampleWorkspaceSize((RandomSampleCnnlDescriptor_t) desc, size);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendGetRandomSampleWorkspaceSize
((
RandomSampleAscendDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaGetRandomSampleWorkspaceSize
((
RandomSampleMacaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaGetRandomSampleWorkspaceSize
((
RandomSampleMusaDescriptor_t
)
desc
,
size
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
void
*
workspace
,
uint64_t
workspace_size
,
void
*
result
,
void
const
*
probs
,
float
random_val
,
float
topp
,
int
topk
,
float
temperature
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuRandomSample
((
RandomSampleCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaRandomSample
((
RandomSampleCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangRandomSample
((
RandomSampleBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendRandomSample
((
RandomSampleAscendDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaRandomSample
((
RandomSampleMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaRandomSample
((
RandomSampleMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
result
,
probs
,
random_val
,
topp
,
topk
,
temperature
,
stream
);
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuDestroyRandomSampleDescriptor
((
RandomSampleCpuDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaDestroyRandomSampleDescriptor
((
RandomSampleCudaDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangDestroyRandomSampleDescriptor
((
RandomSampleBangDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendDestroyRandomSampleDescriptor
((
RandomSampleAscendDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaDestroyRandomSampleDescriptor
((
RandomSampleMacaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaDestroyRandomSampleDescriptor
((
RandomSampleMusaDescriptor_t
)
desc
);
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
src/infiniop/ops/rearrange/operator.cc
0 → 100644
View file @
7a833987
#include "infiniop/ops/rearrange.h"
__C
infiniopStatus_t
infiniopCreateRearrangeDescriptor
(
infiniopHandle_t
handle
,
infiniopRearrangeDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
dst
,
infiniopTensorDescriptor_t
src
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCreateRearrangeDescriptor
(
handle
,
(
RearrangeCpuDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaCreateRearrangeDescriptor
((
CudaHandle_t
)
handle
,
(
RearrangeCudaDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateRearrangeDescriptor
((
BangHandle_t
)
handle
,
(
RearrangeBangDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnCreateRearrangeDescriptor
((
AscendHandle_t
)
handle
,
(
RearrangeAclnnDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCreateRearrangeDescriptor
((
MacaHandle_t
)
handle
,
(
RearrangeMacaDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaCreateRearrangeDescriptor
((
MusaHandle_t
)
handle
,
(
RearrangeMusaDescriptor_t
*
)
desc_ptr
,
dst
,
src
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopRearrange
(
infiniopRearrangeDescriptor_t
desc
,
void
*
dst
,
void
const
*
src
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuRearrange
((
RearrangeCpuDescriptor_t
)
desc
,
dst
,
src
,
stream
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaRearrange
((
RearrangeCudaDescriptor_t
)
desc
,
dst
,
src
,
stream
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangRearrange
((
RearrangeBangDescriptor_t
)
desc
,
dst
,
src
,
stream
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnRearrange
((
RearrangeAclnnDescriptor_t
)
desc
,
dst
,
src
,
stream
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaRearrange
((
RearrangeMacaDescriptor_t
)
desc
,
dst
,
src
,
stream
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaRearrange
((
RearrangeMusaDescriptor_t
)
desc
,
dst
,
src
,
stream
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopDestroyRearrangeDescriptor
(
infiniopRearrangeDescriptor_t
desc
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuDestroyRearrangeDescriptor
((
RearrangeCpuDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaDestroyRearrangeDescriptor
((
RearrangeCudaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangDestroyRearrangeDescriptor
((
RearrangeBangDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnDestroyRearrangeDescriptor
((
RearrangeAclnnDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaDestroyRearrangeDescriptor
((
RearrangeMacaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaDestroyRearrangeDescriptor
((
RearrangeMusaDescriptor_t
)
desc
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
src/infiniop/ops/rms_norm/operator.cc
0 → 100644
View file @
7a833987
#include "infiniop/ops/rms_norm.h"
__C
infiniopStatus_t
infiniopCreateRMSNormDescriptor
(
infiniopHandle_t
handle
,
infiniopRMSNormDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
infiniopTensorDescriptor_t
x_desc
,
infiniopTensorDescriptor_t
w_desc
,
float
epsilon
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCreateRMSNormDescriptor
(
handle
,
(
RMSNormCpuDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaCreateRMSNormDescriptor
((
CudaHandle_t
)
handle
,
(
RMSNormCudaDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateRMSNormDescriptor
((
BangHandle_t
)
handle
,
(
RMSNormBangDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnCreateRMSNormDescriptor
((
AscendHandle_t
)
handle
,
(
RMSNormAclnnDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCreateRMSNormDescriptor
((
MacaHandle_t
)
handle
,
(
RMSNormMacaDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaCreateRMSNormDescriptor
((
MusaHandle_t
)
handle
,
(
RMSNormMusaDescriptor_t
*
)
desc_ptr
,
y_desc
,
x_desc
,
w_desc
,
epsilon
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopGetRMSNormWorkspaceSize
(
infiniopRMSNormDescriptor_t
desc
,
uint64_t
*
size
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuGetRMSNormWorkspaceSize
((
RMSNormCpuDescriptor_t
)
desc
,
size
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaGetRMSNormWorkspaceSize
((
RMSNormCudaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangGetRMSNormWorkspaceSize
((
RMSNormBangDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnGetRMSNormWorkspaceSize
((
RMSNormAclnnDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaGetRMSNormWorkspaceSize
((
RMSNormMacaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaGetRMSNormWorkspaceSize
((
RMSNormMusaDescriptor_t
)
desc
,
size
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopRMSNorm
(
infiniopRMSNormDescriptor_t
desc
,
void
*
workspace
,
uint64_t
workspace_size
,
void
*
y
,
void
const
*
x
,
void
const
*
w
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuRMSNorm
((
RMSNormCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaRMSNorm
((
RMSNormCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangRMSNorm
((
RMSNormBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnRMSNorm
((
RMSNormAclnnDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaRMSNorm
((
RMSNormMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaRMSNorm
((
RMSNormMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
y
,
x
,
w
,
stream
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopDestroyRMSNormDescriptor
(
infiniopRMSNormDescriptor_t
desc
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuDestroyRMSNormDescriptor
((
RMSNormCpuDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaDestroyRMSNormDescriptor
((
RMSNormCudaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangDestroyRMSNormDescriptor
((
RMSNormBangDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnDestroyRMSNormDescriptor
((
RMSNormAclnnDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaDestroyRMSNormDescriptor
((
RMSNormMacaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaDestroyRMSNormDescriptor
((
RMSNormMusaDescriptor_t
)
desc
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
src/infiniop/ops/rotary_embedding/operator.cc
0 → 100644
View file @
7a833987
#include "infiniop/ops/rotary_embedding.h"
__C
infiniopStatus_t
infiniopCreateRoPEDescriptor
(
infiniopHandle_t
handle
,
infiniopRoPEDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
t
,
infiniopTensorDescriptor_t
pos_ids
,
infiniopTensorDescriptor_t
sin_table
,
infiniopTensorDescriptor_t
cos_table
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCreateRoPEDescriptor
((
CpuHandle_t
)
handle
,
(
RoPECpuDescriptor_t
*
)
desc_ptr
,
t
,
pos_ids
,
sin_table
,
cos_table
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaCreateRoPEDescriptor
((
CudaHandle_t
)
handle
,
(
RoPECudaDescriptor_t
*
)
desc_ptr
,
t
,
pos_ids
,
sin_table
,
cos_table
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateRoPEDescriptor
((
BangHandle_t
)
handle
,
(
RoPEBangDescriptor_t
*
)
desc_ptr
,
t
,
pos_ids
,
sin_table
,
cos_table
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendCreateRoPEDescriptor
((
AscendHandle_t
)
handle
,
(
RoPEAscendDescriptor_t
*
)
desc_ptr
,
t
,
pos_ids
,
sin_table
,
cos_table
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCreateRoPEDescriptor
((
MacaHandle_t
)
handle
,
(
RoPEMacaDescriptor_t
*
)
desc_ptr
,
t
,
pos_ids
,
sin_table
,
cos_table
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaCreateRoPEDescriptor
((
MusaHandle_t
)
handle
,
(
RoPEMusaDescriptor_t
*
)
desc_ptr
,
t
,
pos_ids
,
sin_table
,
cos_table
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopGetRoPEWorkspaceSize
(
infiniopRoPEDescriptor_t
desc
,
uint64_t
*
size
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuGetRoPEWorkspaceSize
((
RoPECpuDescriptor_t
)
desc
,
size
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaGetRoPEWorkspaceSize
((
RoPECudaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangGetRoPEWorkspaceSize
((
RoPEBangDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendGetRoPEWorkspaceSize
((
RoPEAscendDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaGetRoPEWorkspaceSize
((
RoPEMacaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaGetRoPEWorkspaceSize
((
RoPEMusaDescriptor_t
)
desc
,
size
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopRoPE
(
infiniopRoPEDescriptor_t
desc
,
void
*
workspace
,
uint64_t
workspace_size
,
void
*
t
,
void
const
*
pos_ids
,
void
const
*
sin_table
,
void
const
*
cos_table
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuRoPE
((
RoPECpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
t
,
pos_ids
,
sin_table
,
cos_table
,
stream
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaRoPE
((
RoPECudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
t
,
pos_ids
,
sin_table
,
cos_table
,
stream
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangRoPE
((
RoPEBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
t
,
pos_ids
,
sin_table
,
cos_table
,
stream
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendRoPE
((
RoPEAscendDescriptor_t
)
desc
,
workspace
,
workspace_size
,
t
,
pos_ids
,
sin_table
,
cos_table
,
stream
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaRoPE
((
RoPEMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
t
,
pos_ids
,
sin_table
,
cos_table
,
stream
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaRoPE
((
RoPEMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
t
,
pos_ids
,
sin_table
,
cos_table
,
stream
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopDestroyRoPEDescriptor
(
infiniopRoPEDescriptor_t
desc
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuDestroyRoPEDescriptor
((
RoPECpuDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaDestroyRoPEDescriptor
((
RoPECudaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangDestroyRoPEDescriptor
((
RoPEBangDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
ascendDestroyRoPEDescriptor
((
RoPEAscendDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaDestroyRoPEDescriptor
((
RoPEMacaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaDestroyRoPEDescriptor
((
RoPEMusaDescriptor_t
)
desc
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
src/infiniop/ops/swiglu/operator.cc
0 → 100644
View file @
7a833987
#include "infiniop/ops/swiglu.h"
__C
infiniopStatus_t
infiniopCreateSwiGLUDescriptor
(
infiniopHandle_t
handle
,
infiniopSwiGLUDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCreateSwiGLUDescriptor
(
handle
,
(
SwiGLUCpuDescriptor_t
*
)
desc_ptr
,
c_desc
,
a_desc
,
b_desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaCreateSwiGLUDescriptor
((
CudaHandle_t
)
handle
,
(
SwiGLUCudaDescriptor_t
*
)
desc_ptr
,
c_desc
,
a_desc
,
b_desc
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateSwiGLUDescriptor
((
BangHandle_t
)
handle
,
(
SwiGLUBangDescriptor_t
*
)
desc_ptr
,
c_desc
,
a_desc
,
b_desc
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
return
ascendCreateSwiGLUDescriptor
(
(
AscendHandle_t
)
handle
,
(
SwiGLUAscendDescriptor_t
*
)
desc_ptr
,
c_desc
,
a_desc
,
b_desc
);
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCreateSwiGLUDescriptor
((
MacaHandle_t
)
handle
,
(
SwiGLUMacaDescriptor_t
*
)
desc_ptr
,
c_desc
,
a_desc
,
b_desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaCreateSwiGLUDescriptor
(
handle
,
(
SwiGLUMusaDescriptor_t
*
)
desc_ptr
,
c_desc
,
a_desc
,
b_desc
);
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
};
__C
infiniopStatus_t
infiniopSwiGLU
(
infiniopSwiGLUDescriptor_t
desc
,
void
*
c
,
void
const
*
a
,
void
const
*
b
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuSwiGLU
((
SwiGLUCpuDescriptor_t
)
desc
,
c
,
a
,
b
,
stream
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaSwiGLU
((
SwiGLUCudaDescriptor_t
)
desc
,
c
,
a
,
b
,
stream
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangSwiGLU
((
SwiGLUBangDescriptor_t
)
desc
,
c
,
a
,
b
,
stream
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
return
ascendSwiGLU
((
SwiGLUAscendDescriptor_t
)
desc
,
c
,
a
,
b
,
stream
);
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
return
macaSwiGLU
((
SwiGLUMacaDescriptor_t
)
desc
,
c
,
a
,
b
,
stream
);
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaSwiGLU
((
SwiGLUMusaDescriptor_t
)
desc
,
c
,
a
,
b
,
stream
);
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniopStatus_t
infiniopDestroySwiGLUDescriptor
(
infiniopSwiGLUDescriptor_t
desc
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuDestroySwiGLUDescriptor
((
SwiGLUCpuDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
return
cudaDestroySwiGLUDescriptor
((
SwiGLUCudaDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangDestroySwiGLUDescriptor
((
SwiGLUBangDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
return
ascendDestroySwiGLUDescriptor
((
SwiGLUAscendDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
return
macaDestroySwiGLUDescriptor
((
SwiGLUMacaDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaDestroySwiGLUDescriptor
((
SwiGLUMusaDescriptor_t
)
desc
);
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
src/infiniop/ops/utils.h
View file @
7a833987
...
...
@@ -13,32 +13,28 @@
#define ROUND_UP_DIV(x, y) ((x + y - 1) / y)
#define CHECK_ERROR(call, target, errCode) \
do { \
if (auto value = (call); value == (target)) { \
std::cerr << "Error: expected " << (target) \
<< " but got " << value \
<< " in file " << __FILE__ \
<< ", function " << __func__ \
<< ", line " << __LINE__ << std::endl; \
return (errCode); \
} \
#define CHECK_ERROR(call, target, errCode) \
do { \
if (auto value = (call); value == (target)) { \
std::cerr << "Error: expected " << (target) << " but got " \
<< value << " in file " << __FILE__ << ", function " \
<< __func__ << ", line " << __LINE__ << std::endl; \
return (errCode); \
} \
} while (0)
#define CREATE_CHECK_ERROR(expr, value, target, errCode) \
expr; \
#define CREATE_CHECK_ERROR(expr, value, target, errCode)
\
expr;
\
CHECK_ERROR(value, target, errCode)
#define CHECK_STATUS(call, target) \
do { \
if (auto value = (call); value != (target)) { \
std::cerr << "Error: expected " << (target) \
<< " but got " << value \
<< " in file " << __FILE__ \
<< ", function " << __func__ \
<< ", line " << __LINE__ << std::endl; \
return value; \
} \
#define CHECK_STATUS(call, target) \
do { \
if (auto value = (call); value != (target)) { \
std::cerr << "Error: expected " << (target) << " but got " \
<< value << " in file " << __FILE__ << ", function " \
<< __func__ << ", line " << __LINE__ << std::endl; \
return value; \
} \
} while (0)
inline
std
::
vector
<
int64_t
>
get_byte_strides
(
infiniopTensorDescriptor_t
desc
)
{
...
...
@@ -53,8 +49,9 @@ inline std::vector<int64_t> get_byte_strides(infiniopTensorDescriptor_t desc) {
// calculate the broadcasted shape for two tensors
inline
bool
getBroadcastShape
(
const
uint64_t
*
shape1
,
uint64_t
ndim1
,
const
uint64_t
*
shape2
,
uint64_t
ndim2
,
uint64_t
*
broadcast_shape
,
uint64_t
*
padded_shape1
,
uint64_t
*
padded_shape2
,
uint64_t
max_rank
)
{
uint64_t
*
broadcast_shape
,
uint64_t
*
padded_shape1
,
uint64_t
*
padded_shape2
,
uint64_t
max_rank
)
{
// prepending and initializing
std
::
fill
(
padded_shape1
,
padded_shape1
+
max_rank
,
1
);
std
::
fill
(
padded_shape2
,
padded_shape2
+
max_rank
,
1
);
...
...
@@ -63,7 +60,8 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
// compute broadcasted shape
for
(
size_t
i
=
0
;
i
<
max_rank
;
++
i
)
{
if
(
padded_shape1
[
i
]
==
padded_shape2
[
i
]
||
padded_shape1
[
i
]
==
1
||
padded_shape2
[
i
]
==
1
)
{
if
(
padded_shape1
[
i
]
==
padded_shape2
[
i
]
||
padded_shape1
[
i
]
==
1
||
padded_shape2
[
i
]
==
1
)
{
broadcast_shape
[
i
]
=
std
::
max
(
padded_shape1
[
i
],
padded_shape2
[
i
]);
}
else
{
return
false
;
...
...
@@ -73,31 +71,39 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
return
true
;
}
// check if the shape of tensor c is valid after broadcasting tensors a and b and also get the broadcasted shapes
inline
bool
isValidBroadcastShape
(
infiniopTensorDescriptor_t
a
,
infiniopTensorDescriptor_t
b
,
infiniopTensorDescriptor_t
c
,
// check if the shape of tensor c is valid after broadcasting tensors a and b
// and also get the broadcasted shapes
inline
bool
isValidBroadcastShape
(
infiniopTensorDescriptor_t
a
,
infiniopTensorDescriptor_t
b
,
infiniopTensorDescriptor_t
c
,
uint64_t
broadcast_ndim
)
{
std
::
vector
<
uint64_t
>
broadcast_shape_
(
broadcast_ndim
),
padded_shape1_
(
broadcast_ndim
),
padded_shape2_
(
broadcast_ndim
);
std
::
vector
<
uint64_t
>
broadcast_shape_
(
broadcast_ndim
),
padded_shape1_
(
broadcast_ndim
),
padded_shape2_
(
broadcast_ndim
);
auto
broadcast_shape
=
broadcast_shape_
.
data
(),
padded_shape1
=
padded_shape1_
.
data
(),
padded_shape2
=
padded_shape2_
.
data
();
if
(
broadcast_ndim
!=
c
->
ndim
||
!
getBroadcastShape
(
a
->
shape
,
a
->
ndim
,
b
->
shape
,
b
->
ndim
,
broadcast_shape
,
padded_shape1
,
padded_shape2
,
broadcast_ndim
))
{
if
(
broadcast_ndim
!=
c
->
ndim
||
!
getBroadcastShape
(
a
->
shape
,
a
->
ndim
,
b
->
shape
,
b
->
ndim
,
broadcast_shape
,
padded_shape1
,
padded_shape2
,
broadcast_ndim
))
{
return
false
;
}
return
std
::
equal
(
broadcast_shape
,
broadcast_shape
+
broadcast_ndim
,
c
->
shape
);
return
std
::
equal
(
broadcast_shape
,
broadcast_shape
+
broadcast_ndim
,
c
->
shape
);
}
// check if the shape of tensor src can be validly broadcasted to that of the tensor dst
inline
bool
isValidBroadcastShape
(
infiniopTensorDescriptor_t
dst
,
infiniopTensorDescriptor_t
src
)
{
// check if the shape of tensor src can be validly broadcasted to that of the
// tensor dst
inline
bool
isValidBroadcastShape
(
infiniopTensorDescriptor_t
dst
,
infiniopTensorDescriptor_t
src
)
{
if
(
dst
->
ndim
<
src
->
ndim
)
{
return
false
;
}
std
::
vector
<
size_t
>
padded_shape_
(
dst
->
ndim
);
auto
padded_shape
=
padded_shape_
.
data
();
std
::
fill
(
padded_shape
,
padded_shape
+
dst
->
ndim
,
1
);
std
::
copy
(
src
->
shape
,
src
->
shape
+
src
->
ndim
,
padded_shape
+
dst
->
ndim
-
src
->
ndim
);
std
::
copy
(
src
->
shape
,
src
->
shape
+
src
->
ndim
,
padded_shape
+
dst
->
ndim
-
src
->
ndim
);
for
(
size_t
i
=
0
;
i
<
dst
->
ndim
;
++
i
)
{
if
(
padded_shape
[
i
]
!=
dst
->
shape
[
i
]
&&
padded_shape
[
i
]
!=
1
)
{
return
false
;
...
...
@@ -107,7 +113,9 @@ inline bool isValidBroadcastShape(infiniopTensorDescriptor_t dst, infiniopTensor
}
// check if the shape of tensor c is valid after broadcasting tensors a and b
inline
bool
isValidBroadcastShape
(
infiniopTensorDescriptor_t
a
,
infiniopTensorDescriptor_t
b
,
infiniopTensorDescriptor_t
c
)
{
inline
bool
isValidBroadcastShape
(
infiniopTensorDescriptor_t
a
,
infiniopTensorDescriptor_t
b
,
infiniopTensorDescriptor_t
c
)
{
return
isValidBroadcastShape
(
a
,
b
,
c
,
std
::
max
(
a
->
ndim
,
b
->
ndim
));
}
...
...
@@ -120,7 +128,8 @@ inline size_t get_byte_size(infiniopTensorDescriptor_t desc) {
}
// permute the dimensions of a tensor descriptor
inline
infiniopTensorDescriptor_t
permute
(
infiniopTensorDescriptor_t
desc
,
const
std
::
vector
<
size_t
>
&
order
)
{
inline
infiniopTensorDescriptor_t
permute
(
infiniopTensorDescriptor_t
desc
,
const
std
::
vector
<
size_t
>
&
order
)
{
size_t
ndim
=
desc
->
ndim
;
if
(
order
.
size
()
!=
ndim
)
{
return
nullptr
;
...
...
@@ -134,14 +143,16 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc, const
shape
[
i
]
=
desc
->
shape
[
order
[
i
]];
strides
[
i
]
=
desc
->
strides
[
order
[
i
]];
}
return
new
InfiniopTensorDescriptor
{
desc
->
dtype
,
ndim
,
shape
,
strides
};
return
new
InfiniopTensorDescriptor
{
desc
->
dtype
,
ndim
,
shape
,
strides
};
}
// check if the dimensions [dim_start, dim_end] of a tensor descriptor are contiguous
inline
bool
is_contiguous
(
const
infiniopTensorDescriptor_t
&
desc
,
size_t
dim_start
,
size_t
dim_end
)
{
// check if the dimensions [dim_start, dim_end] of a tensor descriptor are
// contiguous
inline
bool
is_contiguous
(
const
infiniopTensorDescriptor_t
&
desc
,
size_t
dim_start
,
size_t
dim_end
)
{
for
(
size_t
i
=
dim_start
+
1
;
i
<=
dim_end
;
i
++
)
{
if
(
desc
->
strides
[
i
-
1
]
!=
static_cast
<
int64_t
>
(
desc
->
shape
[
i
])
*
desc
->
strides
[
i
])
{
if
(
desc
->
strides
[
i
-
1
]
!=
static_cast
<
int64_t
>
(
desc
->
shape
[
i
])
*
desc
->
strides
[
i
])
{
return
false
;
}
}
...
...
@@ -156,7 +167,8 @@ inline bool is_contiguous(const infiniopTensorDescriptor_t &desc) {
}
// merge the dimensions [dim_start, dim_end] of a tensor descriptor
inline
infiniopTensorDescriptor_t
dim_merge
(
infiniopTensorDescriptor_t
desc
,
size_t
dim_start
,
size_t
dim_end
)
{
inline
infiniopTensorDescriptor_t
dim_merge
(
infiniopTensorDescriptor_t
desc
,
size_t
dim_start
,
size_t
dim_end
)
{
size_t
ndim
=
desc
->
ndim
;
if
(
dim_start
>
dim_end
||
dim_end
>=
ndim
)
{
return
nullptr
;
...
...
@@ -185,14 +197,17 @@ inline infiniopTensorDescriptor_t dim_merge(infiniopTensorDescriptor_t desc, siz
new_strides
[
index
]
=
desc
->
strides
[
i
];
index
++
;
}
return
new
InfiniopTensorDescriptor
{
desc
->
dtype
,
new_ndim
,
new_shape
,
new_strides
};
return
new
InfiniopTensorDescriptor
{
desc
->
dtype
,
new_ndim
,
new_shape
,
new_strides
};
}
// split the dimension dim of a tensor descriptor into multiple dimensions
inline
infiniopTensorDescriptor_t
dim_split
(
infiniopTensorDescriptor_t
desc
,
size_t
dim
,
const
std
::
vector
<
size_t
>
&
dims
)
{
inline
infiniopTensorDescriptor_t
dim_split
(
infiniopTensorDescriptor_t
desc
,
size_t
dim
,
const
std
::
vector
<
size_t
>
&
dims
)
{
size_t
ndim
=
desc
->
ndim
;
if
(
desc
->
shape
[
dim
]
!=
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
(
size_t
)
1
,
std
::
multiplies
{}))
{
if
(
desc
->
shape
[
dim
]
!=
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
(
size_t
)
1
,
std
::
multiplies
{}))
{
return
nullptr
;
}
size_t
new_ndim
=
ndim
+
dims
.
size
()
-
1
;
...
...
@@ -206,7 +221,10 @@ inline infiniopTensorDescriptor_t dim_split(infiniopTensorDescriptor_t desc, siz
}
for
(
size_t
i
=
0
;
i
<
dims
.
size
();
i
++
)
{
new_shape
[
index
]
=
dims
[
i
];
new_strides
[
index
]
=
desc
->
strides
[
dim
]
*
desc
->
shape
[
dim
]
/
std
::
accumulate
(
dims
.
begin
(),
dims
.
begin
()
+
i
+
1
,
1
,
std
::
multiplies
<
size_t
>
());
new_strides
[
index
]
=
desc
->
strides
[
dim
]
*
desc
->
shape
[
dim
]
/
std
::
accumulate
(
dims
.
begin
(),
dims
.
begin
()
+
i
+
1
,
(
size_t
)
1
,
std
::
multiplies
<
size_t
>
());
index
++
;
}
for
(
size_t
i
=
dim
+
1
;
i
<
ndim
;
i
++
)
{
...
...
@@ -214,8 +232,8 @@ inline infiniopTensorDescriptor_t dim_split(infiniopTensorDescriptor_t desc, siz
new_strides
[
index
]
=
desc
->
strides
[
i
];
index
++
;
}
return
new
InfiniopTensorDescriptor
{
desc
->
dtype
,
new_ndim
,
new_shape
,
new_strides
};
return
new
InfiniopTensorDescriptor
{
desc
->
dtype
,
new_ndim
,
new_shape
,
new_strides
};
}
#endif// __UTILS_H__
#endif
// __UTILS_H__
xmake/cpu.lua
View file @
7a833987
...
...
@@ -2,6 +2,8 @@ target("infiniop-cpu")
on_install
(
function
(
target
)
end
)
set_kind
(
"static"
)
add_cxflags
(
"-Wall"
,
"-Werror"
)
if
not
is_plat
(
"windows"
)
then
add_cxflags
(
"-fPIC"
)
end
...
...
xmake/cuda.lua
View file @
7a833987
...
...
@@ -20,10 +20,12 @@ target("infiniop-cuda")
if
is_plat
(
"windows"
)
then
add_cuflags
(
"-Xcompiler=/utf-8"
,
"--expt-relaxed-constexpr"
,
"--allow-unsupported-compiler"
)
add_cuflags
(
"-Xcompiler=/W3"
,
"-Xcompiler=/WX"
)
if
CUDNN_ROOT
~=
nil
then
add_linkdirs
(
CUDNN_ROOT
..
"
\\
lib\\x64"
)
end
else
add_cuflags
(
"-Xcompiler=-Wall"
,
"-Xcompiler=-Werror"
)
add_cuflags
(
"-Xcompiler=-fPIC"
)
add_culdflags
(
"-Xcompiler=-fPIC"
)
add_cxxflags
(
"-fPIC"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment