Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
ec0ff893
Commit
ec0ff893
authored
Feb 17, 2025
by
YdrMaster
Browse files
issue/52: 格式化所有 c/c++ 文件
Signed-off-by:
YdrMaster
<
ydrml@hotmail.com
>
parent
27ba98d1
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
213 additions
and
233 deletions
+213
-233
include/infinicore.h
include/infinicore.h
+1
-2
include/infiniop/ops/causal_softmax.h
include/infiniop/ops/causal_softmax.h
+0
-1
include/infiniop/ops/conv.h
include/infiniop/ops/conv.h
+0
-1
include/infiniop/ops/random_sample.h
include/infiniop/ops/random_sample.h
+0
-1
include/infiniop/tensor_descriptor.h
include/infiniop/tensor_descriptor.h
+1
-1
src/infiniop/devices/ascend/common_ascend.cc
src/infiniop/devices/ascend/common_ascend.cc
+88
-88
src/infiniop/devices/ascend/common_ascend.h
src/infiniop/devices/ascend/common_ascend.h
+0
-1
src/infiniop/devices/ascend/tensor_aclnn.cc
src/infiniop/devices/ascend/tensor_aclnn.cc
+4
-3
src/infiniop/devices/cpu/common_cpu.cc
src/infiniop/devices/cpu/common_cpu.cc
+4
-5
src/infiniop/devices/cpu/common_cpu.h
src/infiniop/devices/cpu/common_cpu.h
+2
-2
src/infiniop/devices/cuda/common_cuda.cuh
src/infiniop/devices/cuda/common_cuda.cuh
+22
-22
src/infiniop/devices/pool.h
src/infiniop/devices/pool.h
+3
-3
src/infiniop/ops/causal_softmax/operator.cc
src/infiniop/ops/causal_softmax/operator.cc
+71
-71
src/infiniop/ops/matmul/ascend/matmul_aclnn.cc
src/infiniop/ops/matmul/ascend/matmul_aclnn.cc
+4
-8
src/infiniop/ops/matmul/bang/matmul_cnnl.cc
src/infiniop/ops/matmul/bang/matmul_cnnl.cc
+2
-2
src/infiniop/ops/matmul/blas.h
src/infiniop/ops/matmul/blas.h
+2
-2
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
+5
-14
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
+1
-1
src/infiniop/ops/matmul/cuda/matmul_cuda.cuh
src/infiniop/ops/matmul/cuda/matmul_cuda.cuh
+3
-3
src/infiniop/ops/matmul/cuda/matmul_cuda_api.h
src/infiniop/ops/matmul/cuda/matmul_cuda_api.h
+0
-2
No files found.
include/infinicore.h
View file @
ec0ff893
...
...
@@ -6,8 +6,7 @@
#define __INFINICORE_EXPORT_C__
#if defined(_WIN32)
#define __export __declspec(dllexport)
#elif defined(__GNUC__) && \
((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
#elif defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
#define __export __attribute__((visibility("default")))
#else
#define __export
...
...
include/infiniop/ops/causal_softmax.h
View file @
ec0ff893
...
...
@@ -19,5 +19,4 @@ __C __export infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescrip
__C
__export
infiniopStatus_t
infiniopDestroyCausalSoftmaxDescriptor
(
infiniopCausalSoftmaxDescriptor_t
desc
);
#endif
include/infiniop/ops/conv.h
View file @
ec0ff893
...
...
@@ -21,5 +21,4 @@ __C __export infiniopStatus_t infiniopConv(infiniopConvDescriptor_t desc, void *
__C
__export
infiniopStatus_t
infiniopDestroyConvDescriptor
(
infiniopConvDescriptor_t
desc
);
#endif
include/infiniop/ops/random_sample.h
View file @
ec0ff893
...
...
@@ -22,5 +22,4 @@ __C __export infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescripto
__C
__export
infiniopStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
);
#endif
include/infiniop/tensor_descriptor.h
View file @
ec0ff893
...
...
@@ -21,4 +21,4 @@ __C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescr
__C
__export
infiniopStatus_t
infiniopDestroyTensorDescriptor
(
infiniopTensorDescriptor_t
desc
);
#endif// __INFINIOP_TENSOR_DESCRIPTOR__
#endif
// __INFINIOP_TENSOR_DESCRIPTOR__
src/infiniop/devices/ascend/common_ascend.cc
View file @
ec0ff893
...
...
@@ -31,115 +31,115 @@ infiniopStatus_t freeWorkspace(void *workspaceAddr) {
}
aclDataType
toAclDataType
(
infiniDtype_t
dt
)
{
if
(
dt
==
INFINI_DTYPE_I8
)
if
(
dt
==
INFINI_DTYPE_I8
)
{
return
aclDataType
::
ACL_INT8
;
else
if
(
dt
==
INFINI_DTYPE_I16
)
}
else
if
(
dt
==
INFINI_DTYPE_I16
)
{
return
aclDataType
::
ACL_INT16
;
else
if
(
dt
==
INFINI_DTYPE_I32
)
}
else
if
(
dt
==
INFINI_DTYPE_I32
)
{
return
aclDataType
::
ACL_INT32
;
else
if
(
dt
==
INFINI_DTYPE_I64
)
}
else
if
(
dt
==
INFINI_DTYPE_I64
)
{
return
aclDataType
::
ACL_INT64
;
else
if
(
dt
==
INFINI_DTYPE_U8
)
}
else
if
(
dt
==
INFINI_DTYPE_U8
)
{
return
aclDataType
::
ACL_UINT8
;
else
if
(
dt
==
INFINI_DTYPE_U16
)
}
else
if
(
dt
==
INFINI_DTYPE_U16
)
{
return
aclDataType
::
ACL_UINT16
;
else
if
(
dt
==
INFINI_DTYPE_U32
)
}
else
if
(
dt
==
INFINI_DTYPE_U32
)
{
return
aclDataType
::
ACL_UINT32
;
else
if
(
dt
==
INFINI_DTYPE_U64
)
}
else
if
(
dt
==
INFINI_DTYPE_U64
)
{
return
aclDataType
::
ACL_UINT64
;
else
if
(
dt
==
INFINI_DTYPE_F16
)
}
else
if
(
dt
==
INFINI_DTYPE_F16
)
{
return
aclDataType
::
ACL_FLOAT16
;
else
if
(
dt
==
INFINI_DTYPE_BF16
)
}
else
if
(
dt
==
INFINI_DTYPE_BF16
)
{
return
aclDataType
::
ACL_BF16
;
else
if
(
dt
==
INFINI_DTYPE_F32
)
}
else
if
(
dt
==
INFINI_DTYPE_F32
)
{
return
aclDataType
::
ACL_FLOAT
;
else
if
(
dt
==
INFINI_DTYPE_F64
)
}
else
if
(
dt
==
INFINI_DTYPE_F64
)
{
return
aclDataType
::
ACL_DOUBLE
;
else
}
else
{
return
aclDataType
::
ACL_DT_UNDEFINED
;
}
}
const
char
*
dataTypeToString
(
aclDataType
dtype
)
{
switch
(
dtype
)
{
case
ACL_DT_UNDEFINED
:
return
"ACL_DT_UNDEFINED"
;
case
ACL_FLOAT
:
return
"ACL_FLOAT"
;
case
ACL_FLOAT16
:
return
"ACL_FLOAT16"
;
case
ACL_INT8
:
return
"ACL_INT8"
;
case
ACL_INT32
:
return
"ACL_INT32"
;
case
ACL_UINT8
:
return
"ACL_UINT8"
;
case
ACL_INT16
:
return
"ACL_INT16"
;
case
ACL_UINT16
:
return
"ACL_UINT16"
;
case
ACL_UINT32
:
return
"ACL_UINT32"
;
case
ACL_INT64
:
return
"ACL_INT64"
;
case
ACL_UINT64
:
return
"ACL_UINT64"
;
case
ACL_DOUBLE
:
return
"ACL_DOUBLE"
;
case
ACL_BOOL
:
return
"ACL_BOOL"
;
case
ACL_STRING
:
return
"ACL_STRING"
;
case
ACL_COMPLEX64
:
return
"ACL_COMPLEX64"
;
case
ACL_COMPLEX128
:
return
"ACL_COMPLEX128"
;
case
ACL_BF16
:
return
"ACL_BF16"
;
case
ACL_INT4
:
return
"ACL_INT4"
;
case
ACL_UINT1
:
return
"ACL_UINT1"
;
case
ACL_COMPLEX32
:
return
"ACL_COMPLEX32"
;
default:
return
"UNKNOWN"
;
case
ACL_DT_UNDEFINED
:
return
"ACL_DT_UNDEFINED"
;
case
ACL_FLOAT
:
return
"ACL_FLOAT"
;
case
ACL_FLOAT16
:
return
"ACL_FLOAT16"
;
case
ACL_INT8
:
return
"ACL_INT8"
;
case
ACL_INT32
:
return
"ACL_INT32"
;
case
ACL_UINT8
:
return
"ACL_UINT8"
;
case
ACL_INT16
:
return
"ACL_INT16"
;
case
ACL_UINT16
:
return
"ACL_UINT16"
;
case
ACL_UINT32
:
return
"ACL_UINT32"
;
case
ACL_INT64
:
return
"ACL_INT64"
;
case
ACL_UINT64
:
return
"ACL_UINT64"
;
case
ACL_DOUBLE
:
return
"ACL_DOUBLE"
;
case
ACL_BOOL
:
return
"ACL_BOOL"
;
case
ACL_STRING
:
return
"ACL_STRING"
;
case
ACL_COMPLEX64
:
return
"ACL_COMPLEX64"
;
case
ACL_COMPLEX128
:
return
"ACL_COMPLEX128"
;
case
ACL_BF16
:
return
"ACL_BF16"
;
case
ACL_INT4
:
return
"ACL_INT4"
;
case
ACL_UINT1
:
return
"ACL_UINT1"
;
case
ACL_COMPLEX32
:
return
"ACL_COMPLEX32"
;
default:
return
"UNKNOWN"
;
}
}
const
char
*
formatToString
(
aclFormat
format
)
{
switch
(
format
)
{
case
ACL_FORMAT_UNDEFINED
:
return
"ACL_FORMAT_UNDEFINED"
;
case
ACL_FORMAT_NCHW
:
return
"ACL_FORMAT_NCHW"
;
case
ACL_FORMAT_NHWC
:
return
"ACL_FORMAT_NHWC"
;
case
ACL_FORMAT_ND
:
return
"ACL_FORMAT_ND"
;
case
ACL_FORMAT_NC1HWC0
:
return
"ACL_FORMAT_NC1HWC0"
;
case
ACL_FORMAT_FRACTAL_Z
:
return
"ACL_FORMAT_FRACTAL_Z"
;
case
ACL_FORMAT_NC1HWC0_C04
:
return
"ACL_FORMAT_NC1HWC0_C04"
;
case
ACL_FORMAT_HWCN
:
return
"ACL_FORMAT_HWCN"
;
case
ACL_FORMAT_NDHWC
:
return
"ACL_FORMAT_NDHWC"
;
case
ACL_FORMAT_FRACTAL_NZ
:
return
"ACL_FORMAT_FRACTAL_NZ"
;
case
ACL_FORMAT_NCDHW
:
return
"ACL_FORMAT_NCDHW"
;
case
ACL_FORMAT_NDC1HWC0
:
return
"ACL_FORMAT_NDC1HWC0"
;
case
ACL_FRACTAL_Z_3D
:
return
"ACL_FRACTAL_Z_3D"
;
case
ACL_FORMAT_NC
:
return
"ACL_FORMAT_NC"
;
case
ACL_FORMAT_NCL
:
return
"ACL_FORMAT_NCL"
;
default:
return
"UNKNOWN"
;
case
ACL_FORMAT_UNDEFINED
:
return
"ACL_FORMAT_UNDEFINED"
;
case
ACL_FORMAT_NCHW
:
return
"ACL_FORMAT_NCHW"
;
case
ACL_FORMAT_NHWC
:
return
"ACL_FORMAT_NHWC"
;
case
ACL_FORMAT_ND
:
return
"ACL_FORMAT_ND"
;
case
ACL_FORMAT_NC1HWC0
:
return
"ACL_FORMAT_NC1HWC0"
;
case
ACL_FORMAT_FRACTAL_Z
:
return
"ACL_FORMAT_FRACTAL_Z"
;
case
ACL_FORMAT_NC1HWC0_C04
:
return
"ACL_FORMAT_NC1HWC0_C04"
;
case
ACL_FORMAT_HWCN
:
return
"ACL_FORMAT_HWCN"
;
case
ACL_FORMAT_NDHWC
:
return
"ACL_FORMAT_NDHWC"
;
case
ACL_FORMAT_FRACTAL_NZ
:
return
"ACL_FORMAT_FRACTAL_NZ"
;
case
ACL_FORMAT_NCDHW
:
return
"ACL_FORMAT_NCDHW"
;
case
ACL_FORMAT_NDC1HWC0
:
return
"ACL_FORMAT_NDC1HWC0"
;
case
ACL_FRACTAL_Z_3D
:
return
"ACL_FRACTAL_Z_3D"
;
case
ACL_FORMAT_NC
:
return
"ACL_FORMAT_NC"
;
case
ACL_FORMAT_NCL
:
return
"ACL_FORMAT_NCL"
;
default:
return
"UNKNOWN"
;
}
}
src/infiniop/devices/ascend/common_ascend.h
View file @
ec0ff893
...
...
@@ -34,7 +34,6 @@ extern "C" {
return INFINIOP_STATUS_INTERNAL_ERROR; \
} while (0)
#ifdef __cplusplus
};
#endif
...
...
src/infiniop/devices/ascend/tensor_aclnn.cc
View file @
ec0ff893
...
...
@@ -21,7 +21,6 @@ infiniopStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const s
return
INFINIOP_STATUS_SUCCESS
;
}
/// @brief Infer storage shape. For now this ruturns a 1D shape of the total tensor storage size.
/// We don't see why higher dimensional storage shape is ever needed. To change if necesary.
infiniopStatus_t
aclnnTensorDescriptor
::
inferStorageShape
()
{
...
...
@@ -93,8 +92,10 @@ char *aclnnTensorDescriptor::toString() {
// Assume bufferSize
size_t
bufferSize
=
1024
+
this
->
ndim
*
40
+
this
->
storageNdim
*
40
;
char
*
buffer
=
(
char
*
)
malloc
(
bufferSize
);
if
(
!
buffer
)
return
NULL
;
char
*
buffer
=
(
char
*
)
malloc
(
bufferSize
);
if
(
!
buffer
)
{
return
NULL
;
}
// Write info into buffer
char
*
ptr
=
buffer
;
...
...
src/infiniop/devices/cpu/common_cpu.cc
View file @
ec0ff893
...
...
@@ -35,11 +35,10 @@ float f16_to_f32(uint16_t h) {
uint16_t
f32_to_f16
(
float
val
)
{
uint32_t
f32
;
memcpy
(
&
f32
,
&
val
,
sizeof
(
f32
));
// Read the bits of the float32
uint16_t
sign
=
(
f32
>>
16
)
&
0x8000
;
// Extract the sign bit
int32_t
exponent
=
((
f32
>>
23
)
&
0xFF
)
-
127
;
// Extract and de-bias the exponent
uint32_t
mantissa
=
f32
&
0x7FFFFF
;
// Extract the mantissa (fraction part)
memcpy
(
&
f32
,
&
val
,
sizeof
(
f32
));
// Read the bits of the float32
uint16_t
sign
=
(
f32
>>
16
)
&
0x8000
;
// Extract the sign bit
int32_t
exponent
=
((
f32
>>
23
)
&
0xFF
)
-
127
;
// Extract and de-bias the exponent
uint32_t
mantissa
=
f32
&
0x7FFFFF
;
// Extract the mantissa (fraction part)
if
(
exponent
>=
31
)
{
// Special cases for Inf and NaN
// NaN
...
...
src/infiniop/devices/cpu/common_cpu.h
View file @
ec0ff893
...
...
@@ -19,7 +19,7 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim, int64_t const *broad
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
int64_t
const
*
strides
);
/**
* get the total array size (element count) after applying padding for a
* get the total array size (element count) after applying padding for a
* ndim-ary tensor with the given shape
*/
size_t
getPaddedSize
(
size_t
ndim
,
size_t
*
shape
,
size_t
const
*
pads
);
...
...
@@ -27,4 +27,4 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads);
// calculate the padded shape and store the result in padded_shape
std
::
vector
<
size_t
>
getPaddedShape
(
size_t
ndim
,
size_t
const
*
shape
,
size_t
const
*
pads
);
#endif// __INFINIOP__COMMON_CPU_H__
#endif
// __INFINIOP__COMMON_CPU_H__
src/infiniop/devices/cuda/common_cuda.cuh
View file @
ec0ff893
...
...
@@ -47,18 +47,18 @@ struct InfiniopCudaHandle {
int
compute_capability_minor
;
};
template
<
typename
T
>
template
<
typename
T
>
void
use_cublas
(
std
::
shared_ptr
<
Pool
<
cublasHandle_t
>>
cublas_handle_pool
,
int
device_id
,
cudaStream_t
stream
,
T
const
&
f
)
{
auto
handle
=
cublas_handle_pool
->
pop
();
if
(
!
handle
)
{
cublasCreate
(
&
(
*
handle
));
}
cublasSetStream
(
*
handle
,
(
cudaStream_t
)
stream
);
cublasSetStream
(
*
handle
,
(
cudaStream_t
)
stream
);
f
(
*
handle
);
cublas_handle_pool
->
push
(
std
::
move
(
*
handle
));
}
template
<
typename
T
>
template
<
typename
T
>
cudnnStatus_t
use_cudnn
(
std
::
shared_ptr
<
Pool
<
cudnnHandle_t
>>
cudnn_handle_pool
,
int
device_id
,
cudaStream_t
stream
,
T
const
&
f
)
{
auto
handle
=
cudnn_handle_pool
->
pop
();
if
(
!
handle
)
{
...
...
@@ -72,24 +72,24 @@ cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool,
inline
cudnnDataType_t
getCudnnDtype
(
infiniDtype_t
dt
)
{
switch
(
dt
)
{
case
INFINI_DTYPE_F16
:
return
CUDNN_DATA_HALF
;
case
INFINI_DTYPE_F32
:
return
CUDNN_DATA_FLOAT
;
case
INFINI_DTYPE_F64
:
return
CUDNN_DATA_DOUBLE
;
case
INFINI_DTYPE_BF16
:
return
CUDNN_DATA_BFLOAT16
;
case
INFINI_DTYPE_I8
:
return
CUDNN_DATA_INT8
;
case
INFINI_DTYPE_I32
:
return
CUDNN_DATA_INT32
;
case
INFINI_DTYPE_I64
:
return
CUDNN_DATA_INT64
;
case
INFINI_DTYPE_U8
:
return
CUDNN_DATA_UINT8
;
default:
return
CUDNN_DATA_FLOAT
;
case
INFINI_DTYPE_F16
:
return
CUDNN_DATA_HALF
;
case
INFINI_DTYPE_F32
:
return
CUDNN_DATA_FLOAT
;
case
INFINI_DTYPE_F64
:
return
CUDNN_DATA_DOUBLE
;
case
INFINI_DTYPE_BF16
:
return
CUDNN_DATA_BFLOAT16
;
case
INFINI_DTYPE_I8
:
return
CUDNN_DATA_INT8
;
case
INFINI_DTYPE_I32
:
return
CUDNN_DATA_INT32
;
case
INFINI_DTYPE_I64
:
return
CUDNN_DATA_INT64
;
case
INFINI_DTYPE_U8
:
return
CUDNN_DATA_UINT8
;
default:
return
CUDNN_DATA_FLOAT
;
}
}
...
...
@@ -118,4 +118,4 @@ inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim,
return
res
;
}
#endif// __INFINIOP_COMMON_CUDA_H__
#endif
// __INFINIOP_COMMON_CUDA_H__
src/infiniop/devices/pool.h
View file @
ec0ff893
...
...
@@ -5,7 +5,7 @@
#include <mutex>
#include <optional>
template
<
class
T
>
template
<
class
T
>
class
Pool
{
public:
Pool
()
:
_head
(
nullptr
)
{}
...
...
@@ -21,7 +21,7 @@ public:
void
push
(
T
&&
val
)
const
{
Node
<
T
>
*
new_node
=
new
Node
<
T
>
(
std
::
move
(
val
));
new_node
->
next
=
_head
.
load
();
while
(
!
_head
.
compare_exchange_weak
(
new_node
->
next
,
new_node
))
;
while
(
!
_head
.
compare_exchange_weak
(
new_node
->
next
,
new_node
))
{}
}
std
::
optional
<
T
>
pop
()
const
{
...
...
@@ -37,7 +37,7 @@ public:
}
private:
template
<
class
U
>
template
<
class
U
>
struct
Node
{
U
data
;
Node
<
U
>
*
next
;
...
...
src/infiniop/ops/causal_softmax/operator.cc
View file @
ec0ff893
...
...
@@ -6,35 +6,35 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
infiniopTensorDescriptor_t
y_desc
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCreateCausalSoftmaxDescriptor
(
handle
,
(
CausalSoftmaxCpuDescriptor_t
*
)
desc_ptr
,
y_desc
);
case
DevCpu
:
return
cpuCreateCausalSoftmaxDescriptor
(
handle
,
(
CausalSoftmaxCpuDescriptor_t
*
)
desc_ptr
,
y_desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaCreateCausalSoftmaxDescriptor
((
CudaHandle_t
)
handle
,
(
CausalSoftmaxCudaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
case
DevNvGpu
:
{
return
cudaCreateCausalSoftmaxDescriptor
((
CudaHandle_t
)
handle
,
(
CausalSoftmaxCudaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCreateCausalSoftmaxDescriptor
((
BangHandle_t
)
handle
,
(
CausalSoftmaxBangDescriptor_t
*
)
desc_ptr
,
y_desc
);
// return cnnlCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxCnnlDescriptor_t *) desc_ptr, y_desc);
}
case
DevCambriconMlu
:
{
return
bangCreateCausalSoftmaxDescriptor
((
BangHandle_t
)
handle
,
(
CausalSoftmaxBangDescriptor_t
*
)
desc_ptr
,
y_desc
);
// return cnnlCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxCnnlDescriptor_t *) desc_ptr, y_desc);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnCreateCausalSoftmaxDescriptor
((
AscendHandle_t
)
handle
,
(
CausalSoftmaxAclnnDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
case
DevAscendNpu
:
{
return
aclnnCreateCausalSoftmaxDescriptor
((
AscendHandle_t
)
handle
,
(
CausalSoftmaxAclnnDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCreateCausalSoftmaxDescriptor
((
MacaHandle_t
)
handle
,
(
CausalSoftmaxMacaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
case
DevMetaxGpu
:
{
return
macaCreateCausalSoftmaxDescriptor
((
MacaHandle_t
)
handle
,
(
CausalSoftmaxMacaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaCreateCausalSoftmaxDescriptor
((
MusaHandle_t
)
handle
,
(
CausalSoftmaxMusaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
case
DevMthreadsGpu
:
{
return
musaCreateCausalSoftmaxDescriptor
((
MusaHandle_t
)
handle
,
(
CausalSoftmaxMusaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -43,36 +43,36 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
__C
infiniopStatus_t
infiniopGetCausalSoftmaxWorkspaceSize
(
infiniopCausalSoftmaxDescriptor_t
desc
,
uint64_t
*
size
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxCpuDescriptor_t
)
desc
,
size
);
case
DevCpu
:
return
cpuGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxCpuDescriptor_t
)
desc
,
size
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxCudaDescriptor_t
)
desc
,
size
);
}
case
DevNvGpu
:
{
return
cudaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxCudaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxBangDescriptor_t
)
desc
,
size
);
// return cnnlGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCnnlDescriptor_t) desc, size);
}
case
DevCambriconMlu
:
{
return
bangGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxBangDescriptor_t
)
desc
,
size
);
// return cnnlGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCnnlDescriptor_t) desc, size);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxAclnnDescriptor_t
)
desc
,
size
);
}
case
DevAscendNpu
:
{
return
aclnnGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxAclnnDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxMacaDescriptor_t
)
desc
,
size
);
}
case
DevMetaxGpu
:
{
return
macaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxMacaDescriptor_t
)
desc
,
size
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxMusaDescriptor_t
)
desc
,
size
);
}
case
DevMthreadsGpu
:
{
return
musaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxMusaDescriptor_t
)
desc
,
size
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -81,35 +81,35 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax
__C
infiniopStatus_t
infiniopCausalSoftmax
(
infiniopCausalSoftmaxDescriptor_t
desc
,
void
*
workspace
,
uint64_t
workspace_size
,
void
*
data
,
void
*
stream
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuCausalSoftmax
((
CausalSoftmaxCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
case
DevCpu
:
return
cpuCausalSoftmax
((
CausalSoftmaxCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaCausalSoftmax
((
CausalSoftmaxCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
case
DevNvGpu
:
{
return
cudaCausalSoftmax
((
CausalSoftmaxCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangCausalSoftmax
((
CausalSoftmaxBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
// return cnnlCausalSoftmax((CausalSoftmaxCnnlDescriptor_t) desc, workspace, workspace_size, data, stream);
}
case
DevCambriconMlu
:
{
return
bangCausalSoftmax
((
CausalSoftmaxBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
// return cnnlCausalSoftmax((CausalSoftmaxCnnlDescriptor_t) desc, workspace, workspace_size, data, stream);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnCausalSoftmax
((
CausalSoftmaxAclnnDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
case
DevAscendNpu
:
{
return
aclnnCausalSoftmax
((
CausalSoftmaxAclnnDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaCausalSoftmax
((
CausalSoftmaxMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
case
DevMetaxGpu
:
{
return
macaCausalSoftmax
((
CausalSoftmaxMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
return
musaCausalSoftmax
((
CausalSoftmaxMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
case
DevMthreadsGpu
:
{
return
musaCausalSoftmax
((
CausalSoftmaxMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -118,34 +118,34 @@ __C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t des
__C
infiniopStatus_t
infiniopDestroyCausalSoftmaxDescriptor
(
infiniopCausalSoftmaxDescriptor_t
desc
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
case
DevCpu
:
return
cpuDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxCpuDescriptor_t
)
desc
);
case
DevCpu
:
return
cpuDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxCpuDescriptor_t
)
desc
);
#endif
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
return
cudaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxCudaDescriptor_t
)
desc
);
}
case
DevNvGpu
:
{
return
cudaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxCudaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
return
bangDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxBangDescriptor_t
)
desc
);
// return cnnlDestroyCausalSoftmaxDescriptor((CausalSoftmaxCnnlDescriptor_t) desc);
}
case
DevCambriconMlu
:
{
return
bangDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxBangDescriptor_t
)
desc
);
// return cnnlDestroyCausalSoftmaxDescriptor((CausalSoftmaxCnnlDescriptor_t) desc);
}
#endif
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
return
aclnnDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxAclnnDescriptor_t
)
desc
);
}
case
DevAscendNpu
:
{
return
aclnnDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxAclnnDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
return
macaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxMacaDescriptor_t
)
desc
);
}
case
DevMetaxGpu
:
{
return
macaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxMacaDescriptor_t
)
desc
);
}
#endif
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
return
musaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxMusaDescriptor_t
)
desc
);
case
DevMthreadsGpu
:
return
musaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxMusaDescriptor_t
)
desc
);
#endif
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infiniop/ops/matmul/ascend/matmul_aclnn.cc
View file @
ec0ff893
...
...
@@ -123,17 +123,13 @@ infiniopStatus_t aclnnMatmul(MatmulAclnnDescriptor_t desc, void *workspace,
for
(
size_t
i
=
0
;
i
<
batch
;
i
++
)
{
AclSetTensorAddr
(
desc
->
executor
,
0
,
ta
,
(
char
*
)(
a
)
+
i
*
desc
->
info
->
a_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
(
char
*
)(
a
)
+
i
*
desc
->
info
->
a_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
AclSetTensorAddr
(
desc
->
executor
,
1
,
tb
,
(
char
*
)(
b
)
+
i
*
desc
->
info
->
b_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
(
char
*
)(
b
)
+
i
*
desc
->
info
->
b_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
AclSetTensorAddr
(
desc
->
executor
,
2
,
tc
,
(
char
*
)(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
(
char
*
)(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
AclSetTensorAddr
(
desc
->
executor
,
3
,
tc
,
(
char
*
)(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
(
char
*
)(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
ret
=
aclnnGemm
(
workspace
,
workspaceSize
,
desc
->
executor
,
stream
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclnnGemm failed. ERROR: %d
\n
"
,
ret
);
...
...
src/infiniop/ops/matmul/bang/matmul_cnnl.cc
View file @
ec0ff893
...
...
@@ -73,8 +73,8 @@ bangDestroyMatmulDescriptor(infiniopMatmulBangDescriptor_t desc) {
}
void
bangMatmulCnnl
(
infiniopMatmulBangDescriptor_t
desc
,
void
*
workspace
,
void
*
c
,
float
beta
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
void
*
stream
)
{
float
beta
,
void
const
*
a
,
void
const
*
b
,
float
alpha
,
void
*
stream
)
{
auto
info
=
desc
->
info
;
if
(
info
.
is_transed
)
{
std
::
swap
(
a
,
b
);
...
...
src/infiniop/ops/matmul/blas.h
View file @
ec0ff893
...
...
@@ -88,7 +88,7 @@ struct MatmulInfo {
return
;
}
if
(
c_matrix
.
rows
!=
a_matrix
.
rows
||
c_matrix
.
cols
!=
b_matrix
.
cols
||
a_matrix
.
cols
!=
b_matrix
.
rows
){
if
(
c_matrix
.
rows
!=
a_matrix
.
rows
||
c_matrix
.
cols
!=
b_matrix
.
cols
||
a_matrix
.
cols
!=
b_matrix
.
rows
)
{
*
status
=
INFINIOP_STATUS_BAD_TENSOR_SHAPE
;
return
;
}
...
...
@@ -113,4 +113,4 @@ struct MatmulInfo {
}
};
#endif// __BLAS_H__
#endif
// __BLAS_H__
src/infiniop/ops/matmul/cpu/matmul_cpu.cc
View file @
ec0ff893
...
...
@@ -38,8 +38,8 @@ cpuDestroyMatmulDescriptor(infiniopMatmulCpuDescriptor_t desc) {
template
<
typename
Tdata
>
infiniopStatus_t
cpuCalculateMatmul
(
infiniopMatmulCpuDescriptor_t
desc
,
void
*
c
,
float
beta
,
void
const
*
a
,
void
const
*
b
,
float
alpha
)
{
float
beta
,
void
const
*
a
,
void
const
*
b
,
float
alpha
)
{
auto
info
=
desc
->
info
;
if
(
info
.
is_transed
)
{
...
...
@@ -49,20 +49,11 @@ infiniopStatus_t cpuCalculateMatmul(infiniopMatmulCpuDescriptor_t desc, void *c,
for
(
size_t
i
=
0
;
i
<
info
.
batch
;
++
i
)
{
for
(
size_t
m_
=
0
;
m_
<
info
.
m
;
++
m_
)
{
for
(
size_t
n_
=
0
;
n_
<
info
.
n
;
++
n_
)
{
auto
c_
=
reinterpret_cast
<
Tdata
*>
(
c
)
+
i
*
info
.
c_matrix
.
stride
+
m_
*
info
.
c_matrix
.
row_stride
+
n_
*
info
.
c_matrix
.
col_stride
;
auto
c_
=
reinterpret_cast
<
Tdata
*>
(
c
)
+
i
*
info
.
c_matrix
.
stride
+
m_
*
info
.
c_matrix
.
row_stride
+
n_
*
info
.
c_matrix
.
col_stride
;
float
sum
=
0
;
for
(
size_t
k_
=
0
;
k_
<
info
.
k
;
++
k_
)
{
auto
a_
=
reinterpret_cast
<
Tdata
const
*>
(
a
)
+
i
*
info
.
a_matrix
.
stride
+
m_
*
info
.
a_matrix
.
row_stride
+
k_
*
info
.
a_matrix
.
col_stride
;
auto
b_
=
reinterpret_cast
<
Tdata
const
*>
(
b
)
+
i
*
info
.
b_matrix
.
stride
+
n_
*
info
.
b_matrix
.
col_stride
+
k_
*
info
.
b_matrix
.
row_stride
;
auto
a_
=
reinterpret_cast
<
Tdata
const
*>
(
a
)
+
i
*
info
.
a_matrix
.
stride
+
m_
*
info
.
a_matrix
.
row_stride
+
k_
*
info
.
a_matrix
.
col_stride
;
auto
b_
=
reinterpret_cast
<
Tdata
const
*>
(
b
)
+
i
*
info
.
b_matrix
.
stride
+
n_
*
info
.
b_matrix
.
col_stride
+
k_
*
info
.
b_matrix
.
row_stride
;
if
constexpr
(
std
::
is_same
<
Tdata
,
uint16_t
>::
value
)
{
sum
+=
f16_to_f32
(
*
a_
)
*
f16_to_f32
(
*
b_
);
}
else
{
...
...
src/infiniop/ops/matmul/cuda/matmul_cuda.cu
View file @
ec0ff893
#include "./matmul_cuda.cuh"
#include "../../utils.h"
#include "./matmul_cuda.cuh"
infiniopStatus_t
cudaCreateMatmulDescriptor
(
infiniopCudaHandle_t
handle
,
infiniopMatmulCudaDescriptor_t
*
desc_ptr
,
...
...
src/infiniop/ops/matmul/cuda/matmul_cuda.cuh
View file @
ec0ff893
#ifndef __INFINIOP_MATMUL_CUDA_H__
#define __INFINIOP_MATMUL_CUDA_H__
#include "matmul_cuda_api.h"
#include "../../../devices/cuda/common_cuda.cuh"
#include <memory>
#include "../blas.h"
#include "matmul_cuda_api.h"
#include <memory>
typedef
struct
InfiniopMatmulCudaDescriptor
{
infiniDevice_t
device
;
...
...
@@ -14,4 +14,4 @@ typedef struct InfiniopMatmulCudaDescriptor {
std
::
shared_ptr
<
Pool
<
cublasHandle_t
>>
cublas_handle_pool
;
}
InfiniopMatmulCudaDescriptor
;
#endif// __INFINIOP_MATMUL_CUDA_H__
#endif
// __INFINIOP_MATMUL_CUDA_H__
src/infiniop/ops/matmul/cuda/matmul_cuda_api.h
View file @
ec0ff893
...
...
@@ -4,7 +4,6 @@
#include "../../../devices/cuda/cuda_handle.h"
#include "infiniop/operator.h"
struct
InfiniopMatmulCudaDescriptor
;
typedef
struct
InfiniopMatmulCudaDescriptor
*
infiniopMatmulCudaDescriptor_t
;
...
...
@@ -28,5 +27,4 @@ infiniopStatus_t cudaMatmul(infiniopMatmulCudaDescriptor_t desc,
infiniopStatus_t
cudaDestroyMatmulDescriptor
(
infiniopMatmulCudaDescriptor_t
desc
);
#endif // __INFINIOP_MATMUL_CUDA_API_H__
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment