Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
46da1a27
Commit
46da1a27
authored
Feb 11, 2025
by
PanZezhongQY
Browse files
feat: cpu and cuda matmul
parents
Changes
87
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
806 additions
and
0 deletions
+806
-0
include/infiniop/ops/rms_norm.h
include/infiniop/ops/rms_norm.h
+23
-0
include/infiniop/ops/rotary_embedding.h
include/infiniop/ops/rotary_embedding.h
+30
-0
include/infiniop/ops/swiglu.h
include/infiniop/ops/swiglu.h
+22
-0
include/infiniop/status.h
include/infiniop/status.h
+18
-0
include/infiniop/tensor_descriptor.h
include/infiniop/tensor_descriptor.h
+24
-0
include/infinirt.h
include/infinirt.h
+0
-0
src/infiniop/devices/ascend/CMakeLists.txt
src/infiniop/devices/ascend/CMakeLists.txt
+28
-0
src/infiniop/devices/ascend/Makefile
src/infiniop/devices/ascend/Makefile
+10
-0
src/infiniop/devices/ascend/ascend_handle.cc
src/infiniop/devices/ascend/ascend_handle.cc
+23
-0
src/infiniop/devices/ascend/ascend_handle.h
src/infiniop/devices/ascend/ascend_handle.h
+23
-0
src/infiniop/devices/ascend/common_ascend.cc
src/infiniop/devices/ascend/common_ascend.cc
+145
-0
src/infiniop/devices/ascend/common_ascend.h
src/infiniop/devices/ascend/common_ascend.h
+41
-0
src/infiniop/devices/ascend/tensor_aclnn.cc
src/infiniop/devices/ascend/tensor_aclnn.cc
+137
-0
src/infiniop/devices/ascend/tensor_aclnn.h
src/infiniop/devices/ascend/tensor_aclnn.h
+41
-0
src/infiniop/devices/bang/bang_handle.cc
src/infiniop/devices/bang/bang_handle.cc
+21
-0
src/infiniop/devices/bang/bang_handle.h
src/infiniop/devices/bang/bang_handle.h
+31
-0
src/infiniop/devices/bang/common_bang.h
src/infiniop/devices/bang/common_bang.h
+54
-0
src/infiniop/devices/cpu/common_cpu.cc
src/infiniop/devices/cpu/common_cpu.cc
+99
-0
src/infiniop/devices/cpu/common_cpu.h
src/infiniop/devices/cpu/common_cpu.h
+30
-0
src/infiniop/devices/cpu/cpu_handle.cc
src/infiniop/devices/cpu/cpu_handle.cc
+6
-0
No files found.
include/infiniop/ops/rms_norm.h
0 → 100644
View file @
46da1a27
#ifndef __INFINIOP_RMS_NORM_H__
#define __INFINIOP_RMS_NORM_H__
#include "../operator.h"
typedef
InfiniopDescriptor
*
infiniopRMSNormDescriptor_t
;
__C
__export
infiniopStatus_t
infiniopCreateRMSNormDescriptor
(
infiniopHandle_t
handle
,
infiniopRMSNormDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
infiniopTensorDescriptor_t
x_desc
,
infiniopTensorDescriptor_t
w_desc
,
float
epsilon
);
__C
__export
infiniopStatus_t
infiniopGetRMSNormWorkspaceSize
(
infiniopRMSNormDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniopStatus_t
infiniopRMSNorm
(
infiniopRMSNormDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
void
const
*
x
,
void
const
*
w
,
void
*
stream
);
__C
__export
infiniopStatus_t
infiniopDestroyRMSNormDescriptor
(
infiniopRMSNormDescriptor_t
desc
);
#endif
include/infiniop/ops/rotary_embedding.h
0 → 100644
View file @
46da1a27
#ifndef __INFINIOP_ROTARY_EMBEDDING_H__
#define __INFINIOP_ROTARY_EMBEDDING_H__
#include "../operator.h"
typedef
InfiniopDescriptor
*
infiniopRoPEDescriptor_t
;
__C
__export
infiniopStatus_t
infiniopCreateRoPEDescriptor
(
infiniopHandle_t
handle
,
infiniopRoPEDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
t
,
infiniopTensorDescriptor_t
pos_ids
,
infiniopTensorDescriptor_t
sin_table
,
infiniopTensorDescriptor_t
cos_table
);
__C
__export
infiniopStatus_t
infiniopGetRoPEWorkspaceSize
(
infiniopRoPEDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniopStatus_t
infiniopRoPE
(
infiniopRoPEDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
t
,
void
const
*
pos_ids
,
void
const
*
sin_table
,
void
const
*
cos_table
,
void
*
stream
);
__C
__export
infiniopStatus_t
infiniopDestroyRoPEDescriptor
(
infiniopRoPEDescriptor_t
desc
);
#endif
include/infiniop/ops/swiglu.h
0 → 100644
View file @
46da1a27
#ifndef __INFINIOP_SWIGLU_H__
#define __INFINIOP_SWIGLU_H__
#include "../operator.h"
typedef
InfiniopDescriptor
*
infiniopSwiGLUDescriptor_t
;
__C
__export
infiniopStatus_t
infiniopCreateSwiGLUDescriptor
(
infiniopHandle_t
handle
,
infiniopSwiGLUDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
infiniopTensorDescriptor_t
a_desc
,
infiniopTensorDescriptor_t
b_desc
);
__C
__export
infiniopStatus_t
infiniopSwiGLU
(
infiniopSwiGLUDescriptor_t
desc
,
void
*
c
,
void
const
*
a
,
void
const
*
b
,
void
*
stream
);
__C
__export
infiniopStatus_t
infiniopDestroySwiGLUDescriptor
(
infiniopSwiGLUDescriptor_t
desc
);
#endif
include/infiniop/status.h
0 → 100644
View file @
46da1a27
#ifndef __INFINIOP_STATUS__
#define __INFINIOP_STATUS__
typedef
enum
{
INFINIOP_STATUS_SUCCESS
=
0
,
INFINIOP_STATUS_INTERNAL_ERROR
=
1
,
INFINIOP_STATUS_BAD_PARAM
=
2
,
INFINIOP_STATUS_BAD_TENSOR_DTYPE
=
3
,
INFINIOP_STATUS_BAD_TENSOR_SHAPE
=
4
,
INFINIOP_STATUS_BAD_TENSOR_STRIDES
=
5
,
INFINIOP_STATUS_NULL_POINTER
=
6
,
INFINIOP_STATUS_INSUFFICIENT_WORKSPACE
=
7
,
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
=
8
,
INFINIOP_STATUS_BAD_DEVICE
=
9
,
INFINIOP_STATUS_UNDEFINED_BEHAVIOR
=
10
,
}
infiniopStatus_t
;
#endif
include/infiniop/tensor_descriptor.h
0 → 100644
View file @
46da1a27
#ifndef __INFINIOP_TENSOR_DESCRIPTOR__
#define __INFINIOP_TENSOR_DESCRIPTOR__
#include "../infinicore.h"
#include "./status.h"
struct
InfiniopTensorDescriptor
{
// Datatype
infiniDtype_t
dtype
;
// Number of dimensions
size_t
ndim
;
// Shape of the tensor, ndim elements
size_t
*
shape
;
// Stride of each dimension in elements, ndim elements
int64_t
*
strides
;
};
typedef
struct
InfiniopTensorDescriptor
*
infiniopTensorDescriptor_t
;
__C
__export
infiniopStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape
,
int64_t
const
*
strides
,
infiniDtype_t
dtype
);
__C
__export
infiniopStatus_t
infiniopDestroyTensorDescriptor
(
infiniopTensorDescriptor_t
desc
);
#endif// __INFINIOP_TENSOR_DESCRIPTOR__
include/infinirt.h
0 → 100644
View file @
46da1a27
src/infiniop/devices/ascend/CMakeLists.txt
0 → 100644
View file @
46da1a27
cmake_minimum_required
(
VERSION 3.16.0
)
# project information
project
(
Ascend_C
)
set
(
SOC_VERSION
"Ascend910B3"
CACHE STRING
"system on chip type"
)
set
(
ASCEND_CANN_PACKAGE_PATH
"/usr/local/Ascend/ascend-toolkit/latest"
CACHE PATH
"ASCEND CANN package installation directory"
)
set
(
RUN_MODE
"npu"
CACHE STRING
"run mode: npu"
)
set
(
CMAKE_BUILD_TYPE
"Release"
CACHE STRING
"Build type Release/Debug (default Debug)"
FORCE
)
set
(
CMAKE_INSTALL_PREFIX
"
${
CMAKE_CURRENT_LIST_DIR
}
/out"
CACHE STRING
"path for install()"
FORCE
)
if
(
EXISTS
${
ASCEND_CANN_PACKAGE_PATH
}
/tools/tikcpp/ascendc_kernel_cmake
)
set
(
ASCENDC_CMAKE_DIR
${
ASCEND_CANN_PACKAGE_PATH
}
/tools/tikcpp/ascendc_kernel_cmake
)
elseif
(
EXISTS
${
ASCEND_CANN_PACKAGE_PATH
}
/compiler/tikcpp/ascendc_kernel_cmake
)
set
(
ASCENDC_CMAKE_DIR
${
ASCEND_CANN_PACKAGE_PATH
}
/compiler/tikcpp/ascendc_kernel_cmake
)
elseif
(
EXISTS
${
ASCEND_CANN_PACKAGE_PATH
}
/ascendc_devkit/tikcpp/samples/cmake
)
set
(
ASCENDC_CMAKE_DIR
${
ASCEND_CANN_PACKAGE_PATH
}
/ascendc_devkit/tikcpp/samples/cmake
)
else
()
message
(
FATAL_ERROR
"ascendc_kernel_cmake does not exist, please check whether the cann package is installed."
)
endif
()
include
(
${
ASCENDC_CMAKE_DIR
}
/ascendc.cmake
)
ascendc_library
(
ascend_kernels STATIC
../../ops/swiglu/ascend/swiglu_kernel.cpp
../../ops/rotary_embedding/ascend/rotary_embedding_kernel.cpp
../../ops/random_sample/ascend/random_sample_kernel.cpp
)
src/infiniop/devices/ascend/Makefile
0 → 100644
View file @
46da1a27
.PHONY
:
build clean
MKFILE_PATH
:=
$(
abspath
$(
lastword
$(MAKEFILE_LIST)
))
MKFILE_DIR
:=
$(
dir
$(MKFILE_PATH)
)
build
:
mkdir
-p
build
&&
cd
build
&&
cmake ..
&&
make
-j8
clean
:
rm
-rf
build
src/infiniop/devices/ascend/ascend_handle.cc
0 → 100644
View file @
46da1a27
#include "ascend_handle.h"
infiniopStatus_t
createAscendHandle
(
AscendHandle_t
*
handle_ptr
,
int
device_id
)
{
uint32_t
device_count
;
aclrtGetDeviceCount
(
&
device_count
);
if
(
device_id
>=
static_cast
<
int
>
(
device_count
))
{
return
STATUS_BAD_DEVICE
;
}
auto
ret
=
aclrtSetDevice
(
device_id
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclrtSetDevice failed. ERROR: %d
\n
"
,
ret
));
*
handle_ptr
=
new
AscendContext
{
DevAscendNpu
,
device_id
};
return
STATUS_SUCCESS
;
}
infiniopStatus_t
deleteAscendHandle
(
AscendHandle_t
handle_ptr
)
{
delete
handle_ptr
;
return
STATUS_SUCCESS
;
}
src/infiniop/devices/ascend/ascend_handle.h
0 → 100644
View file @
46da1a27
#ifndef ASCEND_HANDLE_H
#define ASCEND_HANDLE_H
#include "common_ascend.h"
#include "device.h"
#include "status.h"
#include <acl/acl.h>
#include <acl/acl_base.h>
#include <acl/acl_rt.h>
#include <aclnn/acl_meta.h>
#include <memory>
struct
AscendContext
{
Device
device
;
int
device_id
;
};
typedef
struct
AscendContext
*
AscendHandle_t
;
infiniopStatus_t
createAscendHandle
(
AscendHandle_t
*
handle_ptr
,
int
device_id
);
infiniopStatus_t
deleteAscendHandle
(
AscendHandle_t
handle_ptr
);
#endif
src/infiniop/devices/ascend/common_ascend.cc
0 → 100644
View file @
46da1a27
#include "common_ascend.h"
int64_t
numElements
(
const
int64_t
*
shape
,
int64_t
num
)
{
int64_t
numEle
=
1
;
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
numEle
*=
shape
[
i
];
}
return
numEle
;
}
infiniopStatus_t
mallocWorkspace
(
void
**
workspaceAddr
,
uint64_t
workspaceSize
)
{
*
workspaceAddr
=
nullptr
;
if
(
workspaceSize
>
0
)
{
auto
ret
=
aclrtMalloc
(
workspaceAddr
,
workspaceSize
,
ACL_MEM_MALLOC_HUGE_FIRST
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclrtMalloc failed. ERROR: %d
\n
"
,
ret
);
return
STATUS_EXECUTION_FAILED
);
}
return
STATUS_SUCCESS
;
}
infiniopStatus_t
freeWorkspace
(
void
*
workspaceAddr
)
{
if
(
workspaceAddr
!=
nullptr
)
{
auto
ret
=
aclrtFree
(
workspaceAddr
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclrtFree failed, ERROR: %d
\n
"
,
ret
);
return
STATUS_EXECUTION_FAILED
);
}
return
STATUS_SUCCESS
;
}
aclDataType
toAclDataType
(
DT
dt
)
{
if
(
dt
==
I8
)
return
aclDataType
::
ACL_INT8
;
else
if
(
dt
==
I16
)
return
aclDataType
::
ACL_INT16
;
else
if
(
dt
==
I32
)
return
aclDataType
::
ACL_INT32
;
else
if
(
dt
==
I64
)
return
aclDataType
::
ACL_INT64
;
else
if
(
dt
==
U8
)
return
aclDataType
::
ACL_UINT8
;
else
if
(
dt
==
U16
)
return
aclDataType
::
ACL_UINT16
;
else
if
(
dt
==
U32
)
return
aclDataType
::
ACL_UINT32
;
else
if
(
dt
==
U64
)
return
aclDataType
::
ACL_UINT64
;
else
if
(
dt
==
F16
)
return
aclDataType
::
ACL_FLOAT16
;
else
if
(
dt
==
BF16
)
return
aclDataType
::
ACL_BF16
;
else
if
(
dt
==
F32
)
return
aclDataType
::
ACL_FLOAT
;
else
if
(
dt
==
F64
)
return
aclDataType
::
ACL_DOUBLE
;
else
return
aclDataType
::
ACL_DT_UNDEFINED
;
}
const
char
*
dataTypeToString
(
aclDataType
dtype
)
{
switch
(
dtype
)
{
case
ACL_DT_UNDEFINED
:
return
"ACL_DT_UNDEFINED"
;
case
ACL_FLOAT
:
return
"ACL_FLOAT"
;
case
ACL_FLOAT16
:
return
"ACL_FLOAT16"
;
case
ACL_INT8
:
return
"ACL_INT8"
;
case
ACL_INT32
:
return
"ACL_INT32"
;
case
ACL_UINT8
:
return
"ACL_UINT8"
;
case
ACL_INT16
:
return
"ACL_INT16"
;
case
ACL_UINT16
:
return
"ACL_UINT16"
;
case
ACL_UINT32
:
return
"ACL_UINT32"
;
case
ACL_INT64
:
return
"ACL_INT64"
;
case
ACL_UINT64
:
return
"ACL_UINT64"
;
case
ACL_DOUBLE
:
return
"ACL_DOUBLE"
;
case
ACL_BOOL
:
return
"ACL_BOOL"
;
case
ACL_STRING
:
return
"ACL_STRING"
;
case
ACL_COMPLEX64
:
return
"ACL_COMPLEX64"
;
case
ACL_COMPLEX128
:
return
"ACL_COMPLEX128"
;
case
ACL_BF16
:
return
"ACL_BF16"
;
case
ACL_INT4
:
return
"ACL_INT4"
;
case
ACL_UINT1
:
return
"ACL_UINT1"
;
case
ACL_COMPLEX32
:
return
"ACL_COMPLEX32"
;
default:
return
"UNKNOWN"
;
}
}
const
char
*
formatToString
(
aclFormat
format
)
{
switch
(
format
)
{
case
ACL_FORMAT_UNDEFINED
:
return
"ACL_FORMAT_UNDEFINED"
;
case
ACL_FORMAT_NCHW
:
return
"ACL_FORMAT_NCHW"
;
case
ACL_FORMAT_NHWC
:
return
"ACL_FORMAT_NHWC"
;
case
ACL_FORMAT_ND
:
return
"ACL_FORMAT_ND"
;
case
ACL_FORMAT_NC1HWC0
:
return
"ACL_FORMAT_NC1HWC0"
;
case
ACL_FORMAT_FRACTAL_Z
:
return
"ACL_FORMAT_FRACTAL_Z"
;
case
ACL_FORMAT_NC1HWC0_C04
:
return
"ACL_FORMAT_NC1HWC0_C04"
;
case
ACL_FORMAT_HWCN
:
return
"ACL_FORMAT_HWCN"
;
case
ACL_FORMAT_NDHWC
:
return
"ACL_FORMAT_NDHWC"
;
case
ACL_FORMAT_FRACTAL_NZ
:
return
"ACL_FORMAT_FRACTAL_NZ"
;
case
ACL_FORMAT_NCDHW
:
return
"ACL_FORMAT_NCDHW"
;
case
ACL_FORMAT_NDC1HWC0
:
return
"ACL_FORMAT_NDC1HWC0"
;
case
ACL_FRACTAL_Z_3D
:
return
"ACL_FRACTAL_Z_3D"
;
case
ACL_FORMAT_NC
:
return
"ACL_FORMAT_NC"
;
case
ACL_FORMAT_NCL
:
return
"ACL_FORMAT_NCL"
;
default:
return
"UNKNOWN"
;
}
}
src/infiniop/devices/ascend/common_ascend.h
0 → 100644
View file @
46da1a27
#ifndef __COMMON_ASCEND_H__
#define __COMMON_ASCEND_H__
#include "operators.h"
#include <acl/acl.h>
#include <acl/acl_base.h>
#include <acl/acl_rt.h>
#include <cstdio>
#include <functional>
#include <inttypes.h>
#include <numeric>
#include <vector>
#ifdef __cplusplus
extern
"C"
{
#endif
#define CHECK_RET(cond, return_expr) \
do { \
if (!(cond)) { \
return_expr; \
} \
} while (0)
#define LOG_PRINT(message, ...) \
do { \
printf(message, ##__VA_ARGS__); \
} while (0)
#ifdef __cplusplus
};
#endif
int64_t
numElements
(
const
int64_t
*
shape
,
int64_t
num
);
const
char
*
dataTypeToString
(
aclDataType
dtype
);
const
char
*
formatToString
(
aclFormat
format
);
infiniopStatus_t
mallocWorkspace
(
void
**
workspaceAddr
,
uint64_t
workspaceSize
);
infiniopStatus_t
freeWorkspace
(
void
*
workspaceAddr
);
aclDataType
toAclDataType
(
DT
dt
);
#endif
src/infiniop/devices/ascend/tensor_aclnn.cc
0 → 100644
View file @
46da1a27
#include "tensor_aclnn.h"
#include "../../ops/utils.h"
#include <algorithm>
infiniopStatus_t
aclnnTensorDescriptor
::
setDescriptor
(
aclDataType
dtype
,
const
std
::
vector
<
int64_t
>
&
shape
,
const
std
::
vector
<
int64_t
>
&
strides
)
{
if
(
shape
.
size
()
!=
strides
.
size
())
{
return
STATUS_BAD_PARAM
;
}
this
->
ndim
=
shape
.
size
();
this
->
shape
=
std
::
vector
<
int64_t
>
(
shape
);
this
->
strides
=
std
::
vector
<
int64_t
>
(
strides
);
this
->
dataType
=
dtype
;
// Set format
// TODO: Support other format
aclFormat
format
=
aclFormat
::
ACL_FORMAT_ND
;
this
->
format
=
format
;
CHECK_STATUS
(
this
->
inferStorageShape
(),
STATUS_SUCCESS
);
return
STATUS_SUCCESS
;
}
/// @brief Infer storage shape. For now this ruturns a 1D shape of the total tensor storage size.
/// We don't see why higher dimensional storage shape is ever needed. To change if necesary.
infiniopStatus_t
aclnnTensorDescriptor
::
inferStorageShape
()
{
auto
index
=
std
::
max_element
(
this
->
strides
.
begin
(),
this
->
strides
.
end
());
uint64_t
max_stride_index
=
std
::
distance
(
this
->
strides
.
begin
(),
index
);
this
->
storageNdim
=
1
;
this
->
storageShape
=
std
::
vector
<
int64_t
>
({
this
->
shape
[
max_stride_index
]
*
this
->
strides
[
max_stride_index
]});
return
STATUS_SUCCESS
;
}
/// @brief Set aclnnTensorDescriptor from infiniopTensorDescriptor
/// @param y infiniopTensorDescriptor
/// @return infiniopStatus_t
infiniopStatus_t
aclnnTensorDescriptor
::
fromInfiniOpTensorDescriptor
(
infiniopTensorDescriptor_t
y
)
{
uint64_t
ndim
=
y
->
ndim
;
// Cast shape type
auto
shape
=
std
::
vector
<
int64_t
>
(
ndim
);
auto
strides
=
std
::
vector
<
int64_t
>
(
ndim
);
for
(
uint64_t
i
=
0
;
i
<
ndim
;
++
i
)
{
shape
[
i
]
=
static_cast
<
int64_t
>
(
y
->
shape
[
i
]);
strides
[
i
]
=
y
->
strides
[
i
];
}
return
setDescriptor
(
toAclDataType
(
y
->
dt
),
shape
,
strides
);
}
/// @brief Wrapper of aclCreateTensor. Create aclTensor.
/// See https://www.hiascend.com/document/detail/zh/CANNCommunityEdition/80RC3alpha001/apiref/appdevgapi/aclcppdevg_03_0168.html
/// @param desc Alias of aclnnTensorDescriptor*.
/// @param data Data ptr on device global mem.
/// @param tensor Pointer of pointer of aclTensor.
/// @return
infiniopStatus_t
aclnnTensorDescriptor
::
createTensor
(
void
*
data
)
{
if
(
this
->
t
)
{
return
STATUS_SUCCESS
;
}
this
->
t
=
aclCreateTensor
(
this
->
shape
.
data
(),
this
->
ndim
,
this
->
dataType
,
this
->
strides
.
data
(),
this
->
offset
,
this
->
format
,
this
->
storageShape
.
data
(),
this
->
storageNdim
,
data
);
return
STATUS_SUCCESS
;
}
infiniopStatus_t
aclnnTensorDescriptor
::
destroyTensor
()
{
auto
ret
=
aclDestroyTensor
(
this
->
t
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclDesctroyTensor failed, ERROR: %d
\n
"
,
ret
);
return
STATUS_EXECUTION_FAILED
);
t
=
nullptr
;
return
STATUS_SUCCESS
;
}
aclnnTensorDescriptor
::~
aclnnTensorDescriptor
()
{
if
(
this
->
t
)
{
destroyTensor
();
}
}
/// @brief TensorDescriptor's string info
/// @param desc Alias of aclnnTensorDescriptor*.
/// @return String of aclnnTensorDescriptor.
char
*
aclnnTensorDescriptor
::
toString
()
{
// Assume bufferSize
size_t
bufferSize
=
1024
+
this
->
ndim
*
40
+
this
->
storageNdim
*
40
;
char
*
buffer
=
(
char
*
)
malloc
(
bufferSize
);
if
(
!
buffer
)
return
NULL
;
// Write info into buffer
char
*
ptr
=
buffer
;
ptr
+=
sprintf
(
ptr
,
"ndim: %"
PRId64
"
\n
"
,
this
->
ndim
);
ptr
+=
sprintf
(
ptr
,
"shape: ["
);
for
(
uint64_t
i
=
0
;
i
<
this
->
ndim
;
++
i
)
{
ptr
+=
sprintf
(
ptr
,
"%"
PRId64
,
this
->
shape
[
i
]);
if
(
i
<
this
->
ndim
-
1
)
{
ptr
+=
sprintf
(
ptr
,
", "
);
}
}
ptr
+=
sprintf
(
ptr
,
"]
\n
"
);
ptr
+=
sprintf
(
ptr
,
"stride: ["
);
for
(
uint64_t
i
=
0
;
i
<
this
->
ndim
;
++
i
)
{
ptr
+=
sprintf
(
ptr
,
"%"
PRId64
,
this
->
strides
[
i
]);
if
(
i
<
this
->
ndim
-
1
)
{
ptr
+=
sprintf
(
ptr
,
", "
);
}
}
ptr
+=
sprintf
(
ptr
,
"]
\n
"
);
ptr
+=
sprintf
(
ptr
,
"offset: %"
PRId64
"
\n
"
,
this
->
offset
);
ptr
+=
sprintf
(
ptr
,
"dataType: %s
\n
"
,
dataTypeToString
(
this
->
dataType
));
ptr
+=
sprintf
(
ptr
,
"format: %s
\n
"
,
formatToString
(
this
->
format
));
ptr
+=
sprintf
(
ptr
,
"storageShape: ["
);
for
(
int64_t
i
=
0
;
i
<
this
->
storageNdim
;
++
i
)
{
ptr
+=
sprintf
(
ptr
,
"%"
PRId64
,
this
->
storageShape
[
i
]);
if
(
i
<
this
->
storageNdim
-
1
)
{
ptr
+=
sprintf
(
ptr
,
", "
);
}
}
ptr
+=
sprintf
(
ptr
,
"]
\n
"
);
ptr
+=
sprintf
(
ptr
,
"storageNdim: %"
PRId64
"
\n
"
,
this
->
storageNdim
);
return
buffer
;
}
src/infiniop/devices/ascend/tensor_aclnn.h
0 → 100644
View file @
46da1a27
#ifndef __ACLNN_TENSOR__
#define __ACLNN_TENSOR__
#include "./common_ascend.h"
#include "operators.h"
#include "tensor.h"
#include "tensor/tensor_descriptor.h"
#include <acl/acl.h>
#include <acl/acl_base.h>
#include <aclnn/acl_meta.h>
#include <vector>
// Aclnn tensor descriptor,
// used to build aclTensor
struct
aclnnTensorDescriptor
{
uint64_t
ndim
;
std
::
vector
<
int64_t
>
shape
;
std
::
vector
<
int64_t
>
strides
;
int64_t
offset
;
aclDataType
dataType
;
aclFormat
format
;
std
::
vector
<
int64_t
>
storageShape
;
int64_t
storageNdim
;
aclTensor
*
t
;
// Transfer from infiniOp DT to aclDataType
infiniopStatus_t
setDescriptor
(
aclDataType
dtype
,
const
std
::
vector
<
int64_t
>
&
shape
,
const
std
::
vector
<
int64_t
>
&
strides
);
infiniopStatus_t
inferStorageShape
();
// Convert form InfiniOpTensorDescriptor
infiniopStatus_t
fromInfiniOpTensorDescriptor
(
infiniopTensorDescriptor_t
y_desc
);
infiniopStatus_t
createTensor
(
void
*
data
=
nullptr
);
infiniopStatus_t
destroyTensor
();
~
aclnnTensorDescriptor
();
char
*
toString
();
};
typedef
aclnnTensorDescriptor
*
aclnnTensorDescriptor_t
;
#endif
src/infiniop/devices/bang/bang_handle.cc
0 → 100644
View file @
46da1a27
#include "bang_handle.h"
infiniopStatus_t
createBangHandle
(
infiniopBangHandle_t
*
handle_ptr
,
int
device_id
)
{
unsigned
int
device_count
;
cnrtGetDeviceCount
(
&
device_count
);
if
(
device_id
>=
static_cast
<
int
>
(
device_count
))
{
return
INFINIOP_STATUS_BAD_DEVICE
;
}
auto
pool
=
std
::
make_shared
<
Pool
<
cnnlHandle_t
>>
();
if
(
cnrtSetDevice
(
device_id
)
!=
cnrtSuccess
){
return
INFINIOP_STATUS_BAD_DEVICE
;
}
cnnlHandle_t
handle
;
cnnlCreate
(
&
handle
);
pool
->
push
(
std
::
move
(
handle
));
*
handle_ptr
=
new
InfiniopBangHandle
{
INFINI_DEVICE_CAMBRICON
,
device_id
,
std
::
move
(
pool
)};
return
INFINIOP_STATUS_SUCCESS
;
}
src/infiniop/devices/bang/bang_handle.h
0 → 100644
View file @
46da1a27
#ifndef BANG_HANDLE_H
#define BANG_HANDLE_H
#include "../pool.h"
#include "cnnl.h"
#include "cnrt.h"
#include "infiniop/handle.h"
#include <memory>
struct
InfiniopBangHandle
{
infiniDevice_t
device
;
int
device_id
;
std
::
shared_ptr
<
Pool
<
cnnlHandle_t
>>
cnnl_handles
;
};
typedef
struct
InfiniopBangHandle
*
infiniopBangHandle_t
;
infiniopStatus_t
createBangHandle
(
infiniopBangHandle_t
*
handle_ptr
,
int
device_id
);
template
<
typename
T
>
void
use_cnnl
(
std
::
shared_ptr
<
Pool
<
cnnlHandle_t
>>
&
pool
,
int
device_id
,
cnrtQueue_t
queue
,
T
const
&
f
)
{
auto
handle
=
pool
->
pop
();
if
(
!
handle
)
{
cnrtSetDevice
(
device_id
);
cnnlCreate
(
&
(
*
handle
));
}
cnnlSetQueue
(
*
handle
,
(
cnrtQueue_t
)
queue
);
f
(
*
handle
);
pool
->
push
(
std
::
move
(
*
handle
));
}
#endif
src/infiniop/devices/bang/common_bang.h
0 → 100644
View file @
46da1a27
#ifndef __COMMON_BANG_H__
#define __COMMON_BANG_H__
#include "cnnl.h"
#include "infinicore.h"
#include <vector>
const
int
NRAM_MAX_SIZE
=
1024
*
256
;
//the maximum NRAM memory is 1024 * 768
const
int
GDRAM_MAX_SIZE
=
1024
*
1024
*
1024
;
// set cnnl tensor descriptor without strides11
inline
void
setCnnlTensor
(
cnnlTensorDescriptor_t
desc
,
const
TensorDescriptor
*
layout
)
{
std
::
vector
<
int
>
dims
(
layout
->
ndim
);
for
(
uint64_t
i
=
0
;
i
<
layout
->
ndim
;
i
++
)
{
dims
[
i
]
=
static_cast
<
int
>
(
layout
->
shape
[
i
]);
}
cnnlSetTensorDescriptor
(
desc
,
CNNL_LAYOUT_ARRAY
,
CNNL_DTYPE_HALF
,
dims
.
size
(),
dims
.
data
());
}
// set cnnl tensor descriptor with strides
inline
void
setCnnlTensorEx
(
cnnlTensorDescriptor_t
desc
,
const
TensorDescriptor
*
layout
)
{
std
::
vector
<
int
>
dim_size
(
layout
->
ndim
),
dim_stride
(
layout
->
ndim
);
for
(
uint64_t
i
=
0
;
i
<
layout
->
ndim
;
i
++
)
{
dim_size
[
i
]
=
static_cast
<
int
>
(
layout
->
shape
[
i
]);
dim_stride
[
i
]
=
static_cast
<
int
>
(
layout
->
strides
[
i
]
/
layout
->
dt
.
size
);
}
cnnlSetTensorDescriptorEx
(
desc
,
CNNL_LAYOUT_ARRAY
,
CNNL_DTYPE_HALF
,
dim_size
.
size
(),
dim_size
.
data
(),
dim_stride
.
data
());
}
inline
cnnlDataType_t
cnnlDataTypeConvert
(
infiniDtype_t
dataType
)
{
if
(
dtype_eq
(
dataType
,
INFINI_DTYPE_F32
))
{
return
CNNL_DTYPE_FLOAT
;
}
else
if
(
dtype_eq
(
dataType
,
INFINI_DTYPE_F64
))
{
return
CNNL_DTYPE_DOUBLE
;
}
else
if
(
dtype_eq
(
dataType
,
INFINI_DTYPE_F16
))
{
return
CNNL_DTYPE_HALF
;
}
else
if
(
dtype_eq
(
dataType
,
INFINI_DTYPE_I8
))
{
return
CNNL_DTYPE_INT8
;
}
else
if
(
dtype_eq
(
dataType
,
INFINI_DTYPE_I32
))
{
return
CNNL_DTYPE_INT32
;
}
else
if
(
dtype_eq
(
dataType
,
INFINI_DTYPE_U8
))
{
return
CNNL_DTYPE_UINT8
;
}
else
if
(
dtype_eq
(
dataType
,
INFINI_DTYPE_BF16
))
{
return
CNNL_DTYPE_BFLOAT16
;
}
else
if
(
dtype_eq
(
dataType
,
INFINI_DTYPE_I64
))
{
return
CNNL_DTYPE_INT64
;
}
else
{
return
CNNL_DTYPE_INVALID
;
}
}
#endif// __COMMON_BANG_H__
src/infiniop/devices/cpu/common_cpu.cc
0 → 100644
View file @
46da1a27
#include "./common_cpu.h"
float
f16_to_f32
(
uint16_t
h
)
{
uint32_t
sign
=
(
h
&
0x8000
)
<<
16
;
// Extract the sign bit
int32_t
exponent
=
(
h
>>
10
)
&
0x1F
;
// Extract the exponent
uint32_t
mantissa
=
h
&
0x3FF
;
// Extract the mantissa (fraction part)
if
(
exponent
==
31
)
{
// Special case for Inf and NaN
if
(
mantissa
!=
0
)
{
// NaN: Set float32 NaN
uint32_t
f32
=
sign
|
0x7F800000
|
(
mantissa
<<
13
);
return
*
(
float
*
)
&
f32
;
}
else
{
// Infinity
uint32_t
f32
=
sign
|
0x7F800000
;
return
*
(
float
*
)
&
f32
;
}
}
else
if
(
exponent
==
0
)
{
// Subnormal float16 or zero
if
(
mantissa
==
0
)
{
// Zero (positive or negative)
uint32_t
f32
=
sign
;
// Just return signed zero
return
*
(
float
*
)
&
f32
;
}
else
{
// Subnormal: Convert to normalized float32
exponent
=
-
14
;
// Set exponent for subnormal numbers
while
((
mantissa
&
0x400
)
==
0
)
{
// Normalize mantissa
mantissa
<<=
1
;
exponent
--
;
}
mantissa
&=
0x3FF
;
// Clear the leading 1 bit
uint32_t
f32
=
sign
|
((
exponent
+
127
)
<<
23
)
|
(
mantissa
<<
13
);
return
*
(
float
*
)
&
f32
;
}
}
else
{
// Normalized float16
uint32_t
f32
=
sign
|
((
exponent
+
127
-
15
)
<<
23
)
|
(
mantissa
<<
13
);
return
*
(
float
*
)
&
f32
;
}
}
uint16_t
f32_to_f16
(
float
val
)
{
uint32_t
f32
=
*
(
uint32_t
*
)
&
val
;
// Read the bits of the float32
uint16_t
sign
=
(
f32
>>
16
)
&
0x8000
;
// Extract the sign bit
int32_t
exponent
=
((
f32
>>
23
)
&
0xFF
)
-
127
;
// Extract and de-bias the exponent
uint32_t
mantissa
=
f32
&
0x7FFFFF
;
// Extract the mantissa (fraction part)
if
(
exponent
>=
31
)
{
// Special cases for Inf and NaN
// NaN
if
(
exponent
==
128
&&
mantissa
!=
0
)
{
return
sign
|
0x7E00
;
}
// Infinity
return
sign
|
0x7C00
;
}
else
if
(
exponent
>=
-
14
)
{
// Normalized case
return
sign
|
((
exponent
+
15
)
<<
10
)
|
(
mantissa
>>
13
);
}
else
if
(
exponent
>=
-
24
)
{
mantissa
|=
0x800000
;
// Add implicit leading 1
mantissa
>>=
(
-
14
-
exponent
);
return
sign
|
(
mantissa
>>
13
);
}
else
{
// Too small for subnormal: return signed zero
return
sign
;
}
}
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
int64_t
const
*
broadcasted_strides
,
int64_t
const
*
target_strides
)
{
size_t
res
=
0
;
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
res
+=
flat_index
/
broadcasted_strides
[
i
]
*
target_strides
[
i
];
flat_index
%=
broadcasted_strides
[
i
];
}
return
res
;
}
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
int64_t
const
*
strides
)
{
size_t
res
=
0
;
for
(
size_t
i
=
ndim
;
i
--
>=
0
;)
{
res
+=
(
flat_index
%
shape
[
i
])
*
strides
[
i
];
flat_index
/=
shape
[
i
];
}
return
res
;
}
size_t
getPaddedSize
(
size_t
ndim
,
size_t
*
shape
,
size_t
const
*
pads
)
{
uint64_t
total_size
=
1
;
for
(
size_t
i
=
0
;
i
<
ndim
;
++
i
)
{
total_size
*=
shape
[
i
]
+
(
i
<
2
?
0
:
2
*
pads
[
i
-
2
]);
}
return
total_size
;
}
std
::
vector
<
size_t
>
getPaddedShape
(
size_t
ndim
,
size_t
const
*
shape
,
size_t
const
*
pads
)
{
std
::
vector
<
size_t
>
padded_shape
(
ndim
);
memcpy
(
padded_shape
.
data
(),
shape
,
ndim
*
sizeof
(
size_t
));
for
(
size_t
i
=
2
;
i
<
ndim
;
++
i
)
{
padded_shape
[
i
]
+=
2
*
pads
[
i
-
2
];
}
return
std
::
move
(
padded_shape
);
}
src/infiniop/devices/cpu/common_cpu.h
0 → 100644
View file @
46da1a27
#ifndef __INFINIOP__COMMON_CPU_H__
#define __INFINIOP__COMMON_CPU_H__
#include <cmath>
#include <cstdint>
#include <cstring>
#include <vector>
// convert half-precision float to single-precision float
float
f16_to_f32
(
uint16_t
code
);
// convert single-precision float to half-precision float
uint16_t
f32_to_f16
(
float
val
);
// return the memory offset of original tensor, given the flattened index of broadcasted tensor
size_t
indexToReducedOffset
(
size_t
flat_index
,
size_t
ndim
,
int64_t
const
*
broadcasted_strides
,
int64_t
const
*
target_strides
);
// return the memory offset a tensor given flattened index
size_t
indexToOffset
(
size_t
flat_index
,
size_t
ndim
,
size_t
const
*
shape
,
int64_t
const
*
strides
);
/**
* get the total array size (element count) after applying padding for a
* ndim-ary tensor with the given shape
*/
size_t
getPaddedSize
(
size_t
ndim
,
size_t
*
shape
,
size_t
const
*
pads
);
// calculate the padded shape and store the result in padded_shape
std
::
vector
<
size_t
>
getPaddedShape
(
size_t
ndim
,
size_t
const
*
shape
,
size_t
const
*
pads
);
#endif// __INFINIOP__COMMON_CPU_H__
src/infiniop/devices/cpu/cpu_handle.cc
0 → 100644
View file @
46da1a27
#include "./cpu_handle.h"
infiniopStatus_t
createCpuHandle
(
infiniopCpuHandle_t
*
handle_ptr
){
*
handle_ptr
=
new
InfiniopHandle
{
INFINI_DEVICE_CPU
,
0
};
return
INFINIOP_STATUS_SUCCESS
;
}
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment