"vscode:/vscode.git/clone" did not exist on "ba072ccca41212e3ac3ac1eca3381d226187c0d1"
Commit 46da1a27 authored by PanZezhongQY's avatar PanZezhongQY
Browse files

feat: cpu and cuda matmul

parents
---
Language: Cpp
# BasedOnStyle: LLVM
AccessModifierOffset: -2
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlinesLeft: false
AlignOperands: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: false
BinPackArguments: true
BinPackParameters: true
BraceWrapping:
AfterClass: false
AfterControlStatement: false
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
BreakBeforeBinaryOperators: None
BreakBeforeBraces: Attach
BreakBeforeTernaryOperators: true
BreakConstructorInitializersBeforeComma: false
ColumnLimit: 80
CommentPragmas: '^ IWYU pragma:'
ConstructorInitializerAllOnOneLineOrOnePerLine: false
ConstructorInitializerIndentWidth: 4
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
DisableFormat: false
ExperimentalAutoDetectBinPacking: false
ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
IncludeCategories:
- Regex: '^"(llvm|llvm-c|clang|clang-c)/'
Priority: 2
- Regex: '^(<|"(gtest|isl|json)/)'
Priority: 3
- Regex: '.*'
Priority: 1
IndentCaseLabels: false
IndentWidth: 4
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
NamespaceIndentation: None
ObjCBlockIndentWidth: 2
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PenaltyBreakBeforeFirstCallParameter: 19
PenaltyBreakComment: 300
PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
PointerAlignment: Right
ReflowComments: true
SortIncludes: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: true
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
TabWidth: 8
UseTab: Never
...
# Xmake cache
.xmake/
build/
# MacOS Cache
.DS_Store
# Vscode
.vscode/
# Python
__pycache__/
# Log
*.log
# Cache
cache/
#ifndef __INFINICORE_H__
#define __INFINICORE_H__
#include <stdint.h>
#ifndef __INFINICORE_EXPORT_C__
#define __INFINICORE_EXPORT_C__
#if defined(_WIN32)
#define __export __declspec(dllexport)
#elif defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
#define __export __attribute__((visibility("default")))
#else
#define __export
#endif
#ifdef __cplusplus
#define __C extern "C"
#include <cstddef>
#else
#define __C
#include <stddef>
#endif
#endif// __INFINICORE_EXPORT_C__
#ifndef __INFINI_DEVICE__
#define __INFINI_DEVICE__
typedef enum
{
INFINI_DEVICE_CPU = 0,
INFINI_DEVICE_NVIDIA = 1,
INFINI_DEVICE_CAMBRICON = 2,
INFINI_DEVICE_ASCEND = 3,
INFINI_DEVICE_METAX = 4,
INFINI_DEVICE_MOORE = 5,
INFINI_DEVICE_ILUVATAR = 6,
INFINI_DEVICE_KUNLUN = 7,
INFINI_DEVICE_SUGON = 8,
} infiniDevice_t;
#endif// __INFINI_DEVICE__
#ifndef __INFINI_DTYPE__
#define __INFINI_DTYPE__
typedef enum {
INFINI_DTYPE_INVALID = 0,
INFINI_DTYPE_BYTE = 1,
INFINI_DTYPE_BOOL = 2,
INFINI_DTYPE_I8 = 3,
INFINI_DTYPE_I16 = 4,
INFINI_DTYPE_I32 = 5,
INFINI_DTYPE_I64 = 6,
INFINI_DTYPE_U8 = 7,
INFINI_DTYPE_U16 = 8,
INFINI_DTYPE_U32 = 9,
INFINI_DTYPE_U64 = 10,
INFINI_DTYPE_F8 = 11,
INFINI_DTYPE_F16 = 12,
INFINI_DTYPE_F32 = 13,
INFINI_DTYPE_F64 = 14,
INFINI_DTYPE_C8 = 15,
INFINI_DTYPE_C16 = 16,
INFINI_DTYPE_C32 = 17,
INFINI_DTYPE_C64 = 18,
INFINI_DTYPE_BF16 = 19,
} infiniDtype_t;
inline size_t infini_sizeof(infiniDtype_t dtype) {
switch (dtype) {
case INFINI_DTYPE_INVALID: return 0;
case INFINI_DTYPE_BYTE: return 1;
case INFINI_DTYPE_BOOL: return 1;
case INFINI_DTYPE_I8: return 1;
case INFINI_DTYPE_I16: return 2;
case INFINI_DTYPE_I32: return 4;
case INFINI_DTYPE_I64: return 8;
case INFINI_DTYPE_U8: return 1;
case INFINI_DTYPE_U16: return 2;
case INFINI_DTYPE_U32: return 4;
case INFINI_DTYPE_U64: return 8;
case INFINI_DTYPE_F8: return 1;
case INFINI_DTYPE_F16: return 2;
case INFINI_DTYPE_F32: return 4;
case INFINI_DTYPE_F64: return 8;
case INFINI_DTYPE_C8: return 2;
case INFINI_DTYPE_C16: return 4;
case INFINI_DTYPE_C32: return 8;
case INFINI_DTYPE_C64: return 16;
case INFINI_DTYPE_BF16: return 2;
default: return 0;
}
}
#endif// __INFINI_DTYPE__
#endif// __INFINICORE_H__
#ifndef __INFINIOP_H__
#define __INFINIOP_H__
#include "infiniop/ops/add.h"
#include "infiniop/ops/attention.h"
#include "infiniop/ops/avg_pool.h"
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/conv.h"
#include "infiniop/ops/expand.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/global_avg_pool.h"
#include "infiniop/ops/matmul.h"
#include "infiniop/ops/max_pool.h"
#include "infiniop/ops/mlp.h"
#include "infiniop/ops/random_sample.h"
#include "infiniop/ops/rearrange.h"
#include "infiniop/ops/relu.h"
#include "infiniop/ops/rms_norm.h"
#include "infiniop/ops/rotary_embedding.h"
#include "infiniop/ops/swiglu.h"
#endif // __INFINIOP_H__
#ifndef __INFINIOP_HANDLE__
#define __INFINIOP_HANDLE__
#include "../infinicore.h"
#include "./status.h"
typedef struct InfiniopHandle {
infiniDevice_t device;
int device_id;
} InfiniopHandle;
typedef InfiniopHandle *infiniopHandle_t;
__C __export infiniopStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr, infiniDevice_t device, int device_id);
__C __export infiniopStatus_t infiniopDestroyHandle(infiniopHandle_t handle);
#endif
#ifndef __INFINIOP_OPERATOR___
#define __INFINIOP_OPERATOR___
#include "./handle.h"
#include "./tensor_descriptor.h"
// Base descriptor for all operators
typedef struct InfiniopDescriptor {
infiniDevice_t device;
int device_id;
} InfiniopDescriptor;
#endif //__INFINIOP_OPERATOR___
#ifndef __INFINIOP_ADD_H__
#define __INFINIOP_ADD_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopAddDescriptor_t;
__C __export infiniopStatus_t infiniopCreateAddDescriptor(infiniopHandle_t handle,
infiniopAddDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);
__C __export infiniopStatus_t infiniopAdd(infiniopAddDescriptor_t desc,
void *c,
void const *a,
void const *b,
void *stream);
__C __export infiniopStatus_t infiniopDestroyAddDescriptor(infiniopAddDescriptor_t desc);
#endif
#ifndef __INFINIOP_ATTENTION_H__
#define __INFINIOP_ATTENTION_H__
#include "../operator.h"
#include "./matmul.h"
#include "./swiglu.h"
typedef InfiniopDescriptor *infiniopAttentionDescriptor_t;
__C __export infiniopStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t handle,
infiniopAttentionDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
infiniopTensorDescriptor_t q_desc,
infiniopTensorDescriptor_t k_desc,
infiniopTensorDescriptor_t v_desc,
infiniopTensorDescriptor_t k_cache_desc,
infiniopTensorDescriptor_t v_cache_desc,
uint64_t pos);
__C __export infiniopStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size);
__C __export infiniopStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *out,
void const *q,
void const *k,
void const *v,
void *k_cache,
void *v_cache,
void *stream);
__C __export infiniopStatus_t infiniopDestroyAttentionDescriptor(infiniopAttentionDescriptor_t desc);
#endif
#ifndef __INFINIOP_AVG_POOL_H__
#define __INFINIOP_AVG_POOL_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopAvgPoolDescriptor_t;
__C __export infiniopStatus_t infiniopCreateAvgPoolDescriptor(infiniopHandle_t handle,
infiniopAvgPoolDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
uint64_t const *kernel_shape,
uint64_t const *pads,
int64_t const *strides,
uint64_t n);
__C __export infiniopStatus_t infiniopGetAvgPoolWorkspaceSize(infiniopAvgPoolDescriptor_t desc, size_t *size);
__C __export infiniopStatus_t infiniopAvgPool(infiniopAvgPoolDescriptor_t desc,
void *workspace, size_t workspace_size,
void *y, void const *x, void *stream);
__C __export infiniopStatus_t infiniopDestroyAvgPoolDescriptor(infiniopAvgPoolDescriptor_t desc);
#endif
#ifndef __INFINIOP_CAUSAL_SOFTMAX_H__
#define __INFINIOP_CAUSAL_SOFTMAX_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopCausalSoftmaxDescriptor_t;
__C __export infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(infiniopHandle_t handle,
infiniopCausalSoftmaxDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc);
__C __export infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, size_t *size);
__C __export infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *data,
void *stream);
__C __export infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc);
#endif
#ifndef __INFINIOP_CONV_H__
#define __INFINIOP_CONV_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopConvDescriptor_t;
__C __export infiniopStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle,
infiniopConvDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
infiniopTensorDescriptor_t w,
void *pads,
void *strides,
void *dilations,
size_t n);
__C __export infiniopStatus_t infiniopGetConvWorkspaceSize(infiniopConvDescriptor_t desc, size_t *size);
__C __export infiniopStatus_t infiniopConv(infiniopConvDescriptor_t desc, void *workspace, size_t workspace_size, void *y, void const *x, void const *w, void *stream);
__C __export infiniopStatus_t infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc);
#endif
#ifndef __INFINIOP_EXPAND_H__
#define __INFINIOP_EXPAND_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopExpandDescriptor_t;
__C __export infiniopStatus_t infiniopCreateExpandDescriptor(infiniopHandle_t handle,
infiniopExpandDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);
__C __export infiniopStatus_t infiniopExpand(infiniopExpandDescriptor_t desc,
void *y,
void const *x,
void *stream);
__C __export infiniopStatus_t infiniopDestroyExpandDescriptor(infiniopExpandDescriptor_t desc);
#endif
#ifndef __INFINIOP_GEMM_H__
#define __INFINIOP_GEMM_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopGEMMDescriptor_t;
__C __export infiniopStatus_t infiniopCreateGEMMDescriptor(infiniopHandle_t handle,
infiniopGEMMDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc,
infiniopTensorDescriptor_t c_desc,
char transA,
char transB);
__C __export infiniopStatus_t infiniopGetGEMMWorkspaceSize(infiniopGEMMDescriptor_t desc, size_t *size);
__C __export infiniopStatus_t infiniopGEMM(infiniopGEMMDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
void const *a,
void const *b,
void const *c,
float alpha,
float beta,
void *stream);
__C __export infiniopStatus_t infiniopDestroyGEMMDescriptor(infiniopGEMMDescriptor_t desc);
#endif
#ifndef __INFINIOP_GLOBAL_AVG_POOL_H__
#define __INFINIOP_GLOBAL_AVG_POOL_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopGlobalAvgPoolDescriptor_t;
__C __export infiniopStatus_t infiniopCreateGlobalAvgPoolDescriptor(infiniopHandle_t handle,
infiniopGlobalAvgPoolDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);
__C __export infiniopStatus_t infiniopGetGlobalAvgPoolWorkspaceSize(infiniopGlobalAvgPoolDescriptor_t desc, size_t *size);
__C __export infiniopStatus_t infiniopGlobalAvgPool(infiniopGlobalAvgPoolDescriptor_t desc,
void *workspace, size_t workspace_size,
void *y, void const *x, void *stream);
__C __export infiniopStatus_t infiniopDestroyGlobalAvgPoolDescriptor(infiniopGlobalAvgPoolDescriptor_t desc);
#endif
#ifndef __INFINIOP_MATMUL_H__
#define __INFINIOP_MATMUL_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopMatmulDescriptor_t;
__C __export infiniopStatus_t infiniopCreateMatmulDescriptor(infiniopHandle_t handle,
infiniopMatmulDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc);
__C __export infiniopStatus_t infiniopGetMatmulWorkspaceSize(infiniopMatmulDescriptor_t desc, size_t *size);
__C __export infiniopStatus_t infiniopMatmul(infiniopMatmulDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
void const *a,
void const *b,
float alpha,
float beta,
void *stream);
__C __export infiniopStatus_t infiniopDestroyMatmulDescriptor(infiniopMatmulDescriptor_t desc);
#endif
#ifndef __INFINIOP_MAX_POOL_H__
#define __INFINIOP_MAX_POOL_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopMaxPoolDescriptor_t;
__C __export infiniopStatus_t infiniopCreateMaxPoolDescriptor(infiniopHandle_t handle,
infiniopMaxPoolDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x,
uint64_t const *kernel_shape,
uint64_t const *pads,
int64_t const *strides,
uint64_t n);
__C __export infiniopStatus_t infiniopGetMaxPoolWorkspaceSize(infiniopMaxPoolDescriptor_t desc, size_t *size);
__C __export infiniopStatus_t infiniopMaxPool(infiniopMaxPoolDescriptor_t desc,
void *workspace, size_t workspace_size,
void *y, void const *x, void *stream);
__C __export infiniopStatus_t infiniopDestroyMaxPoolDescriptor(infiniopMaxPoolDescriptor_t desc);
#endif
#ifndef __INFINIOP_MLP_H__
#define __INFINIOP_MLP_H__
#include "../operator.h"
#include "./matmul.h"
#include "./swiglu.h"
typedef InfiniopDescriptor *infiniopMLPDescriptor_t;
__C __export infiniopStatus_t infiniopCreateMLPDescriptor(infiniopHandle_t handle,
infiniopMLPDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
infiniopTensorDescriptor_t w12_desc,
infiniopTensorDescriptor_t w3_desc,
float alpha,
char residual);
__C __export infiniopStatus_t infiniopGetMLPWorkspaceSize(infiniopMLPDescriptor_t desc, size_t *size);
__C __export infiniopStatus_t infiniopMLP(infiniopMLPDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
void const *x,
void const *w12,
void const *w3,
void *stream);
__C __export infiniopStatus_t infiniopDestroyMLPDescriptor(infiniopMLPDescriptor_t desc);
#endif
#ifndef __INFINIOP_RANDOM_SAMPLE_H__
#define __INFINIOP_RANDOM_SAMPLE_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopRandomSampleDescriptor_t;
__C __export infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handle, infiniopRandomSampleDescriptor_t *desc_ptr, infiniopTensorDescriptor_t result, infiniopTensorDescriptor_t probs);
__C __export infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDescriptor_t desc, size_t *size);
__C __export infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *result,
void const *probs,
float random_val,
float topp,
int topk,
float temperature,
void *stream);
__C __export infiniopStatus_t infiniopDestroyRandomSampleDescriptor(infiniopRandomSampleDescriptor_t desc);
#endif
#ifndef __INFINIOP_REARRANGE_H__
#define __INFINIOP_REARRANGE_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopRearrangeDescriptor_t;
__C __export infiniopStatus_t infiniopCreateRearrangeDescriptor(infiniopHandle_t handle,
infiniopRearrangeDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t dst,
infiniopTensorDescriptor_t src);
__C __export infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void *dst, void const *src, void *stream);
__C __export infiniopStatus_t infiniopDestroyRearrangeDescriptor(infiniopRearrangeDescriptor_t desc);
#endif
#ifndef __INFINIOP_RELU_H__
#define __INFINIOP_RELU_H__
#include "../operator.h"
typedef InfiniopDescriptor *infiniopReluDescriptor_t;
__C __export infiniopStatus_t infiniopCreateReluDescriptor(infiniopHandle_t handle,
infiniopReluDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);
__C __export infiniopStatus_t infiniopRelu(infiniopReluDescriptor_t desc,
void *y,
void const *x,
void *stream);
__C __export infiniopStatus_t infiniopDestroyReluDescriptor(infiniopReluDescriptor_t desc);
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment