Unverified Commit dce99862 authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Merge pull request #1053 from InfiniTensor/issue/1033xmake

Issue/1033 patch aten and fa adaptations
parents 8d99a8f5 d6e44e84
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include "kunlun/random_sample_kunlun.h" #include "kunlun/random_sample_kunlun.h"
#endif #endif
__C infiniStatus_t __INFINI_C infiniStatus_t
infiniopCreateRandomSampleDescriptor( infiniopCreateRandomSampleDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopRandomSampleDescriptor_t *desc_ptr, infiniopRandomSampleDescriptor_t *desc_ptr,
...@@ -82,7 +82,7 @@ infiniopCreateRandomSampleDescriptor( ...@@ -82,7 +82,7 @@ infiniopCreateRandomSampleDescriptor(
#undef CREATE #undef CREATE
}; };
__C infiniStatus_t infiniopGetRandomSampleWorkspaceSize( __INFINI_C infiniStatus_t infiniopGetRandomSampleWorkspaceSize(
infiniopRandomSampleDescriptor_t desc, infiniopRandomSampleDescriptor_t desc,
size_t *size) { size_t *size) {
...@@ -136,7 +136,7 @@ __C infiniStatus_t infiniopGetRandomSampleWorkspaceSize( ...@@ -136,7 +136,7 @@ __C infiniStatus_t infiniopGetRandomSampleWorkspaceSize(
#undef GET #undef GET
} }
__C infiniStatus_t infiniopRandomSample( __INFINI_C infiniStatus_t infiniopRandomSample(
infiniopRandomSampleDescriptor_t desc, infiniopRandomSampleDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
...@@ -200,7 +200,7 @@ __C infiniStatus_t infiniopRandomSample( ...@@ -200,7 +200,7 @@ __C infiniStatus_t infiniopRandomSample(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t infiniopDestroyRandomSampleDescriptor( __INFINI_C infiniStatus_t infiniopDestroyRandomSampleDescriptor(
infiniopRandomSampleDescriptor_t desc) { infiniopRandomSampleDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include "kunlun/rearrange_kunlun.h" #include "kunlun/rearrange_kunlun.h"
#endif #endif
__C infiniStatus_t infiniopCreateRearrangeDescriptor( __INFINI_C infiniStatus_t infiniopCreateRearrangeDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopRearrangeDescriptor_t *desc_ptr, infiniopRearrangeDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t dst, infiniopTensorDescriptor_t dst,
...@@ -80,7 +80,7 @@ __C infiniStatus_t infiniopCreateRearrangeDescriptor( ...@@ -80,7 +80,7 @@ __C infiniStatus_t infiniopCreateRearrangeDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopRearrange( __INFINI_C infiniStatus_t infiniopRearrange(
infiniopRearrangeDescriptor_t desc, infiniopRearrangeDescriptor_t desc,
void *dst, void *dst,
const void *src, const void *src,
...@@ -134,7 +134,7 @@ __C infiniStatus_t infiniopRearrange( ...@@ -134,7 +134,7 @@ __C infiniStatus_t infiniopRearrange(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t infiniopDestroyRearrangeDescriptor( __INFINI_C infiniStatus_t infiniopDestroyRearrangeDescriptor(
infiniopRearrangeDescriptor_t desc) { infiniopRearrangeDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#endif #endif
#endif #endif
__C infiniStatus_t infiniopCreateReluDescriptor( __INFINI_C infiniStatus_t infiniopCreateReluDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopReluDescriptor_t *desc_ptr, infiniopReluDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc, infiniopTensorDescriptor_t y_desc,
...@@ -58,7 +58,7 @@ __C infiniStatus_t infiniopCreateReluDescriptor( ...@@ -58,7 +58,7 @@ __C infiniStatus_t infiniopCreateReluDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetReluWorkspaceSize(infiniopReluDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetReluWorkspaceSize(infiniopReluDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -95,7 +95,7 @@ __C infiniStatus_t infiniopGetReluWorkspaceSize(infiniopReluDescriptor_t desc, s ...@@ -95,7 +95,7 @@ __C infiniStatus_t infiniopGetReluWorkspaceSize(infiniopReluDescriptor_t desc, s
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniStatus_t infiniopRelu( __INFINI_C infiniStatus_t infiniopRelu(
infiniopReluDescriptor_t desc, infiniopReluDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
...@@ -138,7 +138,7 @@ __C infiniStatus_t infiniopRelu( ...@@ -138,7 +138,7 @@ __C infiniStatus_t infiniopRelu(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t __INFINI_C infiniStatus_t
infiniopDestroyReluDescriptor(infiniopReluDescriptor_t desc) { infiniopDestroyReluDescriptor(infiniopReluDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include "kunlun/rms_norm_kunlun.h" #include "kunlun/rms_norm_kunlun.h"
#endif #endif
__C infiniStatus_t infiniopCreateRMSNormDescriptor( __INFINI_C infiniStatus_t infiniopCreateRMSNormDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopRMSNormDescriptor_t *desc_ptr, infiniopRMSNormDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc, infiniopTensorDescriptor_t y_desc,
...@@ -83,7 +83,7 @@ __C infiniStatus_t infiniopCreateRMSNormDescriptor( ...@@ -83,7 +83,7 @@ __C infiniStatus_t infiniopCreateRMSNormDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -131,8 +131,8 @@ __C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t d ...@@ -131,8 +131,8 @@ __C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t d
#undef GET #undef GET
} }
__C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *workspace, size_t workspace_size, __INFINI_C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *workspace, size_t workspace_size,
void *y, const void *x, const void *w, void *stream) { void *y, const void *x, const void *w, void *stream) {
#define CALCULATE(CASE, NAMESPACE) \ #define CALCULATE(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -180,7 +180,7 @@ __C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *works ...@@ -180,7 +180,7 @@ __C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *works
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_t desc) { __INFINI_C infiniStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \ #define DESTROY(CASE, NAMESPACE) \
case CASE: \ case CASE: \
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#include "moore/rope_moore.h" #include "moore/rope_moore.h"
#endif #endif
__C infiniStatus_t infiniopCreateRoPEDescriptor( __INFINI_C infiniStatus_t infiniopCreateRoPEDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopRoPEDescriptor_t *desc_ptr, infiniopRoPEDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y, infiniopTensorDescriptor_t y,
...@@ -87,8 +87,8 @@ __C infiniStatus_t infiniopCreateRoPEDescriptor( ...@@ -87,8 +87,8 @@ __C infiniStatus_t infiniopCreateRoPEDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc, __INFINI_C infiniStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
size_t *size) { size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
*size = reinterpret_cast<const op::rope::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \ *size = reinterpret_cast<const op::rope::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
...@@ -135,7 +135,7 @@ __C infiniStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc, ...@@ -135,7 +135,7 @@ __C infiniStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
#undef GET #undef GET
} }
__C infiniStatus_t infiniopRoPE( __INFINI_C infiniStatus_t infiniopRoPE(
infiniopRoPEDescriptor_t desc, infiniopRoPEDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
...@@ -192,7 +192,7 @@ __C infiniStatus_t infiniopRoPE( ...@@ -192,7 +192,7 @@ __C infiniStatus_t infiniopRoPE(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t __INFINI_C infiniStatus_t
infiniopDestroyRoPEDescriptor(infiniopRoPEDescriptor_t desc) { infiniopDestroyRoPEDescriptor(infiniopRoPEDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -10,14 +10,14 @@ ...@@ -10,14 +10,14 @@
#include "moore/int8_gemm_moore.h" #include "moore/int8_gemm_moore.h"
#endif #endif
__C infiniStatus_t infiniopCreateI8GemmDescriptor(infiniopHandle_t handle, __INFINI_C infiniStatus_t infiniopCreateI8GemmDescriptor(infiniopHandle_t handle,
infiniopI8GemmDescriptor_t *desc_ptr, infiniopI8GemmDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc, infiniopTensorDescriptor_t out_desc,
infiniopTensorDescriptor_t bias_desc, infiniopTensorDescriptor_t bias_desc,
infiniopTensorDescriptor_t a_desc, infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t a_scale_desc, infiniopTensorDescriptor_t a_scale_desc,
infiniopTensorDescriptor_t b_desc, infiniopTensorDescriptor_t b_desc,
infiniopTensorDescriptor_t b_scale_desc) { infiniopTensorDescriptor_t b_scale_desc) {
#define CREATE(CASE, NAMESPACE) \ #define CREATE(CASE, NAMESPACE) \
case CASE: \ case CASE: \
return op::i8gemm::NAMESPACE::Descriptor::create( \ return op::i8gemm::NAMESPACE::Descriptor::create( \
...@@ -45,7 +45,7 @@ __C infiniStatus_t infiniopCreateI8GemmDescriptor(infiniopHandle_t handle, ...@@ -45,7 +45,7 @@ __C infiniStatus_t infiniopCreateI8GemmDescriptor(infiniopHandle_t handle,
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetI8GemmWorkspaceSize(infiniopI8GemmDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetI8GemmWorkspaceSize(infiniopI8GemmDescriptor_t desc, size_t *size) {
switch (desc->device_type) { switch (desc->device_type) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -66,16 +66,16 @@ __C infiniStatus_t infiniopGetI8GemmWorkspaceSize(infiniopI8GemmDescriptor_t des ...@@ -66,16 +66,16 @@ __C infiniStatus_t infiniopGetI8GemmWorkspaceSize(infiniopI8GemmDescriptor_t des
#undef GET #undef GET
} }
__C infiniStatus_t infiniopI8Gemm(infiniopI8GemmDescriptor_t desc, __INFINI_C infiniStatus_t infiniopI8Gemm(infiniopI8GemmDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
void *out, void *out,
const void *bias, const void *bias,
const void *a, const void *a,
const void *a_scale, const void *a_scale,
const void *b, const void *b,
const void *b_scale, const void *b_scale,
void *stream) { void *stream) {
#define CACULATE(CASE, NAMESPACE) \ #define CACULATE(CASE, NAMESPACE) \
case CASE: \ case CASE: \
return reinterpret_cast<op::i8gemm::NAMESPACE::Descriptor *>(desc)->calculate( \ return reinterpret_cast<op::i8gemm::NAMESPACE::Descriptor *>(desc)->calculate( \
...@@ -96,7 +96,7 @@ __C infiniStatus_t infiniopI8Gemm(infiniopI8GemmDescriptor_t desc, ...@@ -96,7 +96,7 @@ __C infiniStatus_t infiniopI8Gemm(infiniopI8GemmDescriptor_t desc,
#undef CACULATE #undef CACULATE
} }
__C infiniStatus_t infiniopDestroyI8GemmDescriptor(infiniopI8GemmDescriptor_t desc) { __INFINI_C infiniStatus_t infiniopDestroyI8GemmDescriptor(infiniopI8GemmDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \ #define DESTROY(CASE, NAMESPACE) \
case CASE: \ case CASE: \
delete reinterpret_cast<op::i8gemm::NAMESPACE::Descriptor *>(desc); \ delete reinterpret_cast<op::i8gemm::NAMESPACE::Descriptor *>(desc); \
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
#include "nvidia/sigmoid_nvidia.cuh" #include "nvidia/sigmoid_nvidia.cuh"
#endif #endif
__C infiniStatus_t infiniopCreateSigmoidDescriptor( __INFINI_C infiniStatus_t infiniopCreateSigmoidDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopSigmoidDescriptor_t *desc_ptr, infiniopSigmoidDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc, infiniopTensorDescriptor_t y_desc,
...@@ -48,7 +48,7 @@ __C infiniStatus_t infiniopCreateSigmoidDescriptor( ...@@ -48,7 +48,7 @@ __C infiniStatus_t infiniopCreateSigmoidDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetSigmoidWorkspaceSize(infiniopSigmoidDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetSigmoidWorkspaceSize(infiniopSigmoidDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -79,7 +79,7 @@ __C infiniStatus_t infiniopGetSigmoidWorkspaceSize(infiniopSigmoidDescriptor_t d ...@@ -79,7 +79,7 @@ __C infiniStatus_t infiniopGetSigmoidWorkspaceSize(infiniopSigmoidDescriptor_t d
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniStatus_t infiniopSigmoid( __INFINI_C infiniStatus_t infiniopSigmoid(
infiniopSigmoidDescriptor_t desc, infiniopSigmoidDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
...@@ -116,7 +116,7 @@ __C infiniStatus_t infiniopSigmoid( ...@@ -116,7 +116,7 @@ __C infiniStatus_t infiniopSigmoid(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t __INFINI_C infiniStatus_t
infiniopDestroySigmoidDescriptor(infiniopSigmoidDescriptor_t desc) { infiniopDestroySigmoidDescriptor(infiniopSigmoidDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "moore/silu_moore.h" #include "moore/silu_moore.h"
#endif #endif
__C infiniStatus_t infiniopCreateSiluDescriptor( __INFINI_C infiniStatus_t infiniopCreateSiluDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopSiluDescriptor_t *desc_ptr, infiniopSiluDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc, infiniopTensorDescriptor_t output_desc,
...@@ -57,7 +57,7 @@ __C infiniStatus_t infiniopCreateSiluDescriptor( ...@@ -57,7 +57,7 @@ __C infiniStatus_t infiniopCreateSiluDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetSiluWorkspaceSize(infiniopSiluDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetSiluWorkspaceSize(infiniopSiluDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -92,7 +92,7 @@ __C infiniStatus_t infiniopGetSiluWorkspaceSize(infiniopSiluDescriptor_t desc, s ...@@ -92,7 +92,7 @@ __C infiniStatus_t infiniopGetSiluWorkspaceSize(infiniopSiluDescriptor_t desc, s
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniStatus_t infiniopSilu( __INFINI_C infiniStatus_t infiniopSilu(
infiniopSiluDescriptor_t desc, infiniopSiluDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
...@@ -133,7 +133,7 @@ __C infiniStatus_t infiniopSilu( ...@@ -133,7 +133,7 @@ __C infiniStatus_t infiniopSilu(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t __INFINI_C infiniStatus_t
infiniopDestroySiluDescriptor(infiniopSiluDescriptor_t desc) { infiniopDestroySiluDescriptor(infiniopSiluDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "moore/silu_and_mul_moore.h" #include "moore/silu_and_mul_moore.h"
#endif #endif
__C infiniStatus_t infiniopCreateSiluAndMulDescriptor( __INFINI_C infiniStatus_t infiniopCreateSiluAndMulDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopSiluAndMulDescriptor_t *desc_ptr, infiniopSiluAndMulDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc, infiniopTensorDescriptor_t y_desc,
...@@ -31,7 +31,7 @@ __C infiniStatus_t infiniopCreateSiluAndMulDescriptor( ...@@ -31,7 +31,7 @@ __C infiniStatus_t infiniopCreateSiluAndMulDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetSiluAndMulWorkspaceSize(infiniopSiluAndMulDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetSiluAndMulWorkspaceSize(infiniopSiluAndMulDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -49,7 +49,7 @@ __C infiniStatus_t infiniopGetSiluAndMulWorkspaceSize(infiniopSiluAndMulDescript ...@@ -49,7 +49,7 @@ __C infiniStatus_t infiniopGetSiluAndMulWorkspaceSize(infiniopSiluAndMulDescript
#undef GET #undef GET
} }
__C infiniStatus_t infiniopSiluAndMul( __INFINI_C infiniStatus_t infiniopSiluAndMul(
infiniopSiluAndMulDescriptor_t desc, infiniopSiluAndMulDescriptor_t desc,
void *workspace, size_t workspace_size, void *workspace, size_t workspace_size,
void *y, void *y,
...@@ -72,7 +72,7 @@ __C infiniStatus_t infiniopSiluAndMul( ...@@ -72,7 +72,7 @@ __C infiniStatus_t infiniopSiluAndMul(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t infiniopDestroySiluAndMulDescriptor(infiniopSiluAndMulDescriptor_t desc) { __INFINI_C infiniStatus_t infiniopDestroySiluAndMulDescriptor(infiniopSiluAndMulDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \ #define DESTROY(CASE, NAMESPACE) \
case CASE: \ case CASE: \
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "nvidia/softmax_nvidia.cuh" #include "nvidia/softmax_nvidia.cuh"
#endif #endif
__C infiniStatus_t infiniopCreateSoftmaxDescriptor( __INFINI_C infiniStatus_t infiniopCreateSoftmaxDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopSoftmaxDescriptor_t *desc_ptr, infiniopSoftmaxDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc, infiniopTensorDescriptor_t y_desc,
...@@ -42,7 +42,7 @@ __C infiniStatus_t infiniopCreateSoftmaxDescriptor( ...@@ -42,7 +42,7 @@ __C infiniStatus_t infiniopCreateSoftmaxDescriptor(
} }
} }
__C infiniStatus_t infiniopGetSoftmaxWorkspaceSize(infiniopSoftmaxDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetSoftmaxWorkspaceSize(infiniopSoftmaxDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -70,7 +70,7 @@ __C infiniStatus_t infiniopGetSoftmaxWorkspaceSize(infiniopSoftmaxDescriptor_t d ...@@ -70,7 +70,7 @@ __C infiniStatus_t infiniopGetSoftmaxWorkspaceSize(infiniopSoftmaxDescriptor_t d
} }
} }
__C infiniStatus_t infiniopSoftmax( __INFINI_C infiniStatus_t infiniopSoftmax(
infiniopSoftmaxDescriptor_t desc, infiniopSoftmaxDescriptor_t desc,
void *workspace, size_t workspace_size, void *workspace, size_t workspace_size,
void *y, void *y,
...@@ -103,7 +103,7 @@ __C infiniStatus_t infiniopSoftmax( ...@@ -103,7 +103,7 @@ __C infiniStatus_t infiniopSoftmax(
} }
} }
__C infiniStatus_t infiniopDestroySoftmaxDescriptor(infiniopSoftmaxDescriptor_t desc) { __INFINI_C infiniStatus_t infiniopDestroySoftmaxDescriptor(infiniopSoftmaxDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \ #define DESTROY(CASE, NAMESPACE) \
case CASE: \ case CASE: \
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "kunlun/softplus_kunlun.h" #include "kunlun/softplus_kunlun.h"
#endif #endif
__C infiniStatus_t infiniopCreateSoftplusDescriptor( __INFINI_C infiniStatus_t infiniopCreateSoftplusDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopSoftplusDescriptor_t *desc_ptr, infiniopSoftplusDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc, infiniopTensorDescriptor_t y_desc,
...@@ -60,7 +60,7 @@ __C infiniStatus_t infiniopCreateSoftplusDescriptor( ...@@ -60,7 +60,7 @@ __C infiniStatus_t infiniopCreateSoftplusDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetSoftplusWorkspaceSize(infiniopSoftplusDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetSoftplusWorkspaceSize(infiniopSoftplusDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -98,7 +98,7 @@ __C infiniStatus_t infiniopGetSoftplusWorkspaceSize(infiniopSoftplusDescriptor_t ...@@ -98,7 +98,7 @@ __C infiniStatus_t infiniopGetSoftplusWorkspaceSize(infiniopSoftplusDescriptor_t
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniStatus_t infiniopSoftplus( __INFINI_C infiniStatus_t infiniopSoftplus(
infiniopSoftplusDescriptor_t desc, infiniopSoftplusDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
...@@ -142,7 +142,7 @@ __C infiniStatus_t infiniopSoftplus( ...@@ -142,7 +142,7 @@ __C infiniStatus_t infiniopSoftplus(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t __INFINI_C infiniStatus_t
infiniopDestroySoftplusDescriptor(infiniopSoftplusDescriptor_t desc) { infiniopDestroySoftplusDescriptor(infiniopSoftplusDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "kunlun/sub_kunlun.h" #include "kunlun/sub_kunlun.h"
#endif #endif
__C infiniStatus_t infiniopCreateSubDescriptor( __INFINI_C infiniStatus_t infiniopCreateSubDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopSubDescriptor_t *desc_ptr, infiniopSubDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c_desc, infiniopTensorDescriptor_t c_desc,
...@@ -62,7 +62,7 @@ __C infiniStatus_t infiniopCreateSubDescriptor( ...@@ -62,7 +62,7 @@ __C infiniStatus_t infiniopCreateSubDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetSubWorkspaceSize(infiniopSubDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetSubWorkspaceSize(infiniopSubDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -100,7 +100,7 @@ __C infiniStatus_t infiniopGetSubWorkspaceSize(infiniopSubDescriptor_t desc, siz ...@@ -100,7 +100,7 @@ __C infiniStatus_t infiniopGetSubWorkspaceSize(infiniopSubDescriptor_t desc, siz
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniStatus_t infiniopSub( __INFINI_C infiniStatus_t infiniopSub(
infiniopSubDescriptor_t desc, infiniopSubDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
...@@ -145,7 +145,7 @@ __C infiniStatus_t infiniopSub( ...@@ -145,7 +145,7 @@ __C infiniStatus_t infiniopSub(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t __INFINI_C infiniStatus_t
infiniopDestroySubDescriptor(infiniopSubDescriptor_t desc) { infiniopDestroySubDescriptor(infiniopSubDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#include "moore/swiglu_moore.h" #include "moore/swiglu_moore.h"
#endif #endif
__C infiniStatus_t infiniopCreateSwiGLUDescriptor( __INFINI_C infiniStatus_t infiniopCreateSwiGLUDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopSwiGLUDescriptor_t *desc_ptr, infiniopSwiGLUDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c_desc, infiniopTensorDescriptor_t c_desc,
...@@ -105,7 +105,7 @@ __C infiniStatus_t infiniopCreateSwiGLUDescriptor( ...@@ -105,7 +105,7 @@ __C infiniStatus_t infiniopCreateSwiGLUDescriptor(
#undef CREATE_CUDA #undef CREATE_CUDA
} }
__C infiniStatus_t infiniopGetSwiGLUWorkspaceSize(infiniopSwiGLUDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetSwiGLUWorkspaceSize(infiniopSwiGLUDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -168,7 +168,7 @@ __C infiniStatus_t infiniopGetSwiGLUWorkspaceSize(infiniopSwiGLUDescriptor_t des ...@@ -168,7 +168,7 @@ __C infiniStatus_t infiniopGetSwiGLUWorkspaceSize(infiniopSwiGLUDescriptor_t des
#undef GET_CUDA #undef GET_CUDA
} }
__C infiniStatus_t infiniopSwiGLU( __INFINI_C infiniStatus_t infiniopSwiGLU(
infiniopSwiGLUDescriptor_t desc, infiniopSwiGLUDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
...@@ -239,7 +239,7 @@ __C infiniStatus_t infiniopSwiGLU( ...@@ -239,7 +239,7 @@ __C infiniStatus_t infiniopSwiGLU(
#undef CALCULATE_CUDA #undef CALCULATE_CUDA
} }
__C infiniStatus_t __INFINI_C infiniStatus_t
infiniopDestroySwiGLUDescriptor(infiniopSwiGLUDescriptor_t desc) { infiniopDestroySwiGLUDescriptor(infiniopSwiGLUDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
// #include "metax/tanh_metax.h" // #include "metax/tanh_metax.h"
// #endif // #endif
__C infiniStatus_t infiniopCreateTanhDescriptor( __INFINI_C infiniStatus_t infiniopCreateTanhDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopTanhDescriptor_t *desc_ptr, infiniopTanhDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc, infiniopTensorDescriptor_t output_desc,
...@@ -55,7 +55,7 @@ __C infiniStatus_t infiniopCreateTanhDescriptor( ...@@ -55,7 +55,7 @@ __C infiniStatus_t infiniopCreateTanhDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetTanhWorkspaceSize(infiniopTanhDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetTanhWorkspaceSize(infiniopTanhDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -90,7 +90,7 @@ __C infiniStatus_t infiniopGetTanhWorkspaceSize(infiniopTanhDescriptor_t desc, s ...@@ -90,7 +90,7 @@ __C infiniStatus_t infiniopGetTanhWorkspaceSize(infiniopTanhDescriptor_t desc, s
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniStatus_t infiniopTanh( __INFINI_C infiniStatus_t infiniopTanh(
infiniopTanhDescriptor_t desc, infiniopTanhDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
...@@ -132,7 +132,7 @@ __C infiniStatus_t infiniopTanh( ...@@ -132,7 +132,7 @@ __C infiniStatus_t infiniopTanh(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t __INFINI_C infiniStatus_t
infiniopDestroyTanhDescriptor(infiniopTanhDescriptor_t desc) { infiniopDestroyTanhDescriptor(infiniopTanhDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -15,9 +15,9 @@ ...@@ -15,9 +15,9 @@
#include "kunlun/topkrouter_kunlun.h" #include "kunlun/topkrouter_kunlun.h"
#endif #endif
__C infiniStatus_t infiniopCreateTopkrouterDescriptor(infiniopHandle_t handle, infiniopTopkrouterDescriptor_t *desc_ptr, __INFINI_C infiniStatus_t infiniopCreateTopkrouterDescriptor(infiniopHandle_t handle, infiniopTopkrouterDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t x_desc, infiniopTensorDescriptor_t x_desc,
infiniopTensorDescriptor_t correction_bias_desc) { infiniopTensorDescriptor_t correction_bias_desc) {
#define CREATE(CASE, NAMESPACE) \ #define CREATE(CASE, NAMESPACE) \
case CASE: \ case CASE: \
return op::topkrouter::NAMESPACE::Descriptor::create( \ return op::topkrouter::NAMESPACE::Descriptor::create( \
...@@ -49,7 +49,7 @@ __C infiniStatus_t infiniopCreateTopkrouterDescriptor(infiniopHandle_t handle, i ...@@ -49,7 +49,7 @@ __C infiniStatus_t infiniopCreateTopkrouterDescriptor(infiniopHandle_t handle, i
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetTopkrouterWorkspaceSize(infiniopTopkrouterDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetTopkrouterWorkspaceSize(infiniopTopkrouterDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
*size = reinterpret_cast<op::topkrouter::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \ *size = reinterpret_cast<op::topkrouter::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
...@@ -81,9 +81,9 @@ __C infiniStatus_t infiniopGetTopkrouterWorkspaceSize(infiniopTopkrouterDescript ...@@ -81,9 +81,9 @@ __C infiniStatus_t infiniopGetTopkrouterWorkspaceSize(infiniopTopkrouterDescript
#undef GET #undef GET
} }
__C infiniStatus_t infiniopTopkrouter(infiniopTopkrouterDescriptor_t desc, void *workspace, size_t workspace_size, __INFINI_C infiniStatus_t infiniopTopkrouter(infiniopTopkrouterDescriptor_t desc, void *workspace, size_t workspace_size,
void *values, void *indices, const void *x, const void *correction_bias, void *values, void *indices, const void *x, const void *correction_bias,
const float routed_scaling_factor, const size_t topk, void *stream) { const float routed_scaling_factor, const size_t topk, void *stream) {
#define CALCULATE(CASE, NAMESPACE) \ #define CALCULATE(CASE, NAMESPACE) \
case CASE: \ case CASE: \
return reinterpret_cast<op::topkrouter::NAMESPACE::Descriptor *>(desc)->calculate( \ return reinterpret_cast<op::topkrouter::NAMESPACE::Descriptor *>(desc)->calculate( \
...@@ -116,7 +116,7 @@ __C infiniStatus_t infiniopTopkrouter(infiniopTopkrouterDescriptor_t desc, void ...@@ -116,7 +116,7 @@ __C infiniStatus_t infiniopTopkrouter(infiniopTopkrouterDescriptor_t desc, void
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t infiniopDestroyTopkrouterDescriptor(infiniopTopkrouterDescriptor_t desc) { __INFINI_C infiniStatus_t infiniopDestroyTopkrouterDescriptor(infiniopTopkrouterDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \ #define DESTROY(CASE, NAMESPACE) \
case CASE: \ case CASE: \
delete reinterpret_cast<op::topkrouter::NAMESPACE::Descriptor *>(desc); \ delete reinterpret_cast<op::topkrouter::NAMESPACE::Descriptor *>(desc); \
......
...@@ -12,9 +12,9 @@ ...@@ -12,9 +12,9 @@
#include "metax/topksoftmax_metax.cuh" #include "metax/topksoftmax_metax.cuh"
#endif #endif
__C infiniStatus_t infiniopCreateTopksoftmaxDescriptor(infiniopHandle_t handle, __INFINI_C infiniStatus_t infiniopCreateTopksoftmaxDescriptor(infiniopHandle_t handle,
infiniopTopksoftmaxDescriptor_t *desc_ptr, infiniopTopksoftmaxDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t x_desc) { infiniopTensorDescriptor_t x_desc) {
#define CREATE(CASE, NAMESPACE) \ #define CREATE(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -47,7 +47,7 @@ __C infiniStatus_t infiniopCreateTopksoftmaxDescriptor(infiniopHandle_t handle, ...@@ -47,7 +47,7 @@ __C infiniStatus_t infiniopCreateTopksoftmaxDescriptor(infiniopHandle_t handle,
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetTopksoftmaxWorkspaceSize(infiniopTopksoftmaxDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetTopksoftmaxWorkspaceSize(infiniopTopksoftmaxDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -80,9 +80,9 @@ __C infiniStatus_t infiniopGetTopksoftmaxWorkspaceSize(infiniopTopksoftmaxDescri ...@@ -80,9 +80,9 @@ __C infiniStatus_t infiniopGetTopksoftmaxWorkspaceSize(infiniopTopksoftmaxDescri
#undef GET #undef GET
} }
__C infiniStatus_t infiniopTopksoftmax(infiniopTopksoftmaxDescriptor_t desc, void *workspace, size_t workspace_size, __INFINI_C infiniStatus_t infiniopTopksoftmax(infiniopTopksoftmaxDescriptor_t desc, void *workspace, size_t workspace_size,
void *values, void *indices, const void *x, const size_t topk, const int norm, void *values, void *indices, const void *x, const size_t topk, const int norm,
void *stream) { void *stream) {
if (topk > 32) { if (topk > 32) {
return INFINI_STATUS_BAD_PARAM; return INFINI_STATUS_BAD_PARAM;
} }
...@@ -118,7 +118,7 @@ __C infiniStatus_t infiniopTopksoftmax(infiniopTopksoftmaxDescriptor_t desc, voi ...@@ -118,7 +118,7 @@ __C infiniStatus_t infiniopTopksoftmax(infiniopTopksoftmaxDescriptor_t desc, voi
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t infiniopDestroyTopksoftmaxDescriptor(infiniopTopksoftmaxDescriptor_t desc) { __INFINI_C infiniStatus_t infiniopDestroyTopksoftmaxDescriptor(infiniopTopksoftmaxDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \ #define DESTROY(CASE, NAMESPACE) \
case CASE: \ case CASE: \
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
#include "moore/zeros_moore.h" #include "moore/zeros_moore.h"
#endif #endif
__C infiniStatus_t infiniopCreateZerosDescriptor( __INFINI_C infiniStatus_t infiniopCreateZerosDescriptor(
infiniopHandle_t handle, infiniopHandle_t handle,
infiniopZerosDescriptor_t *desc_ptr, infiniopZerosDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc, infiniopTensorDescriptor_t y_desc,
...@@ -59,7 +59,7 @@ __C infiniStatus_t infiniopCreateZerosDescriptor( ...@@ -59,7 +59,7 @@ __C infiniStatus_t infiniopCreateZerosDescriptor(
#undef CREATE #undef CREATE
} }
__C infiniStatus_t infiniopGetZerosWorkspaceSize(infiniopZerosDescriptor_t desc, size_t *size) { __INFINI_C infiniStatus_t infiniopGetZerosWorkspaceSize(infiniopZerosDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
...@@ -96,7 +96,7 @@ __C infiniStatus_t infiniopGetZerosWorkspaceSize(infiniopZerosDescriptor_t desc, ...@@ -96,7 +96,7 @@ __C infiniStatus_t infiniopGetZerosWorkspaceSize(infiniopZerosDescriptor_t desc,
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniStatus_t infiniopZeros( __INFINI_C infiniStatus_t infiniopZeros(
infiniopZerosDescriptor_t desc, infiniopZerosDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
...@@ -139,7 +139,7 @@ __C infiniStatus_t infiniopZeros( ...@@ -139,7 +139,7 @@ __C infiniStatus_t infiniopZeros(
#undef CALCULATE #undef CALCULATE
} }
__C infiniStatus_t __INFINI_C infiniStatus_t
infiniopDestroyZerosDescriptor(infiniopZerosDescriptor_t desc) { infiniopDestroyZerosDescriptor(infiniopZerosDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <functional> #include <functional>
#include <numeric> #include <numeric>
__C __export infiniStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, size_t const *shape_, ptrdiff_t const *strides_, infiniDtype_t datatype) { __INFINI_C __export infiniStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, size_t const *shape_, ptrdiff_t const *strides_, infiniDtype_t datatype) {
if (strides_ != nullptr) { if (strides_ != nullptr) {
*desc_ptr = new InfiniopTensorDescriptor(datatype, ndim, shape_, strides_); *desc_ptr = new InfiniopTensorDescriptor(datatype, ndim, shape_, strides_);
} else { } else {
...@@ -23,7 +23,7 @@ __C __export infiniStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescrip ...@@ -23,7 +23,7 @@ __C __export infiniStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescrip
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
__C __export infiniStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc) { __INFINI_C __export infiniStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc) {
delete desc; delete desc;
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
......
...@@ -13,14 +13,14 @@ thread_local int CURRENT_DEVICE_ID = 0; ...@@ -13,14 +13,14 @@ thread_local int CURRENT_DEVICE_ID = 0;
thread_local infiniDevice_t PREVIOUS_NON_CPU_DEVICE_TYPE = INFINI_DEVICE_TYPE_COUNT; thread_local infiniDevice_t PREVIOUS_NON_CPU_DEVICE_TYPE = INFINI_DEVICE_TYPE_COUNT;
thread_local int PREVIOUS_NON_CPU_DEVICE_ID = 0; thread_local int PREVIOUS_NON_CPU_DEVICE_ID = 0;
__C infiniStatus_t infinirtInit() { __INFINI_C infiniStatus_t infinirtInit() {
#ifdef ENABLE_ASCEND_API #ifdef ENABLE_ASCEND_API
CHECK_STATUS(infinirt::ascend::init()); CHECK_STATUS(infinirt::ascend::init());
#endif #endif
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
__C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) { __INFINI_C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) {
if (count_array == nullptr) { if (count_array == nullptr) {
return INFINI_STATUS_NULL_POINTER; return INFINI_STATUS_NULL_POINTER;
} }
...@@ -33,7 +33,7 @@ __C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) { ...@@ -33,7 +33,7 @@ __C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) {
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
__C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_ptr) { __INFINI_C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_ptr) {
if (device_ptr == nullptr && device_id_ptr == nullptr) { if (device_ptr == nullptr && device_id_ptr == nullptr) {
return INFINI_STATUS_NULL_POINTER; return INFINI_STATUS_NULL_POINTER;
} }
...@@ -93,14 +93,14 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_ ...@@ -93,14 +93,14 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
#define INFINIRT_CALL_DEVICE_API(API, PARAMS) INFINIRT_CALL_DEVICE_API_AND(CURRENT_DEVICE_TYPE, API, PARAMS, ) #define INFINIRT_CALL_DEVICE_API(API, PARAMS) INFINIRT_CALL_DEVICE_API_AND(CURRENT_DEVICE_TYPE, API, PARAMS, )
__C infiniStatus_t infinirtGetDeviceCount(infiniDevice_t device, int *count) { __INFINI_C infiniStatus_t infinirtGetDeviceCount(infiniDevice_t device, int *count) {
if (count == nullptr) { if (count == nullptr) {
return INFINI_STATUS_NULL_POINTER; return INFINI_STATUS_NULL_POINTER;
} }
INFINIRT_CALL_DEVICE_API_AND(device, getDeviceCount, (count), {}); INFINIRT_CALL_DEVICE_API_AND(device, getDeviceCount, (count), {});
} }
__C infiniStatus_t infinirtSetDevice(infiniDevice_t device, int device_id) { __INFINI_C infiniStatus_t infinirtSetDevice(infiniDevice_t device, int device_id) {
bool skip_set = CURRENT_DEVICE_TYPE == INFINI_DEVICE_CPU && device == PREVIOUS_NON_CPU_DEVICE_TYPE && device_id == PREVIOUS_NON_CPU_DEVICE_ID; bool skip_set = CURRENT_DEVICE_TYPE == INFINI_DEVICE_CPU && device == PREVIOUS_NON_CPU_DEVICE_TYPE && device_id == PREVIOUS_NON_CPU_DEVICE_ID;
if (CURRENT_DEVICE_TYPE != INFINI_DEVICE_CPU) { if (CURRENT_DEVICE_TYPE != INFINI_DEVICE_CPU) {
PREVIOUS_NON_CPU_DEVICE_TYPE = CURRENT_DEVICE_TYPE; PREVIOUS_NON_CPU_DEVICE_TYPE = CURRENT_DEVICE_TYPE;
...@@ -116,99 +116,99 @@ __C infiniStatus_t infinirtSetDevice(infiniDevice_t device, int device_id) { ...@@ -116,99 +116,99 @@ __C infiniStatus_t infinirtSetDevice(infiniDevice_t device, int device_id) {
CURRENT_DEVICE_ID = device_id; }); CURRENT_DEVICE_ID = device_id; });
} }
__C infiniStatus_t infinirtDeviceSynchronize() { __INFINI_C infiniStatus_t infinirtDeviceSynchronize() {
INFINIRT_CALL_DEVICE_API(deviceSynchronize, ()); INFINIRT_CALL_DEVICE_API(deviceSynchronize, ());
} }
__C infiniStatus_t infinirtStreamCreate(infinirtStream_t *stream_ptr) { __INFINI_C infiniStatus_t infinirtStreamCreate(infinirtStream_t *stream_ptr) {
INFINIRT_CALL_DEVICE_API(streamCreate, (stream_ptr)); INFINIRT_CALL_DEVICE_API(streamCreate, (stream_ptr));
} }
__C infiniStatus_t infinirtStreamDestroy(infinirtStream_t stream) { __INFINI_C infiniStatus_t infinirtStreamDestroy(infinirtStream_t stream) {
INFINIRT_CALL_DEVICE_API(streamDestroy, (stream)); INFINIRT_CALL_DEVICE_API(streamDestroy, (stream));
} }
__C infiniStatus_t infinirtStreamSynchronize(infinirtStream_t stream) { __INFINI_C infiniStatus_t infinirtStreamSynchronize(infinirtStream_t stream) {
INFINIRT_CALL_DEVICE_API(streamSynchronize, (stream)); INFINIRT_CALL_DEVICE_API(streamSynchronize, (stream));
} }
__C infiniStatus_t infinirtStreamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) { __INFINI_C infiniStatus_t infinirtStreamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) {
INFINIRT_CALL_DEVICE_API(streamWaitEvent, (stream, event)); INFINIRT_CALL_DEVICE_API(streamWaitEvent, (stream, event));
} }
__C infiniStatus_t infinirtEventCreate(infinirtEvent_t *event_ptr) { __INFINI_C infiniStatus_t infinirtEventCreate(infinirtEvent_t *event_ptr) {
INFINIRT_CALL_DEVICE_API(eventCreate, (event_ptr)); INFINIRT_CALL_DEVICE_API(eventCreate, (event_ptr));
} }
__C infiniStatus_t infinirtEventCreateWithFlags(infinirtEvent_t *event_ptr, uint32_t flags) { __INFINI_C infiniStatus_t infinirtEventCreateWithFlags(infinirtEvent_t *event_ptr, uint32_t flags) {
INFINIRT_CALL_DEVICE_API(eventCreateWithFlags, (event_ptr, flags)); INFINIRT_CALL_DEVICE_API(eventCreateWithFlags, (event_ptr, flags));
} }
__C infiniStatus_t infinirtEventRecord(infinirtEvent_t event, infinirtStream_t stream) { __INFINI_C infiniStatus_t infinirtEventRecord(infinirtEvent_t event, infinirtStream_t stream) {
INFINIRT_CALL_DEVICE_API(eventRecord, (event, stream)); INFINIRT_CALL_DEVICE_API(eventRecord, (event, stream));
} }
__C infiniStatus_t infinirtEventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) { __INFINI_C infiniStatus_t infinirtEventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) {
INFINIRT_CALL_DEVICE_API(eventQuery, (event, status_ptr)); INFINIRT_CALL_DEVICE_API(eventQuery, (event, status_ptr));
} }
__C infiniStatus_t infinirtEventSynchronize(infinirtEvent_t event) { __INFINI_C infiniStatus_t infinirtEventSynchronize(infinirtEvent_t event) {
INFINIRT_CALL_DEVICE_API(eventSynchronize, (event)); INFINIRT_CALL_DEVICE_API(eventSynchronize, (event));
} }
__C infiniStatus_t infinirtEventDestroy(infinirtEvent_t event) { __INFINI_C infiniStatus_t infinirtEventDestroy(infinirtEvent_t event) {
INFINIRT_CALL_DEVICE_API(eventDestroy, (event)); INFINIRT_CALL_DEVICE_API(eventDestroy, (event));
} }
__C infiniStatus_t infinirtEventElapsedTime(float *ms_ptr, infinirtEvent_t start, infinirtEvent_t end) { __INFINI_C infiniStatus_t infinirtEventElapsedTime(float *ms_ptr, infinirtEvent_t start, infinirtEvent_t end) {
INFINIRT_CALL_DEVICE_API(eventElapsedTime, (ms_ptr, start, end)); INFINIRT_CALL_DEVICE_API(eventElapsedTime, (ms_ptr, start, end));
} }
__C infiniStatus_t infinirtMalloc(void **p_ptr, size_t size) { __INFINI_C infiniStatus_t infinirtMalloc(void **p_ptr, size_t size) {
INFINIRT_CALL_DEVICE_API(mallocDevice, (p_ptr, size)); INFINIRT_CALL_DEVICE_API(mallocDevice, (p_ptr, size));
} }
__C infiniStatus_t infinirtMallocHost(void **p_ptr, size_t size) { __INFINI_C infiniStatus_t infinirtMallocHost(void **p_ptr, size_t size) {
INFINIRT_CALL_DEVICE_API(mallocHost, (p_ptr, size)); INFINIRT_CALL_DEVICE_API(mallocHost, (p_ptr, size));
} }
__C infiniStatus_t infinirtFree(void *ptr) { __INFINI_C infiniStatus_t infinirtFree(void *ptr) {
INFINIRT_CALL_DEVICE_API(freeDevice, (ptr)); INFINIRT_CALL_DEVICE_API(freeDevice, (ptr));
} }
__C infiniStatus_t infinirtFreeHost(void *ptr) { __INFINI_C infiniStatus_t infinirtFreeHost(void *ptr) {
INFINIRT_CALL_DEVICE_API(freeHost, (ptr)); INFINIRT_CALL_DEVICE_API(freeHost, (ptr));
} }
__C infiniStatus_t infinirtMemcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) { __INFINI_C infiniStatus_t infinirtMemcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) {
INFINIRT_CALL_DEVICE_API(memcpy, (dst, src, size, kind)); INFINIRT_CALL_DEVICE_API(memcpy, (dst, src, size, kind));
} }
__C infiniStatus_t infinirtMemcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) { __INFINI_C infiniStatus_t infinirtMemcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) {
INFINIRT_CALL_DEVICE_API(memcpyAsync, (dst, src, size, kind, stream)); INFINIRT_CALL_DEVICE_API(memcpyAsync, (dst, src, size, kind, stream));
} }
__C infiniStatus_t infinirtMallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) { __INFINI_C infiniStatus_t infinirtMallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) {
INFINIRT_CALL_DEVICE_API(mallocAsync, (p_ptr, size, stream)); INFINIRT_CALL_DEVICE_API(mallocAsync, (p_ptr, size, stream));
} }
__C infiniStatus_t infinirtFreeAsync(void *ptr, infinirtStream_t stream) { __INFINI_C infiniStatus_t infinirtFreeAsync(void *ptr, infinirtStream_t stream) {
INFINIRT_CALL_DEVICE_API(freeAsync, (ptr, stream)); INFINIRT_CALL_DEVICE_API(freeAsync, (ptr, stream));
} }
__C infiniStatus_t infinirtStreamBeginCapture(infinirtStream_t stream, infinirtStreamCaptureMode_t mode) { __INFINI_C infiniStatus_t infinirtStreamBeginCapture(infinirtStream_t stream, infinirtStreamCaptureMode_t mode) {
INFINIRT_CALL_DEVICE_API(streamBeginCapture, (stream, mode)); INFINIRT_CALL_DEVICE_API(streamBeginCapture, (stream, mode));
} }
__C infiniStatus_t infinirtStreamEndCapture(infinirtStream_t stream, infinirtGraph_t *graph_ptr) { __INFINI_C infiniStatus_t infinirtStreamEndCapture(infinirtStream_t stream, infinirtGraph_t *graph_ptr) {
INFINIRT_CALL_DEVICE_API(streamEndCapture, (stream, graph_ptr)); INFINIRT_CALL_DEVICE_API(streamEndCapture, (stream, graph_ptr));
} }
__C infiniStatus_t infinirtGraphDestroy(infinirtGraph_t graph) { __INFINI_C infiniStatus_t infinirtGraphDestroy(infinirtGraph_t graph) {
INFINIRT_CALL_DEVICE_API(graphDestroy, (graph)); INFINIRT_CALL_DEVICE_API(graphDestroy, (graph));
} }
__C infiniStatus_t infinirtGraphInstantiate( __INFINI_C infiniStatus_t infinirtGraphInstantiate(
infinirtGraphExec_t *graph_exec_ptr, infinirtGraphExec_t *graph_exec_ptr,
infinirtGraph_t graph, infinirtGraph_t graph,
infinirtGraphNode_t *node_ptr, infinirtGraphNode_t *node_ptr,
...@@ -217,10 +217,10 @@ __C infiniStatus_t infinirtGraphInstantiate( ...@@ -217,10 +217,10 @@ __C infiniStatus_t infinirtGraphInstantiate(
INFINIRT_CALL_DEVICE_API(graphInstantiate, (graph_exec_ptr, graph, node_ptr, log_buffer, buffer_size)); INFINIRT_CALL_DEVICE_API(graphInstantiate, (graph_exec_ptr, graph, node_ptr, log_buffer, buffer_size));
} }
__C infiniStatus_t infinirtGraphExecDestroy(infinirtGraphExec_t graph_exec) { __INFINI_C infiniStatus_t infinirtGraphExecDestroy(infinirtGraphExec_t graph_exec) {
INFINIRT_CALL_DEVICE_API(graphExecDestroy, (graph_exec)); INFINIRT_CALL_DEVICE_API(graphExecDestroy, (graph_exec));
} }
__C infiniStatus_t infinirtGraphLuanch(infinirtGraphExec_t graph_exec, infinirtStream_t stream) { __INFINI_C infiniStatus_t infinirtGraphLuanch(infinirtGraphExec_t graph_exec, infinirtStream_t stream) {
INFINIRT_CALL_DEVICE_API(graphLuanch, (graph_exec, stream)); INFINIRT_CALL_DEVICE_API(graphLuanch, (graph_exec, stream));
} }
import os
import sys
import torch
import infinicore
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from framework import (
BaseOperatorTest,
GenericTestRunner,
TensorInitializer,
TensorSpec,
TestCase,
)
# Test Cases: (num_heads, num_kv_heads, head_size, block_size, [request_batch])
_TEST_CASES_DATA = [
(1, 1, 128, 256, [(250,), (7,)]),
(4, 4, 128, 256, [(250,), (7,)]),
(1, 1, 128, 256, [(260, 73), (1, 1)]),
(8, 2, 128, 256, [(250,), (7,)]),
(8, 2, 128, 256, [(260, 73), (1, 1)]),
]
_MAX_SEQUENCE_LENGTH = 8192
_TOLERANCE_MAP = {
infinicore.float16: {"atol": 1e-2, "rtol": 1e-2},
infinicore.bfloat16: {"atol": 2e-2, "rtol": 2e-2},
}
_TENSOR_DTYPES = [infinicore.float16, infinicore.bfloat16]
class SimpleCacheManager:
def __init__(self, num_blocks, block_size):
self.num_blocks = num_blocks
self.block_size = block_size
self.free_blocks = list(range(num_blocks))
self.request_to_blocks = {}
self.request_to_len = {}
def allocate_slots(self, request_id, num_new_tokens):
if request_id not in self.request_to_len:
self.request_to_len[request_id] = 0
self.request_to_blocks[request_id] = []
start_pos = self.request_to_len[request_id]
new_total_len = start_pos + num_new_tokens
needed_blocks = (new_total_len + self.block_size - 1) // self.block_size
added_blocks = needed_blocks - len(self.request_to_blocks[request_id])
for _ in range(added_blocks):
self.request_to_blocks[request_id].append(self.free_blocks.pop(0))
self.request_to_len[request_id] = new_total_len
return self.request_to_blocks[request_id], new_total_len
def parse_test_cases():
test_cases = []
for (
num_heads,
num_kv_heads,
head_size,
block_size,
request_batches,
) in _TEST_CASES_DATA:
scale = head_size**-0.5
num_blocks = 512
manager = SimpleCacheManager(num_blocks, block_size)
num_seqs = len(request_batches[0])
kv_lens = torch.zeros(num_seqs, dtype=torch.int32)
persistent_k = torch.zeros((num_blocks, num_kv_heads, block_size, head_size))
persistent_v = torch.zeros((num_blocks, num_kv_heads, block_size, head_size))
for r, req in enumerate(request_batches):
assert len(req) == num_seqs, "All requests should have the same length"
q_lens = torch.tensor(req, dtype=torch.int32)
kv_lens = kv_lens + q_lens
total_q_tokens = q_lens.sum().item()
cum_seqlens_q = torch.zeros(num_seqs + 1, dtype=torch.int32)
cum_seqlens_q[1:] = torch.cumsum(q_lens, dim=0)
cum_seqlens_k = torch.zeros(num_seqs + 1, dtype=torch.int32)
cum_seqlens_k[1:] = torch.cumsum(kv_lens, dim=0)
query_base = torch.randn((total_q_tokens, num_heads, head_size))
round_block_tables_list = []
for i in range(num_seqs):
p_blocks, total_len = manager.allocate_slots(i, q_lens[i].item())
round_block_tables_list.append(p_blocks)
h_len = kv_lens[i].item() - q_lens[i].item()
for t in range(q_lens[i].item()):
logical_pos = h_len + t
b_id = p_blocks[logical_pos // block_size]
off = logical_pos % block_size
persistent_k[b_id, :, off, :] = torch.randn(num_kv_heads, head_size)
persistent_v[b_id, :, off, :] = torch.randn(num_kv_heads, head_size)
max_blks = max(len(t) for t in round_block_tables_list)
padded_tables = torch.tensor(
[t + [0] * (max_blks - len(t)) for t in round_block_tables_list]
)
for dtype in _TENSOR_DTYPES:
tolerance = _TOLERANCE_MAP.get(dtype)
test_cases.append(
TestCase(
inputs=[
TensorSpec.from_tensor(
query_base.shape,
init_mode=TensorInitializer.MANUAL,
set_tensor=query_base.clone(),
dtype=dtype,
),
TensorSpec.from_tensor(
persistent_k.shape,
init_mode=TensorInitializer.MANUAL,
set_tensor=persistent_k.clone(),
dtype=dtype,
),
TensorSpec.from_tensor(
persistent_v.shape,
init_mode=TensorInitializer.MANUAL,
set_tensor=persistent_v.clone(),
dtype=dtype,
),
TensorSpec.from_tensor(
padded_tables.shape,
init_mode=TensorInitializer.MANUAL,
set_tensor=padded_tables.clone(),
dtype=infinicore.int32,
),
TensorSpec.from_tensor(
cum_seqlens_q.shape,
init_mode=TensorInitializer.MANUAL,
set_tensor=cum_seqlens_q.clone(),
dtype=infinicore.int32,
),
TensorSpec.from_tensor(
cum_seqlens_k.shape,
init_mode=TensorInitializer.MANUAL,
set_tensor=cum_seqlens_k.clone(),
dtype=infinicore.int32,
),
],
kwargs={
"scale": scale,
"max_seqlen_q": _MAX_SEQUENCE_LENGTH,
"max_seqlen_k": _MAX_SEQUENCE_LENGTH,
},
tolerance=tolerance,
description=f"MHA_Varlen_Round_{r}_{str(dtype).split('.')[-1]}",
)
)
return test_cases
def ref_paged_attention_multi_turn(
query, k_cache, v_cache, block_tables, cum_seqlens_q, cum_seqlens_k, scale
):
output = torch.zeros_like(query)
num_seqs = len(cum_seqlens_q) - 1
block_size = k_cache.shape[2]
for i in range(num_seqs):
q_start, q_end = cum_seqlens_q[i].item(), cum_seqlens_q[i + 1].item()
cur_q = query[q_start:q_end]
q_len = q_end - q_start
h_len = (cum_seqlens_k[i + 1].item() - cum_seqlens_k[i].item()) - q_len
total_len = h_len + q_len
table = block_tables[i]
keys, values = [], []
for j in range(total_len):
b_id = table[j // block_size].item()
off = j % block_size
keys.append(k_cache[b_id, :, off, :])
values.append(v_cache[b_id, :, off, :])
K = torch.stack(keys, dim=0)
V = torch.stack(values, dim=0)
q_heads = cur_q.shape[1]
kv_heads = K.shape[1]
assert q_heads % kv_heads == 0
group_size = q_heads // kv_heads
if group_size > 1:
K = K.repeat_interleave(group_size, dim=1)
V = V.repeat_interleave(group_size, dim=1)
scores = torch.einsum("qhd,khd->hqk", cur_q.float(), K.float()) * scale
mask = torch.full((q_len, total_len), float("-inf"), device=query.device)
for t in range(q_len):
mask[t, : h_len + t + 1] = 0.0
attn = torch.softmax(scores + mask.unsqueeze(0), dim=-1).to(query.dtype)
output[q_start:q_end] = torch.einsum("hqk,khd->qhd", attn, V)
return output
class OpTest(BaseOperatorTest):
def __init__(self):
super().__init__("PagedAttentionPrefill")
def get_test_cases(self):
return parse_test_cases()
def torch_operator(
self,
query,
k_cache,
v_cache,
block_tables,
cum_seqlens_q,
cum_seqlens_k,
scale=1.0,
max_seqlen_q=0,
max_seqlen_k=0,
):
return ref_paged_attention_multi_turn(
query, k_cache, v_cache, block_tables, cum_seqlens_q, cum_seqlens_k, scale
)
def infinicore_operator(
self,
query,
k_cache,
v_cache,
block_tables,
cum_seqlens_q,
cum_seqlens_k,
scale=1.0,
max_seqlen_q=0,
max_seqlen_k=0,
):
out = infinicore.mha_varlen(
query,
k_cache.permute([0, 2, 1, 3]),
v_cache.permute([0, 2, 1, 3]),
cum_seqlens_q,
cum_seqlens_k,
block_tables,
max_seqlen_q,
max_seqlen_k,
alibi_slopes=None,
scale=scale,
)
infinicore.sync_stream()
return out
def main():
"""Main entry point"""
runner = GenericTestRunner(OpTest)
runner.run_and_exit()
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment