Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
dce99862
Unverified
Commit
dce99862
authored
Mar 06, 2026
by
thatPepe
Committed by
GitHub
Mar 06, 2026
Browse files
Merge pull request #1053 from InfiniTensor/issue/1033xmake
Issue/1033 patch aten and fa adaptations
parents
8d99a8f5
d6e44e84
Changes
102
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
396 additions
and
126 deletions
+396
-126
src/infiniop/ops/random_sample/operator.cc
src/infiniop/ops/random_sample/operator.cc
+4
-4
src/infiniop/ops/rearrange/operator.cc
src/infiniop/ops/rearrange/operator.cc
+3
-3
src/infiniop/ops/relu/operator.cc
src/infiniop/ops/relu/operator.cc
+4
-4
src/infiniop/ops/rms_norm/operator.cc
src/infiniop/ops/rms_norm/operator.cc
+5
-5
src/infiniop/ops/rope/operator.cc
src/infiniop/ops/rope/operator.cc
+5
-5
src/infiniop/ops/scaled_mm/operator.cc
src/infiniop/ops/scaled_mm/operator.cc
+20
-20
src/infiniop/ops/sigmoid/operator.cc
src/infiniop/ops/sigmoid/operator.cc
+4
-4
src/infiniop/ops/silu/operator.cc
src/infiniop/ops/silu/operator.cc
+4
-4
src/infiniop/ops/silu_and_mul/operator.cc
src/infiniop/ops/silu_and_mul/operator.cc
+4
-4
src/infiniop/ops/softmax/operator.cc
src/infiniop/ops/softmax/operator.cc
+4
-4
src/infiniop/ops/softplus/operator.cc
src/infiniop/ops/softplus/operator.cc
+4
-4
src/infiniop/ops/sub/operator.cc
src/infiniop/ops/sub/operator.cc
+4
-4
src/infiniop/ops/swiglu/operator.cc
src/infiniop/ops/swiglu/operator.cc
+4
-4
src/infiniop/ops/tanh/operator.cc
src/infiniop/ops/tanh/operator.cc
+4
-4
src/infiniop/ops/topkrouter/operator.cc
src/infiniop/ops/topkrouter/operator.cc
+8
-8
src/infiniop/ops/topksoftmax/operator.cc
src/infiniop/ops/topksoftmax/operator.cc
+8
-8
src/infiniop/ops/zeros/operator.cc
src/infiniop/ops/zeros/operator.cc
+4
-4
src/infiniop/tensor_descriptor.cc
src/infiniop/tensor_descriptor.cc
+2
-2
src/infinirt/infinirt.cc
src/infinirt/infinirt.cc
+31
-31
test/infinicore/ops/mha_varlen.py
test/infinicore/ops/mha_varlen.py
+270
-0
No files found.
src/infiniop/ops/random_sample/operator.cc
View file @
dce99862
...
...
@@ -24,7 +24,7 @@
#include "kunlun/random_sample_kunlun.h"
#endif
__C
infiniStatus_t
__
INFINI_
C
infiniStatus_t
infiniopCreateRandomSampleDescriptor
(
infiniopHandle_t
handle
,
infiniopRandomSampleDescriptor_t
*
desc_ptr
,
...
...
@@ -82,7 +82,7 @@ infiniopCreateRandomSampleDescriptor(
#undef CREATE
};
__C
infiniStatus_t
infiniopGetRandomSampleWorkspaceSize
(
__
INFINI_
C
infiniStatus_t
infiniopGetRandomSampleWorkspaceSize
(
infiniopRandomSampleDescriptor_t
desc
,
size_t
*
size
)
{
...
...
@@ -136,7 +136,7 @@ __C infiniStatus_t infiniopGetRandomSampleWorkspaceSize(
#undef GET
}
__C
infiniStatus_t
infiniopRandomSample
(
__
INFINI_
C
infiniStatus_t
infiniopRandomSample
(
infiniopRandomSampleDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
...
...
@@ -200,7 +200,7 @@ __C infiniStatus_t infiniopRandomSample(
#undef CALCULATE
}
__C
infiniStatus_t
infiniopDestroyRandomSampleDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/ops/rearrange/operator.cc
View file @
dce99862
...
...
@@ -24,7 +24,7 @@
#include "kunlun/rearrange_kunlun.h"
#endif
__C
infiniStatus_t
infiniopCreateRearrangeDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateRearrangeDescriptor
(
infiniopHandle_t
handle
,
infiniopRearrangeDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
dst
,
...
...
@@ -80,7 +80,7 @@ __C infiniStatus_t infiniopCreateRearrangeDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopRearrange
(
__
INFINI_
C
infiniStatus_t
infiniopRearrange
(
infiniopRearrangeDescriptor_t
desc
,
void
*
dst
,
const
void
*
src
,
...
...
@@ -134,7 +134,7 @@ __C infiniStatus_t infiniopRearrange(
#undef CALCULATE
}
__C
infiniStatus_t
infiniopDestroyRearrangeDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopDestroyRearrangeDescriptor
(
infiniopRearrangeDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/ops/relu/operator.cc
View file @
dce99862
...
...
@@ -14,7 +14,7 @@
#endif
#endif
__C
infiniStatus_t
infiniopCreateReluDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateReluDescriptor
(
infiniopHandle_t
handle
,
infiniopReluDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
...
...
@@ -58,7 +58,7 @@ __C infiniStatus_t infiniopCreateReluDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetReluWorkspaceSize
(
infiniopReluDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetReluWorkspaceSize
(
infiniopReluDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -95,7 +95,7 @@ __C infiniStatus_t infiniopGetReluWorkspaceSize(infiniopReluDescriptor_t desc, s
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniStatus_t
infiniopRelu
(
__
INFINI_
C
infiniStatus_t
infiniopRelu
(
infiniopReluDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
...
...
@@ -138,7 +138,7 @@ __C infiniStatus_t infiniopRelu(
#undef CALCULATE
}
__C
infiniStatus_t
__
INFINI_
C
infiniStatus_t
infiniopDestroyReluDescriptor
(
infiniopReluDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/ops/rms_norm/operator.cc
View file @
dce99862
...
...
@@ -24,7 +24,7 @@
#include "kunlun/rms_norm_kunlun.h"
#endif
__C
infiniStatus_t
infiniopCreateRMSNormDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateRMSNormDescriptor
(
infiniopHandle_t
handle
,
infiniopRMSNormDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
...
...
@@ -83,7 +83,7 @@ __C infiniStatus_t infiniopCreateRMSNormDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetRMSNormWorkspaceSize
(
infiniopRMSNormDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetRMSNormWorkspaceSize
(
infiniopRMSNormDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -131,7 +131,7 @@ __C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t d
#undef GET
}
__C
infiniStatus_t
infiniopRMSNorm
(
infiniopRMSNormDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
__
INFINI_
C
infiniStatus_t
infiniopRMSNorm
(
infiniopRMSNormDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
const
void
*
x
,
const
void
*
w
,
void
*
stream
)
{
#define CALCULATE(CASE, NAMESPACE) \
...
...
@@ -180,7 +180,7 @@ __C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *works
#undef CALCULATE
}
__C
infiniStatus_t
infiniopDestroyRMSNormDescriptor
(
infiniopRMSNormDescriptor_t
desc
)
{
__
INFINI_
C
infiniStatus_t
infiniopDestroyRMSNormDescriptor
(
infiniopRMSNormDescriptor_t
desc
)
{
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
...
...
src/infiniop/ops/rope/operator.cc
View file @
dce99862
...
...
@@ -24,7 +24,7 @@
#include "moore/rope_moore.h"
#endif
__C
infiniStatus_t
infiniopCreateRoPEDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateRoPEDescriptor
(
infiniopHandle_t
handle
,
infiniopRoPEDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y
,
...
...
@@ -87,7 +87,7 @@ __C infiniStatus_t infiniopCreateRoPEDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetRoPEWorkspaceSize
(
infiniopRoPEDescriptor_t
desc
,
__
INFINI_
C
infiniStatus_t
infiniopGetRoPEWorkspaceSize
(
infiniopRoPEDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -135,7 +135,7 @@ __C infiniStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
#undef GET
}
__C
infiniStatus_t
infiniopRoPE
(
__
INFINI_
C
infiniStatus_t
infiniopRoPE
(
infiniopRoPEDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
...
...
@@ -192,7 +192,7 @@ __C infiniStatus_t infiniopRoPE(
#undef CALCULATE
}
__C
infiniStatus_t
__
INFINI_
C
infiniStatus_t
infiniopDestroyRoPEDescriptor
(
infiniopRoPEDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/ops/scaled_mm/operator.cc
View file @
dce99862
...
...
@@ -10,7 +10,7 @@
#include "moore/int8_gemm_moore.h"
#endif
__C
infiniStatus_t
infiniopCreateI8GemmDescriptor
(
infiniopHandle_t
handle
,
__
INFINI_
C
infiniStatus_t
infiniopCreateI8GemmDescriptor
(
infiniopHandle_t
handle
,
infiniopI8GemmDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
out_desc
,
infiniopTensorDescriptor_t
bias_desc
,
...
...
@@ -45,7 +45,7 @@ __C infiniStatus_t infiniopCreateI8GemmDescriptor(infiniopHandle_t handle,
#undef CREATE
}
__C
infiniStatus_t
infiniopGetI8GemmWorkspaceSize
(
infiniopI8GemmDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetI8GemmWorkspaceSize
(
infiniopI8GemmDescriptor_t
desc
,
size_t
*
size
)
{
switch
(
desc
->
device_type
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -66,7 +66,7 @@ __C infiniStatus_t infiniopGetI8GemmWorkspaceSize(infiniopI8GemmDescriptor_t des
#undef GET
}
__C
infiniStatus_t
infiniopI8Gemm
(
infiniopI8GemmDescriptor_t
desc
,
__
INFINI_
C
infiniStatus_t
infiniopI8Gemm
(
infiniopI8GemmDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
out
,
...
...
@@ -96,7 +96,7 @@ __C infiniStatus_t infiniopI8Gemm(infiniopI8GemmDescriptor_t desc,
#undef CACULATE
}
__C
infiniStatus_t
infiniopDestroyI8GemmDescriptor
(
infiniopI8GemmDescriptor_t
desc
)
{
__
INFINI_
C
infiniStatus_t
infiniopDestroyI8GemmDescriptor
(
infiniopI8GemmDescriptor_t
desc
)
{
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<op::i8gemm::NAMESPACE::Descriptor *>(desc); \
...
...
src/infiniop/ops/sigmoid/operator.cc
View file @
dce99862
...
...
@@ -9,7 +9,7 @@
#include "nvidia/sigmoid_nvidia.cuh"
#endif
__C
infiniStatus_t
infiniopCreateSigmoidDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateSigmoidDescriptor
(
infiniopHandle_t
handle
,
infiniopSigmoidDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
...
...
@@ -48,7 +48,7 @@ __C infiniStatus_t infiniopCreateSigmoidDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetSigmoidWorkspaceSize
(
infiniopSigmoidDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetSigmoidWorkspaceSize
(
infiniopSigmoidDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -79,7 +79,7 @@ __C infiniStatus_t infiniopGetSigmoidWorkspaceSize(infiniopSigmoidDescriptor_t d
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniStatus_t
infiniopSigmoid
(
__
INFINI_
C
infiniStatus_t
infiniopSigmoid
(
infiniopSigmoidDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
...
...
@@ -116,7 +116,7 @@ __C infiniStatus_t infiniopSigmoid(
#undef CALCULATE
}
__C
infiniStatus_t
__
INFINI_
C
infiniStatus_t
infiniopDestroySigmoidDescriptor
(
infiniopSigmoidDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/ops/silu/operator.cc
View file @
dce99862
...
...
@@ -15,7 +15,7 @@
#include "moore/silu_moore.h"
#endif
__C
infiniStatus_t
infiniopCreateSiluDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateSiluDescriptor
(
infiniopHandle_t
handle
,
infiniopSiluDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
output_desc
,
...
...
@@ -57,7 +57,7 @@ __C infiniStatus_t infiniopCreateSiluDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetSiluWorkspaceSize
(
infiniopSiluDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetSiluWorkspaceSize
(
infiniopSiluDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -92,7 +92,7 @@ __C infiniStatus_t infiniopGetSiluWorkspaceSize(infiniopSiluDescriptor_t desc, s
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniStatus_t
infiniopSilu
(
__
INFINI_
C
infiniStatus_t
infiniopSilu
(
infiniopSiluDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
...
...
@@ -133,7 +133,7 @@ __C infiniStatus_t infiniopSilu(
#undef CALCULATE
}
__C
infiniStatus_t
__
INFINI_
C
infiniStatus_t
infiniopDestroySiluDescriptor
(
infiniopSiluDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/ops/silu_and_mul/operator.cc
View file @
dce99862
...
...
@@ -6,7 +6,7 @@
#include "moore/silu_and_mul_moore.h"
#endif
__C
infiniStatus_t
infiniopCreateSiluAndMulDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateSiluAndMulDescriptor
(
infiniopHandle_t
handle
,
infiniopSiluAndMulDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
...
...
@@ -31,7 +31,7 @@ __C infiniStatus_t infiniopCreateSiluAndMulDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetSiluAndMulWorkspaceSize
(
infiniopSiluAndMulDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetSiluAndMulWorkspaceSize
(
infiniopSiluAndMulDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -49,7 +49,7 @@ __C infiniStatus_t infiniopGetSiluAndMulWorkspaceSize(infiniopSiluAndMulDescript
#undef GET
}
__C
infiniStatus_t
infiniopSiluAndMul
(
__
INFINI_
C
infiniStatus_t
infiniopSiluAndMul
(
infiniopSiluAndMulDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
...
...
@@ -72,7 +72,7 @@ __C infiniStatus_t infiniopSiluAndMul(
#undef CALCULATE
}
__C
infiniStatus_t
infiniopDestroySiluAndMulDescriptor
(
infiniopSiluAndMulDescriptor_t
desc
)
{
__
INFINI_
C
infiniStatus_t
infiniopDestroySiluAndMulDescriptor
(
infiniopSiluAndMulDescriptor_t
desc
)
{
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
...
...
src/infiniop/ops/softmax/operator.cc
View file @
dce99862
...
...
@@ -6,7 +6,7 @@
#include "nvidia/softmax_nvidia.cuh"
#endif
__C
infiniStatus_t
infiniopCreateSoftmaxDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateSoftmaxDescriptor
(
infiniopHandle_t
handle
,
infiniopSoftmaxDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
...
...
@@ -42,7 +42,7 @@ __C infiniStatus_t infiniopCreateSoftmaxDescriptor(
}
}
__C
infiniStatus_t
infiniopGetSoftmaxWorkspaceSize
(
infiniopSoftmaxDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetSoftmaxWorkspaceSize
(
infiniopSoftmaxDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -70,7 +70,7 @@ __C infiniStatus_t infiniopGetSoftmaxWorkspaceSize(infiniopSoftmaxDescriptor_t d
}
}
__C
infiniStatus_t
infiniopSoftmax
(
__
INFINI_
C
infiniStatus_t
infiniopSoftmax
(
infiniopSoftmaxDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
...
...
@@ -103,7 +103,7 @@ __C infiniStatus_t infiniopSoftmax(
}
}
__C
infiniStatus_t
infiniopDestroySoftmaxDescriptor
(
infiniopSoftmaxDescriptor_t
desc
)
{
__
INFINI_
C
infiniStatus_t
infiniopDestroySoftmaxDescriptor
(
infiniopSoftmaxDescriptor_t
desc
)
{
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
...
...
src/infiniop/ops/softplus/operator.cc
View file @
dce99862
...
...
@@ -15,7 +15,7 @@
#include "kunlun/softplus_kunlun.h"
#endif
__C
infiniStatus_t
infiniopCreateSoftplusDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateSoftplusDescriptor
(
infiniopHandle_t
handle
,
infiniopSoftplusDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
...
...
@@ -60,7 +60,7 @@ __C infiniStatus_t infiniopCreateSoftplusDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetSoftplusWorkspaceSize
(
infiniopSoftplusDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetSoftplusWorkspaceSize
(
infiniopSoftplusDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -98,7 +98,7 @@ __C infiniStatus_t infiniopGetSoftplusWorkspaceSize(infiniopSoftplusDescriptor_t
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniStatus_t
infiniopSoftplus
(
__
INFINI_
C
infiniStatus_t
infiniopSoftplus
(
infiniopSoftplusDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
...
...
@@ -142,7 +142,7 @@ __C infiniStatus_t infiniopSoftplus(
#undef CALCULATE
}
__C
infiniStatus_t
__
INFINI_
C
infiniStatus_t
infiniopDestroySoftplusDescriptor
(
infiniopSoftplusDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/ops/sub/operator.cc
View file @
dce99862
...
...
@@ -15,7 +15,7 @@
#include "kunlun/sub_kunlun.h"
#endif
__C
infiniStatus_t
infiniopCreateSubDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateSubDescriptor
(
infiniopHandle_t
handle
,
infiniopSubDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
...
...
@@ -62,7 +62,7 @@ __C infiniStatus_t infiniopCreateSubDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetSubWorkspaceSize
(
infiniopSubDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetSubWorkspaceSize
(
infiniopSubDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -100,7 +100,7 @@ __C infiniStatus_t infiniopGetSubWorkspaceSize(infiniopSubDescriptor_t desc, siz
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniStatus_t
infiniopSub
(
__
INFINI_
C
infiniStatus_t
infiniopSub
(
infiniopSubDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
...
...
@@ -145,7 +145,7 @@ __C infiniStatus_t infiniopSub(
#undef CALCULATE
}
__C
infiniStatus_t
__
INFINI_
C
infiniStatus_t
infiniopDestroySubDescriptor
(
infiniopSubDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/ops/swiglu/operator.cc
View file @
dce99862
...
...
@@ -28,7 +28,7 @@
#include "moore/swiglu_moore.h"
#endif
__C
infiniStatus_t
infiniopCreateSwiGLUDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateSwiGLUDescriptor
(
infiniopHandle_t
handle
,
infiniopSwiGLUDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
c_desc
,
...
...
@@ -105,7 +105,7 @@ __C infiniStatus_t infiniopCreateSwiGLUDescriptor(
#undef CREATE_CUDA
}
__C
infiniStatus_t
infiniopGetSwiGLUWorkspaceSize
(
infiniopSwiGLUDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetSwiGLUWorkspaceSize
(
infiniopSwiGLUDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -168,7 +168,7 @@ __C infiniStatus_t infiniopGetSwiGLUWorkspaceSize(infiniopSwiGLUDescriptor_t des
#undef GET_CUDA
}
__C
infiniStatus_t
infiniopSwiGLU
(
__
INFINI_
C
infiniStatus_t
infiniopSwiGLU
(
infiniopSwiGLUDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
...
...
@@ -239,7 +239,7 @@ __C infiniStatus_t infiniopSwiGLU(
#undef CALCULATE_CUDA
}
__C
infiniStatus_t
__
INFINI_
C
infiniStatus_t
infiniopDestroySwiGLUDescriptor
(
infiniopSwiGLUDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/ops/tanh/operator.cc
View file @
dce99862
...
...
@@ -12,7 +12,7 @@
// #include "metax/tanh_metax.h"
// #endif
__C
infiniStatus_t
infiniopCreateTanhDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateTanhDescriptor
(
infiniopHandle_t
handle
,
infiniopTanhDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
output_desc
,
...
...
@@ -55,7 +55,7 @@ __C infiniStatus_t infiniopCreateTanhDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetTanhWorkspaceSize
(
infiniopTanhDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetTanhWorkspaceSize
(
infiniopTanhDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -90,7 +90,7 @@ __C infiniStatus_t infiniopGetTanhWorkspaceSize(infiniopTanhDescriptor_t desc, s
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniStatus_t
infiniopTanh
(
__
INFINI_
C
infiniStatus_t
infiniopTanh
(
infiniopTanhDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
...
...
@@ -132,7 +132,7 @@ __C infiniStatus_t infiniopTanh(
#undef CALCULATE
}
__C
infiniStatus_t
__
INFINI_
C
infiniStatus_t
infiniopDestroyTanhDescriptor
(
infiniopTanhDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/ops/topkrouter/operator.cc
View file @
dce99862
...
...
@@ -15,7 +15,7 @@
#include "kunlun/topkrouter_kunlun.h"
#endif
__C
infiniStatus_t
infiniopCreateTopkrouterDescriptor
(
infiniopHandle_t
handle
,
infiniopTopkrouterDescriptor_t
*
desc_ptr
,
__
INFINI_
C
infiniStatus_t
infiniopCreateTopkrouterDescriptor
(
infiniopHandle_t
handle
,
infiniopTopkrouterDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
x_desc
,
infiniopTensorDescriptor_t
correction_bias_desc
)
{
#define CREATE(CASE, NAMESPACE) \
...
...
@@ -49,7 +49,7 @@ __C infiniStatus_t infiniopCreateTopkrouterDescriptor(infiniopHandle_t handle, i
#undef CREATE
}
__C
infiniStatus_t
infiniopGetTopkrouterWorkspaceSize
(
infiniopTopkrouterDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetTopkrouterWorkspaceSize
(
infiniopTopkrouterDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<op::topkrouter::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
...
...
@@ -81,7 +81,7 @@ __C infiniStatus_t infiniopGetTopkrouterWorkspaceSize(infiniopTopkrouterDescript
#undef GET
}
__C
infiniStatus_t
infiniopTopkrouter
(
infiniopTopkrouterDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
__
INFINI_
C
infiniStatus_t
infiniopTopkrouter
(
infiniopTopkrouterDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
values
,
void
*
indices
,
const
void
*
x
,
const
void
*
correction_bias
,
const
float
routed_scaling_factor
,
const
size_t
topk
,
void
*
stream
)
{
#define CALCULATE(CASE, NAMESPACE) \
...
...
@@ -116,7 +116,7 @@ __C infiniStatus_t infiniopTopkrouter(infiniopTopkrouterDescriptor_t desc, void
#undef CALCULATE
}
__C
infiniStatus_t
infiniopDestroyTopkrouterDescriptor
(
infiniopTopkrouterDescriptor_t
desc
)
{
__
INFINI_
C
infiniStatus_t
infiniopDestroyTopkrouterDescriptor
(
infiniopTopkrouterDescriptor_t
desc
)
{
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<op::topkrouter::NAMESPACE::Descriptor *>(desc); \
...
...
src/infiniop/ops/topksoftmax/operator.cc
View file @
dce99862
...
...
@@ -12,7 +12,7 @@
#include "metax/topksoftmax_metax.cuh"
#endif
__C
infiniStatus_t
infiniopCreateTopksoftmaxDescriptor
(
infiniopHandle_t
handle
,
__
INFINI_
C
infiniStatus_t
infiniopCreateTopksoftmaxDescriptor
(
infiniopHandle_t
handle
,
infiniopTopksoftmaxDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
x_desc
)
{
...
...
@@ -47,7 +47,7 @@ __C infiniStatus_t infiniopCreateTopksoftmaxDescriptor(infiniopHandle_t handle,
#undef CREATE
}
__C
infiniStatus_t
infiniopGetTopksoftmaxWorkspaceSize
(
infiniopTopksoftmaxDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetTopksoftmaxWorkspaceSize
(
infiniopTopksoftmaxDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -80,7 +80,7 @@ __C infiniStatus_t infiniopGetTopksoftmaxWorkspaceSize(infiniopTopksoftmaxDescri
#undef GET
}
__C
infiniStatus_t
infiniopTopksoftmax
(
infiniopTopksoftmaxDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
__
INFINI_
C
infiniStatus_t
infiniopTopksoftmax
(
infiniopTopksoftmaxDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
values
,
void
*
indices
,
const
void
*
x
,
const
size_t
topk
,
const
int
norm
,
void
*
stream
)
{
if
(
topk
>
32
)
{
...
...
@@ -118,7 +118,7 @@ __C infiniStatus_t infiniopTopksoftmax(infiniopTopksoftmaxDescriptor_t desc, voi
#undef CALCULATE
}
__C
infiniStatus_t
infiniopDestroyTopksoftmaxDescriptor
(
infiniopTopksoftmaxDescriptor_t
desc
)
{
__
INFINI_
C
infiniStatus_t
infiniopDestroyTopksoftmaxDescriptor
(
infiniopTopksoftmaxDescriptor_t
desc
)
{
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
...
...
src/infiniop/ops/zeros/operator.cc
View file @
dce99862
...
...
@@ -15,7 +15,7 @@
#include "moore/zeros_moore.h"
#endif
__C
infiniStatus_t
infiniopCreateZerosDescriptor
(
__
INFINI_
C
infiniStatus_t
infiniopCreateZerosDescriptor
(
infiniopHandle_t
handle
,
infiniopZerosDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
...
...
@@ -59,7 +59,7 @@ __C infiniStatus_t infiniopCreateZerosDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetZerosWorkspaceSize
(
infiniopZerosDescriptor_t
desc
,
size_t
*
size
)
{
__
INFINI_
C
infiniStatus_t
infiniopGetZerosWorkspaceSize
(
infiniopZerosDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
...
...
@@ -96,7 +96,7 @@ __C infiniStatus_t infiniopGetZerosWorkspaceSize(infiniopZerosDescriptor_t desc,
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__C
infiniStatus_t
infiniopZeros
(
__
INFINI_
C
infiniStatus_t
infiniopZeros
(
infiniopZerosDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
...
...
@@ -139,7 +139,7 @@ __C infiniStatus_t infiniopZeros(
#undef CALCULATE
}
__C
infiniStatus_t
__
INFINI_
C
infiniStatus_t
infiniopDestroyZerosDescriptor
(
infiniopZerosDescriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
...
...
src/infiniop/tensor_descriptor.cc
View file @
dce99862
...
...
@@ -5,7 +5,7 @@
#include <functional>
#include <numeric>
__C
__export
infiniStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape_
,
ptrdiff_t
const
*
strides_
,
infiniDtype_t
datatype
)
{
__
INFINI_
C
__export
infiniStatus_t
infiniopCreateTensorDescriptor
(
infiniopTensorDescriptor_t
*
desc_ptr
,
size_t
ndim
,
size_t
const
*
shape_
,
ptrdiff_t
const
*
strides_
,
infiniDtype_t
datatype
)
{
if
(
strides_
!=
nullptr
)
{
*
desc_ptr
=
new
InfiniopTensorDescriptor
(
datatype
,
ndim
,
shape_
,
strides_
);
}
else
{
...
...
@@ -23,7 +23,7 @@ __C __export infiniStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescrip
return
INFINI_STATUS_SUCCESS
;
}
__C
__export
infiniStatus_t
infiniopDestroyTensorDescriptor
(
infiniopTensorDescriptor_t
desc
)
{
__
INFINI_
C
__export
infiniStatus_t
infiniopDestroyTensorDescriptor
(
infiniopTensorDescriptor_t
desc
)
{
delete
desc
;
return
INFINI_STATUS_SUCCESS
;
}
...
...
src/infinirt/infinirt.cc
View file @
dce99862
...
...
@@ -13,14 +13,14 @@ thread_local int CURRENT_DEVICE_ID = 0;
thread_local
infiniDevice_t
PREVIOUS_NON_CPU_DEVICE_TYPE
=
INFINI_DEVICE_TYPE_COUNT
;
thread_local
int
PREVIOUS_NON_CPU_DEVICE_ID
=
0
;
__C
infiniStatus_t
infinirtInit
()
{
__
INFINI_
C
infiniStatus_t
infinirtInit
()
{
#ifdef ENABLE_ASCEND_API
CHECK_STATUS
(
infinirt
::
ascend
::
init
());
#endif
return
INFINI_STATUS_SUCCESS
;
}
__C
infiniStatus_t
infinirtGetAllDeviceCount
(
int
*
count_array
)
{
__
INFINI_
C
infiniStatus_t
infinirtGetAllDeviceCount
(
int
*
count_array
)
{
if
(
count_array
==
nullptr
)
{
return
INFINI_STATUS_NULL_POINTER
;
}
...
...
@@ -33,7 +33,7 @@ __C infiniStatus_t infinirtGetAllDeviceCount(int *count_array) {
return
INFINI_STATUS_SUCCESS
;
}
__C
infiniStatus_t
infinirtGetDevice
(
infiniDevice_t
*
device_ptr
,
int
*
device_id_ptr
)
{
__
INFINI_
C
infiniStatus_t
infinirtGetDevice
(
infiniDevice_t
*
device_ptr
,
int
*
device_id_ptr
)
{
if
(
device_ptr
==
nullptr
&&
device_id_ptr
==
nullptr
)
{
return
INFINI_STATUS_NULL_POINTER
;
}
...
...
@@ -93,14 +93,14 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
#define INFINIRT_CALL_DEVICE_API(API, PARAMS) INFINIRT_CALL_DEVICE_API_AND(CURRENT_DEVICE_TYPE, API, PARAMS, )
__C
infiniStatus_t
infinirtGetDeviceCount
(
infiniDevice_t
device
,
int
*
count
)
{
__
INFINI_
C
infiniStatus_t
infinirtGetDeviceCount
(
infiniDevice_t
device
,
int
*
count
)
{
if
(
count
==
nullptr
)
{
return
INFINI_STATUS_NULL_POINTER
;
}
INFINIRT_CALL_DEVICE_API_AND
(
device
,
getDeviceCount
,
(
count
),
{});
}
__C
infiniStatus_t
infinirtSetDevice
(
infiniDevice_t
device
,
int
device_id
)
{
__
INFINI_
C
infiniStatus_t
infinirtSetDevice
(
infiniDevice_t
device
,
int
device_id
)
{
bool
skip_set
=
CURRENT_DEVICE_TYPE
==
INFINI_DEVICE_CPU
&&
device
==
PREVIOUS_NON_CPU_DEVICE_TYPE
&&
device_id
==
PREVIOUS_NON_CPU_DEVICE_ID
;
if
(
CURRENT_DEVICE_TYPE
!=
INFINI_DEVICE_CPU
)
{
PREVIOUS_NON_CPU_DEVICE_TYPE
=
CURRENT_DEVICE_TYPE
;
...
...
@@ -116,99 +116,99 @@ __C infiniStatus_t infinirtSetDevice(infiniDevice_t device, int device_id) {
CURRENT_DEVICE_ID
=
device_id
;
});
}
__C
infiniStatus_t
infinirtDeviceSynchronize
()
{
__
INFINI_
C
infiniStatus_t
infinirtDeviceSynchronize
()
{
INFINIRT_CALL_DEVICE_API
(
deviceSynchronize
,
());
}
__C
infiniStatus_t
infinirtStreamCreate
(
infinirtStream_t
*
stream_ptr
)
{
__
INFINI_
C
infiniStatus_t
infinirtStreamCreate
(
infinirtStream_t
*
stream_ptr
)
{
INFINIRT_CALL_DEVICE_API
(
streamCreate
,
(
stream_ptr
));
}
__C
infiniStatus_t
infinirtStreamDestroy
(
infinirtStream_t
stream
)
{
__
INFINI_
C
infiniStatus_t
infinirtStreamDestroy
(
infinirtStream_t
stream
)
{
INFINIRT_CALL_DEVICE_API
(
streamDestroy
,
(
stream
));
}
__C
infiniStatus_t
infinirtStreamSynchronize
(
infinirtStream_t
stream
)
{
__
INFINI_
C
infiniStatus_t
infinirtStreamSynchronize
(
infinirtStream_t
stream
)
{
INFINIRT_CALL_DEVICE_API
(
streamSynchronize
,
(
stream
));
}
__C
infiniStatus_t
infinirtStreamWaitEvent
(
infinirtStream_t
stream
,
infinirtEvent_t
event
)
{
__
INFINI_
C
infiniStatus_t
infinirtStreamWaitEvent
(
infinirtStream_t
stream
,
infinirtEvent_t
event
)
{
INFINIRT_CALL_DEVICE_API
(
streamWaitEvent
,
(
stream
,
event
));
}
__C
infiniStatus_t
infinirtEventCreate
(
infinirtEvent_t
*
event_ptr
)
{
__
INFINI_
C
infiniStatus_t
infinirtEventCreate
(
infinirtEvent_t
*
event_ptr
)
{
INFINIRT_CALL_DEVICE_API
(
eventCreate
,
(
event_ptr
));
}
__C
infiniStatus_t
infinirtEventCreateWithFlags
(
infinirtEvent_t
*
event_ptr
,
uint32_t
flags
)
{
__
INFINI_
C
infiniStatus_t
infinirtEventCreateWithFlags
(
infinirtEvent_t
*
event_ptr
,
uint32_t
flags
)
{
INFINIRT_CALL_DEVICE_API
(
eventCreateWithFlags
,
(
event_ptr
,
flags
));
}
__C
infiniStatus_t
infinirtEventRecord
(
infinirtEvent_t
event
,
infinirtStream_t
stream
)
{
__
INFINI_
C
infiniStatus_t
infinirtEventRecord
(
infinirtEvent_t
event
,
infinirtStream_t
stream
)
{
INFINIRT_CALL_DEVICE_API
(
eventRecord
,
(
event
,
stream
));
}
__C
infiniStatus_t
infinirtEventQuery
(
infinirtEvent_t
event
,
infinirtEventStatus_t
*
status_ptr
)
{
__
INFINI_
C
infiniStatus_t
infinirtEventQuery
(
infinirtEvent_t
event
,
infinirtEventStatus_t
*
status_ptr
)
{
INFINIRT_CALL_DEVICE_API
(
eventQuery
,
(
event
,
status_ptr
));
}
__C
infiniStatus_t
infinirtEventSynchronize
(
infinirtEvent_t
event
)
{
__
INFINI_
C
infiniStatus_t
infinirtEventSynchronize
(
infinirtEvent_t
event
)
{
INFINIRT_CALL_DEVICE_API
(
eventSynchronize
,
(
event
));
}
__C
infiniStatus_t
infinirtEventDestroy
(
infinirtEvent_t
event
)
{
__
INFINI_
C
infiniStatus_t
infinirtEventDestroy
(
infinirtEvent_t
event
)
{
INFINIRT_CALL_DEVICE_API
(
eventDestroy
,
(
event
));
}
__C
infiniStatus_t
infinirtEventElapsedTime
(
float
*
ms_ptr
,
infinirtEvent_t
start
,
infinirtEvent_t
end
)
{
__
INFINI_
C
infiniStatus_t
infinirtEventElapsedTime
(
float
*
ms_ptr
,
infinirtEvent_t
start
,
infinirtEvent_t
end
)
{
INFINIRT_CALL_DEVICE_API
(
eventElapsedTime
,
(
ms_ptr
,
start
,
end
));
}
__C
infiniStatus_t
infinirtMalloc
(
void
**
p_ptr
,
size_t
size
)
{
__
INFINI_
C
infiniStatus_t
infinirtMalloc
(
void
**
p_ptr
,
size_t
size
)
{
INFINIRT_CALL_DEVICE_API
(
mallocDevice
,
(
p_ptr
,
size
));
}
__C
infiniStatus_t
infinirtMallocHost
(
void
**
p_ptr
,
size_t
size
)
{
__
INFINI_
C
infiniStatus_t
infinirtMallocHost
(
void
**
p_ptr
,
size_t
size
)
{
INFINIRT_CALL_DEVICE_API
(
mallocHost
,
(
p_ptr
,
size
));
}
__C
infiniStatus_t
infinirtFree
(
void
*
ptr
)
{
__
INFINI_
C
infiniStatus_t
infinirtFree
(
void
*
ptr
)
{
INFINIRT_CALL_DEVICE_API
(
freeDevice
,
(
ptr
));
}
__C
infiniStatus_t
infinirtFreeHost
(
void
*
ptr
)
{
__
INFINI_
C
infiniStatus_t
infinirtFreeHost
(
void
*
ptr
)
{
INFINIRT_CALL_DEVICE_API
(
freeHost
,
(
ptr
));
}
__C
infiniStatus_t
infinirtMemcpy
(
void
*
dst
,
const
void
*
src
,
size_t
size
,
infinirtMemcpyKind_t
kind
)
{
__
INFINI_
C
infiniStatus_t
infinirtMemcpy
(
void
*
dst
,
const
void
*
src
,
size_t
size
,
infinirtMemcpyKind_t
kind
)
{
INFINIRT_CALL_DEVICE_API
(
memcpy
,
(
dst
,
src
,
size
,
kind
));
}
__C
infiniStatus_t
infinirtMemcpyAsync
(
void
*
dst
,
const
void
*
src
,
size_t
size
,
infinirtMemcpyKind_t
kind
,
infinirtStream_t
stream
)
{
__
INFINI_
C
infiniStatus_t
infinirtMemcpyAsync
(
void
*
dst
,
const
void
*
src
,
size_t
size
,
infinirtMemcpyKind_t
kind
,
infinirtStream_t
stream
)
{
INFINIRT_CALL_DEVICE_API
(
memcpyAsync
,
(
dst
,
src
,
size
,
kind
,
stream
));
}
__C
infiniStatus_t
infinirtMallocAsync
(
void
**
p_ptr
,
size_t
size
,
infinirtStream_t
stream
)
{
__
INFINI_
C
infiniStatus_t
infinirtMallocAsync
(
void
**
p_ptr
,
size_t
size
,
infinirtStream_t
stream
)
{
INFINIRT_CALL_DEVICE_API
(
mallocAsync
,
(
p_ptr
,
size
,
stream
));
}
__C
infiniStatus_t
infinirtFreeAsync
(
void
*
ptr
,
infinirtStream_t
stream
)
{
__
INFINI_
C
infiniStatus_t
infinirtFreeAsync
(
void
*
ptr
,
infinirtStream_t
stream
)
{
INFINIRT_CALL_DEVICE_API
(
freeAsync
,
(
ptr
,
stream
));
}
__C
infiniStatus_t
infinirtStreamBeginCapture
(
infinirtStream_t
stream
,
infinirtStreamCaptureMode_t
mode
)
{
__
INFINI_
C
infiniStatus_t
infinirtStreamBeginCapture
(
infinirtStream_t
stream
,
infinirtStreamCaptureMode_t
mode
)
{
INFINIRT_CALL_DEVICE_API
(
streamBeginCapture
,
(
stream
,
mode
));
}
__C
infiniStatus_t
infinirtStreamEndCapture
(
infinirtStream_t
stream
,
infinirtGraph_t
*
graph_ptr
)
{
__
INFINI_
C
infiniStatus_t
infinirtStreamEndCapture
(
infinirtStream_t
stream
,
infinirtGraph_t
*
graph_ptr
)
{
INFINIRT_CALL_DEVICE_API
(
streamEndCapture
,
(
stream
,
graph_ptr
));
}
__C
infiniStatus_t
infinirtGraphDestroy
(
infinirtGraph_t
graph
)
{
__
INFINI_
C
infiniStatus_t
infinirtGraphDestroy
(
infinirtGraph_t
graph
)
{
INFINIRT_CALL_DEVICE_API
(
graphDestroy
,
(
graph
));
}
__C
infiniStatus_t
infinirtGraphInstantiate
(
__
INFINI_
C
infiniStatus_t
infinirtGraphInstantiate
(
infinirtGraphExec_t
*
graph_exec_ptr
,
infinirtGraph_t
graph
,
infinirtGraphNode_t
*
node_ptr
,
...
...
@@ -217,10 +217,10 @@ __C infiniStatus_t infinirtGraphInstantiate(
INFINIRT_CALL_DEVICE_API
(
graphInstantiate
,
(
graph_exec_ptr
,
graph
,
node_ptr
,
log_buffer
,
buffer_size
));
}
__C
infiniStatus_t
infinirtGraphExecDestroy
(
infinirtGraphExec_t
graph_exec
)
{
__
INFINI_
C
infiniStatus_t
infinirtGraphExecDestroy
(
infinirtGraphExec_t
graph_exec
)
{
INFINIRT_CALL_DEVICE_API
(
graphExecDestroy
,
(
graph_exec
));
}
__C
infiniStatus_t
infinirtGraphLuanch
(
infinirtGraphExec_t
graph_exec
,
infinirtStream_t
stream
)
{
__
INFINI_
C
infiniStatus_t
infinirtGraphLuanch
(
infinirtGraphExec_t
graph_exec
,
infinirtStream_t
stream
)
{
INFINIRT_CALL_DEVICE_API
(
graphLuanch
,
(
graph_exec
,
stream
));
}
test/infinicore/ops/mha_varlen.py
0 → 100644
View file @
dce99862
import
os
import
sys
import
torch
import
infinicore
sys
.
path
.
insert
(
0
,
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
".."
))
from
framework
import
(
BaseOperatorTest
,
GenericTestRunner
,
TensorInitializer
,
TensorSpec
,
TestCase
,
)
# Test Cases: (num_heads, num_kv_heads, head_size, block_size, [request_batch])
_TEST_CASES_DATA
=
[
(
1
,
1
,
128
,
256
,
[(
250
,),
(
7
,)]),
(
4
,
4
,
128
,
256
,
[(
250
,),
(
7
,)]),
(
1
,
1
,
128
,
256
,
[(
260
,
73
),
(
1
,
1
)]),
(
8
,
2
,
128
,
256
,
[(
250
,),
(
7
,)]),
(
8
,
2
,
128
,
256
,
[(
260
,
73
),
(
1
,
1
)]),
]
_MAX_SEQUENCE_LENGTH
=
8192
_TOLERANCE_MAP
=
{
infinicore
.
float16
:
{
"atol"
:
1e-2
,
"rtol"
:
1e-2
},
infinicore
.
bfloat16
:
{
"atol"
:
2e-2
,
"rtol"
:
2e-2
},
}
_TENSOR_DTYPES
=
[
infinicore
.
float16
,
infinicore
.
bfloat16
]
class
SimpleCacheManager
:
def
__init__
(
self
,
num_blocks
,
block_size
):
self
.
num_blocks
=
num_blocks
self
.
block_size
=
block_size
self
.
free_blocks
=
list
(
range
(
num_blocks
))
self
.
request_to_blocks
=
{}
self
.
request_to_len
=
{}
def
allocate_slots
(
self
,
request_id
,
num_new_tokens
):
if
request_id
not
in
self
.
request_to_len
:
self
.
request_to_len
[
request_id
]
=
0
self
.
request_to_blocks
[
request_id
]
=
[]
start_pos
=
self
.
request_to_len
[
request_id
]
new_total_len
=
start_pos
+
num_new_tokens
needed_blocks
=
(
new_total_len
+
self
.
block_size
-
1
)
//
self
.
block_size
added_blocks
=
needed_blocks
-
len
(
self
.
request_to_blocks
[
request_id
])
for
_
in
range
(
added_blocks
):
self
.
request_to_blocks
[
request_id
].
append
(
self
.
free_blocks
.
pop
(
0
))
self
.
request_to_len
[
request_id
]
=
new_total_len
return
self
.
request_to_blocks
[
request_id
],
new_total_len
def
parse_test_cases
():
test_cases
=
[]
for
(
num_heads
,
num_kv_heads
,
head_size
,
block_size
,
request_batches
,
)
in
_TEST_CASES_DATA
:
scale
=
head_size
**-
0.5
num_blocks
=
512
manager
=
SimpleCacheManager
(
num_blocks
,
block_size
)
num_seqs
=
len
(
request_batches
[
0
])
kv_lens
=
torch
.
zeros
(
num_seqs
,
dtype
=
torch
.
int32
)
persistent_k
=
torch
.
zeros
((
num_blocks
,
num_kv_heads
,
block_size
,
head_size
))
persistent_v
=
torch
.
zeros
((
num_blocks
,
num_kv_heads
,
block_size
,
head_size
))
for
r
,
req
in
enumerate
(
request_batches
):
assert
len
(
req
)
==
num_seqs
,
"All requests should have the same length"
q_lens
=
torch
.
tensor
(
req
,
dtype
=
torch
.
int32
)
kv_lens
=
kv_lens
+
q_lens
total_q_tokens
=
q_lens
.
sum
().
item
()
cum_seqlens_q
=
torch
.
zeros
(
num_seqs
+
1
,
dtype
=
torch
.
int32
)
cum_seqlens_q
[
1
:]
=
torch
.
cumsum
(
q_lens
,
dim
=
0
)
cum_seqlens_k
=
torch
.
zeros
(
num_seqs
+
1
,
dtype
=
torch
.
int32
)
cum_seqlens_k
[
1
:]
=
torch
.
cumsum
(
kv_lens
,
dim
=
0
)
query_base
=
torch
.
randn
((
total_q_tokens
,
num_heads
,
head_size
))
round_block_tables_list
=
[]
for
i
in
range
(
num_seqs
):
p_blocks
,
total_len
=
manager
.
allocate_slots
(
i
,
q_lens
[
i
].
item
())
round_block_tables_list
.
append
(
p_blocks
)
h_len
=
kv_lens
[
i
].
item
()
-
q_lens
[
i
].
item
()
for
t
in
range
(
q_lens
[
i
].
item
()):
logical_pos
=
h_len
+
t
b_id
=
p_blocks
[
logical_pos
//
block_size
]
off
=
logical_pos
%
block_size
persistent_k
[
b_id
,
:,
off
,
:]
=
torch
.
randn
(
num_kv_heads
,
head_size
)
persistent_v
[
b_id
,
:,
off
,
:]
=
torch
.
randn
(
num_kv_heads
,
head_size
)
max_blks
=
max
(
len
(
t
)
for
t
in
round_block_tables_list
)
padded_tables
=
torch
.
tensor
(
[
t
+
[
0
]
*
(
max_blks
-
len
(
t
))
for
t
in
round_block_tables_list
]
)
for
dtype
in
_TENSOR_DTYPES
:
tolerance
=
_TOLERANCE_MAP
.
get
(
dtype
)
test_cases
.
append
(
TestCase
(
inputs
=
[
TensorSpec
.
from_tensor
(
query_base
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
query_base
.
clone
(),
dtype
=
dtype
,
),
TensorSpec
.
from_tensor
(
persistent_k
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
persistent_k
.
clone
(),
dtype
=
dtype
,
),
TensorSpec
.
from_tensor
(
persistent_v
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
persistent_v
.
clone
(),
dtype
=
dtype
,
),
TensorSpec
.
from_tensor
(
padded_tables
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
padded_tables
.
clone
(),
dtype
=
infinicore
.
int32
,
),
TensorSpec
.
from_tensor
(
cum_seqlens_q
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
cum_seqlens_q
.
clone
(),
dtype
=
infinicore
.
int32
,
),
TensorSpec
.
from_tensor
(
cum_seqlens_k
.
shape
,
init_mode
=
TensorInitializer
.
MANUAL
,
set_tensor
=
cum_seqlens_k
.
clone
(),
dtype
=
infinicore
.
int32
,
),
],
kwargs
=
{
"scale"
:
scale
,
"max_seqlen_q"
:
_MAX_SEQUENCE_LENGTH
,
"max_seqlen_k"
:
_MAX_SEQUENCE_LENGTH
,
},
tolerance
=
tolerance
,
description
=
f
"MHA_Varlen_Round_
{
r
}
_
{
str
(
dtype
).
split
(
'.'
)[
-
1
]
}
"
,
)
)
return
test_cases
def
ref_paged_attention_multi_turn
(
query
,
k_cache
,
v_cache
,
block_tables
,
cum_seqlens_q
,
cum_seqlens_k
,
scale
):
output
=
torch
.
zeros_like
(
query
)
num_seqs
=
len
(
cum_seqlens_q
)
-
1
block_size
=
k_cache
.
shape
[
2
]
for
i
in
range
(
num_seqs
):
q_start
,
q_end
=
cum_seqlens_q
[
i
].
item
(),
cum_seqlens_q
[
i
+
1
].
item
()
cur_q
=
query
[
q_start
:
q_end
]
q_len
=
q_end
-
q_start
h_len
=
(
cum_seqlens_k
[
i
+
1
].
item
()
-
cum_seqlens_k
[
i
].
item
())
-
q_len
total_len
=
h_len
+
q_len
table
=
block_tables
[
i
]
keys
,
values
=
[],
[]
for
j
in
range
(
total_len
):
b_id
=
table
[
j
//
block_size
].
item
()
off
=
j
%
block_size
keys
.
append
(
k_cache
[
b_id
,
:,
off
,
:])
values
.
append
(
v_cache
[
b_id
,
:,
off
,
:])
K
=
torch
.
stack
(
keys
,
dim
=
0
)
V
=
torch
.
stack
(
values
,
dim
=
0
)
q_heads
=
cur_q
.
shape
[
1
]
kv_heads
=
K
.
shape
[
1
]
assert
q_heads
%
kv_heads
==
0
group_size
=
q_heads
//
kv_heads
if
group_size
>
1
:
K
=
K
.
repeat_interleave
(
group_size
,
dim
=
1
)
V
=
V
.
repeat_interleave
(
group_size
,
dim
=
1
)
scores
=
torch
.
einsum
(
"qhd,khd->hqk"
,
cur_q
.
float
(),
K
.
float
())
*
scale
mask
=
torch
.
full
((
q_len
,
total_len
),
float
(
"-inf"
),
device
=
query
.
device
)
for
t
in
range
(
q_len
):
mask
[
t
,
:
h_len
+
t
+
1
]
=
0.0
attn
=
torch
.
softmax
(
scores
+
mask
.
unsqueeze
(
0
),
dim
=-
1
).
to
(
query
.
dtype
)
output
[
q_start
:
q_end
]
=
torch
.
einsum
(
"hqk,khd->qhd"
,
attn
,
V
)
return
output
class
OpTest
(
BaseOperatorTest
):
def
__init__
(
self
):
super
().
__init__
(
"PagedAttentionPrefill"
)
def
get_test_cases
(
self
):
return
parse_test_cases
()
def
torch_operator
(
self
,
query
,
k_cache
,
v_cache
,
block_tables
,
cum_seqlens_q
,
cum_seqlens_k
,
scale
=
1.0
,
max_seqlen_q
=
0
,
max_seqlen_k
=
0
,
):
return
ref_paged_attention_multi_turn
(
query
,
k_cache
,
v_cache
,
block_tables
,
cum_seqlens_q
,
cum_seqlens_k
,
scale
)
def
infinicore_operator
(
self
,
query
,
k_cache
,
v_cache
,
block_tables
,
cum_seqlens_q
,
cum_seqlens_k
,
scale
=
1.0
,
max_seqlen_q
=
0
,
max_seqlen_k
=
0
,
):
out
=
infinicore
.
mha_varlen
(
query
,
k_cache
.
permute
([
0
,
2
,
1
,
3
]),
v_cache
.
permute
([
0
,
2
,
1
,
3
]),
cum_seqlens_q
,
cum_seqlens_k
,
block_tables
,
max_seqlen_q
,
max_seqlen_k
,
alibi_slopes
=
None
,
scale
=
scale
,
)
infinicore
.
sync_stream
()
return
out
def
main
():
"""Main entry point"""
runner
=
GenericTestRunner
(
OpTest
)
runner
.
run_and_exit
()
if
__name__
==
"__main__"
:
main
()
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment