Unverified Commit dce99862 authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Merge pull request #1053 from InfiniTensor/issue/1033xmake

Issue/1033 patch aten and fa adaptations
parents 8d99a8f5 d6e44e84
......@@ -31,7 +31,7 @@ struct InfiniopAttentionDescriptor {
float qk_alpha;
};
__C __export infiniStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t handle,
__INFINI_C __export infiniStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t handle,
infiniopAttentionDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
infiniopTensorDescriptor_t q_desc,
......@@ -218,12 +218,12 @@ __C __export infiniStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t h
return INFINI_STATUS_SUCCESS;
}
__C __export infiniStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size) {
__INFINI_C __export infiniStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size) {
*size = ((InfiniopAttentionDescriptor *)desc)->workspace_size;
return INFINI_STATUS_SUCCESS;
}
__C __export infiniStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc_,
__INFINI_C __export infiniStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc_,
void *workspace_,
size_t workspace_size_,
void *out,
......@@ -274,7 +274,7 @@ __C __export infiniStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc
return INFINI_STATUS_SUCCESS;
}
__C __export infiniStatus_t infiniopDestroyAttentionDescriptor(infiniopAttentionDescriptor_t desc_) {
__INFINI_C __export infiniStatus_t infiniopDestroyAttentionDescriptor(infiniopAttentionDescriptor_t desc_) {
auto desc = (InfiniopAttentionDescriptor *)desc_;
if (desc->rearrange_desc_q) {
CHECK_STATUS(infiniopDestroyRearrangeDescriptor(desc->rearrange_desc_q));
......
......@@ -24,7 +24,7 @@
#include "moore/causal_softmax_moore.h"
#endif
__C infiniStatus_t infiniopCreateCausalSoftmaxDescriptor(
__INFINI_C infiniStatus_t infiniopCreateCausalSoftmaxDescriptor(
infiniopHandle_t handle,
infiniopCausalSoftmaxDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
......@@ -78,7 +78,7 @@ __C infiniStatus_t infiniopCreateCausalSoftmaxDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, size_t *size) {
__INFINI_C infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
......@@ -125,7 +125,7 @@ __C infiniStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDe
#undef GET
}
__C infiniStatus_t infiniopCausalSoftmax(
__INFINI_C infiniStatus_t infiniopCausalSoftmax(
infiniopCausalSoftmaxDescriptor_t desc,
void *workspace, size_t workspace_size,
void *y,
......@@ -177,7 +177,7 @@ __C infiniStatus_t infiniopCausalSoftmax(
#undef CALCULATE
}
__C infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc) {
__INFINI_C infiniStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
......
......@@ -15,7 +15,7 @@
#include "kunlun/clip_kunlun.h"
#endif
__C infiniStatus_t infiniopCreateClipDescriptor(
__INFINI_C infiniStatus_t infiniopCreateClipDescriptor(
infiniopHandle_t handle,
infiniopClipDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
......@@ -62,7 +62,7 @@ __C infiniStatus_t infiniopCreateClipDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, size_t *size) {
__INFINI_C infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
......@@ -98,7 +98,7 @@ __C infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, s
#undef GET
}
__C infiniStatus_t infiniopClip(
__INFINI_C infiniStatus_t infiniopClip(
infiniopClipDescriptor_t desc,
void *workspace,
size_t workspace_size,
......@@ -144,7 +144,7 @@ __C infiniStatus_t infiniopClip(
#undef CALCULATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
......
......@@ -9,7 +9,7 @@
#include "nvidia/conv_nvidia.cuh"
#endif
__C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle,
__INFINI_C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle,
infiniopConvDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
......@@ -55,7 +55,7 @@ __C __export infiniStatus_t infiniopCreateConvDescriptor(infiniopHandle_t handle
#undef CREATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopGetConvWorkspaceSize(
infiniopConvDescriptor_t desc,
size_t *size) {
......@@ -90,7 +90,7 @@ infiniopGetConvWorkspaceSize(
#undef GET
}
__C infiniStatus_t infiniopConv(
__INFINI_C infiniStatus_t infiniopConv(
infiniopConvDescriptor_t desc,
void *workspace,
size_t workspace_size,
......@@ -131,7 +131,7 @@ __C infiniStatus_t infiniopConv(
#undef CALCULATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
case CASE: \
......
......@@ -12,7 +12,7 @@
#include "iluvatar/dequantize_w42f16_iluvatar.cuh"
#endif
__C infiniStatus_t infiniopCreateDequantizeAWQDescriptor(
__INFINI_C infiniStatus_t infiniopCreateDequantizeAWQDescriptor(
infiniopHandle_t handle,
infiniopDequantizeAWQDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
......@@ -54,7 +54,7 @@ __C infiniStatus_t infiniopCreateDequantizeAWQDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopGetDequantizeAWQWorkspaceSize(infiniopDequantizeAWQDescriptor_t desc,
__INFINI_C infiniStatus_t infiniopGetDequantizeAWQWorkspaceSize(infiniopDequantizeAWQDescriptor_t desc,
size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
......@@ -84,7 +84,7 @@ __C infiniStatus_t infiniopGetDequantizeAWQWorkspaceSize(infiniopDequantizeAWQDe
#undef GET
}
__C infiniStatus_t infiniopDequantizeAWQ(
__INFINI_C infiniStatus_t infiniopDequantizeAWQ(
infiniopDequantizeAWQDescriptor_t desc,
void *workspace,
size_t workspace_size,
......@@ -123,7 +123,7 @@ __C infiniStatus_t infiniopDequantizeAWQ(
#undef CALCULATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopDestroyDequantizeAWQDescriptor(infiniopDequantizeAWQDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
......
......@@ -15,7 +15,7 @@
#include "moore/embedding_moore.h"
#endif
__C infiniStatus_t infiniopCreateEmbeddingDescriptor(
__INFINI_C infiniStatus_t infiniopCreateEmbeddingDescriptor(
infiniopHandle_t handle,
infiniopEmbeddingDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc,
......@@ -65,7 +65,7 @@ __C infiniStatus_t infiniopCreateEmbeddingDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopEmbedding(
__INFINI_C infiniStatus_t infiniopEmbedding(
infiniopEmbeddingDescriptor_t desc,
void *output,
const void *input,
......@@ -111,7 +111,7 @@ __C infiniStatus_t infiniopEmbedding(
#undef CALCULATE
}
__C infiniStatus_t infiniopDestroyEmbeddingDescriptor(infiniopEmbeddingDescriptor_t desc) {
__INFINI_C infiniStatus_t infiniopDestroyEmbeddingDescriptor(infiniopEmbeddingDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
......
......@@ -8,7 +8,7 @@
#endif
#endif
__C infiniStatus_t infiniopCreateFlashAttentionDescriptor(
__INFINI_C infiniStatus_t infiniopCreateFlashAttentionDescriptor(
infiniopHandle_t handle,
infiniopFlashAttentionDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
......@@ -45,7 +45,7 @@ __C infiniStatus_t infiniopCreateFlashAttentionDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopGetFlashAttentionWorkspaceSize(
__INFINI_C infiniStatus_t infiniopGetFlashAttentionWorkspaceSize(
infiniopFlashAttentionDescriptor_t desc,
size_t *size) {
......@@ -69,7 +69,7 @@ __C infiniStatus_t infiniopGetFlashAttentionWorkspaceSize(
#undef GET_SIZE
}
__C infiniStatus_t infiniopFlashAttention(
__INFINI_C infiniStatus_t infiniopFlashAttention(
infiniopFlashAttentionDescriptor_t desc,
void *workspace,
size_t workspace_size,
......@@ -99,7 +99,7 @@ __C infiniStatus_t infiniopFlashAttention(
#undef CALCULATE
}
__C infiniStatus_t infiniopDestroyFlashAttentionDescriptor(
__INFINI_C infiniStatus_t infiniopDestroyFlashAttentionDescriptor(
infiniopFlashAttentionDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \
......
......@@ -15,7 +15,7 @@
#include "kunlun/gelu_kunlun.h"
#endif
__C infiniStatus_t infiniopCreateGeluDescriptor(
__INFINI_C infiniStatus_t infiniopCreateGeluDescriptor(
infiniopHandle_t handle,
infiniopGeluDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc,
......@@ -60,7 +60,7 @@ __C infiniStatus_t infiniopCreateGeluDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopGetGeluWorkspaceSize(infiniopGeluDescriptor_t desc, size_t *size) {
__INFINI_C infiniStatus_t infiniopGetGeluWorkspaceSize(infiniopGeluDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
......@@ -98,7 +98,7 @@ __C infiniStatus_t infiniopGetGeluWorkspaceSize(infiniopGeluDescriptor_t desc, s
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
__C infiniStatus_t infiniopGelu(
__INFINI_C infiniStatus_t infiniopGelu(
infiniopGeluDescriptor_t desc,
void *workspace,
size_t workspace_size,
......@@ -142,7 +142,7 @@ __C infiniStatus_t infiniopGelu(
#undef CALCULATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopDestroyGeluDescriptor(infiniopGeluDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
......
......@@ -24,7 +24,7 @@
#include "kunlun/gemm_kunlun.h"
#endif
__C infiniStatus_t infiniopCreateGemmDescriptor(
__INFINI_C infiniStatus_t infiniopCreateGemmDescriptor(
infiniopHandle_t handle,
infiniopGemmDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c_desc,
......@@ -84,7 +84,7 @@ __C infiniStatus_t infiniopCreateGemmDescriptor(
#undef CREATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopGetGemmWorkspaceSize(
infiniopGemmDescriptor_t desc,
size_t *size) {
......@@ -137,7 +137,7 @@ infiniopGetGemmWorkspaceSize(
#undef GET
}
__C infiniStatus_t infiniopGemm(
__INFINI_C infiniStatus_t infiniopGemm(
infiniopGemmDescriptor_t desc,
void *workspace, size_t workspace_size,
void *c,
......@@ -198,7 +198,7 @@ __C infiniStatus_t infiniopGemm(
#undef CALCULATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopDestroyGemmDescriptor(infiniopGemmDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
......
......@@ -9,7 +9,7 @@
#include "metax/kv_caching_metax.h"
#endif
__C infiniStatus_t infiniopCreateKVCachingDescriptor(
__INFINI_C infiniStatus_t infiniopCreateKVCachingDescriptor(
infiniopHandle_t handle,
infiniopKVCachingDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t k_cache,
......@@ -57,7 +57,7 @@ __C infiniStatus_t infiniopCreateKVCachingDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopGetKVCachingWorkspaceSize(
__INFINI_C infiniStatus_t infiniopGetKVCachingWorkspaceSize(
infiniopKVCachingDescriptor_t desc,
size_t *size) {
......@@ -95,7 +95,7 @@ __C infiniStatus_t infiniopGetKVCachingWorkspaceSize(
#undef GET_SIZE
}
__C infiniStatus_t infiniopKVCaching(
__INFINI_C infiniStatus_t infiniopKVCaching(
infiniopKVCachingDescriptor_t desc,
void *workspace,
size_t workspace_size,
......@@ -139,7 +139,7 @@ __C infiniStatus_t infiniopKVCaching(
#undef CALCULATE
}
__C infiniStatus_t infiniopDestroyKVCachingDescriptor(
__INFINI_C infiniStatus_t infiniopDestroyKVCachingDescriptor(
infiniopKVCachingDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
......
......@@ -12,7 +12,7 @@
#include "metax/layer_norm_metax.h"
#endif
__C infiniStatus_t infiniopCreateLayerNormDescriptor(
__INFINI_C infiniStatus_t infiniopCreateLayerNormDescriptor(
infiniopHandle_t handle,
infiniopLayerNormDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc,
......@@ -63,7 +63,7 @@ __C infiniStatus_t infiniopCreateLayerNormDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopGetLayerNormWorkspaceSize(infiniopLayerNormDescriptor_t desc, size_t *size) {
__INFINI_C infiniStatus_t infiniopGetLayerNormWorkspaceSize(infiniopLayerNormDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<op::layer_norm::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
......@@ -96,7 +96,7 @@ __C infiniStatus_t infiniopGetLayerNormWorkspaceSize(infiniopLayerNormDescriptor
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
__C infiniStatus_t infiniopLayerNorm(
__INFINI_C infiniStatus_t infiniopLayerNorm(
infiniopLayerNormDescriptor_t desc,
void *workspace,
size_t workspace_size,
......@@ -149,7 +149,7 @@ __C infiniStatus_t infiniopLayerNorm(
#undef CALCULATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopDestroyLayerNormDescriptor(infiniopLayerNormDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
......
......@@ -15,7 +15,7 @@
// #include "ascend/logsoftmax_ascend.h"
#endif
__C infiniStatus_t infiniopCreateLogSoftmaxDescriptor(
__INFINI_C infiniStatus_t infiniopCreateLogSoftmaxDescriptor(
infiniopHandle_t handle,
infiniopLogSoftmaxDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
......@@ -56,7 +56,7 @@ __C infiniStatus_t infiniopCreateLogSoftmaxDescriptor(
}
}
__C infiniStatus_t infiniopGetLogSoftmaxWorkspaceSize(infiniopLogSoftmaxDescriptor_t desc, size_t *size) {
__INFINI_C infiniStatus_t infiniopGetLogSoftmaxWorkspaceSize(infiniopLogSoftmaxDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
......@@ -90,7 +90,7 @@ __C infiniStatus_t infiniopGetLogSoftmaxWorkspaceSize(infiniopLogSoftmaxDescript
}
}
__C infiniStatus_t infiniopLogSoftmax(
__INFINI_C infiniStatus_t infiniopLogSoftmax(
infiniopLogSoftmaxDescriptor_t desc,
void *workspace, size_t workspace_size,
void *y,
......@@ -129,7 +129,7 @@ __C infiniStatus_t infiniopLogSoftmax(
}
}
__C infiniStatus_t infiniopDestroyLogSoftmaxDescriptor(infiniopLogSoftmaxDescriptor_t desc) {
__INFINI_C infiniStatus_t infiniopDestroyLogSoftmaxDescriptor(infiniopLogSoftmaxDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
......
......@@ -6,7 +6,7 @@
#include "nvidia/lp_norm_nvidia.cuh"
#endif
__C infiniStatus_t infiniopCreateLPNormDescriptor(
__INFINI_C infiniStatus_t infiniopCreateLPNormDescriptor(
infiniopHandle_t handle,
infiniopLPNormDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc,
......@@ -47,7 +47,7 @@ __C infiniStatus_t infiniopCreateLPNormDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopGetLPNormWorkspaceSize(infiniopLPNormDescriptor_t desc, size_t *size) {
__INFINI_C infiniStatus_t infiniopGetLPNormWorkspaceSize(infiniopLPNormDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<op::lp_norm::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
......@@ -75,7 +75,7 @@ __C infiniStatus_t infiniopGetLPNormWorkspaceSize(infiniopLPNormDescriptor_t des
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
__C infiniStatus_t infiniopLPNorm(
__INFINI_C infiniStatus_t infiniopLPNorm(
infiniopLPNormDescriptor_t desc,
void *workspace,
size_t workspace_size,
......@@ -114,7 +114,7 @@ __C infiniStatus_t infiniopLPNorm(
#undef CALCULATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopDestroyLPNormDescriptor(infiniopLPNormDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
......
......@@ -18,7 +18,7 @@
#include "moore/mul_moore.h"
#endif
__C infiniStatus_t infiniopCreateMulDescriptor(
__INFINI_C infiniStatus_t infiniopCreateMulDescriptor(
infiniopHandle_t handle,
infiniopMulDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c_desc,
......@@ -68,7 +68,7 @@ __C infiniStatus_t infiniopCreateMulDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopGetMulWorkspaceSize(infiniopMulDescriptor_t desc, size_t *size) {
__INFINI_C infiniStatus_t infiniopGetMulWorkspaceSize(infiniopMulDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
......@@ -109,7 +109,7 @@ __C infiniStatus_t infiniopGetMulWorkspaceSize(infiniopMulDescriptor_t desc, siz
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
__C infiniStatus_t infiniopMul(
__INFINI_C infiniStatus_t infiniopMul(
infiniopMulDescriptor_t desc,
void *workspace,
size_t workspace_size,
......@@ -157,7 +157,7 @@ __C infiniStatus_t infiniopMul(
#undef CALCULATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopDestroyMulDescriptor(infiniopMulDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
......
......@@ -15,7 +15,7 @@
#include "moore/ones_moore.h"
#endif
__C infiniStatus_t infiniopCreateOnesDescriptor(
__INFINI_C infiniStatus_t infiniopCreateOnesDescriptor(
infiniopHandle_t handle,
infiniopOnesDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y_desc,
......@@ -60,7 +60,7 @@ __C infiniStatus_t infiniopCreateOnesDescriptor(
#undef CREATE
}
__C infiniStatus_t infiniopGetOnesWorkspaceSize(infiniopOnesDescriptor_t desc, size_t *size) {
__INFINI_C infiniStatus_t infiniopGetOnesWorkspaceSize(infiniopOnesDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
......@@ -98,7 +98,7 @@ __C infiniStatus_t infiniopGetOnesWorkspaceSize(infiniopOnesDescriptor_t desc, s
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
__C infiniStatus_t infiniopOnes(
__INFINI_C infiniStatus_t infiniopOnes(
infiniopOnesDescriptor_t desc,
void *workspace,
size_t workspace_size,
......@@ -142,7 +142,7 @@ __C infiniStatus_t infiniopOnes(
#undef CALCULATE
}
__C infiniStatus_t
__INFINI_C infiniStatus_t
infiniopDestroyOnesDescriptor(infiniopOnesDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
......
......@@ -12,7 +12,7 @@
#include "metax/paged_attention_metax.h"
#endif
__C infiniStatus_t infiniopCreatePagedAttentionDescriptor(
__INFINI_C infiniStatus_t infiniopCreatePagedAttentionDescriptor(
infiniopHandle_t handle,
infiniopPagedAttentionDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
......@@ -54,7 +54,7 @@ __C infiniStatus_t infiniopCreatePagedAttentionDescriptor(
}
}
__C infiniStatus_t infiniopGetPagedAttentionWorkspaceSize(
__INFINI_C infiniStatus_t infiniopGetPagedAttentionWorkspaceSize(
infiniopPagedAttentionDescriptor_t desc,
size_t *size) {
......@@ -84,7 +84,7 @@ __C infiniStatus_t infiniopGetPagedAttentionWorkspaceSize(
}
}
__C infiniStatus_t infiniopPagedAttention(
__INFINI_C infiniStatus_t infiniopPagedAttention(
infiniopPagedAttentionDescriptor_t desc,
void *workspace, size_t workspace_size,
void *out, const void *q, const void *k_cache, const void *v_cache,
......@@ -118,7 +118,7 @@ __C infiniStatus_t infiniopPagedAttention(
}
}
__C infiniStatus_t infiniopDestroyPagedAttentionDescriptor(
__INFINI_C infiniStatus_t infiniopDestroyPagedAttentionDescriptor(
infiniopPagedAttentionDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \
......
......@@ -12,7 +12,7 @@
#include "moore/paged_attention_prefill_moore.h"
#endif
__C infiniStatus_t infiniopCreatePagedAttentionPrefillDescriptor(
__INFINI_C infiniStatus_t infiniopCreatePagedAttentionPrefillDescriptor(
infiniopHandle_t handle,
infiniopPagedAttentionPrefillDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out_desc,
......@@ -56,7 +56,7 @@ __C infiniStatus_t infiniopCreatePagedAttentionPrefillDescriptor(
}
}
__C infiniStatus_t infiniopGetPagedAttentionPrefillWorkspaceSize(
__INFINI_C infiniStatus_t infiniopGetPagedAttentionPrefillWorkspaceSize(
infiniopPagedAttentionPrefillDescriptor_t desc,
size_t *size) {
......@@ -86,7 +86,7 @@ __C infiniStatus_t infiniopGetPagedAttentionPrefillWorkspaceSize(
}
}
__C infiniStatus_t infiniopPagedAttentionPrefill(
__INFINI_C infiniStatus_t infiniopPagedAttentionPrefill(
infiniopPagedAttentionPrefillDescriptor_t desc,
void *workspace, size_t workspace_size,
void *out, const void *q, const void *k_cache, const void *v_cache,
......@@ -123,7 +123,7 @@ __C infiniStatus_t infiniopPagedAttentionPrefill(
}
}
__C infiniStatus_t infiniopDestroyPagedAttentionPrefillDescriptor(
__INFINI_C infiniStatus_t infiniopDestroyPagedAttentionPrefillDescriptor(
infiniopPagedAttentionPrefillDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \
......
......@@ -12,7 +12,7 @@
#include "moore/paged_caching_moore.h"
#endif
__C infiniStatus_t infiniopCreatePagedCachingDescriptor(
__INFINI_C infiniStatus_t infiniopCreatePagedCachingDescriptor(
infiniopHandle_t handle,
infiniopPagedCachingDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t k_cache_desc,
......@@ -49,7 +49,7 @@ __C infiniStatus_t infiniopCreatePagedCachingDescriptor(
}
}
__C infiniStatus_t infiniopGetPagedCachingWorkspaceSize(
__INFINI_C infiniStatus_t infiniopGetPagedCachingWorkspaceSize(
infiniopPagedCachingDescriptor_t desc,
size_t *size) {
......@@ -79,7 +79,7 @@ __C infiniStatus_t infiniopGetPagedCachingWorkspaceSize(
}
}
__C infiniStatus_t infiniopPagedCaching(
__INFINI_C infiniStatus_t infiniopPagedCaching(
infiniopPagedCachingDescriptor_t desc,
void *workspace, size_t workspace_size,
void *k_cache, void *v_cache,
......@@ -113,7 +113,7 @@ __C infiniStatus_t infiniopPagedCaching(
}
}
__C infiniStatus_t infiniopDestroyPagedCachingDescriptor(
__INFINI_C infiniStatus_t infiniopDestroyPagedCachingDescriptor(
infiniopPagedCachingDescriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \
......
......@@ -9,7 +9,7 @@
#include "moore/per_channel_quant_int8_moore.h"
#endif
__C infiniStatus_t infiniopCreatePerChannelQuantI8Descriptor(infiniopHandle_t handle,
__INFINI_C infiniStatus_t infiniopCreatePerChannelQuantI8Descriptor(infiniopHandle_t handle,
infiniopPerChannelQuantI8Descriptor_t *desc_ptr,
infiniopTensorDescriptor_t x_packed_desc,
infiniopTensorDescriptor_t x_scale_desc,
......@@ -40,7 +40,7 @@ __C infiniStatus_t infiniopCreatePerChannelQuantI8Descriptor(infiniopHandle_t ha
#undef CREATE
}
__C infiniStatus_t infiniopGetPerChannelQuantI8WorkspaceSize(infiniopPerChannelQuantI8Descriptor_t desc, size_t *size) {
__INFINI_C infiniStatus_t infiniopGetPerChannelQuantI8WorkspaceSize(infiniopPerChannelQuantI8Descriptor_t desc, size_t *size) {
switch (desc->device_type) {
#define GET(CASE, NAMESPACE) \
case CASE: \
......@@ -61,7 +61,7 @@ __C infiniStatus_t infiniopGetPerChannelQuantI8WorkspaceSize(infiniopPerChannelQ
#undef GET
}
__C infiniStatus_t infiniopPerChannelQuantI8(infiniopPerChannelQuantI8Descriptor_t desc,
__INFINI_C infiniStatus_t infiniopPerChannelQuantI8(infiniopPerChannelQuantI8Descriptor_t desc,
void *workspace,
size_t workspace_size,
void *x_packed,
......@@ -90,7 +90,7 @@ __C infiniStatus_t infiniopPerChannelQuantI8(infiniopPerChannelQuantI8Descriptor
#undef QUANT
}
__C infiniStatus_t infiniopDestroyPerChannelQuantI8Descriptor(infiniopPerChannelQuantI8Descriptor_t desc) {
__INFINI_C infiniStatus_t infiniopDestroyPerChannelQuantI8Descriptor(infiniopPerChannelQuantI8Descriptor_t desc) {
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<op::per_channel_quant_int8::NAMESPACE::Descriptor *>(desc); \
......
......@@ -16,10 +16,21 @@ static cudaError argMax_(
void *workspace_ptr,
size_t &workspace_len,
cudaStream_t stream) {
#if CUDART_VERSION >= 11000 && !defined(ENABLE_QY_API) && !defined(ENABLE_HYGON_API)
// New interface: separate value and index outputs
T *max_value = &kv_pair->value;
int *max_index = &kv_pair->key;
return cub::DeviceReduce::ArgMax(
workspace_ptr, workspace_len,
logits, max_value, max_index, n,
stream);
#else
// Old interface
return cub::DeviceReduce::ArgMax(
workspace_ptr, workspace_len,
logits, kv_pair, n,
stream);
#endif
}
template <class Tval, class Tidx>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment