attention.h 1.92 KB
Newer Older
PanZezhong's avatar
PanZezhong committed
1
2
#ifndef __INFINIOP_ATTENTION_API_H__
#define __INFINIOP_ATTENTION_API_H__
PanZezhongQY's avatar
PanZezhongQY committed
3

PanZezhong's avatar
PanZezhong committed
4
#include "../operator_descriptor.h"
PanZezhong's avatar
PanZezhong committed
5
#include "gemm.h"
6
#include "swiglu.h"
PanZezhongQY's avatar
PanZezhongQY committed
7

8
typedef struct InfiniopDescriptor *infiniopAttentionDescriptor_t;
PanZezhongQY's avatar
PanZezhongQY committed
9

10
__INFINI_C __export infiniStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t handle,
PanZezhong's avatar
PanZezhong committed
11
12
13
14
15
16
17
18
                                                              infiniopAttentionDescriptor_t *desc_ptr,
                                                              infiniopTensorDescriptor_t out_desc,
                                                              infiniopTensorDescriptor_t q_desc,
                                                              infiniopTensorDescriptor_t k_desc,
                                                              infiniopTensorDescriptor_t v_desc,
                                                              infiniopTensorDescriptor_t k_cache_desc,
                                                              infiniopTensorDescriptor_t v_cache_desc,
                                                              size_t pos);
PanZezhongQY's avatar
PanZezhongQY committed
19

20
__INFINI_C __export infiniStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size);
PanZezhongQY's avatar
PanZezhongQY committed
21

22
__INFINI_C __export infiniStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc,
PanZezhong's avatar
PanZezhong committed
23
24
25
26
27
28
29
30
31
                                              void *workspace,
                                              size_t workspace_size,
                                              void *out,
                                              const void *q,
                                              const void *k,
                                              const void *v,
                                              void *k_cache,
                                              void *v_cache,
                                              void *stream);
PanZezhongQY's avatar
PanZezhongQY committed
32

33
__INFINI_C __export infiniStatus_t infiniopDestroyAttentionDescriptor(infiniopAttentionDescriptor_t desc);
PanZezhongQY's avatar
PanZezhongQY committed
34
#endif