attention.h 1.87 KB
Newer Older
PanZezhongQY's avatar
PanZezhongQY committed
1
2
3
4
#ifndef __INFINIOP_ATTENTION_H__
#define __INFINIOP_ATTENTION_H__

#include "../operator.h"
5
6
#include "matmul.h"
#include "swiglu.h"
PanZezhongQY's avatar
PanZezhongQY committed
7
8
9

typedef InfiniopDescriptor *infiniopAttentionDescriptor_t;

PanZezhong's avatar
PanZezhong committed
10
11
12
13
14
15
16
17
18
__C __export infiniStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t handle,
                                                              infiniopAttentionDescriptor_t *desc_ptr,
                                                              infiniopTensorDescriptor_t out_desc,
                                                              infiniopTensorDescriptor_t q_desc,
                                                              infiniopTensorDescriptor_t k_desc,
                                                              infiniopTensorDescriptor_t v_desc,
                                                              infiniopTensorDescriptor_t k_cache_desc,
                                                              infiniopTensorDescriptor_t v_cache_desc,
                                                              size_t pos);
PanZezhongQY's avatar
PanZezhongQY committed
19

PanZezhong's avatar
PanZezhong committed
20
__C __export infiniStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size);
PanZezhongQY's avatar
PanZezhongQY committed
21

PanZezhong's avatar
PanZezhong committed
22
23
24
25
26
27
28
29
30
31
__C __export infiniStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc,
                                              void *workspace,
                                              size_t workspace_size,
                                              void *out,
                                              const void *q,
                                              const void *k,
                                              const void *v,
                                              void *k_cache,
                                              void *v_cache,
                                              void *stream);
PanZezhongQY's avatar
PanZezhongQY committed
32

PanZezhong's avatar
PanZezhong committed
33
__C __export infiniStatus_t infiniopDestroyAttentionDescriptor(infiniopAttentionDescriptor_t desc);
PanZezhongQY's avatar
PanZezhongQY committed
34
#endif