attention.h 1.92 KB
Newer Older
PanZezhongQY's avatar
PanZezhongQY committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#ifndef __INFINIOP_ATTENTION_H__
#define __INFINIOP_ATTENTION_H__

#include "../operator.h"
#include "./matmul.h"
#include "./swiglu.h"

typedef InfiniopDescriptor *infiniopAttentionDescriptor_t;

__C __export infiniopStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t handle,
                                                                infiniopAttentionDescriptor_t *desc_ptr,
                                                                infiniopTensorDescriptor_t out_desc,
                                                                infiniopTensorDescriptor_t q_desc,
                                                                infiniopTensorDescriptor_t k_desc,
                                                                infiniopTensorDescriptor_t v_desc,
                                                                infiniopTensorDescriptor_t k_cache_desc,
                                                                infiniopTensorDescriptor_t v_cache_desc,
                                                                uint64_t pos);

__C __export infiniopStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size);

__C __export infiniopStatus_t infiniopAttention(infiniopAttentionDescriptor_t desc,
                                                void *workspace,
                                                size_t workspace_size,
                                                void *out,
                                                void const *q,
                                                void const *k,
                                                void const *v,
                                                void *k_cache,
                                                void *v_cache,
                                                void *stream);

__C __export infiniopStatus_t infiniopDestroyAttentionDescriptor(infiniopAttentionDescriptor_t desc);
#endif