flash_attention.h 1.11 KB
Newer Older
1
2
3
4
5
6
7
#ifndef __INFINIOP_FLASH_ATTENTION_API_H__
#define __INFINIOP_FLASH_ATTENTION_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopFlashAttentionDescriptor_t;

8
__INFINI_C __export infiniStatus_t infiniopCreateFlashAttentionDescriptor(
9
10
11
12
13
14
15
16
17
18
    infiniopHandle_t handle,
    infiniopFlashAttentionDescriptor_t *desc_ptr,
    infiniopTensorDescriptor_t out_desc,
    infiniopTensorDescriptor_t q_desc,
    infiniopTensorDescriptor_t k_desc,
    infiniopTensorDescriptor_t v_desc,
    infiniopTensorDescriptor_t total_kv_len,
    float scale,
    char is_causal);

19
__INFINI_C __export infiniStatus_t infiniopGetFlashAttentionWorkspaceSize(
20
21
22
    infiniopFlashAttentionDescriptor_t desc,
    size_t *size);

23
__INFINI_C __export infiniStatus_t infiniopFlashAttention(
24
25
26
27
28
29
30
31
32
33
    infiniopFlashAttentionDescriptor_t desc,
    void *workspace,
    size_t workspace_size,
    void *out,
    const void *q,
    const void *k,
    const void *v,
    const void *total_kv_len,
    void *stream);

34
__INFINI_C __export infiniStatus_t infiniopDestroyFlashAttentionDescriptor(
35
36
    infiniopFlashAttentionDescriptor_t desc);
#endif