kv_caching.h 1.33 KB
Newer Older
1
2
3
4
5
6
7
#ifndef __INFINIOP_KV_CACHING_API_H__
#define __INFINIOP_KV_CACHING_API_H__

#include "../operator_descriptor.h"

typedef struct InfiniopDescriptor *infiniopKVCachingDescriptor_t;

8
__INFINI_C __export infiniStatus_t infiniopCreateKVCachingDescriptor(
9
10
11
12
13
14
15
16
    infiniopHandle_t handle,
    infiniopKVCachingDescriptor_t *desc_ptr,
    infiniopTensorDescriptor_t k_cache,
    infiniopTensorDescriptor_t v_cache,
    infiniopTensorDescriptor_t k,
    infiniopTensorDescriptor_t v,
    infiniopTensorDescriptor_t past_kv_lengths);

17
__INFINI_C __export infiniStatus_t infiniopGetKVCachingWorkspaceSize(infiniopKVCachingDescriptor_t desc, size_t *size);
18

19
__INFINI_C __export infiniStatus_t infiniopKVCaching(infiniopKVCachingDescriptor_t desc,
20
21
22
23
24
25
26
27
28
                                              void *workspace,
                                              size_t workspace_size,
                                              void *k_cache,
                                              void *v_cache,
                                              const void *k,
                                              const void *v,
                                              const void *past_kv_lengths,
                                              void *stream);

29
__INFINI_C __export infiniStatus_t infiniopDestroyKVCachingDescriptor(infiniopKVCachingDescriptor_t desc);
30
31

#endif