paged_caching.h 2.87 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#ifndef __INFINIOP_PAGED_CACHING_API_H__
#define __INFINIOP_PAGED_CACHING_API_H__

#include "../operator_descriptor.h"

// Define an opaque handle for the Paged Caching descriptor.
typedef struct InfiniopDescriptor *infiniopPagedCachingDescriptor_t;

/**
 * @brief Creates a descriptor for the Paged Caching operation.
 *
 * This function initializes a descriptor that holds all the metadata needed
 * to copy key/value vectors into their respective cache pools.
 *
 * @param handle The handle to the InfiniOP library context.
 * @param desc_ptr A pointer to store the created descriptor.
 * @param k_cache_desc Descriptor for the key cache pool tensor.
 * @param v_cache_desc Descriptor for the value cache pool tensor.
19
20
 * @param k_desc Descriptor for the source key tensor.
 * @param v_desc Descriptor for the source value tensor.
21
22
23
 * @param slot_mapping_desc Descriptor for the slot mapping tensor.
 * @return infiniStatus_t Status code of the operation.
 */
24
__INFINI_C __export infiniStatus_t infiniopCreatePagedCachingDescriptor(
25
26
27
28
    infiniopHandle_t handle,
    infiniopPagedCachingDescriptor_t *desc_ptr,
    infiniopTensorDescriptor_t k_cache_desc,
    infiniopTensorDescriptor_t v_cache_desc,
29
30
    infiniopTensorDescriptor_t k_desc,
    infiniopTensorDescriptor_t v_desc,
31
32
33
34
35
36
37
38
39
    infiniopTensorDescriptor_t slot_mapping_desc);

/**
 * @brief Retrieves the workspace size required for the Paged Caching operation.
 *
 * @param desc The Paged Caching descriptor.
 * @param size A pointer to store the required workspace size in bytes (typically 0).
 * @return infiniStatus_t Status code of the operation.
 */
40
__INFINI_C __export infiniStatus_t infiniopGetPagedCachingWorkspaceSize(
41
42
43
44
45
46
47
48
49
50
    infiniopPagedCachingDescriptor_t desc, size_t *size);

/**
 * @brief Executes the Paged Caching operation.
 *
 * @param desc The Paged Caching descriptor.
 * @param workspace Pointer to the workspace memory.
 * @param workspace_size The size of the workspace.
 * @param k_cache Pointer to the key cache pool data.
 * @param v_cache Pointer to the value cache pool data.
51
52
 * @param k Pointer to the source key tensor data.
 * @param v Pointer to the source value tensor data.
53
54
55
56
 * @param slot_mapping Pointer to the slot mapping data.
 * @param stream The CUDA stream for the operation. Can be NULL.
 * @return infiniStatus_t Status code of the operation.
 */
57
__INFINI_C __export infiniStatus_t infiniopPagedCaching(
58
59
60
61
62
    infiniopPagedCachingDescriptor_t desc,
    void *workspace,
    size_t workspace_size,
    void *k_cache,
    void *v_cache,
63
64
    const void *k,
    const void *v,
65
66
67
68
69
70
71
72
73
    const void *slot_mapping,
    void *stream);

/**
 * @brief Destroys a Paged Caching descriptor.
 *
 * @param desc The descriptor to be destroyed.
 * @return infiniStatus_t Status code of the operation.
 */
74
__INFINI_C __export infiniStatus_t infiniopDestroyPagedCachingDescriptor(
75
76
77
    infiniopPagedCachingDescriptor_t desc);

#endif // __INFINIOP_PAGED_CACHING_API_H__