prefix_cache.h 343 Bytes
Newer Older
Li Zhang's avatar
Li Zhang committed
1
2
3
4
5
6
7
8
// Copyright (c) OpenMMLab. All rights reserved.

#include <cuda_fp16.h>

template<typename T>
void invokeInsertKeyCache(T* key_cache, const T* src, int L, int H, int Dx, int s, int X, int S, cudaStream_t st);

template<typename T>
AllentDan's avatar
AllentDan committed
9
void invokeInsertValueCache(T* value_cache, const T* src, int L, int H, int s, int D, int S, cudaStream_t st);