"scripts/generation.py" did not exist on "2603f5108d4a32139a85dbafdf5440ac85ada691"
jiuge_kv_cache.cpp 2.54 KB
Newer Older
PanZezhong's avatar
init  
PanZezhong committed
1
2
3
4
5
6
7
8
#include "jiuge_impl.hpp"

__C struct KVCache *createKVCache(const JiugeModel *model) {
    KVCache *cache = new KVCache();
    auto ndev = model->dev_resources.size();
    auto nkvh = model->meta.nkvh / ndev;
    auto max_len = model->meta.dctx;
    auto dh = model->meta.dh;
9
    auto shape = std::vector<size_t>{max_len, nkvh, dh};
PanZezhong's avatar
init  
PanZezhong committed
10
11
12
13
14
    for (unsigned int idev = 0; idev < ndev; idev++) {
        RUN_INFINI(infinirtSetDevice(model->device, model->dev_ids[idev]));
        auto kcache = std::vector<std::shared_ptr<Tensor>>();
        auto vcache = std::vector<std::shared_ptr<Tensor>>();
        for (unsigned int layer = 0; layer < model->meta.nlayer; layer++) {
PanZezhong's avatar
PanZezhong committed
15
16
            kcache.push_back(std::move(Tensor::buffer(model->meta.dt_logits, shape)));
            vcache.push_back(std::move(Tensor::buffer(model->meta.dt_logits, shape)));
PanZezhong's avatar
init  
PanZezhong committed
17
18
19
20
21
22
23
24
25
26
27
28
29
        }
        cache->k.push_back(kcache);
        cache->v.push_back(vcache);
    }

    return cache;
}

__C struct KVCache *duplicateKVCache(const JiugeModel *model,
                                     const KVCache *kv_cache,
                                     unsigned int seq_len) {
    auto new_kv_cache = createKVCache(model);
    auto ndev = model->dev_resources.size();
30
31
32
    auto nkvh = model->meta.nkvh / ndev;
    auto dh = model->meta.dh;
    auto dt_size = dsize(model->meta.dt_logits);
PanZezhong's avatar
init  
PanZezhong committed
33
34
35
    for (unsigned int idev = 0; idev < ndev; idev++) {
        RUN_INFINI(infinirtSetDevice(model->device, model->dev_ids[idev]));
        for (unsigned int layer = 0; layer < model->meta.nlayer; layer++) {
36
37
38
39
40
41
42
43
            RUN_INFINI(infinirtMemcpy(new_kv_cache->k[idev][layer]->data(),
                                      kv_cache->k[idev][layer]->data(),
                                      seq_len * nkvh * dh * dt_size,
                                      INFINIRT_MEMCPY_D2D));
            RUN_INFINI(infinirtMemcpy(new_kv_cache->v[idev][layer]->data(),
                                      kv_cache->v[idev][layer]->data(),
                                      seq_len * nkvh * dh * dt_size,
                                      INFINIRT_MEMCPY_D2D));
PanZezhong's avatar
init  
PanZezhong committed
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
        }
    }
    return new_kv_cache;
}

__C void dropKVCache(JiugeModel const *model, KVCache *kv_cache) {
    auto ndev = model->dev_resources.size();
    for (unsigned int idev = 0; idev < ndev; idev++) {
        RUN_INFINI(infinirtSetDevice(model->device, model->dev_ids[idev]));
        for (unsigned int layer = 0; layer < model->meta.nlayer; layer++) {
            kv_cache->k[idev][layer].reset();
            kv_cache->v[idev][layer].reset();
        }
    }
    delete kv_cache;
}