BlockManager.h 3.41 KB
Newer Older
Li Zhang's avatar
Li Zhang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
// Copyright (c) OpenMMLab. All rights reserved.

#pragma once

#include "src/turbomind/utils/allocator.h"
#include "src/turbomind/utils/cuda_utils.h"
#include "src/turbomind/utils/logger.h"
#include <algorithm>
#include <cstdint>
#include <cuda_runtime.h>
#include <iterator>
#include <numeric>
#include <queue>
Li Zhang's avatar
Li Zhang committed
14
#include <sstream>
Li Zhang's avatar
Li Zhang committed
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#include <unordered_map>
#include <vector>

namespace turbomind {

// [L, H, S, D]

// [L, S/x, H, x, D]

struct Block {
    int      id;         // fixed linear id in the pool
    int      use_count;  // active sequences using the block
    uint64_t unique_id;  // unique for every block allocation
    uint64_t timestamp;
    void*    data;

    friend std::ostream& operator<<(std::ostream& os, const Block& block);
Li Zhang's avatar
Li Zhang committed
32
33
34
35
36
37
    friend std::string   to_string(const Block& b)
    {
        std::stringstream ss;
        ss << b;
        return ss.str();
    }
Li Zhang's avatar
Li Zhang committed
38
39
};

Li Zhang's avatar
Li Zhang committed
40
41
42
using BlockIds  = std::vector<int>;
using UniqueIds = std::vector<uint64_t>;

Li Zhang's avatar
Li Zhang committed
43
44
inline bool is_active(const Block& block)
{
Li Zhang's avatar
Li Zhang committed
45
46
    // timestamp may be 0 for newly allocated block that has not been written
    return block.use_count > 0;
Li Zhang's avatar
Li Zhang committed
47
48
49
50
}

inline bool is_cached(const Block& block)
{
Li Zhang's avatar
Li Zhang committed
51
    return block.use_count == 0 && block.timestamp != 0;
Li Zhang's avatar
Li Zhang committed
52
53
54
55
}

inline bool is_free(const Block& block)
{
Li Zhang's avatar
Li Zhang committed
56
    return block.use_count == 0 && block.timestamp == 0;
Li Zhang's avatar
Li Zhang committed
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
}

struct Snapshot {
    int              active;
    int              cached;
    int              free;
    std::vector<int> use_count;
};

class BlockManager {
public:
    explicit BlockManager(size_t block_size, double block_count, int chunk_size, IAllocator* allocator);

    ~BlockManager();

    // free -> active (use_count = 1, ref_count = 1)
Li Zhang's avatar
Li Zhang committed
73
    [[nodiscard]] std::pair<BlockIds, UniqueIds> Allocate(int count);
Li Zhang's avatar
Li Zhang committed
74
75

    // cached -> active (use_count += 1)
Li Zhang's avatar
Li Zhang committed
76
    [[maybe_unused]] int Lock(const BlockIds& ids);
Li Zhang's avatar
Li Zhang committed
77
78

    // active -> cached (use_count -= 1)
Li Zhang's avatar
Li Zhang committed
79
    [[maybe_unused]] int Unlock(const BlockIds& ids);
Li Zhang's avatar
Li Zhang committed
80
81
82
83
84

    // cached -> free (ref_count = 0)
    void Evict(int count);

    // cached -> free (ref_count -= 1)
Li Zhang's avatar
Li Zhang committed
85
    void Free(BlockIds bs);
Li Zhang's avatar
Li Zhang committed
86
87

    // increase timestamp in reversed order
Li Zhang's avatar
Li Zhang committed
88
89
90
    void Touch(const BlockIds& bs);

    [[nodiscard]] int Verify(const BlockIds& block_ids, const UniqueIds& unique_ids);
Li Zhang's avatar
Li Zhang committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

    Snapshot TakeSnapshot();

    int max_block_count() const noexcept
    {
        return max_block_count_;
    }

    int active_count() const noexcept
    {
        return active_ids_.size();
    }

    int cached_count() const noexcept
    {
        return cached_ids_.size();
    }

    int free_count() const noexcept
    {
        return (max_block_count_ - blocks_.size()) + free_ids_.size();
    }

Li Zhang's avatar
Li Zhang committed
114
115
116
117
118
119
120
121
122
123
    Block& block(int idx)
    {
        return blocks_[idx];
    }

    int unique_id(int idx)
    {
        return blocks_[idx].unique_id;
    }

Li Zhang's avatar
Li Zhang committed
124
125
126
127
128
129
    friend std::ostream& operator<<(std::ostream& os, const BlockManager&);

private:
    static size_t GetBlockCount(size_t block_size, double ratio);

    // move indices between sets
Li Zhang's avatar
Li Zhang committed
130
    static void Move(BlockIds& src, const BlockIds& delta, BlockIds& dst);
Li Zhang's avatar
Li Zhang committed
131
132
133
134
135
136
137
138
139
140
141
142

    // allocate a chunk of blocks
    bool Malloc();

private:
    size_t      block_size_;
    int         max_block_count_{};
    int         chunk_size_{};
    IAllocator* allocator_;

    std::vector<void*> chunks_;

Li Zhang's avatar
Li Zhang committed
143
144
145
    BlockIds active_ids_;
    BlockIds cached_ids_;
    BlockIds free_ids_;
Li Zhang's avatar
Li Zhang committed
146
147
148
149
150
151
152
153

    std::vector<Block> blocks_;  // < 100k

    uint64_t unique_id_{1};
    uint64_t timestamp_{1};
};

}  // namespace turbomind