paged_compiler.hpp 687 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
#pragma once

#include "graph_compiler.hpp"

#include <unordered_map>

namespace infinilm::engine {
class PagedCompiler : public GraphCompiler {
public:
10
    PagedCompiler(const std::shared_ptr<InfinilmModel> &model, RankBarrier *barrier);
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

    void compile() override;

    Compiled get_compiled(const InfinilmModel::Input &input) override;

private:
    std::vector<size_t> decode_batch_sizes_;

    infinicore::Tensor block_tables_holder_;

    struct CompiledResult {
        InfinilmModel::Input input;
        Compiled compiled;
    };

    std::unordered_map<
        size_t, // num_requests
        CompiledResult>
        compiled_map_decode_;
};
} // namespace infinilm::engine