infinilm_model.hpp 1.55 KB
Newer Older
1
2
3
4
#pragma once

#include "infinicore/nn/module.hpp"

5
6
#include "../cache/cache.hpp"

7
8
#include <any>

9
10
#include <optional>

11
12
13
namespace infinilm {
class InfinilmModel : public infinicore::nn::Module {
public:
Jiacheng Huang's avatar
Jiacheng Huang committed
14
15
16
17
18
19
    struct Config {
        std::string model_type;

        virtual ~Config() = default;
    };

20
21
    struct Input {
        /// Token IDs tensor of shape `[batch, seq_len]`.
22
        std::optional<infinicore::Tensor> input_ids;
23
        /// Position IDs tensor of shape `[batch, seq_len]` or `[seq_len]`.
24
        std::optional<infinicore::Tensor> position_ids;
PanZezhong's avatar
PanZezhong committed
25
        /// Past Lengths of cached sequence for each request, of shape `[num_requests]`.
26
27
28
        std::optional<infinicore::Tensor> past_sequence_lengths;
        /// ToTal Lengths for each request sequence, of shape `[num_requests]`.
        std::optional<infinicore::Tensor> total_sequence_lengths;
PanZezhong's avatar
PanZezhong committed
29
        /// Offsets of each request in a continous-batched sequence, of shape `[num_requests + 1]`.
30
31
32
33
34
        std::optional<infinicore::Tensor> input_offsets;
        /// Block ids for each request `[batch, max_block_table_length]`. Used for paged cache.
        std::optional<infinicore::Tensor> block_tables;
        /// Slot ids for each token `[seq]`. Used for paged cache.
        std::optional<infinicore::Tensor> slot_mapping;
35
36
37
    };

    struct Output {
38
        /// Logits.
39
40
41
        infinicore::Tensor logits;
    };

42
    virtual ~InfinilmModel() = default;
43
    virtual Output forward(const Input &input) const = 0;
PanZezhong's avatar
PanZezhong committed
44
45

    virtual void reset_cache(const cache::CacheConfig *cache_config) = 0;
46
47
};
} // namespace infinilm