#pragma once #include "../cache/cache.hpp" #include "infinicore/nn/module.hpp" #include "nlohmann/json.hpp" #include #include namespace infinilm { class InfinilmModel : public infinicore::nn::Module { public: struct Config { std::string model_type; virtual ~Config() = default; }; struct Input { /// Token IDs tensor of shape `[batch, seq_len]`. std::optional input_ids; /// Position IDs tensor of shape `[batch, seq_len]` or `[seq_len]`. std::optional position_ids; /// Past Lengths of cached sequence for each request, of shape `[num_requests]`. std::optional past_sequence_lengths; /// ToTal Lengths for each request sequence, of shape `[num_requests]`. std::optional total_sequence_lengths; /// Offsets of each request in a continous-batched sequence, of shape `[num_requests + 1]`. std::optional input_offsets; /// Cumulative total sequence lengths for each request, of shape `[num_requests + 1]`. std::optional cu_seqlens; /// Block ids for each request `[batch, max_block_table_length]`. Used for paged cache. std::optional block_tables; /// Slot ids for each token `[seq]`. Used for paged cache. std::optional slot_mapping; }; struct Output { /// Logits. infinicore::Tensor logits; }; virtual ~InfinilmModel() = default; virtual Output forward(const Input &input) const = 0; virtual void reset_cache(const cache::CacheConfig *cache_config) = 0; virtual const cache::CacheConfig *get_cache_config() const = 0; }; } // namespace infinilm