#ifndef DEEPSEEK_V3_IMPL_H #define DEEPSEEK_V3_IMPL_H #include "infinicore_infer.h" #include "../../allocator.hpp" #include "../../tensor.hpp" #include #include #include #include #include struct QuantLinearWeight { std::shared_ptr w; std::shared_ptr s; std::shared_ptr z; }; struct MLAWeight { std::shared_ptr kv_a_norm, q_a_norm; std::shared_ptr kv_a_proj, kv_b_proj, o_proj, q_a_proj, q_b_proj; }; struct GateWeight { std::shared_ptr w; std::shared_ptr b; }; struct MLPWeight { std::shared_ptr gate, up, down; }; struct LayerWeight { std::shared_ptr mla_norm; std::shared_ptr mla; std::shared_ptr mlp_norm; std::shared_ptr dense_mlp; std::shared_ptr route; std::shared_ptr share_expert; std::vector> experts; }; struct DeepSeekV3DeviceWeights { std::shared_ptr w_in_embd, w_out_norm, w_out_embd, sin_table, cos_table; std::vector w_layers; infiniDevice_t device; int dev_id; infinirtStream_t load_stream; }; struct DeepSeekV3Weights { std::vector> device_weights; DeepSeekV3Weights(const DeepSeekV3Meta *meta, infiniDevice_t device, int ndev, const int *dev_ids); }; struct DeepSeekV3DeviceResource { // Device infiniDevice_t device; int device_id; infiniopHandle_t handle; // Weights std::shared_ptr weights; // Streams infinirtStream_t stream; // Communicator infinicclComm_t comm; std::shared_ptr memory_pool; }; struct InferState { std::mutex mtx; std::condition_variable cv_load, cv_start, cv_done; bool loaded = false; bool proceed = false; bool exit_flag = false; }; struct InferRequest { const uint32_t *tokens; uint32_t ntok; const uint32_t *req_lens; uint32_t nreq; const uint32_t *req_pos; struct DeepSeekV3Cache **kv_caches; const float *temperature; const uint32_t *topk; const float *topp; uint32_t *output; void *logits; }; struct DeepSeekV3Model { DeepSeekV3Meta meta; infiniDevice_t device; std::vector dev_ids; std::vector dev_resources; std::vector states; std::vector threads; InferRequest req; DeepSeekV3Model(const DeepSeekV3Meta *, const DeepSeekV3Weights *weights); }; struct DeepSeekV3Cache { std::vector>> kv_pass, k_rot; }; #endif