std::stringmodel_url="";// model url to download // NOLINT
booluse_guide_tokens=false;// enable guide tokens to improve TTS accuracy // NOLINT
};
enumcommon_reasoning_format{
COMMON_REASONING_FORMAT_NONE,
COMMON_REASONING_FORMAT_DEEPSEEK,// Extract thinking tag contents and return as `message.reasoning_content`
};
structcommon_params{
...
...
@@ -240,14 +268,13 @@ struct common_params {
std::stringlookup_cache_static="";// path of static ngram cache file for lookup decoding // NOLINT
std::stringlookup_cache_dynamic="";// path of dynamic ngram cache file for lookup decoding // NOLINT
std::stringlogits_file="";// file for saving *all* logits // NOLINT
std::stringrpc_servers="";// comma separated list of RPC servers // NOLINT
std::vector<std::string>in_files;// all input files
std::vector<std::string>antiprompt;// strings upon which more user input is prompted (a.k.a. reverse prompts)
std::vector<llama_model_kv_override>kv_overrides;
boollora_init_without_apply=false;// only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_lora_adapter_apply)
std::vector<common_lora_adapter_info>lora_adapters;// lora adapter path with user defined scale
boollora_init_without_apply=false;// only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
std::vector<common_adapter_lora_info>lora_adapters;// lora adapter path with user defined scale
std::vector<common_control_vector_load_info>control_vectors;// control vector with user defined scale
/** build image from pixels decoded by other libraries instead of stb_image.h for better performance. The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes */
LOG_ERR("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n",__func__,n_image_embd,n_llama_embd);