boollora_init_without_apply=false;// only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
boollora_init_without_apply=false;// only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
std::vector<common_adapter_lora_info>lora_adapters;// lora adapter path with user defined scale
std::vector<common_adapter_lora_info>lora_adapters;// lora adapter path with user defined scale
...
@@ -325,13 +333,15 @@ struct common_params {
...
@@ -325,13 +333,15 @@ struct common_params {
boolwarmup=true;// warmup run
boolwarmup=true;// warmup run
boolcheck_tensors=false;// validate tensor data
boolcheck_tensors=false;// validate tensor data
boolsingle_turn=false;// single turn chat conversation
ggml_typecache_type_k=GGML_TYPE_F16;// KV cache data type for the K
ggml_typecache_type_k=GGML_TYPE_F16;// KV cache data type for the K
ggml_typecache_type_v=GGML_TYPE_F16;// KV cache data type for the V
ggml_typecache_type_v=GGML_TYPE_F16;// KV cache data type for the V
/** build image from pixels decoded by other libraries instead of stb_image.h for better performance. The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes */
// use for accessing underlay data of clip_image_f32_batch
CLIP_APIsize_tclip_image_f32_batch_n_images(conststructclip_image_f32_batch*batch);// equivalent to batch->size()
CLIP_APIsize_tclip_image_f32_batch_nx(conststructclip_image_f32_batch*batch,intidx);// equivalent to batch[idx]->nx
CLIP_APIsize_tclip_image_f32_batch_ny(conststructclip_image_f32_batch*batch,intidx);// equivalent to batch[idx]->ny
CLIP_APIstructclip_image_f32*clip_image_f32_get_img(conststructclip_image_f32_batch*batch,intidx);// equivalent to batch[idx]->data
/**
* Build image from pixels decoded by other libraries instead of stb_image.h for better performance.
* The memory layout is RGBRGBRGB..., input buffer length must be 3*nx*ny bytes
// std::vector<clip_image_f32*> img_res_v; // format VectN x H x W x RGB (N x 336 x 336 x 3), so interleaved RGB - different to the python implementation which is N x 3 x 336 x 336
// std::vector<clip_image_f32*> img_res_v; // format VectN x H x W x RGB (N x 336 x 336 x 3), so interleaved RGB - different to the python implementation which is N x 3 x 336 x 336
constboolencoded=clip_image_encode(ctx_clip,n_threads,&img_res_v.data[i],image_embd_v[i]);// image data is in 3x336x336 format and will be converted to 336x336x3 inside
constboolencoded=clip_image_encode(ctx_clip,n_threads,img_res,image_embd_v[i]);// image data is in 3x336x336 format and will be converted to 336x336x3 inside
if(!encoded){
if(!encoded){
LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n",(int)i+1,(int)img_res_v.size);
LOG_ERR("Unable to encode image - spatial_unpad - subimage %d of %d\n",(int)i+1,(int)n_imgs);
returnfalse;
returnfalse;
}
}
}
}
constint64_tt_img_enc_batch_us=ggml_time_us();
constint64_tt_img_enc_batch_us=ggml_time_us();
LOG_INF("%s: %d segments encoded in %8.2f ms\n",__func__,(int)img_res_v.size,(t_img_enc_batch_us-t_img_enc_start_us)/1000.0);
LOG_INF("%s: %d segments encoded in %8.2f ms\n",__func__,(int)n_imgs,(t_img_enc_batch_us-t_img_enc_start_us)/1000.0);
/// @details Intializes a GBNF grammar, see grammars/README.md for details.
/// @param vocab The vocabulary that this grammar will be used with.
/// @param grammar_str The production rules for the grammar, encoded as a string. Returns an empty grammar if empty. Returns NULL if parsing of grammar_str fails.
/// @param grammar_root The name of the start symbol for the grammar.
/// @details Lazy grammar sampler, introduced in https://github.com/ggml-org/llama.cpp/pull/9639
/// @param trigger_patterns A list of patterns that will trigger the grammar sampler. Pattern will be matched from the start of the generation output, and grammar sampler will be fed content starting from its first match group.
/// @param trigger_tokens A list of tokens that will trigger the grammar sampler. Grammar sampler will be fed content starting from the trigger token included.