Unverified Commit c68f367e authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Update GGML to b6646 (#12245)

Notable EOLs with this change:
- MacOS v12 and v13 are no longer supported (v14+ required)
- AMD gfx900 and gfx906 are no longer supported
parent fdb10946
...@@ -789,6 +789,7 @@ const struct ggml_tensor * llama_model_loader::check_tensor_dims(const std::stri ...@@ -789,6 +789,7 @@ const struct ggml_tensor * llama_model_loader::check_tensor_dims(const std::stri
} }
struct ggml_tensor * llama_model_loader::create_tensor(struct ggml_context * ctx, const std::string & name, const std::initializer_list<int64_t> & ne, int flags) { struct ggml_tensor * llama_model_loader::create_tensor(struct ggml_context * ctx, const std::string & name, const std::initializer_list<int64_t> & ne, int flags) {
LLAMA_LOG_DEBUG("%s: loading tensor %s\n", __func__, name.c_str());
const struct ggml_tensor * cur = check_tensor_dims(name, ne, !(flags & TENSOR_NOT_REQUIRED)); const struct ggml_tensor * cur = check_tensor_dims(name, ne, !(flags & TENSOR_NOT_REQUIRED));
if (cur == NULL) { if (cur == NULL) {
......
This diff is collapsed.
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "llama-memory.h" #include "llama-memory.h"
#include "llama-vocab.h" #include "llama-vocab.h"
#include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
...@@ -28,6 +29,7 @@ enum llm_type { ...@@ -28,6 +29,7 @@ enum llm_type {
LLM_TYPE_80M, LLM_TYPE_80M,
LLM_TYPE_109M, LLM_TYPE_109M,
LLM_TYPE_137M, LLM_TYPE_137M,
LLM_TYPE_140M,
LLM_TYPE_160M, LLM_TYPE_160M,
LLM_TYPE_190M, LLM_TYPE_190M,
LLM_TYPE_220M, LLM_TYPE_220M,
...@@ -36,12 +38,15 @@ enum llm_type { ...@@ -36,12 +38,15 @@ enum llm_type {
LLM_TYPE_270M, LLM_TYPE_270M,
LLM_TYPE_335M, LLM_TYPE_335M,
LLM_TYPE_350M, LLM_TYPE_350M,
LLM_TYPE_360M,
LLM_TYPE_410M, LLM_TYPE_410M,
LLM_TYPE_450M, LLM_TYPE_450M,
LLM_TYPE_475M, LLM_TYPE_475M,
LLM_TYPE_558M,
LLM_TYPE_700M, LLM_TYPE_700M,
LLM_TYPE_770M, LLM_TYPE_770M,
LLM_TYPE_780M, LLM_TYPE_780M,
LLM_TYPE_950M,
LLM_TYPE_0_3B, LLM_TYPE_0_3B,
LLM_TYPE_0_5B, LLM_TYPE_0_5B,
LLM_TYPE_0_6B, LLM_TYPE_0_6B,
...@@ -54,6 +59,7 @@ enum llm_type { ...@@ -54,6 +59,7 @@ enum llm_type {
LLM_TYPE_1_7B, LLM_TYPE_1_7B,
LLM_TYPE_1_8B, LLM_TYPE_1_8B,
LLM_TYPE_2B, LLM_TYPE_2B,
LLM_TYPE_2_6B,
LLM_TYPE_2_8B, LLM_TYPE_2_8B,
LLM_TYPE_2_9B, LLM_TYPE_2_9B,
LLM_TYPE_3B, LLM_TYPE_3B,
...@@ -76,9 +82,11 @@ enum llm_type { ...@@ -76,9 +82,11 @@ enum llm_type {
LLM_TYPE_32B, LLM_TYPE_32B,
LLM_TYPE_34B, LLM_TYPE_34B,
LLM_TYPE_35B, LLM_TYPE_35B,
LLM_TYPE_36B,
LLM_TYPE_40B, LLM_TYPE_40B,
LLM_TYPE_65B, LLM_TYPE_65B,
LLM_TYPE_70B, LLM_TYPE_70B,
LLM_TYPE_120B,
LLM_TYPE_142B, LLM_TYPE_142B,
LLM_TYPE_236B, LLM_TYPE_236B,
LLM_TYPE_290B, LLM_TYPE_290B,
...@@ -268,6 +276,11 @@ struct llama_layer { ...@@ -268,6 +276,11 @@ struct llama_layer {
struct ggml_tensor * ffn_down_shexp = nullptr; struct ggml_tensor * ffn_down_shexp = nullptr;
struct ggml_tensor * ffn_up_shexp = nullptr; struct ggml_tensor * ffn_up_shexp = nullptr;
// ff adjugate experts (chexps)
struct ggml_tensor * ffn_gate_chexps = nullptr;
struct ggml_tensor * ffn_down_chexps = nullptr;
struct ggml_tensor * ffn_up_chexps = nullptr;
// ff bias // ff bias
struct ggml_tensor * ffn_gate_b = nullptr; struct ggml_tensor * ffn_gate_b = nullptr;
struct ggml_tensor * ffn_down_b = nullptr; // b2 struct ggml_tensor * ffn_down_b = nullptr; // b2
...@@ -449,10 +462,12 @@ struct llama_model { ...@@ -449,10 +462,12 @@ struct llama_model {
std::string desc() const; std::string desc() const;
size_t size() const; size_t size() const; // file size
size_t n_tensors() const; size_t n_tensors() const;
size_t n_devices() const; size_t n_devices() const;
std::map<ggml_backend_buffer_type_t, size_t> memory_breakdown() const;
// total number of parameters in the model // total number of parameters in the model
uint64_t n_elements() const; uint64_t n_elements() const;
......
...@@ -725,7 +725,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: ...@@ -725,7 +725,9 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
// attention layers have a non-zero number of kv heads // attention layers have a non-zero number of kv heads
int32_t n_attn_layer = model.hparams.n_layer - std::count(n_head_kv_iter, n_head_kv_iter + model.hparams.n_layer, 0); int32_t n_attn_layer = model.hparams.n_layer - std::count(n_head_kv_iter, n_head_kv_iter + model.hparams.n_layer, 0);
if (llama_model_has_encoder(&model)) { if (llama_model_has_encoder(&model)) {
n_attn_layer *= 3; // now n_attn_layer is the number of attention layers in the encoder
// for each decoder block, there are 2 attention layers
n_attn_layer += 2 * model.hparams.dec_n_layer;
} }
GGML_ASSERT((qs.n_attention_wv == n_attn_layer - pruned_attention_w) && "n_attention_wv is unexpected"); GGML_ASSERT((qs.n_attention_wv == n_attn_layer - pruned_attention_w) && "n_attention_wv is unexpected");
} }
...@@ -920,7 +922,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: ...@@ -920,7 +922,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
new_type = tensor->type; new_type = tensor->type;
new_data = tensor->data; new_data = tensor->data;
new_size = ggml_nbytes(tensor); new_size = ggml_nbytes(tensor);
LLAMA_LOG_INFO("size = %8.3f MB\n", ggml_nbytes(tensor)/1024.0/1024.0); LLAMA_LOG_INFO("size = %8.3f MiB\n", ggml_nbytes(tensor)/1024.0/1024.0);
} else { } else {
const int64_t nelements = ggml_nelements(tensor); const int64_t nelements = ggml_nelements(tensor);
...@@ -1037,8 +1039,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: ...@@ -1037,8 +1039,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
} }
close_ofstream(); close_ofstream();
LLAMA_LOG_INFO("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0); LLAMA_LOG_INFO("%s: model size = %8.2f MiB\n", __func__, total_size_org/1024.0/1024.0);
LLAMA_LOG_INFO("%s: quant size = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0); LLAMA_LOG_INFO("%s: quant size = %8.2f MiB\n", __func__, total_size_new/1024.0/1024.0);
if (qs.n_fallback > 0) { if (qs.n_fallback > 0) {
LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) required fallback quantization\n", LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) required fallback quantization\n",
......
This diff is collapsed.
...@@ -434,6 +434,13 @@ struct llm_tokenizer_bpe : llm_tokenizer { ...@@ -434,6 +434,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1}| ?[^\\s\\p{L}\\p{N}\\r\\n]+|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1}| ?[^\\s\\p{L}\\p{N}\\r\\n]+|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
}; };
break; break;
case LLAMA_VOCAB_PRE_TYPE_GROK_2:
regex_exprs = {
// original regex from tokenizer.json
// "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
"(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
};
break;
default: default:
// default regex for BPE tokenization pre-processing // default regex for BPE tokenization pre-processing
regex_exprs = { regex_exprs = {
...@@ -1763,7 +1770,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { ...@@ -1763,7 +1770,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
const size_t n_precompiled_charsmap = gguf_get_arr_data_n(ctx, precompiled_charsmap_keyidx); const size_t n_precompiled_charsmap = gguf_get_arr_data_n(ctx, precompiled_charsmap_keyidx);
const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx); const char * pc = (const char *) gguf_get_arr_data(ctx, precompiled_charsmap_keyidx);
precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap); precompiled_charsmap.assign(pc, pc + n_precompiled_charsmap);
#ifdef IS_BIG_ENDIAN #if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
// correct endiannes of data in precompiled_charsmap binary blob // correct endiannes of data in precompiled_charsmap binary blob
uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0]; uint32_t * xcda_blob_size = (uint32_t *) &precompiled_charsmap[0];
*xcda_blob_size = __builtin_bswap32(*xcda_blob_size); *xcda_blob_size = __builtin_bswap32(*xcda_blob_size);
...@@ -1944,7 +1951,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { ...@@ -1944,7 +1951,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
pre_type = LLAMA_VOCAB_PRE_TYPE_TRILLION; pre_type = LLAMA_VOCAB_PRE_TYPE_TRILLION;
clean_spaces = false; clean_spaces = false;
} else if ( } else if (
tokenizer_pre == "bailingmoe") { tokenizer_pre == "bailingmoe" ||
tokenizer_pre == "llada-moe") {
pre_type = LLAMA_VOCAB_PRE_TYPE_BAILINGMOE; pre_type = LLAMA_VOCAB_PRE_TYPE_BAILINGMOE;
clean_spaces = false; clean_spaces = false;
} else if ( } else if (
...@@ -1963,6 +1971,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { ...@@ -1963,6 +1971,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
tokenizer_pre == "kimi-k2") { tokenizer_pre == "kimi-k2") {
pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2; pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2;
clean_spaces = false; clean_spaces = false;
} else if (
tokenizer_pre == "grok-2") {
pre_type = LLAMA_VOCAB_PRE_TYPE_GROK_2;
clean_spaces = false;
} else { } else {
LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__); LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__);
pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT; pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
...@@ -2331,7 +2343,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { ...@@ -2331,7 +2343,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
// @ngxson : quick hack for gpt-oss, always render these tokens // @ngxson : quick hack for gpt-oss, always render these tokens
for (const auto & t : token_to_id) { for (const auto & t : token_to_id) {
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>") { if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") {
id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED; id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
} }
} }
...@@ -2378,6 +2390,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { ...@@ -2378,6 +2390,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
if (has_return && has_call && has_end) { if (has_return && has_call && has_end) {
special_eog_ids.erase(end_id); special_eog_ids.erase(end_id);
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__); LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
} }
} }
...@@ -2459,7 +2472,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { ...@@ -2459,7 +2472,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
// set attributes by model/tokenizer/architecture name // set attributes by model/tokenizer/architecture name
if (false if (false
|| _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"}) || _contains_any(tokenizer_pre, {"jina-v2-de", "jina-v2-es", "jina-v2-code"})
|| _contains_any(general_arch, {"nomic-bert-moe"}) || _contains_any(general_arch, {"nomic-bert-moe", "jina-bert-v3"})
) { ) {
if (token_to_id.count("<mask>") == 0) { if (token_to_id.count("<mask>") == 0) {
LLAMA_LOG_WARN("%s: Mask token is missing in vocab, please reconvert model!\n", __func__); LLAMA_LOG_WARN("%s: Mask token is missing in vocab, please reconvert model!\n", __func__);
......
...@@ -47,6 +47,7 @@ enum llama_vocab_pre_type { ...@@ -47,6 +47,7 @@ enum llama_vocab_pre_type {
LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36, LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36,
LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37, LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37,
LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE = 38, LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE = 38,
LLAMA_VOCAB_PRE_TYPE_GROK_2 = 39,
}; };
struct LLM_KV; struct LLM_KV;
......
...@@ -25,6 +25,18 @@ ...@@ -25,6 +25,18 @@
// interface implementation // interface implementation
// //
const char * llama_flash_attn_type_name(enum llama_flash_attn_type flash_attn_type) {
switch (flash_attn_type) {
case LLAMA_FLASH_ATTN_TYPE_AUTO:
return "auto";
case LLAMA_FLASH_ATTN_TYPE_DISABLED:
return "disabled";
case LLAMA_FLASH_ATTN_TYPE_ENABLED:
return "enabled";
}
GGML_ABORT("fatal error");
}
struct llama_sampler_chain_params llama_sampler_chain_default_params() { struct llama_sampler_chain_params llama_sampler_chain_default_params() {
struct llama_sampler_chain_params result = { struct llama_sampler_chain_params result = {
/*.no_perf =*/ true, /*.no_perf =*/ true,
...@@ -47,6 +59,7 @@ bool llama_supports_mlock(void) { ...@@ -47,6 +59,7 @@ bool llama_supports_mlock(void) {
bool llama_supports_gpu_offload(void) { bool llama_supports_gpu_offload(void) {
return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr || return ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_GPU) != nullptr ||
ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_IGPU) != nullptr ||
llama_supports_rpc(); llama_supports_rpc();
} }
...@@ -71,7 +84,9 @@ void llama_numa_init(enum ggml_numa_strategy numa) { ...@@ -71,7 +84,9 @@ void llama_numa_init(enum ggml_numa_strategy numa) {
GGML_ASSERT(dev && "CPU backend is not loaded"); GGML_ASSERT(dev && "CPU backend is not loaded");
auto * reg = ggml_backend_dev_backend_reg(dev); auto * reg = ggml_backend_dev_backend_reg(dev);
auto * numa_init_fn = (decltype(ggml_numa_init) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_numa_init"); auto * numa_init_fn = (decltype(ggml_numa_init) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_numa_init");
numa_init_fn(numa); if (numa_init_fn) {
numa_init_fn(numa);
}
} }
} }
...@@ -170,8 +185,13 @@ static struct llama_model * llama_model_load_from_file_impl( ...@@ -170,8 +185,13 @@ static struct llama_model * llama_model_load_from_file_impl(
model->devices.push_back(*dev); model->devices.push_back(*dev);
} }
} else { } else {
// default device selection
// build list of available devices
std::vector<ggml_backend_dev_t> gpus;
std::vector<ggml_backend_dev_t> igpus;
std::vector<ggml_backend_dev_t> rpc_servers; std::vector<ggml_backend_dev_t> rpc_servers;
// use all available devices
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
ggml_backend_dev_t dev = ggml_backend_dev_get(i); ggml_backend_dev_t dev = ggml_backend_dev_get(i);
switch (ggml_backend_dev_type(dev)) { switch (ggml_backend_dev_type(dev)) {
...@@ -180,19 +200,51 @@ static struct llama_model * llama_model_load_from_file_impl( ...@@ -180,19 +200,51 @@ static struct llama_model * llama_model_load_from_file_impl(
// skip CPU backends since they are handled separately // skip CPU backends since they are handled separately
break; break;
case GGML_BACKEND_DEVICE_TYPE_GPU: case GGML_BACKEND_DEVICE_TYPE_GPU: {
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev); ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
if (ggml_backend_reg_name(reg) == std::string("RPC")) { if (ggml_backend_reg_name(reg) == std::string("RPC")) {
rpc_servers.push_back(dev); rpc_servers.push_back(dev);
} else { } else {
model->devices.push_back(dev); // check if there is already a GPU with the same device id
ggml_backend_dev_props props;
ggml_backend_dev_get_props(dev, &props);
auto it = std::find_if(gpus.begin(), gpus.end(), [&props](ggml_backend_dev_t d) {
ggml_backend_dev_props d_props;
ggml_backend_dev_get_props(d, &d_props);
if (props.device_id && d_props.device_id) {
return strcmp(props.device_id, d_props.device_id) == 0;
}
return false;
});
if (it != gpus.end()) {
LLAMA_LOG_INFO("%s: skipping device %s (%s) with id %s - already using device %s (%s) with the same id\n",
__func__,
ggml_backend_dev_name(dev), ggml_backend_dev_description(dev),
props.device_id ? props.device_id : "unknown id",
ggml_backend_dev_name(*it), ggml_backend_dev_description(*it));
} else {
gpus.push_back(dev);
}
} }
break; break;
}
case GGML_BACKEND_DEVICE_TYPE_IGPU:
igpus.push_back(dev);
break;
} }
} }
// add RPC servers at the front of the list
if (!rpc_servers.empty()) { // add RPC servers at the front of the list to minimize network transfers
model->devices.insert(model->devices.begin(), rpc_servers.begin(), rpc_servers.end()); model->devices.insert(model->devices.begin(), rpc_servers.begin(), rpc_servers.end());
// add GPUs
model->devices.insert(model->devices.end(), gpus.begin(), gpus.end());
// add integrated GPUs only if no other devices were found
if (model->devices.empty()) {
model->devices.insert(model->devices.end(), igpus.begin(), igpus.end());
} }
} }
...@@ -213,9 +265,12 @@ static struct llama_model * llama_model_load_from_file_impl( ...@@ -213,9 +265,12 @@ static struct llama_model * llama_model_load_from_file_impl(
} }
for (auto * dev : model->devices) { for (auto * dev : model->devices) {
size_t free, total; // NOLINT ggml_backend_dev_props props;
ggml_backend_dev_memory(dev, &free, &total); ggml_backend_dev_get_props(dev, &props);
LLAMA_LOG_INFO("%s: using device %s (%s) - %zu MiB free\n", __func__, ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), free/1024/1024); LLAMA_LOG_INFO("%s: using device %s (%s) (%s) - %zu MiB free\n", __func__,
ggml_backend_dev_name(dev), ggml_backend_dev_description(dev),
props.device_id ? props.device_id : "unknown id",
props.memory_free/1024/1024);
} }
const int status = llama_model_load(path_model, splits, *model, params); const int status = llama_model_load(path_model, splits, *model, params);
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
// TODO: reimplement this structure in endian-independent way
struct unicode_cpt_flags { struct unicode_cpt_flags {
enum { enum {
UNDEFINED = 0x0001, UNDEFINED = 0x0001,
...@@ -15,6 +16,10 @@ struct unicode_cpt_flags { ...@@ -15,6 +16,10 @@ struct unicode_cpt_flags {
SYMBOL = 0x0040, // regex: \p{S} SYMBOL = 0x0040, // regex: \p{S}
CONTROL = 0x0080, // regex: \p{C} CONTROL = 0x0080, // regex: \p{C}
MASK_CATEGORIES = 0x00FF, MASK_CATEGORIES = 0x00FF,
WHITESPACE = 0x0100,
LOWERCASE = 0x0200,
UPPERCASE = 0x0400,
NFD = 0x0800,
}; };
// codepoint type // codepoint type
...@@ -34,11 +39,49 @@ struct unicode_cpt_flags { ...@@ -34,11 +39,49 @@ struct unicode_cpt_flags {
// decode from uint16 // decode from uint16
inline unicode_cpt_flags(const uint16_t flags = 0) { inline unicode_cpt_flags(const uint16_t flags = 0) {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
*reinterpret_cast<uint16_t*>(this) = flags; *reinterpret_cast<uint16_t*>(this) = flags;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
is_undefined = (flags & UNDEFINED) ? 1 : 0;
is_number = (flags & NUMBER) ? 1 : 0;
is_letter = (flags & LETTER) ? 1 : 0;
is_separator = (flags & SEPARATOR) ? 1 : 0;
is_accent_mark = (flags & ACCENT_MARK) ? 1 : 0;
is_punctuation = (flags & PUNCTUATION) ? 1 : 0;
is_symbol = (flags & SYMBOL) ? 1 : 0;
is_control = (flags & CONTROL) ? 1 : 0;
is_whitespace = (flags & WHITESPACE) ? 1 : 0;
is_lowercase = (flags & LOWERCASE) ? 1 : 0;
is_uppercase = (flags & UPPERCASE) ? 1 : 0;
is_nfd = (flags & NFD) ? 1 : 0;
#else
#error Unexpected or undefined __BYTE_ORDER__
#endif
} }
inline uint16_t as_uint() const { inline uint16_t as_uint() const {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return *reinterpret_cast<const uint16_t*>(this); return *reinterpret_cast<const uint16_t*>(this);
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
uint16_t result =
is_undefined * UNDEFINED
+ is_number * NUMBER
+ is_letter * LETTER
+ is_separator * SEPARATOR
+ is_accent_mark * ACCENT_MARK
+ is_punctuation * PUNCTUATION
+ is_symbol * SYMBOL
+ is_control * CONTROL
+ is_whitespace * WHITESPACE
+ is_lowercase * LOWERCASE
+ is_uppercase * UPPERCASE
+ is_nfd * NFD
;
return result;
#else
#error Unexpected or undefined __BYTE_ORDER__
#endif
} }
inline uint16_t category_flag() const { inline uint16_t category_flag() const {
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#define KEY_WIN_ATTN_PATTERN "clip.vision.n_wa_pattern" #define KEY_WIN_ATTN_PATTERN "clip.vision.n_wa_pattern"
#define KEY_ATTN_WINDOW_SIZE "clip.vision.window_size" #define KEY_ATTN_WINDOW_SIZE "clip.vision.window_size"
#define KEY_MINICPMV_VERSION "clip.minicpmv_version" #define KEY_MINICPMV_VERSION "clip.minicpmv_version"
#define KEY_MINICPMV_QUERY_NUM "clip.minicpmv_query_num"
// audio-specific // audio-specific
#define KEY_A_NUM_MEL_BINS "clip.audio.num_mel_bins" #define KEY_A_NUM_MEL_BINS "clip.audio.num_mel_bins"
...@@ -81,6 +82,7 @@ ...@@ -81,6 +82,7 @@
#define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s" #define TN_MVLM_PROJ_PEG "mm.model.peg.%d.%s"
#define TN_IMAGE_NEWLINE "model.image_newline" #define TN_IMAGE_NEWLINE "model.image_newline"
#define TN_MM_INP_NORM "mm.input_norm.weight" #define TN_MM_INP_NORM "mm.input_norm.weight"
#define TN_MM_INP_NORM_B "mm.input_norm.bias"
#define TN_MM_INP_PROJ "mm.input_projection.weight" // gemma3 #define TN_MM_INP_PROJ "mm.input_projection.weight" // gemma3
#define TN_MM_SOFT_EMB_N "mm.soft_emb_norm.weight" // gemma3 #define TN_MM_SOFT_EMB_N "mm.soft_emb_norm.weight" // gemma3
#define TN_MM_PROJECTOR "mm.model.fc.weight" // idefics3 #define TN_MM_PROJECTOR "mm.model.fc.weight" // idefics3
...@@ -132,6 +134,8 @@ enum projector_type { ...@@ -132,6 +134,8 @@ enum projector_type {
PROJECTOR_TYPE_QWEN2A, PROJECTOR_TYPE_QWEN2A,
PROJECTOR_TYPE_QWEN25O, // will be replaced by QWEN2A or QWEN25VL depending on clip_ctx PROJECTOR_TYPE_QWEN25O, // will be replaced by QWEN2A or QWEN25VL depending on clip_ctx
PROJECTOR_TYPE_VOXTRAL, PROJECTOR_TYPE_VOXTRAL,
PROJECTOR_TYPE_LFM2,
PROJECTOR_TYPE_KIMIVL,
PROJECTOR_TYPE_UNKNOWN, PROJECTOR_TYPE_UNKNOWN,
}; };
...@@ -152,6 +156,8 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = { ...@@ -152,6 +156,8 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
{ PROJECTOR_TYPE_QWEN2A, "qwen2a"}, { PROJECTOR_TYPE_QWEN2A, "qwen2a"},
{ PROJECTOR_TYPE_QWEN25O, "qwen2.5o"}, { PROJECTOR_TYPE_QWEN25O, "qwen2.5o"},
{ PROJECTOR_TYPE_VOXTRAL, "voxtral"}, { PROJECTOR_TYPE_VOXTRAL, "voxtral"},
{ PROJECTOR_TYPE_LFM2, "lfm2"},
{ PROJECTOR_TYPE_KIMIVL, "kimivl"},
}; };
static projector_type clip_projector_type_from_string(const std::string & str) { static projector_type clip_projector_type_from_string(const std::string & str) {
......
This diff is collapsed.
...@@ -82,11 +82,6 @@ struct clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch ...@@ -82,11 +82,6 @@ struct clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch
*/ */
void clip_build_img_from_pixels(const unsigned char * rgb_pixels, int nx, int ny, struct clip_image_u8 * img); void clip_build_img_from_pixels(const unsigned char * rgb_pixels, int nx, int ny, struct clip_image_u8 * img);
bool clip_image_load_from_file(const char * fname, struct clip_image_u8 * img);
/** interpret bytes as an image file with length bytes_length, and use the result to populate img */
bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
/** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */ /** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs ); bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment