if(version!=kArrayBhikshaVersion)UTIL_THROW(FormatLoadException,"This file has sorted array compression version "<<(unsigned)version<<" but the code expects version "<<(unsigned)kArrayBhikshaVersion);
constchar*kModelNames[6]={"probing hash tables","probing hash tables with rest costs","trie","trie with quantization","trie with array-compressed pointers","trie with quantization and array-compressed pointers"};
namespace{
constcharkMagicBeforeVersion[]="mmap lm http://kheafield.com/code format version";
constcharkMagicBytes[]="mmap lm http://kheafield.com/code format version 5\n\0";
// This must be shorter than kMagicBytes and indicates an incomplete binary file (i.e. build failed).
UTIL_THROW(FormatLoadException,"Binary file has version "<<version<<" but this implementation expects version "<<kMagicVersion<<" so you'll have to use the ARPA to rebuild your binary");
}
OldSanityold_sanity=OldSanity();
old_sanity.SetToReference();
UTIL_THROW_IF(!std::memcmp(memory.get(),&old_sanity,sizeof(OldSanity)),FormatLoadException,"Looks like this is an old 32-bit format. The old 32-bit format has been removed so that 64-bit and 32-bit files are exchangeable.");
UTIL_THROW(FormatLoadException,"File looks like it should be loaded with mmap, but the test values don't match. Try rebuilding the binary format LM using the same code revision, compiler, and architecture");
UTIL_THROW(FormatLoadException,"The binary file claims to be model type "<<static_cast<unsignedint>(params.fixed.model_type)<<" but this is not implemented for in this inference code.");
UTIL_THROW(FormatLoadException,"The binary file was built for "<<kModelNames[params.fixed.model_type]<<" but the inference code is trying to load "<<kModelNames[model_type]);
}
UTIL_THROW_IF(search_version!=params.fixed.search_version,FormatLoadException,"The binary file has "<<kModelNames[params.fixed.model_type]<<" version "<<params.fixed.search_version<<" but this code expects "<<kModelNames[params.fixed.model_type]<<" version "<<search_version);
UTIL_THROW_IF(file_size!=util::kBadSize&&file_size<total_map,FormatLoadException,"Binary file has size "<<file_size<<" but the headers say it should be at least "<<total_map);
UTIL_THROW_IF(discounts_[i].amount[j]<0.0||discounts_[i].amount[j]>j,BadDiscountException,"ERROR: "<<(i+1)<<"-gram discount out of range for adjusted count "<<j<<": "<<discounts_[i].amount[j]<<". This means modified Kneser-Ney smoothing thinks something is weird about your data. To override this error for e.g. a class-based model, rerun with --discount_fallback\n");
}
}catch(constBadDiscountException&){
switch(config.bad_action){
caseTHROW_UP:
throw;
caseCOMPLAIN:
std::cerr<<"Substituting fallback discounts for order "<<i<<": D1="<<config.fallback.amount[1]<<" D2="<<config.fallback.amount[2]<<" D3+="<<config.fallback.amount[3]<<std::endl;
std::cerr<<"Warning: "<<word<<" appears in the input. All instances of <s>, </s>, and <unk> will be interpreted as whitespace."<<std::endl;
action=SILENT;
return;
caseTHROW_UP:
UTIL_THROW(FormatLoadException,"Special word "<<word<<" is not allowed in the corpus. I plan to support models containing <unk> in the future. Pass --skip_symbols to convert these symbols to whitespace.");
}
}
// Vocab ids are given in a precompiled hash table.