"docs/source/vscode:/vscode.git/clone" did not exist on "f861003c8819cd19737ba0f1cbfbc08f408404db"
Unverified Commit 6ea570dd authored by Wang, Yi's avatar Wang, Yi Committed by GitHub
Browse files

fix microsoft/Phi-3-mini-4k-instruct crash in batch.slots[batch.slot_… (#2148)



* fix microsoft/Phi-3-mini-4k-instruct crash in batch.slots[batch.slot_indices]
Signed-off-by: default avatarWang, Yi A <yi.a.wang@intel.com>

* Apply suggestions from code review

---------
Signed-off-by: default avatarWang, Yi A <yi.a.wang@intel.com>
Co-authored-by: default avatarNicolas Patry <patry.nicolas@protonmail.com>
parent fb98ab27
...@@ -17,7 +17,7 @@ use text_generation_router::{ ...@@ -17,7 +17,7 @@ use text_generation_router::{
server, HubModelInfo, HubPreprocessorConfig, HubProcessorConfig, HubTokenizerConfig, server, HubModelInfo, HubPreprocessorConfig, HubProcessorConfig, HubTokenizerConfig,
}; };
use thiserror::Error; use thiserror::Error;
use tokenizers::{processors::template::TemplateProcessing, Tokenizer}; use tokenizers::{processors::template::TemplateProcessing, Tokenizer, PostProcessor};
use tower_http::cors::AllowOrigin; use tower_http::cors::AllowOrigin;
use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt; use tracing_subscriber::util::SubscriberInitExt;
...@@ -309,7 +309,7 @@ async fn main() -> Result<(), RouterError> { ...@@ -309,7 +309,7 @@ async fn main() -> Result<(), RouterError> {
let mut tokenizer = Tokenizer::from_file(filename).ok(); let mut tokenizer = Tokenizer::from_file(filename).ok();
if let Some(tokenizer) = &mut tokenizer { if let Some(tokenizer) = &mut tokenizer {
if let Some(class) = &tokenizer_config.tokenizer_class { if let Some(class) = &tokenizer_config.tokenizer_class {
if (class == "LlamaTokenizer" || class == "LlamaTokenizerFast") && tokenizer.get_post_processor().is_none() { if (class == "LlamaTokenizer" || class == "LlamaTokenizerFast"){
if let Ok(post_processor) = create_post_processor(tokenizer, &tokenizer_config) { if let Ok(post_processor) = create_post_processor(tokenizer, &tokenizer_config) {
tracing::info!("Overriding LlamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205"); tracing::info!("Overriding LlamaTokenizer with TemplateProcessing to follow python override defined in https://github.com/huggingface/transformers/blob/4aa17d00690b7f82c95bb2949ea57e22c35b4336/src/transformers/models/llama/tokenization_llama_fast.py#L203-L205");
tokenizer.with_post_processor(post_processor); tokenizer.with_post_processor(post_processor);
...@@ -577,7 +577,7 @@ pub fn create_post_processor( ...@@ -577,7 +577,7 @@ pub fn create_post_processor(
if add_bos_token { if add_bos_token {
if let Some(bos) = bos_token { if let Some(bos) = bos_token {
single.push(format!("{}:1", bos)); pair.push(format!("{}:1", bos));
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment