Unverified Commit 023a299c authored by Albert Cheng's avatar Albert Cheng Committed by GitHub
Browse files

fix: strip null bytes before tokenization to prevent false positive prompt...


fix: strip null bytes before tokenization to prevent false positive prompt rejection due to Rust/Python tokenizer divergence (#7694)
Signed-off-by: default avatarAlbert Cheng (Engrg-Hardware 1) <albecheng@login-lyris01.lyris.clusters.nvidia.com>
Co-authored-by: default avatarAlbert Cheng (Engrg-Hardware 1) <albecheng@login-lyris01.lyris.clusters.nvidia.com>
parent a20b9dde
......@@ -33,6 +33,7 @@ use dynamo_runtime::metrics::frontend_perf::{
DETOKENIZE_TOKEN_COUNT, DETOKENIZE_TOTAL_US, STAGE_DURATION_SECONDS, TEMPLATE_SECONDS,
TOKENIZE_SECONDS,
};
use std::borrow::Cow;
use std::{collections::HashMap, pin::Pin, sync::Arc};
use tracing;
......@@ -621,7 +622,13 @@ impl OpenAIPreprocessor {
tracker: Option<&RequestTracker>,
) -> anyhow::Result<Encoding> {
let encode_start = Instant::now();
let encoding = self.tokenizer.encode(prompt)?;
let prompt = if prompt.contains('\0') {
tracing::debug!("Prompt contains null bytes; stripping to avoid tokenizer divergence");
Cow::Owned(prompt.replace('\0', ""))
} else {
Cow::Borrowed(prompt)
};
let encoding = self.tokenizer.encode(prompt.as_ref())?;
if let Some(t) = tracker {
t.record_tokenize_latency(encode_start.elapsed());
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment