letinput1="<|im_start|>system\nYou are a helpful assistant that provides clear and detailed responses.<|im_end|><|im_start|>user\nHello there! How are you doing today?<|im_end|>";
// Insert at special token boundaries (re-tokenizes prefixes)
// Search with same prefix but different user query
letinput2="<|im_start|>system\nYou are a helpful assistant that provides clear and detailed responses.<|im_end|><|im_start|>user\nWhat is 2+2?<|im_end|>";
// Create multi-turn conversation with multiple special token boundaries (~400 bytes)
letinput="<|im_start|>system\nYou are a helpful AI assistant that provides detailed and accurate responses.<|im_end|><|im_start|>user\nHello there! How are you today? Can you help me understand how tokenization works in language models?<|im_end|><|im_start|>assistant\nI'm doing well, thank you! I'd be happy to explain tokenization. Tokenization is the process of breaking text into smaller units called tokens.<|im_end|>";
// Should have multiple entries at special token boundaries
assert!(cache.len()>=2);// At least 2 boundaries
// Search with partial conversation - should match at a special token boundary
letpartial_input="<|im_start|>system\nYou are a helpful AI assistant that provides detailed and accurate responses.<|im_end|><|im_start|>user\nHello there! How are you today? Can you help me understand how tokenization works in language models?<|im_end|>";
// Should find a match at a special token boundary
assert!(result.is_some());
let(_,offset)=result.unwrap();
assert!(offset>0);
assert!(offset<=partial_input.len());
}
#[test]
fntest_stats(){
letcache=L1Cache::new(1024*1024);
letspecial_tokens=&["<|im_start|>","<|im_end|>"];
lettokenizer=MockTokenizer::new();
// ChatML input with special tokens
letinput="<|im_start|>system\nYou are a helpful assistant that provides detailed answers.<|im_end|><|im_start|>user\nHello there! How are you today?<|im_end|>";
// Should have at least one hit (the longest special token boundary should match)
assert!(stats.hits>=1);
assert_eq!(stats.hit_rate,1.0);
}
#[test]
fntest_clear(){
letcache=L1Cache::new(1024*1024);
letspecial_tokens=&["<|im_start|>","<|im_end|>"];
lettokenizer=MockTokenizer::new();
// ChatML input with special tokens
letinput="<|im_start|>system\nYou are a helpful assistant that provides clear and detailed responses.<|im_end|><|im_start|>user\nHello there!<|im_end|>";
letinput1="<|im_start|>system\nYou are a helpful assistant specialized in mathematics.<|im_end|><|im_start|>user\nCan you explain calculus to me?<|im_end|><|im_start|>assistant\nCertainly! Calculus is a branch of mathematics that studies continuous change.<|im_end|><|eot_id|>";
letinput2="<|im_start|>system\nYou are a helpful assistant specialized in physics.<|im_end|><|im_start|>user\nWhat is quantum mechanics?<|im_end|><|im_start|>assistant\nQuantum mechanics is the fundamental theory describing nature at atomic and subatomic scales.<|im_end|><|eot_id|>";
// Insert third conversation (should trigger eviction of oldest)
letinput3="<|im_start|>system\nYou are a helpful assistant specialized in chemistry.<|im_end|><|im_start|>user\nExplain the periodic table to me please.<|im_end|><|im_start|>assistant\nThe periodic table is a tabular arrangement of chemical elements organized by atomic number and electron configuration.<|im_end|><|eot_id|>";