Unverified Commit fab07d1c authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix: reject guided_grammar with excessive nesting depth (#7091)

parent df67ce16
...@@ -19,6 +19,9 @@ use serde::{Deserialize, Serialize}; ...@@ -19,6 +19,9 @@ use serde::{Deserialize, Serialize};
use super::TokenIdType; use super::TokenIdType;
/// Maximum nesting depth allowed in guided_grammar EBNF strings.
const MAX_GRAMMAR_NESTING_DEPTH: usize = 500;
pub mod llm_backend; pub mod llm_backend;
pub mod postprocessor; pub mod postprocessor;
pub mod preprocessor; pub mod preprocessor;
...@@ -422,7 +425,8 @@ impl GuidedDecodingOptions { ...@@ -422,7 +425,8 @@ impl GuidedDecodingOptions {
Ok(Some(instance)) Ok(Some(instance))
} }
/// Validate that only one guided decoding option is set /// Validate that only one guided decoding option is set, and that
/// grammar nesting depth is bounded.
pub fn validate(&self) -> Result<()> { pub fn validate(&self) -> Result<()> {
let count = [ let count = [
self.json.is_some(), self.json.is_some(),
...@@ -436,14 +440,45 @@ impl GuidedDecodingOptions { ...@@ -436,14 +440,45 @@ impl GuidedDecodingOptions {
.count(); .count();
if count > 1 { if count > 1 {
Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
"Only one of json, regex, choice, or grammar can be set, but multiple are specified: {:?}", "Only one of json, regex, choice, or grammar can be set, but multiple are specified: {:?}",
self self
)) ));
} else { }
Ok(())
if let Some(ref grammar) = self.grammar {
// NOTE: This intentionally scans raw bytes without tracking quoted
// regions. Delimiters inside quoted terminals (e.g. "(") are counted
// but balanced quotes contribute net-zero depth, and the 500 limit is
// generous enough that false positives from unbalanced quoted
// delimiters are not a practical concern.
let mut depth: usize = 0;
let mut max: usize = 0;
for ch in grammar.bytes() {
match ch {
b'(' | b'[' | b'{' => {
depth += 1;
if depth > max {
max = depth;
}
}
b')' | b']' | b'}' => {
depth = depth.saturating_sub(1);
}
_ => {}
}
}
if max > MAX_GRAMMAR_NESTING_DEPTH {
return Err(anyhow::anyhow!(
"guided_grammar exceeds maximum nesting depth of {} (got {})",
MAX_GRAMMAR_NESTING_DEPTH,
max
));
} }
} }
Ok(())
}
} }
impl SamplingOptions { impl SamplingOptions {
...@@ -843,4 +878,19 @@ mod tests { ...@@ -843,4 +878,19 @@ mod tests {
let val = val.unwrap(); let val = val.unwrap();
assert_eq!(val.choice, Some(vec!["A".to_string()])); assert_eq!(val.choice, Some(vec!["A".to_string()]));
} }
#[test]
fn test_guided_grammar_deep_nesting_rejected() {
let grammar = "(".repeat(501) + "a" + &")".repeat(501);
let result = GuidedDecodingOptions::validated(None, None, None, Some(grammar), None, None);
assert!(result.is_err());
assert!(result.unwrap_err().to_string().contains("nesting depth"));
}
#[test]
fn test_guided_grammar_acceptable_nesting_ok() {
let grammar = "(".repeat(500) + "a" + &")".repeat(500);
let result = GuidedDecodingOptions::validated(None, None, None, Some(grammar), None, None);
assert!(result.is_ok());
}
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment