"vscode:/vscode.git/clone" did not exist on "9efc3503742fa942865c2e5662147209087d2ef4"
Unverified Commit 9a18aa54 authored by LukasBluebaum's avatar LukasBluebaum Committed by GitHub
Browse files

[fix] Relax white space rules in EBNFComposer (#9595)

parent 91f0fd95
...@@ -50,19 +50,19 @@ class EBNFComposer: ...@@ -50,19 +50,19 @@ class EBNFComposer:
CALL_RULE_MAP = { CALL_RULE_MAP = {
"pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"', "pythonic": 'call_{name} ::= "{name}" "(" {arguments_rule} ")"',
"json": 'call_{name} ::= "{{" "\\"name\\"" ":" "\\"{name}\\"" ", " "\\"arguments\\"" ":" {arguments_rule} "}}"', "json": 'call_{name} ::= "{{" ws "\\"name\\"" ws ":" ws "\\"{name}\\"" ws "," ws "\\"arguments\\"" ws ":" ws {arguments_rule} ws "}}"',
"xml": 'call_{name} ::= "<function={name}>\\n" {arguments_rule} "\\n</function>"', "xml": 'call_{name} ::= "<function={name}>\\n" {arguments_rule} "\\n</function>"',
} }
ARGUMENTS_RULE_MAP = { ARGUMENTS_RULE_MAP = {
"pythonic": "{arg_rules}", "pythonic": "{arg_rules}",
"json": '"{{" {arg_rules} "}}"', "json": '"{{" ws {arg_rules} ws "}}"',
"xml": "{arg_rules}", "xml": "{arg_rules}",
} }
KEY_VALUE_RULE_MAP = { KEY_VALUE_RULE_MAP = {
"pythonic": '"{key}" "=" {valrule}', "pythonic": '"{key}" "=" {valrule}',
"json": '"\\"{key}\\"" ":" {valrule}', "json": '"\\"{key}\\"" ws ":" ws {valrule}',
"xml": '"<parameter={key}>\\n" {valrule} "\\n</parameter>"', "xml": '"<parameter={key}>\\n" {valrule} "\\n</parameter>"',
} }
...@@ -165,7 +165,7 @@ class EBNFComposer: ...@@ -165,7 +165,7 @@ class EBNFComposer:
tool_call_separator: Optional[str] = None, tool_call_separator: Optional[str] = None,
call_rule_fmt: Optional[str] = None, call_rule_fmt: Optional[str] = None,
key_value_rule_fmt: Optional[str] = None, key_value_rule_fmt: Optional[str] = None,
key_value_separator: str = ",", key_value_separator: str = 'ws "," ws',
): ):
""" """
Generalized EBNF builder for all detectors. Generalized EBNF builder for all detectors.
...@@ -183,6 +183,10 @@ class EBNFComposer: ...@@ -183,6 +183,10 @@ class EBNFComposer:
key_value_rule_fmt: Optional custom format string for key-value pairs. It should define how each parameter is formatted, key_value_rule_fmt: Optional custom format string for key-value pairs. It should define how each parameter is formatted,
with placeholders {key} for the parameter name and {valrule} for the value rule. If None, a default format with placeholders {key} for the parameter name and {valrule} for the value rule. If None, a default format
based on function_format will be used. based on function_format will be used.
key_value_separator: Raw EBNF fragment inserted between key-value pairs.
This string is used verbatim (not auto-quoted). Pass:
- Quoted terminals when you need a literal token (e.g. '","' or '"\\n"').
- Raw/non-terminals when you need grammar tokens (e.g. 'ws "," ws').
""" """
# ================================================================= # =================================================================
# Step 1: Determine the root tool calls rule # Step 1: Determine the root tool calls rule
...@@ -281,9 +285,7 @@ class EBNFComposer: ...@@ -281,9 +285,7 @@ class EBNFComposer:
# Add required properties joined by commas # Add required properties joined by commas
if required: if required:
rule_parts.append( rule_parts.append(
f' "{key_value_separator}" '.join( f" {key_value_separator} ".join(prop_kv_pairs[k] for k in required)
prop_kv_pairs[k] for k in required
)
) )
# Add optional properties with flexible ordering # Add optional properties with flexible ordering
...@@ -298,14 +300,14 @@ class EBNFComposer: ...@@ -298,14 +300,14 @@ class EBNFComposer:
opt_parts.append(prop_kv_pairs[optional[j]]) opt_parts.append(prop_kv_pairs[optional[j]])
else: else:
opt_parts.append( opt_parts.append(
f' ( "{key_value_separator}" {prop_kv_pairs[optional[j]]} )?' f" ( {key_value_separator} {prop_kv_pairs[optional[j]]} )?"
) )
opt_alternatives.append("".join(opt_parts)) opt_alternatives.append("".join(opt_parts))
# Wrap with appropriate comma handling based on whether we have required properties # Wrap with appropriate comma handling based on whether we have required properties
if required: if required:
# Required properties exist, so optional group needs outer comma # Required properties exist, so optional group needs outer comma
rule_parts.append(f' ( "{key_value_separator}" ( ') rule_parts.append(f" ( {key_value_separator} ( ")
rule_parts.append(" | ".join(opt_alternatives)) rule_parts.append(" | ".join(opt_alternatives))
rule_parts.append(" ) )?") rule_parts.append(" ) )?")
else: else:
......
...@@ -160,5 +160,5 @@ class Glm4MoeDetector(BaseFormatDetector): ...@@ -160,5 +160,5 @@ class Glm4MoeDetector(BaseFormatDetector):
function_format="xml", function_format="xml",
call_rule_fmt='"{name}" "\\n" ( {arguments_rule} "\\n" )?', call_rule_fmt='"{name}" "\\n" ( {arguments_rule} "\\n" )?',
key_value_rule_fmt='"<arg_key>{key}</arg_key>" "\\n" "<arg_value>" {valrule} "</arg_value>"', key_value_rule_fmt='"<arg_key>{key}</arg_key>" "\\n" "<arg_value>" {valrule} "</arg_value>"',
key_value_separator="\\n", key_value_separator='"\\n"',
) )
...@@ -358,5 +358,5 @@ class Qwen3CoderDetector(BaseFormatDetector): ...@@ -358,5 +358,5 @@ class Qwen3CoderDetector(BaseFormatDetector):
function_format="xml", function_format="xml",
call_rule_fmt='"<function={name}>\\n" {arguments_rule} "\\n</function>"', call_rule_fmt='"<function={name}>\\n" {arguments_rule} "\\n</function>"',
key_value_rule_fmt='"<parameter={key}>\\n" {valrule} "\\n</parameter>"', key_value_rule_fmt='"<parameter={key}>\\n" {valrule} "\\n</parameter>"',
key_value_separator="\\n", key_value_separator='"\\n"',
) )
...@@ -549,7 +549,7 @@ class TestEBNFGeneration(unittest.TestCase): ...@@ -549,7 +549,7 @@ class TestEBNFGeneration(unittest.TestCase):
# Check that the EBNF contains expected patterns # Check that the EBNF contains expected patterns
self.assertIn("<|tool▁calls▁begin|>", ebnf) self.assertIn("<|tool▁calls▁begin|>", ebnf)
self.assertIn("<|tool▁call▁begin|>function<|tool▁sep|>get_weather", ebnf) self.assertIn("<|tool▁call▁begin|>function<|tool▁sep|>get_weather", ebnf)
self.assertIn('\\"location\\"" ":" basic_string ', ebnf) self.assertIn('\\"location\\"" ws ":" ws basic_string ', ebnf)
# Validate that the EBNF can be compiled by GrammarCompiler # Validate that the EBNF can be compiled by GrammarCompiler
try: try:
...@@ -591,8 +591,8 @@ class TestEBNFGeneration(unittest.TestCase): ...@@ -591,8 +591,8 @@ class TestEBNFGeneration(unittest.TestCase):
self.assertIsNotNone(ebnf) self.assertIsNotNone(ebnf)
# Check that the EBNF contains expected patterns # Check that the EBNF contains expected patterns
self.assertIn('\\"name\\"" ":" "\\"get_weather\\"', ebnf) self.assertIn('\\"name\\"" ws ":" ws "\\"get_weather\\"', ebnf)
self.assertIn('"\\"arguments\\"" ":"', ebnf) self.assertIn('"\\"arguments\\"" ws ":"', ebnf)
# Validate that the EBNF can be compiled by GrammarCompiler # Validate that the EBNF can be compiled by GrammarCompiler
try: try:
...@@ -609,7 +609,7 @@ class TestEBNFGeneration(unittest.TestCase): ...@@ -609,7 +609,7 @@ class TestEBNFGeneration(unittest.TestCase):
# Check that the EBNF contains expected patterns # Check that the EBNF contains expected patterns
self.assertIn('"[TOOL_CALLS] ["', ebnf) self.assertIn('"[TOOL_CALLS] ["', ebnf)
self.assertIn("call_get_weather | call_search", ebnf) self.assertIn("call_get_weather | call_search", ebnf)
self.assertIn('"\\"arguments\\"" ":"', ebnf) self.assertIn('"\\"arguments\\"" ws ":"', ebnf)
# Validate that the EBNF can be compiled by GrammarCompiler # Validate that the EBNF can be compiled by GrammarCompiler
try: try:
...@@ -625,8 +625,8 @@ class TestEBNFGeneration(unittest.TestCase): ...@@ -625,8 +625,8 @@ class TestEBNFGeneration(unittest.TestCase):
# Check that the EBNF contains expected patterns # Check that the EBNF contains expected patterns
self.assertIn("<tool_call>", ebnf) self.assertIn("<tool_call>", ebnf)
self.assertIn('\\"name\\"" ":" "\\"get_weather\\"', ebnf) self.assertIn('\\"name\\"" ws ":" ws "\\"get_weather\\"', ebnf)
self.assertIn('"\\"arguments\\"" ":"', ebnf) self.assertIn('"\\"arguments\\"" ws ":"', ebnf)
# Validate that the EBNF can be compiled by GrammarCompiler # Validate that the EBNF can be compiled by GrammarCompiler
try: try:
...@@ -724,13 +724,13 @@ class TestEBNFGeneration(unittest.TestCase): ...@@ -724,13 +724,13 @@ class TestEBNFGeneration(unittest.TestCase):
# Pythonic format: location="Paris" ( , ( unit=("celsius" | "fahrenheit") )? # Pythonic format: location="Paris" ( , ( unit=("celsius" | "fahrenheit") )?
self.assertIn('"location" "=" basic_string', ebnf) self.assertIn('"location" "=" basic_string', ebnf)
# The comma should be inside the optional brackets for unit # The comma should be inside the optional brackets for unit
self.assertIn('( "," ( "unit" "=" ', ebnf) self.assertIn('( ws "," ws ( "unit" "=" ', ebnf)
else: else:
# JSON format: "location": "Paris" ( , ( "unit": ("celsius" | "fahrenheit") )? # JSON format: "location": "Paris" ( , ( "unit": ("celsius" | "fahrenheit") )?
self.assertIn('"location\\"" ":" basic_string', ebnf) self.assertIn('"location\\"" ws ":" ws basic_string', ebnf)
# The comma should be part of the optional group # The comma should be part of the optional group
# This pattern ensures no trailing comma when unit is omitted # This pattern ensures no trailing comma when unit is omitted
self.assertIn('( "," ( "\\"unit\\"" ":"', ebnf) self.assertIn('( ws "," ws ( "\\"unit\\"" ws ":"', ebnf)
# Validate that the EBNF can be compiled # Validate that the EBNF can be compiled
try: try:
...@@ -788,7 +788,7 @@ class TestEBNFGeneration(unittest.TestCase): ...@@ -788,7 +788,7 @@ class TestEBNFGeneration(unittest.TestCase):
) )
# Check required field # Check required field
self.assertIn('"required_field\\"" ":" basic_string', ebnf) self.assertIn('"required_field\\"" ws ":" ws basic_string', ebnf)
# Check the structure for optional parameters # Check the structure for optional parameters
# The pattern should be: required_field ( "," ( opt1 ... | opt2 ... | opt3 ... ) )? # The pattern should be: required_field ( "," ( opt1 ... | opt2 ... | opt3 ... ) )?
...@@ -797,16 +797,16 @@ class TestEBNFGeneration(unittest.TestCase): ...@@ -797,16 +797,16 @@ class TestEBNFGeneration(unittest.TestCase):
# Check that optional parameters are in a group with comma # Check that optional parameters are in a group with comma
if args_rule: # Only check if args_rule was found if args_rule: # Only check if args_rule was found
self.assertIn( self.assertIn(
'( ","', '( ws "," ws (',
args_rule, args_rule,
f"{name} should have comma grouped with optional parameters", f"{name} should have comma grouped with optional parameters",
) )
# Check for the alternation pattern that allows flexible ordering # Check for the alternation pattern that allows flexible ordering
# Should contain patterns like: opt1 ... | opt2 ... | opt3 # Should contain patterns like: opt1 ... | opt2 ... | opt3
self.assertIn('"opt1\\"" ":" basic_number', args_rule) self.assertIn('"opt1\\"" ws ":" ws basic_number', args_rule)
self.assertIn('"opt2\\"" ":" basic_boolean', args_rule) self.assertIn('"opt2\\"" ws ":" ws basic_boolean', args_rule)
self.assertIn('"opt3\\"" ":" basic_string', args_rule) self.assertIn('"opt3\\"" ws ":" ws basic_string', args_rule)
# Check for alternation (|) which allows skipping optional parameters # Check for alternation (|) which allows skipping optional parameters
self.assertIn( self.assertIn(
...@@ -881,9 +881,9 @@ class TestEBNFGeneration(unittest.TestCase): ...@@ -881,9 +881,9 @@ class TestEBNFGeneration(unittest.TestCase):
# This allows flexible ordering where any optional can appear first # This allows flexible ordering where any optional can appear first
# Check the structure # Check the structure
self.assertIn('"opt1\\"" ":" basic_string', args_rule) self.assertIn('"opt1\\"" ws ":" ws basic_string', args_rule)
self.assertIn('"opt2\\"" ":" basic_number', args_rule) self.assertIn('"opt2\\"" ws ":" ws basic_number', args_rule)
self.assertIn('"opt3\\"" ":" basic_boolean', args_rule) self.assertIn('"opt3\\"" ws ":" ws basic_boolean', args_rule)
# The pattern SHOULD have alternation (|) for flexible ordering # The pattern SHOULD have alternation (|) for flexible ordering
self.assertIn( self.assertIn(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment