"...text-generation-inference.git" did not exist on "a88c54bb4ce022ddc03b899b0c8018b6625e3a9e"
Commit 242df70a authored by Devon Rifkin's avatar Devon Rifkin
Browse files

parsers: fix `&`s in qwen3coder parameter values

In <https://github.com/ollama/ollama/issues/12357> we that the model
will output tool calls such as

```
<function=shell>
<parameter=command>
pwd && ls -la
</parameter>
</function>
```

We parse this using the approach of transforming into valid xml and then
using an xml parser. While we do transform the function and parameter
names, we weren't escaping the parameter values (which in this example
are invalid since `pwd && ls -la` contains unescaped ampersands).

This has been fixed by first transforming the tags in the same way, and
then walking the transformed string and escaping the text in between the
tags. This also fixes a case where `<` in the middle of a parameter
value would cause an xml parse failure.

Fixes: #12357
parent dba39b2e
...@@ -393,18 +393,55 @@ func parseValue(raw string, paramType api.PropertyType) any { ...@@ -393,18 +393,55 @@ func parseValue(raw string, paramType api.PropertyType) any {
return raw return raw
} }
var qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`) var (
qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`)
qwenXMLTagRegex = regexp.MustCompile(`</?(?:function|parameter)(?:\s+name="[^"]*")?>`)
)
// transformToXML transforms a raw qwen tool call with xml-like tags into valid // transformToXML transforms a raw qwen tool call with xml-like tags into valid
// xml so that it can be parsed by any xml parser // xml so that it can be parsed by any xml parser
func transformToXML(raw string) string { func transformToXML(raw string) string {
// take the form `<tag=abc>` and transform it to `<tag name="abc">`, taking // take the form `<tag=abc>` and transform it to `<tag name="abc">`, taking
// care to properly escape the string that becomes the attribute value // care to properly escape the string that becomes the attribute value
return qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string { transformed := qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string {
groups := qwenTagRegex.FindStringSubmatch(match) groups := qwenTagRegex.FindStringSubmatch(match)
tag := groups[1] tag := groups[1]
var escapedValue strings.Builder var escapedValue strings.Builder
xml.EscapeText(&escapedValue, []byte(groups[2])) xml.EscapeText(&escapedValue, []byte(groups[2]))
return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String()) return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String())
}) })
// Walk the resulting string, escaping any character data that sits between the
// xml tags we just emitted
var out strings.Builder
lastIdx := 0
for _, loc := range qwenXMLTagRegex.FindAllStringIndex(transformed, -1) {
if loc[0] > lastIdx {
escapeTextNode(&out, transformed[lastIdx:loc[0]])
}
out.WriteString(transformed[loc[0]:loc[1]])
lastIdx = loc[1]
}
if lastIdx < len(transformed) {
escapeTextNode(&out, transformed[lastIdx:])
}
return out.String()
}
// escapeTextNode escapes XML character data without altering other characters
// like newlines or tabs (which is why we don't use xml.EscapeText for this)
func escapeTextNode(sb *strings.Builder, s string) {
for _, r := range s {
switch r {
case '&':
sb.WriteString("&amp;")
case '<':
sb.WriteString("&lt;")
case '>':
sb.WriteString("&gt;")
default:
sb.WriteRune(r)
}
}
} }
...@@ -312,6 +312,41 @@ true ...@@ -312,6 +312,41 @@ true
}, },
}, },
}, },
// regression test for <https://github.com/ollama/ollama/issues/12357>
{
name: "ampersands in parameter values",
tools: []api.Tool{},
rawToolCall: `<function=exec>
<parameter=command>
ls && echo "done"
</parameter>
</function>`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "exec",
Arguments: map[string]any{
"command": "ls && echo \"done\"",
},
},
},
},
{
name: "angle brackets in parameter values",
tools: []api.Tool{},
rawToolCall: `<function=exec>
<parameter=command>
ls && echo "a > b and a < b"
</parameter>
</function>`,
wantToolCall: api.ToolCall{
Function: api.ToolCallFunction{
Name: "exec",
Arguments: map[string]any{
"command": "ls && echo \"a > b and a < b\"",
},
},
},
},
} }
for i, step := range steps { for i, step := range steps {
...@@ -798,6 +833,19 @@ celsius ...@@ -798,6 +833,19 @@ celsius
</parameter> </parameter>
</function>`, </function>`,
}, },
{
desc: "ampersands in parameter values",
raw: `<function=get_current_temperature>
<parameter=location>
San Francisco & San Jose
</parameter>
</function>`,
want: `<function name="get_current_temperature">
<parameter name="location">
San Francisco &amp; San Jose
</parameter>
</function>`,
},
} }
for _, tc := range cases { for _, tc := range cases {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment