Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
34efbbd3
Unverified
Commit
34efbbd3
authored
Sep 25, 2025
by
Devon Rifkin
Committed by
GitHub
Sep 25, 2025
Browse files
Merge pull request #12417 from ollama/drifkin/qwen3-coder-unicode
parsers: fix unicode handling for qwen3-coder
parents
5a56ff3c
05ba4ca1
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
231 additions
and
4 deletions
+231
-4
model/parsers/qwen3coder.go
model/parsers/qwen3coder.go
+14
-4
model/parsers/qwen3coder_test.go
model/parsers/qwen3coder_test.go
+217
-0
No files found.
model/parsers/qwen3coder.go
View file @
34efbbd3
...
@@ -11,6 +11,7 @@ import (
...
@@ -11,6 +11,7 @@ import (
"strconv"
"strconv"
"strings"
"strings"
"unicode"
"unicode"
"unicode/utf8"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/logutil"
"github.com/ollama/ollama/logutil"
...
@@ -204,12 +205,21 @@ func overlap(s, delim string) int {
...
@@ -204,12 +205,21 @@ func overlap(s, delim string) int {
}
}
func
trailingWhitespaceLen
(
s
string
)
int
{
func
trailingWhitespaceLen
(
s
string
)
int
{
for
i
:=
len
(
s
)
-
1
;
i
>=
0
;
i
--
{
remaining
:=
s
if
!
unicode
.
IsSpace
(
rune
(
s
[
i
]))
{
total
:=
0
return
len
(
s
)
-
i
-
1
for
len
(
remaining
)
>
0
{
r
,
size
:=
utf8
.
DecodeLastRuneInString
(
remaining
)
// if it's an invalid utf8 rune, assume it isn't whitespace
if
r
==
utf8
.
RuneError
&&
size
==
1
{
break
}
if
!
unicode
.
IsSpace
(
r
)
{
break
}
}
total
+=
size
remaining
=
remaining
[
:
len
(
remaining
)
-
size
]
}
}
return
len
(
s
)
return
total
}
}
type
XMLFunctionCall
struct
{
type
XMLFunctionCall
struct
{
...
...
model/parsers/qwen3coder_test.go
View file @
34efbbd3
...
@@ -166,6 +166,137 @@ func TestQwenParserStreaming(t *testing.T) {
...
@@ -166,6 +166,137 @@ func TestQwenParserStreaming(t *testing.T) {
},
},
},
},
},
},
{
desc
:
"unicode content"
,
steps
:
[]
step
{
{
input
:
"你好 🌍<tool_call>test</tool_call>مرحبا"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"你好 🌍"
},
qwenEventRawToolCall
{
raw
:
"test"
},
qwenEventContent
{
content
:
"مرحبا"
},
},
},
},
},
{
desc
:
"arabic text handling"
,
steps
:
[]
step
{
{
input
:
"مرحبا بالعالم"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"مرحبا بالعالم"
}},
},
},
},
{
desc
:
"emoji passthrough"
,
steps
:
[]
step
{
{
input
:
"✅"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"✅"
}},
},
},
},
{
desc
:
"emoji after tool call"
,
steps
:
[]
step
{
{
input
:
"<tool_call>test</tool_call>完成 ✅"
,
wantEvents
:
[]
qwenEvent
{
qwenEventRawToolCall
{
raw
:
"test"
},
qwenEventContent
{
content
:
"完成 ✅"
},
},
},
},
},
{
desc
:
"unicode streaming with whitespace handling"
,
steps
:
[]
step
{
{
input
:
"مرحبا"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"مرحبا"
},
},
},
{
input
:
"
\n
"
,
wantEvents
:
[]
qwenEvent
{},
},
{
input
:
"世界"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"
\n
世界"
},
},
},
},
},
{
desc
:
"non-breaking space withheld across chunks"
,
steps
:
[]
step
{
{
input
:
"Hello
\u00a0
"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"Hello"
},
},
},
{
input
:
"world"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"
\u00a0
world"
},
},
},
},
},
{
desc
:
"ideographic space before partial tool"
,
steps
:
[]
step
{
{
input
:
"Hello
\u3000
<tool"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"Hello"
},
},
},
{
input
:
"_call>abc"
,
wantEvents
:
[]
qwenEvent
{},
},
{
input
:
"</tool_call>def"
,
wantEvents
:
[]
qwenEvent
{
qwenEventRawToolCall
{
raw
:
"abc"
},
qwenEventContent
{
content
:
"def"
},
},
},
},
},
{
desc
:
"ideographic space before partial tool fakeout"
,
steps
:
[]
step
{
{
input
:
"Hello
\u3000
<tool"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"Hello"
},
},
},
{
input
:
"fakeout>abc"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"
\u3000
<toolfakeout>abc"
},
},
},
},
},
{
desc
:
"unicode with partial tool tag"
,
steps
:
[]
step
{
{
input
:
"测试🎯 <to"
,
wantEvents
:
[]
qwenEvent
{
qwenEventContent
{
content
:
"测试🎯"
},
},
},
},
},
}
}
anyOnlies
:=
false
anyOnlies
:=
false
...
@@ -347,6 +478,27 @@ ls && echo "a > b and a < b"
...
@@ -347,6 +478,27 @@ ls && echo "a > b and a < b"
},
},
},
},
},
},
{
name
:
"unicode in function names and parameters"
,
tools
:
[]
api
.
Tool
{},
rawToolCall
:
`<function=获取天气>
<parameter=城市>
北京
</parameter>
<parameter=message>
Hello! 你好! 🌟 مرحبا
</parameter>
</function>`
,
wantToolCall
:
api
.
ToolCall
{
Function
:
api
.
ToolCallFunction
{
Name
:
"获取天气"
,
Arguments
:
map
[
string
]
any
{
"城市"
:
"北京"
,
"message"
:
"Hello! 你好! 🌟 مرحبا"
,
},
},
},
},
}
}
for
i
,
step
:=
range
steps
{
for
i
,
step
:=
range
steps
{
...
@@ -360,6 +512,42 @@ ls && echo "a > b and a < b"
...
@@ -360,6 +512,42 @@ ls && echo "a > b and a < b"
}
}
}
}
func
TestTrailingWhitespaceLenUnicode
(
t
*
testing
.
T
)
{
cases
:=
[]
struct
{
name
string
input
string
want
int
}{
{
name
:
"ascii space"
,
input
:
"Hello "
,
want
:
1
,
},
{
name
:
"non-breaking space"
,
input
:
"Hello
\u00a0
"
,
want
:
2
,
},
{
name
:
"ideographic space"
,
input
:
"Hello
\u3000
"
,
want
:
3
,
},
{
name
:
"multiple runes of whitespace"
,
input
:
"Hi
\u00a0\u3000
"
,
want
:
5
,
},
}
for
_
,
tc
:=
range
cases
{
got
:=
trailingWhitespaceLen
(
tc
.
input
)
if
got
!=
tc
.
want
{
t
.
Errorf
(
"%s: trailingWhitespaceLen(%q) = %d, want %d"
,
tc
.
name
,
tc
.
input
,
got
,
tc
.
want
)
}
}
}
func
TestQwenToolCallValueParsing
(
t
*
testing
.
T
)
{
func
TestQwenToolCallValueParsing
(
t
*
testing
.
T
)
{
cases
:=
[]
struct
{
cases
:=
[]
struct
{
desc
string
desc
string
...
@@ -867,6 +1055,8 @@ func TestTrailingWhitespaceLen(t *testing.T) {
...
@@ -867,6 +1055,8 @@ func TestTrailingWhitespaceLen(t *testing.T) {
{
desc
:
"trailing whitespace with newlines"
,
s
:
"abc
\n
"
,
want
:
2
},
{
desc
:
"trailing whitespace with newlines"
,
s
:
"abc
\n
"
,
want
:
2
},
{
desc
:
"only whitespace"
,
s
:
"
\n
"
,
want
:
4
},
{
desc
:
"only whitespace"
,
s
:
"
\n
"
,
want
:
4
},
{
desc
:
"leading whitespace doesn't count"
,
s
:
"
\n
abc"
,
want
:
0
},
{
desc
:
"leading whitespace doesn't count"
,
s
:
"
\n
abc"
,
want
:
0
},
{
desc
:
"unicode with trailing space"
,
s
:
"测试🎯 "
,
want
:
1
},
{
desc
:
"unicode with trailing tab and newline"
,
s
:
"مرحبا
\t\n
"
,
want
:
2
},
}
}
for
_
,
tc
:=
range
cases
{
for
_
,
tc
:=
range
cases
{
...
@@ -876,3 +1066,30 @@ func TestTrailingWhitespaceLen(t *testing.T) {
...
@@ -876,3 +1066,30 @@ func TestTrailingWhitespaceLen(t *testing.T) {
}
}
}
}
}
}
func
TestOverlapFunction
(
t
*
testing
.
T
)
{
cases
:=
[]
struct
{
desc
string
s
string
delim
string
want
int
}{
{
desc
:
"no overlap"
,
s
:
"hello"
,
delim
:
"<tool"
,
want
:
0
},
{
desc
:
"full overlap"
,
s
:
"hello<tool"
,
delim
:
"<tool>"
,
want
:
5
},
{
desc
:
"partial overlap"
,
s
:
"hello<to"
,
delim
:
"<tool>"
,
want
:
3
},
{
desc
:
"unicode with partial overlap"
,
s
:
"测试🎯<to"
,
delim
:
"<tool>"
,
want
:
3
},
{
desc
:
"unicode string with no overlap"
,
s
:
"مرحبا"
,
delim
:
"<tool>"
,
want
:
0
},
{
desc
:
"unicode at boundary"
,
s
:
"世界<"
,
delim
:
"<tool>"
,
want
:
1
},
{
desc
:
"unicode delimiter single rune"
,
s
:
"hello🔧"
,
delim
:
"🔧工具"
,
want
:
len
(
"🔧"
)},
{
desc
:
"unicode delimiter multiple runes"
,
s
:
"hello🔧工"
,
delim
:
"🔧工具"
,
want
:
len
(
"🔧工"
)},
}
for
_
,
tc
:=
range
cases
{
t
.
Run
(
tc
.
desc
,
func
(
t
*
testing
.
T
)
{
got
:=
overlap
(
tc
.
s
,
tc
.
delim
)
if
got
!=
tc
.
want
{
t
.
Errorf
(
"overlap(%q, %q) = %d, want %d"
,
tc
.
s
,
tc
.
delim
,
got
,
tc
.
want
)
}
})
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment