Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
6de62664
Unverified
Commit
6de62664
authored
Aug 19, 2025
by
Devon Rifkin
Committed by
GitHub
Aug 19, 2025
Browse files
Merge pull request #11973 from ollama/drifkin/bpe
model: fix boundary in bpe
parents
05ccb17c
463a6caa
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
1 deletion
+31
-1
model/bytepairencoding.go
model/bytepairencoding.go
+1
-1
model/bytepairencoding_test.go
model/bytepairencoding_test.go
+30
-0
No files found.
model/bytepairencoding.go
View file @
6de62664
...
@@ -109,7 +109,7 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
...
@@ -109,7 +109,7 @@ func (bpe BytePairEncoding) Encode(s string, addSpecial bool) ([]int32, error) {
r
=
0x0143
r
=
0x0143
case
r
<=
0x0020
:
case
r
<=
0x0020
:
r
=
r
+
0x0100
r
=
r
+
0x0100
case
r
>=
0x007
e
&&
r
<=
0x00a0
:
case
r
>=
0x007
f
&&
r
<=
0x00a0
:
r
=
r
+
0x00a2
r
=
r
+
0x00a2
}
}
...
...
model/bytepairencoding_test.go
View file @
6de62664
...
@@ -207,6 +207,36 @@ func TestLlama(t *testing.T) {
...
@@ -207,6 +207,36 @@ func TestLlama(t *testing.T) {
}
}
}
}
})
})
t
.
Run
(
"roundtriping 0x00-0xFF"
,
func
(
t
*
testing
.
T
)
{
t
.
Parallel
()
for
b
:=
0x00
;
b
<=
0xFF
;
b
++
{
input
:=
string
(
rune
(
b
))
ids
,
err
:=
tokenizer
.
Encode
(
input
,
false
)
if
err
!=
nil
{
t
.
Errorf
(
"failed to encode rune 0x%02X: %v"
,
b
,
err
)
continue
}
decoded
,
err
:=
tokenizer
.
Decode
(
ids
)
if
err
!=
nil
{
t
.
Errorf
(
"failed to decode rune 0x%02X: %v"
,
b
,
err
)
continue
}
if
b
==
0x00
{
if
len
(
decoded
)
!=
0
{
t
.
Errorf
(
"Decode(Encode(0x00)) should be empty, got %v"
,
ids
)
}
continue
}
if
decoded
!=
input
{
t
.
Errorf
(
"rune 0x%02X failed roundtrip: got %q, want %q"
,
b
,
decoded
,
input
)
}
}
})
}
}
func
BenchmarkBytePairEncoding
(
b
*
testing
.
B
)
{
func
BenchmarkBytePairEncoding
(
b
*
testing
.
B
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment