Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
2d315ba9
Commit
2d315ba9
authored
May 08, 2024
by
Patrick Devine
Committed by
Michael Yang
May 20, 2024
Browse files
add missing file
parent
d355d202
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
72 additions
and
0 deletions
+72
-0
convert/tokenizer.go
convert/tokenizer.go
+72
-0
No files found.
convert/tokenizer.go
0 → 100644
View file @
2d315ba9
package
convert
import
(
"encoding/json"
"io/ioutil"
"os"
)
type
Tokenizer
struct
{
Version
string
`json:"version"`
AddedTokens
[]
Token
`json:"added_tokens"`
Model
TokenizerModel
`json:"model"`
}
type
TokenizerModel
struct
{
Type
string
`json:"type"`
Vocab
map
[
string
]
int
`json:"vocab"`
Merges
[]
string
`json:"merges"`
Tokens
[]
Token
}
type
Token
struct
{
ID
int
`json:"id"`
Content
string
`json:"content"`
Special
bool
`json:"special"`
UserDefined
bool
}
func
(
t
*
Tokenizer
)
getMaxID
()
int
{
var
maxID
int
for
_
,
v
:=
range
t
.
Model
.
Vocab
{
maxID
=
max
(
maxID
,
v
)
}
for
_
,
v
:=
range
t
.
AddedTokens
{
maxID
=
max
(
maxID
,
v
.
ID
)
}
return
maxID
}
func
newTokenizer
(
dirpath
string
)
(
*
Tokenizer
,
error
)
{
f
,
err
:=
os
.
Open
(
dirpath
)
if
err
!=
nil
{
panic
(
err
)
}
defer
f
.
Close
()
data
,
err
:=
ioutil
.
ReadAll
(
f
)
if
err
!=
nil
{
return
nil
,
err
}
var
tdata
Tokenizer
if
err
:=
json
.
Unmarshal
(
data
,
&
tdata
);
err
!=
nil
{
return
nil
,
err
}
maxID
:=
tdata
.
getMaxID
()
tdata
.
Model
.
Tokens
=
make
([]
Token
,
maxID
+
1
)
for
k
,
v
:=
range
tdata
.
Model
.
Vocab
{
tdata
.
Model
.
Tokens
[
v
]
=
Token
{
ID
:
v
,
Content
:
k
,
Special
:
false
,
UserDefined
:
false
}
}
for
_
,
v
:=
range
tdata
.
AddedTokens
{
v
.
UserDefined
=
true
tdata
.
Model
.
Tokens
[
v
.
ID
]
=
v
}
return
&
tdata
,
nil
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment