Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
d26c18e2
Commit
d26c18e2
authored
Apr 23, 2025
by
Michael Yang
Committed by
Michael Yang
Apr 25, 2025
Browse files
fix token type
parent
8d376acc
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
36 additions
and
25 deletions
+36
-25
fs/config.go
fs/config.go
+1
-1
fs/ggml/ggml.go
fs/ggml/ggml.go
+16
-5
model/models/gemma2/model.go
model/models/gemma2/model.go
+1
-1
model/models/gemma3/model.go
model/models/gemma3/model.go
+1
-1
model/models/gemma3/model_text.go
model/models/gemma3/model_text.go
+1
-1
model/models/llama/model.go
model/models/llama/model.go
+1
-1
model/models/mistral3/model_text.go
model/models/mistral3/model_text.go
+1
-1
model/models/mllama/model.go
model/models/mllama/model.go
+1
-1
model/models/mllama/model_text.go
model/models/mllama/model_text.go
+4
-4
model/models/mllama/model_vision.go
model/models/mllama/model_vision.go
+4
-4
model/process_text.go
model/process_text.go
+1
-1
model/process_text_spm_test.go
model/process_text_spm_test.go
+3
-3
model/process_text_test.go
model/process_text_test.go
+1
-1
No files found.
fs/config.go
View file @
d26c18e2
...
@@ -8,6 +8,6 @@ type Config interface {
...
@@ -8,6 +8,6 @@ type Config interface {
Bool
(
string
,
...
bool
)
bool
Bool
(
string
,
...
bool
)
bool
Strings
(
string
,
...
[]
string
)
[]
string
Strings
(
string
,
...
[]
string
)
[]
string
Ui
nts
(
string
,
...
[]
u
int32
)
[]
u
int32
I
nts
(
string
,
...
[]
int32
)
[]
int32
Floats
(
string
,
...
[]
float32
)
[]
float32
Floats
(
string
,
...
[]
float32
)
[]
float32
}
}
fs/ggml/ggml.go
View file @
d26c18e2
...
@@ -108,6 +108,10 @@ func (kv KV) Strings(key string, defaultValue ...[]string) []string {
...
@@ -108,6 +108,10 @@ func (kv KV) Strings(key string, defaultValue ...[]string) []string {
return
keyValue
(
kv
,
key
,
&
array
[
string
]{})
.
values
return
keyValue
(
kv
,
key
,
&
array
[
string
]{})
.
values
}
}
func
(
kv
KV
)
Ints
(
key
string
,
defaultValue
...
[]
int32
)
[]
int32
{
return
keyValue
(
kv
,
key
,
&
array
[
int32
]{})
.
values
}
func
(
kv
KV
)
Uints
(
key
string
,
defaultValue
...
[]
uint32
)
[]
uint32
{
func
(
kv
KV
)
Uints
(
key
string
,
defaultValue
...
[]
uint32
)
[]
uint32
{
return
keyValue
(
kv
,
key
,
&
array
[
uint32
]{})
.
values
return
keyValue
(
kv
,
key
,
&
array
[
uint32
]{})
.
values
}
}
...
@@ -124,11 +128,18 @@ func (kv KV) OllamaEngineRequired() bool {
...
@@ -124,11 +128,18 @@ func (kv KV) OllamaEngineRequired() bool {
}
}
type
valueTypes
interface
{
type
valueTypes
interface
{
string
|
uint32
|
uint64
|
float32
|
bool
|
uint8
|
int8
|
uint16
|
int16
|
*
array
[
string
]
|
*
array
[
uint32
]
|
*
array
[
uint64
]
|
*
array
[
float32
]
|
*
array
[
bool
]
uint32
|
int32
|
uint64
|
int64
|
string
|
float32
|
float64
|
bool
}
type
arrayValueTypes
interface
{
*
array
[
uint8
]
|
*
array
[
int8
]
|
*
array
[
uint16
]
|
*
array
[
int16
]
|
*
array
[
uint32
]
|
*
array
[
int32
]
|
*
array
[
uint64
]
|
*
array
[
int64
]
|
*
array
[
string
]
|
*
array
[
float32
]
|
*
array
[
float64
]
|
*
array
[
bool
]
}
}
func
keyValue
[
T
valueTypes
](
kv
KV
,
key
string
,
defaultValue
...
T
)
T
{
func
keyValue
[
T
valueTypes
|
arrayValueTypes
](
kv
KV
,
key
string
,
defaultValue
...
T
)
T
{
if
!
strings
.
HasPrefix
(
key
,
"tokenizer."
)
&&
!
strings
.
HasPrefix
(
key
,
"general."
)
{
if
!
strings
.
HasPrefix
(
key
,
"tokenizer."
)
&&
!
strings
.
HasPrefix
(
key
,
"general."
)
{
key
=
kv
.
Architecture
()
+
"."
+
key
key
=
kv
.
Architecture
()
+
"."
+
key
}
}
...
@@ -450,9 +461,9 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri
...
@@ -450,9 +461,9 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri
case
"mllama"
:
case
"mllama"
:
var
visionTokens
,
tiles
uint64
=
1601
,
4
var
visionTokens
,
tiles
uint64
=
1601
,
4
crossAttentionLayers
:=
f
.
KV
()
.
Ui
nts
(
"attention.cross_attention_layers"
)
crossAttentionLayers
:=
f
.
KV
()
.
I
nts
(
"attention.cross_attention_layers"
)
for
i
:=
range
kv
{
for
i
:=
range
kv
{
if
slices
.
Contains
(
crossAttentionLayers
,
u
int32
(
i
))
{
if
slices
.
Contains
(
crossAttentionLayers
,
int32
(
i
))
{
kv
[
i
]
=
headsKV
*
(
embeddingHeadsK
+
embeddingHeadsV
)
*
kv
[
i
]
=
headsKV
*
(
embeddingHeadsK
+
embeddingHeadsV
)
*
4
*
// sizeof(float32)
4
*
// sizeof(float32)
visionTokens
*
visionTokens
*
...
...
model/models/gemma2/model.go
View file @
d26c18e2
...
@@ -42,7 +42,7 @@ func New(c fs.Config) (model.Model, error) {
...
@@ -42,7 +42,7 @@ func New(c fs.Config) (model.Model, error) {
&
model
.
Vocabulary
{
&
model
.
Vocabulary
{
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Scores
:
c
.
Floats
(
"tokenizer.ggml.scores"
),
Scores
:
c
.
Floats
(
"tokenizer.ggml.scores"
),
Types
:
c
.
Ui
nts
(
"tokenizer.ggml.token_type"
),
Types
:
c
.
I
nts
(
"tokenizer.ggml.token_type"
),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
},
},
...
...
model/models/gemma3/model.go
View file @
d26c18e2
...
@@ -59,7 +59,7 @@ func New(c fs.Config) (model.Model, error) {
...
@@ -59,7 +59,7 @@ func New(c fs.Config) (model.Model, error) {
&
model
.
Vocabulary
{
&
model
.
Vocabulary
{
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Scores
:
c
.
Floats
(
"tokenizer.ggml.scores"
),
Scores
:
c
.
Floats
(
"tokenizer.ggml.scores"
),
Types
:
c
.
Ui
nts
(
"tokenizer.ggml.token_type"
),
Types
:
c
.
I
nts
(
"tokenizer.ggml.token_type"
),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
EOS
:
int32
(
1
),
EOS
:
int32
(
1
),
...
...
model/models/gemma3/model_text.go
View file @
d26c18e2
...
@@ -49,7 +49,7 @@ func newTextModel(c fs.Config) *TextModel {
...
@@ -49,7 +49,7 @@ func newTextModel(c fs.Config) *TextModel {
&
model
.
Vocabulary
{
&
model
.
Vocabulary
{
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Scores
:
c
.
Floats
(
"tokenizer.ggml.scores"
),
Scores
:
c
.
Floats
(
"tokenizer.ggml.scores"
),
Types
:
c
.
Ui
nts
(
"tokenizer.ggml.token_type"
),
Types
:
c
.
I
nts
(
"tokenizer.ggml.token_type"
),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
},
},
...
...
model/models/llama/model.go
View file @
d26c18e2
...
@@ -41,7 +41,7 @@ func New(c fs.Config) (model.Model, error) {
...
@@ -41,7 +41,7 @@ func New(c fs.Config) (model.Model, error) {
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
&
model
.
Vocabulary
{
&
model
.
Vocabulary
{
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Types
:
c
.
Ui
nts
(
"tokenizer.ggml.token_type"
),
Types
:
c
.
I
nts
(
"tokenizer.ggml.token_type"
),
Merges
:
c
.
Strings
(
"tokenizer.ggml.merges"
),
Merges
:
c
.
Strings
(
"tokenizer.ggml.merges"
),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
...
...
model/models/mistral3/model_text.go
View file @
d26c18e2
...
@@ -152,7 +152,7 @@ func NewTextModel(c fs.Config) (*TextModel, error) {
...
@@ -152,7 +152,7 @@ func NewTextModel(c fs.Config) (*TextModel, error) {
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
&
model
.
Vocabulary
{
&
model
.
Vocabulary
{
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Types
:
c
.
Ui
nts
(
"tokenizer.ggml.token_type"
),
Types
:
c
.
I
nts
(
"tokenizer.ggml.token_type"
),
Merges
:
c
.
Strings
(
"tokenizer.ggml.merges"
),
Merges
:
c
.
Strings
(
"tokenizer.ggml.merges"
),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
,
1
)),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
,
1
)),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
...
...
model/models/mllama/model.go
View file @
d26c18e2
...
@@ -43,7 +43,7 @@ func New(c fs.Config) (model.Model, error) {
...
@@ -43,7 +43,7 @@ func New(c fs.Config) (model.Model, error) {
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
&
model
.
Vocabulary
{
&
model
.
Vocabulary
{
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Types
:
c
.
Ui
nts
(
"tokenizer.ggml.token_type"
),
Types
:
c
.
I
nts
(
"tokenizer.ggml.token_type"
),
Merges
:
c
.
Strings
(
"tokenizer.ggml.merges"
),
Merges
:
c
.
Strings
(
"tokenizer.ggml.merges"
),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
...
...
model/models/mllama/model_text.go
View file @
d26c18e2
...
@@ -177,7 +177,7 @@ type TextDecoder struct {
...
@@ -177,7 +177,7 @@ type TextDecoder struct {
func
(
d
*
TextDecoder
)
Forward
(
ctx
ml
.
Context
,
hiddenState
,
positionIDs
,
outputs
,
mask
,
crossAttentionStates
,
crossAttentionMask
ml
.
Tensor
,
cache
*
kvcache
.
WrapperCache
,
opts
*
TextModelOptions
)
ml
.
Tensor
{
func
(
d
*
TextDecoder
)
Forward
(
ctx
ml
.
Context
,
hiddenState
,
positionIDs
,
outputs
,
mask
,
crossAttentionStates
,
crossAttentionMask
ml
.
Tensor
,
cache
*
kvcache
.
WrapperCache
,
opts
*
TextModelOptions
)
ml
.
Tensor
{
for
i
,
layer
:=
range
d
.
Layers
{
for
i
,
layer
:=
range
d
.
Layers
{
layerType
:=
selfAttentionLayer
layerType
:=
selfAttentionLayer
if
slices
.
Contains
(
opts
.
crossAttentionLayers
,
u
int32
(
i
))
{
if
slices
.
Contains
(
opts
.
crossAttentionLayers
,
int32
(
i
))
{
layerType
=
crossAttentionLayer
layerType
=
crossAttentionLayer
}
}
...
@@ -202,7 +202,7 @@ type TextModelOptions struct {
...
@@ -202,7 +202,7 @@ type TextModelOptions struct {
eps
,
ropeBase
,
ropeScale
float32
eps
,
ropeBase
,
ropeScale
float32
ropeDim
uint32
ropeDim
uint32
crossAttentionLayers
[]
u
int32
crossAttentionLayers
[]
int32
}
}
type
TextModel
struct
{
type
TextModel
struct
{
...
@@ -225,7 +225,7 @@ func newTextModel(c fs.Config) *TextModel {
...
@@ -225,7 +225,7 @@ func newTextModel(c fs.Config) *TextModel {
var
decoderLayers
[]
TextDecoderLayer
var
decoderLayers
[]
TextDecoderLayer
for
i
:=
range
c
.
Uint
(
"block_count"
)
{
for
i
:=
range
c
.
Uint
(
"block_count"
)
{
var
textDecoderLayer
TextDecoderLayer
var
textDecoderLayer
TextDecoderLayer
if
slices
.
Contains
(
c
.
Ui
nts
(
"attention.cross_attention_layers"
),
i
)
{
if
slices
.
Contains
(
c
.
I
nts
(
"attention.cross_attention_layers"
),
i
nt32
(
i
)
)
{
textDecoderLayer
=
&
TextCrossAttentionDecoderLayer
{}
textDecoderLayer
=
&
TextCrossAttentionDecoderLayer
{}
}
else
{
}
else
{
textDecoderLayer
=
&
TextSelfAttentionDecoderLayer
{}
textDecoderLayer
=
&
TextSelfAttentionDecoderLayer
{}
...
@@ -244,7 +244,7 @@ func newTextModel(c fs.Config) *TextModel {
...
@@ -244,7 +244,7 @@ func newTextModel(c fs.Config) *TextModel {
ropeBase
:
c
.
Float
(
"rope.freq_base"
),
ropeBase
:
c
.
Float
(
"rope.freq_base"
),
ropeScale
:
c
.
Float
(
"rope.freq_scale"
,
1
),
ropeScale
:
c
.
Float
(
"rope.freq_scale"
,
1
),
ropeDim
:
c
.
Uint
(
"rope.dimension_count"
),
ropeDim
:
c
.
Uint
(
"rope.dimension_count"
),
crossAttentionLayers
:
c
.
Ui
nts
(
"attention.cross_attention_layers"
),
crossAttentionLayers
:
c
.
I
nts
(
"attention.cross_attention_layers"
),
},
},
}
}
}
}
model/models/mllama/model_vision.go
View file @
d26c18e2
...
@@ -96,10 +96,10 @@ type VisionEncoder struct {
...
@@ -96,10 +96,10 @@ type VisionEncoder struct {
Layers
[]
VisionEncoderLayer
Layers
[]
VisionEncoderLayer
}
}
func
(
e
*
VisionEncoder
)
Forward
(
ctx
ml
.
Context
,
hiddenState
ml
.
Tensor
,
intermediateLayersIndices
[]
u
int32
,
opts
*
VisionModelOptions
)
(
ml
.
Tensor
,
[]
ml
.
Tensor
)
{
func
(
e
*
VisionEncoder
)
Forward
(
ctx
ml
.
Context
,
hiddenState
ml
.
Tensor
,
intermediateLayersIndices
[]
int32
,
opts
*
VisionModelOptions
)
(
ml
.
Tensor
,
[]
ml
.
Tensor
)
{
var
intermediateHiddenStates
[]
ml
.
Tensor
var
intermediateHiddenStates
[]
ml
.
Tensor
for
i
,
layer
:=
range
e
.
Layers
{
for
i
,
layer
:=
range
e
.
Layers
{
if
slices
.
Contains
(
intermediateLayersIndices
,
u
int32
(
i
))
{
if
slices
.
Contains
(
intermediateLayersIndices
,
int32
(
i
))
{
intermediateHiddenStates
=
append
(
intermediateHiddenStates
,
hiddenState
.
Reshape
(
ctx
,
append
([]
int
{
1
},
hiddenState
.
Shape
()
...
)
...
))
intermediateHiddenStates
=
append
(
intermediateHiddenStates
,
hiddenState
.
Reshape
(
ctx
,
append
([]
int
{
1
},
hiddenState
.
Shape
()
...
)
...
))
}
}
...
@@ -154,7 +154,7 @@ type VisionModelOptions struct {
...
@@ -154,7 +154,7 @@ type VisionModelOptions struct {
imageSize
,
patchSize
int
imageSize
,
patchSize
int
eps
float32
eps
float32
intermediateLayersIndices
[]
u
int32
intermediateLayersIndices
[]
int32
}
}
type
VisionModel
struct
{
type
VisionModel
struct
{
...
@@ -229,7 +229,7 @@ func newVisionModel(c fs.Config) *VisionModel {
...
@@ -229,7 +229,7 @@ func newVisionModel(c fs.Config) *VisionModel {
eps
:
c
.
Float
(
"vision.attention.layer_norm_epsilon"
),
eps
:
c
.
Float
(
"vision.attention.layer_norm_epsilon"
),
intermediateLayersIndices
:
c
.
Ui
nts
(
"vision.intermediate_layers_indices"
),
intermediateLayersIndices
:
c
.
I
nts
(
"vision.intermediate_layers_indices"
),
},
},
}
}
}
}
model/process_text.go
View file @
d26c18e2
...
@@ -37,7 +37,7 @@ type TextProcessor interface {
...
@@ -37,7 +37,7 @@ type TextProcessor interface {
type
Vocabulary
struct
{
type
Vocabulary
struct
{
Values
[]
string
Values
[]
string
Types
[]
u
int32
Types
[]
int32
Scores
[]
float32
Scores
[]
float32
Merges
[]
string
Merges
[]
string
...
...
model/process_text_spm_test.go
View file @
d26c18e2
...
@@ -35,9 +35,9 @@ func loadSentencePieceVocab(t *testing.T) SentencePieceModel {
...
@@ -35,9 +35,9 @@ func loadSentencePieceVocab(t *testing.T) SentencePieceModel {
sentencepiece
.
ModelProto_SentencePiece_CONTROL
,
sentencepiece
.
ModelProto_SentencePiece_CONTROL
,
sentencepiece
.
ModelProto_SentencePiece_UNUSED
,
sentencepiece
.
ModelProto_SentencePiece_UNUSED
,
sentencepiece
.
ModelProto_SentencePiece_BYTE
:
sentencepiece
.
ModelProto_SentencePiece_BYTE
:
v
.
Types
=
append
(
v
.
Types
,
u
int32
(
t
))
v
.
Types
=
append
(
v
.
Types
,
int32
(
t
))
default
:
default
:
tt
:=
u
int32
(
sentencepiece
.
ModelProto_SentencePiece_NORMAL
)
tt
:=
int32
(
sentencepiece
.
ModelProto_SentencePiece_NORMAL
)
// todo parse the special tokens file
// todo parse the special tokens file
// - this will roundtrip correctly but the <start_of_turn> and
// - this will roundtrip correctly but the <start_of_turn> and
// <end_of_turn> tokens aren't processed
// <end_of_turn> tokens aren't processed
...
@@ -124,7 +124,7 @@ func TestSentencePieceModelDecodeByteTokens(t *testing.T) {
...
@@ -124,7 +124,7 @@ func TestSentencePieceModelDecodeByteTokens(t *testing.T) {
"<0xC3>"
,
"<0xC3>"
,
"<0xA3>"
,
"<0xA3>"
,
},
},
Types
:
[]
u
int32
{
Types
:
[]
int32
{
TOKEN_TYPE_NORMAL
,
TOKEN_TYPE_NORMAL
,
TOKEN_TYPE_BYTE
,
TOKEN_TYPE_BYTE
,
TOKEN_TYPE_BYTE
,
TOKEN_TYPE_BYTE
,
...
...
model/process_text_test.go
View file @
d26c18e2
...
@@ -28,7 +28,7 @@ func llama(t testing.TB) BytePairEncoding {
...
@@ -28,7 +28,7 @@ func llama(t testing.TB) BytePairEncoding {
t
.
Fatal
(
err
)
t
.
Fatal
(
err
)
}
}
types
:=
make
([]
u
int32
,
len
(
vocab
))
types
:=
make
([]
int32
,
len
(
vocab
))
tokens
:=
make
([]
string
,
len
(
vocab
))
tokens
:=
make
([]
string
,
len
(
vocab
))
for
token
,
id
:=
range
vocab
{
for
token
,
id
:=
range
vocab
{
tokens
[
id
]
=
token
tokens
[
id
]
=
token
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment