Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
526b2ed1
Unverified
Commit
526b2ed1
authored
May 12, 2025
by
Michael Yang
Committed by
GitHub
May 12, 2025
Browse files
fix vocabulary (#10679)
parent
a7240c6d
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
28 additions
and
26 deletions
+28
-26
model/models/gemma2/model.go
model/models/gemma2/model.go
+2
-0
model/models/gemma3/model_text.go
model/models/gemma3/model_text.go
+0
-13
model/models/llama/model.go
model/models/llama/model.go
+3
-0
model/models/llama4/model.go
model/models/llama4/model.go
+3
-0
model/models/mistral3/model.go
model/models/mistral3/model.go
+17
-0
model/models/mistral3/model_text.go
model/models/mistral3/model_text.go
+0
-13
model/models/mllama/model.go
model/models/mllama/model.go
+3
-0
No files found.
model/models/gemma2/model.go
View file @
526b2ed1
...
...
@@ -45,6 +45,8 @@ func New(c fs.Config) (model.Model, error) {
Types
:
c
.
Ints
(
"tokenizer.ggml.token_type"
),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
// TODO: set EOT to EOS otherwise 0 will stop generation
EOT
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
},
),
Layers
:
make
([]
Layer
,
c
.
Uint
(
"block_count"
)),
...
...
model/models/gemma3/model_text.go
View file @
526b2ed1
...
...
@@ -7,7 +7,6 @@ import (
"github.com/ollama/ollama/kvcache"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/ml/nn"
"github.com/ollama/ollama/model"
"github.com/ollama/ollama/model/input"
)
...
...
@@ -20,9 +19,6 @@ type TextConfig struct {
}
type
TextModel
struct
{
model
.
Base
model
.
SentencePieceModel
TokenEmbedding
*
nn
.
Embedding
`gguf:"token_embd"`
Layers
[]
TextLayer
`gguf:"blk"`
OutputNorm
*
nn
.
RMSNorm
`gguf:"output_norm"`
...
...
@@ -45,15 +41,6 @@ func newTextModel(c fs.Config) *TextModel {
numBlocks
:=
int
(
c
.
Uint
(
"block_count"
))
m
:=
TextModel
{
SentencePieceModel
:
model
.
NewSentencePieceModel
(
&
model
.
Vocabulary
{
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Scores
:
c
.
Floats
(
"tokenizer.ggml.scores"
),
Types
:
c
.
Ints
(
"tokenizer.ggml.token_type"
),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
)),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
},
),
Layers
:
make
([]
TextLayer
,
numBlocks
),
TextConfig
:
&
TextConfig
{
hiddenSize
:
int
(
c
.
Uint
(
"embedding_length"
)),
...
...
model/models/llama/model.go
View file @
526b2ed1
...
...
@@ -47,6 +47,9 @@ func New(c fs.Config) (model.Model, error) {
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
AddEOS
:
c
.
Bool
(
"tokenizer.ggml.add_eos_token"
,
false
),
// TODO: set EOT to EOS otherwise 0 will stop generation
EOT
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
AddEOT
:
c
.
Bool
(
"tokenizer.ggml.add_eos_token"
,
false
),
},
),
Layers
:
make
([]
Layer
,
c
.
Uint
(
"block_count"
)),
...
...
model/models/llama4/model.go
View file @
526b2ed1
...
...
@@ -45,6 +45,9 @@ func New(c fs.Config) (model.Model, error) {
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
AddEOS
:
c
.
Bool
(
"tokenizer.ggml.add_eos_token"
,
false
),
// TODO: set EOT to EOS otherwise 0 will stop generation
EOT
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
AddEOT
:
c
.
Bool
(
"tokenizer.ggml.add_eos_token"
,
false
),
},
),
ImageProcessor
:
newImageProcessor
(
c
),
...
...
model/models/mistral3/model.go
View file @
526b2ed1
...
...
@@ -16,6 +16,8 @@ import (
type
Model
struct
{
model
.
Base
model
.
BytePairEncoding
*
TextModel
*
VisionModel
`gguf:"v,vision"`
*
MultiModalProjector
`gguf:"mm"`
...
...
@@ -40,6 +42,21 @@ func New(c fs.Config) (model.Model, error) {
VisionModel
:
newVisionModel
(
c
),
ImageProcessor
:
newImageProcessor
(
c
),
MultiModalProjector
:
newMultiModalProjector
(
c
),
BytePairEncoding
:
model
.
NewBytePairEncoding
(
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
&
model
.
Vocabulary
{
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Types
:
c
.
Ints
(
"tokenizer.ggml.token_type"
),
Merges
:
c
.
Strings
(
"tokenizer.ggml.merges"
),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
,
1
)),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
,
2
)),
AddEOS
:
c
.
Bool
(
"tokenizer.ggml.add_eos_token"
,
false
),
// TODO: set EOT to EOS otherwise 0 will stop generation
EOT
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
AddEOT
:
c
.
Bool
(
"tokenizer.ggml.add_eos_token"
,
false
),
},
),
}
m
.
Cache
=
kvcache
.
NewCausalCache
(
m
.
TextModel
.
Shift
)
...
...
model/models/mistral3/model_text.go
View file @
526b2ed1
...
...
@@ -21,7 +21,6 @@ type TextOptions struct {
type
TextModel
struct
{
model
.
Base
model
.
BytePairEncoding
TokenEmbedding
*
nn
.
Embedding
`gguf:"token_embd"`
Layers
[]
Layer
`gguf:"blk"`
...
...
@@ -148,18 +147,6 @@ func NewTextModel(c fs.Config) (*TextModel, error) {
}
textModel
:=
&
TextModel
{
BytePairEncoding
:
model
.
NewBytePairEncoding
(
c
.
String
(
"tokenizer.ggml.pretokenizer"
,
`[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]*[\p{Ll}\p{Lm}\p{Lo}\p{M}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}]+[\p{Ll}\p{Lm}\p{Lo}\p{M}]*|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n/]*|\s*[\r\n]+|\s+(?!\S)|\s+`
),
&
model
.
Vocabulary
{
Values
:
c
.
Strings
(
"tokenizer.ggml.tokens"
),
Types
:
c
.
Ints
(
"tokenizer.ggml.token_type"
),
Merges
:
c
.
Strings
(
"tokenizer.ggml.merges"
),
BOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.bos_token_id"
,
1
)),
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
,
2
)),
AddEOS
:
c
.
Bool
(
"tokenizer.ggml.add_eos_token"
,
false
),
},
),
Layers
:
make
([]
Layer
,
c
.
Uint
(
"block_count"
)),
TextOptions
:
&
TextOptions
{
hiddenSize
:
int
(
c
.
Uint
(
"embedding_length"
)),
...
...
model/models/mllama/model.go
View file @
526b2ed1
...
...
@@ -49,6 +49,9 @@ func New(c fs.Config) (model.Model, error) {
AddBOS
:
c
.
Bool
(
"tokenizer.ggml.add_bos_token"
,
true
),
EOS
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
AddEOS
:
c
.
Bool
(
"tokenizer.ggml.add_eos_token"
,
false
),
// TODO: set EOT to EOS otherwise 0 will stop generation
EOT
:
int32
(
c
.
Uint
(
"tokenizer.ggml.eos_token_id"
)),
AddEOT
:
c
.
Bool
(
"tokenizer.ggml.add_eos_token"
,
false
),
},
),
ImageProcessor
:
newImageProcessor
(
c
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment