Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
orangecat
ollama
Commits
ce8ce825
"vscode:/vscode.git/clone" did not exist on "daa0d1de7a55bb7dc993645169b02a535d3a40a8"
Unverified
Commit
ce8ce825
authored
Apr 23, 2024
by
Patrick Devine
Committed by
GitHub
Apr 23, 2024
Browse files
add mixtral 8x7b model conversion (#3859)
parent
4dc4f1be
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
138 additions
and
25 deletions
+138
-25
convert/convert.go
convert/convert.go
+17
-13
convert/mixtral.go
convert/mixtral.go
+96
-0
convert/safetensors.go
convert/safetensors.go
+25
-12
No files found.
convert/convert.go
View file @
ce8ce825
...
...
@@ -18,19 +18,23 @@ import (
)
type
Params
struct
{
Architectures
[]
string
`json:"architectures"`
VocabSize
int
`json:"vocab_size"`
HiddenSize
int
`json:"hidden_size"`
// n_embd
HiddenLayers
int
`json:"num_hidden_layers"`
// n_layer
ContextSize
int
`json:"max_position_embeddings"`
IntermediateSize
int
`json:"intermediate_size"`
AttentionHeads
int
`json:"num_attention_heads"`
// n_head
KeyValHeads
int
`json:"num_key_value_heads"`
NormEPS
float64
`json:"rms_norm_eps"`
BoSTokenID
int
`json:"bos_token_id"`
EoSTokenID
int
`json:"eos_token_id"`
HeadDimension
int
`json:"head_dim"`
PaddingTokenID
int
`json:"pad_token_id"`
Architectures
[]
string
`json:"architectures"`
VocabSize
int
`json:"vocab_size"`
HiddenSize
int
`json:"hidden_size"`
// n_embd
HiddenLayers
int
`json:"num_hidden_layers"`
// n_layer
ContextSize
int
`json:"max_position_embeddings"`
IntermediateSize
int
`json:"intermediate_size"`
AttentionHeads
int
`json:"num_attention_heads"`
// n_head
KeyValHeads
int
`json:"num_key_value_heads"`
NormEPS
float64
`json:"rms_norm_eps"`
BoSTokenID
int
`json:"bos_token_id"`
EoSTokenID
int
`json:"eos_token_id"`
HeadDimension
int
`json:"head_dim"`
PaddingTokenID
int
`json:"pad_token_id"`
RopeFrequencyBase
float64
`json:"rope_theta"`
Experts
int
`json:"num_local_experts"`
ExpertsUsed
int
`json:"num_experts_per_tok"`
ByteOrder
}
...
...
convert/mixtral.go
0 → 100644
View file @
ce8ce825
package
convert
import
(
"os"
"regexp"
"github.com/ollama/ollama/llm"
)
type
MixtralModel
struct
{
ModelData
}
func
(
m
*
MixtralModel
)
GetTensors
()
error
{
t
,
err
:=
m
.
Format
.
GetTensors
(
m
.
Path
,
m
.
Params
)
if
err
!=
nil
{
return
err
}
m
.
Tensors
=
[]
llm
.
Tensor
{}
pattern
:=
`^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
re
,
err
:=
regexp
.
Compile
(
pattern
)
if
err
!=
nil
{
return
err
}
for
_
,
l
:=
range
t
{
matches
:=
re
.
FindAllStringSubmatch
(
l
.
Name
,
-
1
)
if
len
(
matches
)
>
0
{
wt
:=
l
.
WriterTo
.
(
safetensorWriterTo
)
wt
.
handler
=
mistralLayerHandler
l
.
WriterTo
=
wt
}
m
.
Tensors
=
append
(
m
.
Tensors
,
l
)
}
return
nil
}
func
(
m
*
MixtralModel
)
LoadVocab
()
error
{
v
,
err
:=
LoadSentencePieceTokens
(
m
.
Path
,
m
.
Params
)
if
err
!=
nil
{
return
err
}
m
.
Vocab
=
v
return
nil
}
func
(
m
*
MixtralModel
)
WriteGGUF
()
(
string
,
error
)
{
kv
:=
llm
.
KV
{
"general.architecture"
:
"llama"
,
"general.name"
:
m
.
Name
,
"llama.block_count"
:
uint32
(
m
.
Params
.
HiddenLayers
),
"llama.context_length"
:
uint32
(
m
.
Params
.
ContextSize
),
"llama.embedding_length"
:
uint32
(
m
.
Params
.
HiddenSize
),
"llama.feed_forward_length"
:
uint32
(
m
.
Params
.
IntermediateSize
),
"llama.attention.head_count"
:
uint32
(
m
.
Params
.
AttentionHeads
),
"llama.attention.head_count_kv"
:
uint32
(
m
.
Params
.
KeyValHeads
),
"llama.rope.freq_base"
:
float32
(
m
.
Params
.
RopeFrequencyBase
),
"llama.attention.layer_norm_rms_epsilon"
:
float32
(
m
.
Params
.
NormEPS
),
"llama.expert_count"
:
uint32
(
m
.
Params
.
Experts
),
"llama.expert_used_count"
:
uint32
(
m
.
Params
.
ExpertsUsed
),
"llama.vocab_size"
:
uint32
(
len
(
m
.
Vocab
.
Tokens
)),
"llama.rope.dimension_count"
:
uint32
(
m
.
Params
.
HiddenSize
/
m
.
Params
.
AttentionHeads
),
"general.file_type"
:
uint32
(
1
),
"tokenizer.ggml.model"
:
"llama"
,
"tokenizer.ggml.tokens"
:
m
.
Vocab
.
Tokens
,
"tokenizer.ggml.scores"
:
m
.
Vocab
.
Scores
,
"tokenizer.ggml.token_type"
:
m
.
Vocab
.
Types
,
"tokenizer.ggml.bos_token_id"
:
uint32
(
m
.
Params
.
BoSTokenID
),
"tokenizer.ggml.eos_token_id"
:
uint32
(
m
.
Params
.
EoSTokenID
),
"tokenizer.ggml.unknown_token_id"
:
uint32
(
0
),
"tokenizer.ggml.add_bos_token"
:
true
,
"tokenizer.ggml.add_eos_token"
:
false
,
}
f
,
err
:=
os
.
CreateTemp
(
""
,
"ollama-gguf"
)
if
err
!=
nil
{
return
""
,
err
}
defer
f
.
Close
()
mod
:=
llm
.
NewGGUFV3
(
m
.
Params
.
ByteOrder
)
if
err
:=
mod
.
Encode
(
f
,
kv
,
m
.
Tensors
);
err
!=
nil
{
return
""
,
err
}
return
f
.
Name
(),
nil
}
convert/safetensors.go
View file @
ce8ce825
...
...
@@ -93,7 +93,6 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
}
slices
.
Sort
(
keys
)
slog
.
Info
(
"converting layers"
)
var
tensors
[]
llm
.
Tensor
...
...
@@ -105,7 +104,6 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
return
nil
,
0
,
err
}
slog
.
Debug
(
fmt
.
Sprintf
(
"metadata = %#v"
,
data
))
var
size
uint64
var
kind
uint32
switch
len
(
data
.
Shape
)
{
...
...
@@ -150,11 +148,13 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
padding
:
8
+
jsonSize
,
}
tensors
=
append
(
tensors
,
t
)
offset
+=
size
tensors
=
append
(
tensors
,
t
)
}
slog
.
Debug
(
fmt
.
Sprintf
(
"total tensors for file = %d"
,
len
(
tensors
)))
slog
.
Debug
(
fmt
.
Sprintf
(
"offset = %d"
,
offset
))
return
tensors
,
offset
,
nil
}
...
...
@@ -185,15 +185,19 @@ func (m *SafetensorFormat) GetLayerName(n string) (string, error) {
}
tMap
:=
map
[
string
]
string
{
"model.layers.(
\\
d+).input_layernorm.weight"
:
"blk.$1.attn_norm.weight"
,
"model.layers.(
\\
d+).mlp.down_proj.weight"
:
"blk.$1.ffn_down.weight"
,
"model.layers.(
\\
d+).mlp.gate_proj.weight"
:
"blk.$1.ffn_gate.weight"
,
"model.layers.(
\\
d+).mlp.up_proj.weight"
:
"blk.$1.ffn_up.weight"
,
"model.layers.(
\\
d+).post_attention_layernorm.weight"
:
"blk.$1.ffn_norm.weight"
,
"model.layers.(
\\
d+).self_attn.k_proj.weight"
:
"blk.$1.attn_k.weight"
,
"model.layers.(
\\
d+).self_attn.o_proj.weight"
:
"blk.$1.attn_output.weight"
,
"model.layers.(
\\
d+).self_attn.q_proj.weight"
:
"blk.$1.attn_q.weight"
,
"model.layers.(
\\
d+).self_attn.v_proj.weight"
:
"blk.$1.attn_v.weight"
,
"model.layers.(
\\
d+).input_layernorm.weight"
:
"blk.$1.attn_norm.weight"
,
"model.layers.(
\\
d+).mlp.down_proj.weight"
:
"blk.$1.ffn_down.weight"
,
"model.layers.(
\\
d+).mlp.gate_proj.weight"
:
"blk.$1.ffn_gate.weight"
,
"model.layers.(
\\
d+).mlp.up_proj.weight"
:
"blk.$1.ffn_up.weight"
,
"model.layers.(
\\
d+).post_attention_layernorm.weight"
:
"blk.$1.ffn_norm.weight"
,
"model.layers.(
\\
d+).self_attn.k_proj.weight"
:
"blk.$1.attn_k.weight"
,
"model.layers.(
\\
d+).self_attn.o_proj.weight"
:
"blk.$1.attn_output.weight"
,
"model.layers.(
\\
d+).self_attn.q_proj.weight"
:
"blk.$1.attn_q.weight"
,
"model.layers.(
\\
d+).self_attn.v_proj.weight"
:
"blk.$1.attn_v.weight"
,
"model.layers.(
\\
d+).block_sparse_moe.gate.weight"
:
"blk.$1.ffn_gate_inp.weight"
,
"model.layers.(
\\
d+).block_sparse_moe.experts.(
\\
d+).w1.weight"
:
"blk.$1.ffn_gate.$2.weight"
,
"model.layers.(
\\
d+).block_sparse_moe.experts.(
\\
d+).w2.weight"
:
"blk.$1.ffn_down.$2.weight"
,
"model.layers.(
\\
d+).block_sparse_moe.experts.(
\\
d+).w3.weight"
:
"blk.$1.ffn_up.$2.weight"
,
}
v
,
ok
:=
directMap
[
n
]
...
...
@@ -286,6 +290,15 @@ func (m *SafetensorFormat) GetModelArch(name, dirPath string, params *Params) (M
Format
:
m
,
},
},
nil
case
"MixtralForCausalLM"
:
return
&
MixtralModel
{
ModelData
{
Name
:
name
,
Path
:
dirPath
,
Params
:
params
,
Format
:
m
,
},
},
nil
case
"GemmaForCausalLM"
:
return
&
GemmaModel
{
ModelData
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment