"official/projects/assemblenet/README.md" did not exist on "c7644458781324e8c182efe816687b348d011830"
convert_mixtral.go 1.52 KB
Newer Older
Michael Yang's avatar
Michael Yang committed
1
2
3
4
5
package convert

import (
	"fmt"

Michael Yang's avatar
Michael Yang committed
6
	"github.com/ollama/ollama/fs/ggml"
Michael Yang's avatar
Michael Yang committed
7
8
)

9
10
type mixtralModel struct {
	llamaModel
Michael Yang's avatar
Michael Yang committed
11
12
13
14
	NumLocalExperts    uint32 `json:"num_local_experts"`
	NumExpertsPerToken uint32 `json:"num_experts_per_tok"`
}

Michael Yang's avatar
Michael Yang committed
15
func (p *mixtralModel) KV(t *Tokenizer) ggml.KV {
16
	kv := p.llamaModel.KV(t)
Michael Yang's avatar
Michael Yang committed
17
18
19
20
21
22
23
24
25
26
27
28

	if p.NumLocalExperts > 0 {
		kv["llama.expert_count"] = p.NumLocalExperts
	}

	if p.NumExpertsPerToken > 0 {
		kv["llama.expert_used_count"] = p.NumExpertsPerToken
	}

	return kv
}

29
func (p *mixtralModel) Tensors(ts []Tensor) []*ggml.Tensor {
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
	merges := make([]merge, 0, p.NumHiddenLayers*6)
	for i := range p.NumHiddenLayers {
		merges = append(merges, merge{
			fmt.Sprintf("blk.%d.*.w1.weight", i),
			fmt.Sprintf("blk.%d.ffn_gate_exps.weight", i),
		}, merge{
			fmt.Sprintf("blk.%d.*.w1.bias", i),
			fmt.Sprintf("blk.%d.ffn_gate_exps.bias", i),
		}, merge{
			fmt.Sprintf("blk.%d.*.w2.weight", i),
			fmt.Sprintf("blk.%d.ffn_up_exps.weight", i),
		}, merge{
			fmt.Sprintf("blk.%d.*.w2.bias", i),
			fmt.Sprintf("blk.%d.ffn_up_exps.bias", i),
		}, merge{
			fmt.Sprintf("blk.%d.*.w3.weight", i),
			fmt.Sprintf("blk.%d.ffn_down_exps.weight", i),
		}, merge{
			fmt.Sprintf("blk.%d.*.w3.bias", i),
			fmt.Sprintf("blk.%d.ffn_down_exps.bias", i),
Michael Yang's avatar
Michael Yang committed
50
51
52
		})
	}

53
	out, ts := mergeTensors(ts, merges...)
54
	return append(out, p.llamaModel.Tensors(ts)...)
Michael Yang's avatar
Michael Yang committed
55
56
}

57
func (p *mixtralModel) Replacements() []string {
Michael Yang's avatar
Michael Yang committed
58
	return append(
59
		p.llamaModel.Replacements(),
60
		"model.layers", "blk",
Michael Yang's avatar
Michael Yang committed
61
		"block_sparse_moe.gate", "ffn_gate_inp",
62
		"block_sparse_moe.experts.", ".",
Michael Yang's avatar
Michael Yang committed
63
64
	)
}