convert_phi3.go 3.73 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
package convert

import (
	"cmp"
	"encoding/binary"
	"io"
	"math"
	"strings"
	"sync"

	"github.com/ollama/ollama/llm"
)

type phi3 struct {
	Parameters
	NumHiddenLayers   uint32  `json:"num_hidden_layers"`
	NLayers           uint32  `json:"n_layers"`
	HiddenSize        uint32  `json:"hidden_size"`
	NEmbd             uint32  `json:"n_embd"`
	IntermediateSize  uint32  `json:"intermediate_size"`
	NumAttentionHeads uint32  `json:"num_attention_heads"`
	NHead             uint32  `json:"n_head"`
	NumKeyValueHeads  uint32  `json:"num_key_value_heads"`
	NHeadKV           uint32  `json:"n_head_kv"`
	RopeTheta         float32 `json:"rope_theta"`
	RopeScaling       struct {
		Type        string     `json:"type"`
		LongFactor  ropeFactor `json:"long_factor"`
		ShortFactor ropeFactor `json:"short_factor"`
	} `json:"rope_scaling"`
	RMSNormEPS                    float32 `json:"rms_norm_eps"`
	NPositions                    uint32  `json:"n_positions"`
	MaxPositionEmbeddings         uint32  `json:"max_position_embeddings"`
	OriginalMaxPositionEmbeddings uint32  `json:"original_max_position_embeddings"`
	SlidingWindow                 uint32  `json:"sliding_window"`
}

var _ Converter = (*phi3)(nil)

func (p *phi3) KV(t *Tokenizer) llm.KV {
	kv := p.Parameters.KV(t)
	kv["general.architecture"] = "phi3"
	kv["general.name"] = "phi3"
	kv["phi3.context_length"] = p.MaxPositionEmbeddings
	kv["phi3.embedding_length"] = cmp.Or(p.HiddenSize, p.NEmbd)
	kv["phi3.feed_forward_length"] = p.IntermediateSize
	kv["phi3.block_count"] = cmp.Or(p.NumHiddenLayers, p.NLayers)
	kv["phi3.attention.head_count"] = cmp.Or(p.NumAttentionHeads, p.NHead)
	kv["phi3.attention.head_count_kv"] = cmp.Or(p.NumKeyValueHeads, p.NHeadKV)
	kv["phi3.attention.layer_norm_rms_epsilon"] = p.RMSNormEPS
	kv["phi3.rope.dimension_count"] = p.HiddenSize / cmp.Or(p.NumAttentionHeads, p.NHead)
	kv["phi3.rope.freq_base"] = p.RopeTheta
	kv["phi3.rope.scaling.original_context_length"] = p.OriginalMaxPositionEmbeddings
	kv["phi3.attention.sliding_window"] = p.SlidingWindow

	scale := float64(p.MaxPositionEmbeddings) / float64(p.OriginalMaxPositionEmbeddings)

	switch p.RopeScaling.Type {
	case "":
		// no scaling
61
	case "su", "longrope":
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
		kv["phi3.rope.scaling.attn_factor"] = float32(max(math.Sqrt(1+math.Log(scale)/math.Log(float64(p.OriginalMaxPositionEmbeddings))), 1.0))
	case "yarn":
		kv["phi3.rope.scaling.attn_factor"] = float32(max(0.1*math.Log(scale)+1.0, 1.0))
	default:
		panic("unknown rope scaling type")
	}

	return kv
}

func (p *phi3) Tensors(ts []Tensor) []llm.Tensor {
	var addRopeFactors sync.Once

	out := make([]llm.Tensor, 0, len(ts)+2)
	for _, t := range ts {
Michael Yang's avatar
Michael Yang committed
77
		if strings.HasPrefix(t.Name(), "blk.0.") {
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
			addRopeFactors.Do(func() {
				out = append(out, llm.Tensor{
					Name:     "rope_factors_long.weight",
					Kind:     0,
					Shape:    []uint64{uint64(len(p.RopeScaling.LongFactor))},
					WriterTo: p.RopeScaling.LongFactor,
				}, llm.Tensor{
					Name:     "rope_factors_short.weight",
					Kind:     0,
					Shape:    []uint64{uint64(len(p.RopeScaling.ShortFactor))},
					WriterTo: p.RopeScaling.ShortFactor,
				})
			})
		}

		out = append(out, llm.Tensor{
Michael Yang's avatar
Michael Yang committed
94
			Name:     t.Name(),
95
96
97
98
99
100
101
102
103
			Kind:     t.Kind(),
			Shape:    t.Shape(),
			WriterTo: t,
		})
	}

	return out
}

Michael Yang's avatar
Michael Yang committed
104
105
func (p *phi3) Replacements() []string {
	return []string{
106
107
108
109
110
111
112
113
114
115
		"lm_head", "output",
		"model.embed_tokens", "token_embd",
		"model.norm", "output_norm",
		"model.layers", "blk",
		"input_layernorm", "attn_norm",
		"self_attn.qkv_proj", "attn_qkv",
		"self_attn.o_proj", "attn_output",
		"mlp.down_proj", "ffn_down",
		"mlp.gate_up_proj", "ffn_up",
		"post_attention_layernorm", "ffn_norm",
Michael Yang's avatar
Michael Yang committed
116
	}
117
118
119
120
121
122
123
124
}

type ropeFactor []float32

func (r ropeFactor) WriteTo(w io.Writer) (int64, error) {
	err := binary.Write(w, binary.LittleEndian, r)
	return 0, err
}