ggla.go 2.85 KB
Newer Older
Michael Yang's avatar
Michael Yang committed
1
2
3
4
5
6
7
8
9
package llm

import (
	"encoding/binary"
	"errors"
	"io"
	"slices"
)

10
type containerGGLA struct {
Michael Yang's avatar
Michael Yang committed
11
12
13
	version uint32
}

14
func (c *containerGGLA) Name() string {
Michael Yang's avatar
Michael Yang committed
15
16
17
	return "ggla"
}

18
19
20
21
func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
	if err := binary.Read(rs, binary.LittleEndian, &c.version); err != nil {
		return nil, err
	}
Michael Yang's avatar
Michael Yang committed
22
23
24
25
26
27
28

	switch c.version {
	case 1:
	default:
		return nil, errors.New("invalid version")
	}

29
	model := newGGLA(c)
Michael Yang's avatar
Michael Yang committed
30
	err := model.decode(rs)
Michael Yang's avatar
Michael Yang committed
31
32
33
	return model, err
}

34
35
type ggla struct {
	*containerGGLA
Michael Yang's avatar
Michael Yang committed
36
37
38
39
40

	kv      KV
	tensors []Tensor
}

41
42
43
func newGGLA(container *containerGGLA) *ggla {
	return &ggla{
		containerGGLA: container,
Michael Yang's avatar
Michael Yang committed
44
45
46
47
		kv:            make(KV),
	}
}

48
func (m *ggla) decode(rs io.ReadSeeker) error {
Michael Yang's avatar
Michael Yang committed
49
	var r uint32
Michael Yang's avatar
Michael Yang committed
50
	if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
Michael Yang's avatar
Michael Yang committed
51
52
53
54
55
		return err
	}
	m.kv["r"] = r

	var alpha uint32
Michael Yang's avatar
Michael Yang committed
56
	if err := binary.Read(rs, binary.LittleEndian, &alpha); err != nil {
Michael Yang's avatar
Michael Yang committed
57
58
59
60
61
62
		return err
	}
	m.kv["alpha"] = alpha

	for {
		var dims uint32
Michael Yang's avatar
Michael Yang committed
63
		if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
Michael Yang's avatar
Michael Yang committed
64
65
66
67
			return err
		}

		var namesize uint32
Michael Yang's avatar
Michael Yang committed
68
		if err := binary.Read(rs, binary.LittleEndian, &namesize); err != nil {
Michael Yang's avatar
Michael Yang committed
69
70
71
72
			return err
		}

		var t Tensor
Michael Yang's avatar
Michael Yang committed
73
		if err := binary.Read(rs, binary.LittleEndian, &t.Kind); err != nil {
Michael Yang's avatar
Michael Yang committed
74
75
76
77
78
79
			return err
		}

		t.Shape = make([]uint64, dims)
		for i := 0; uint32(i) < dims; i++ {
			var shape32 uint32
Michael Yang's avatar
Michael Yang committed
80
			if err := binary.Read(rs, binary.LittleEndian, &shape32); err != nil {
Michael Yang's avatar
Michael Yang committed
81
82
83
84
85
86
87
88
89
90
91
				return err
			}

			t.Shape[i] = uint64(shape32)
		}

		// ggla tensor shape is reversed
		// ref: https://github.com/ggerganov/llama.cpp/blob/29ae62d2ae163e2b68aa0ad3bf2ab4636de0c957/convert-lora-to-ggml.py#L44
		slices.Reverse(t.Shape)

		name := make([]byte, namesize)
Michael Yang's avatar
Michael Yang committed
92
		if err := binary.Read(rs, binary.LittleEndian, &name); err != nil {
Michael Yang's avatar
Michael Yang committed
93
94
95
96
97
			return err
		}

		t.Name = string(name)

Michael Yang's avatar
Michael Yang committed
98
99
		offset, err := rs.Seek(0, io.SeekCurrent)
		if err != nil {
Michael Yang's avatar
Michael Yang committed
100
101
102
			return err
		}

Michael Yang's avatar
Michael Yang committed
103
104
105
106
107
108
109
110
111
112
		if _, err := rs.Seek((offset+31)&-32, io.SeekStart); err != nil {
			return err
		}

		offset, err = rs.Seek(0, io.SeekCurrent)
		if err != nil {
			return err
		}

		t.Offset = uint64(offset)
Michael Yang's avatar
Michael Yang committed
113

114
		if _, err := rs.Seek(int64(t.size()), io.SeekCurrent); err != nil {
Michael Yang's avatar
Michael Yang committed
115
116
117
118
119
120
121
			return err
		}

		m.tensors = append(m.tensors, t)
	}
}

122
func (m *ggla) KV() KV {
Michael Yang's avatar
Michael Yang committed
123
124
125
	return m.kv
}

126
func (m *ggla) Tensor() []Tensor {
Michael Yang's avatar
Michael Yang committed
127
128
129
	return m.tensors
}

130
func (*ggla) ModelFamily() string {
Michael Yang's avatar
Michael Yang committed
131
132
133
	return "ggla"
}

134
func (*ggla) ModelType() string {
Michael Yang's avatar
Michael Yang committed
135
136
137
	panic("not implemented")
}

138
func (*ggla) FileType() string {
Michael Yang's avatar
Michael Yang committed
139
140
141
	panic("not implemented")
}

142
func (*ggla) NumLayers() uint32 {
Michael Yang's avatar
Michael Yang committed
143
144
145
	panic("not implemented")
}

146
func (*ggla) NumGQA() uint32 {
Michael Yang's avatar
Michael Yang committed
147
148
149
	panic("not implemented")
}

150
func (*ggla) NumEmbed() uint32 {
Michael Yang's avatar
Michael Yang committed
151
152
153
	panic("not implemented")
}

154
func (*ggla) NumHead() uint32 {
Michael Yang's avatar
Michael Yang committed
155
156
157
	panic("not implemented")
}

158
func (*ggla) NumHeadKv() uint32 {
Michael Yang's avatar
Michael Yang committed
159
160
161
	panic("not implemented")
}

162
func (*ggla) NumCtx() uint32 {
Michael Yang's avatar
Michael Yang committed
163
164
	panic("not implemented")
}