ggla.go 2.67 KB
Newer Older
mashun1's avatar
v1  
mashun1 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
package llm

import (
	"encoding/binary"
	"errors"
	"io"
	"slices"
)

type containerGGLA struct {
	version uint32
}

func (c *containerGGLA) Name() string {
	return "ggla"
}

func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
	if err := binary.Read(rs, binary.LittleEndian, &c.version); err != nil {
		return nil, err
	}

	switch c.version {
	case 1:
	default:
		return nil, errors.New("invalid version")
	}

	model := newGGLA(c)
	err := model.decode(rs)
	return model, err
}

type ggla struct {
	*containerGGLA

	kv      KV
	tensors []*Tensor
xuxzh1's avatar
init  
xuxzh1 committed
39
40

	tensorOffset uint64
mashun1's avatar
v1  
mashun1 committed
41
42
43
44
45
46
47
48
49
50
51
52
53
54
}

func newGGLA(container *containerGGLA) *ggla {
	return &ggla{
		containerGGLA: container,
		kv:            make(KV),
	}
}

func (llm *ggla) KV() KV {
	return llm.kv
}

func (llm *ggla) Tensors() Tensors {
xuxzh1's avatar
init  
xuxzh1 committed
55
56
57
58
	return Tensors{
		Items:  llm.tensors,
		Offset: llm.tensorOffset,
	}
mashun1's avatar
v1  
mashun1 committed
59
60
}

xuxzh1's avatar
init  
xuxzh1 committed
61
func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
mashun1's avatar
v1  
mashun1 committed
62
63
64
65
66
67
68
69
70
71
72
73
	var r uint32
	if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
		return err
	}
	llm.kv["r"] = r

	var alpha uint32
	if err := binary.Read(rs, binary.LittleEndian, &alpha); err != nil {
		return err
	}
	llm.kv["alpha"] = alpha

xuxzh1's avatar
init  
xuxzh1 committed
74
75
76
77
78
79
80
	offset, err := rs.Seek(0, io.SeekCurrent)
	if err != nil {
		return err
	}

	llm.tensorOffset = uint64(offset)

mashun1's avatar
v1  
mashun1 committed
81
82
83
	for {
		var dims uint32
		if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
xuxzh1's avatar
init  
xuxzh1 committed
84
85
86
			if errors.Is(err, io.EOF) {
				return nil
			}
mashun1's avatar
v1  
mashun1 committed
87
88
89
			return err
		}

xuxzh1's avatar
init  
xuxzh1 committed
90
91
92
93
94
95
		defer func() {
			if errors.Is(retErr, io.EOF) {
				retErr = io.ErrUnexpectedEOF
			}
		}()

mashun1's avatar
v1  
mashun1 committed
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
		var namesize uint32
		if err := binary.Read(rs, binary.LittleEndian, &namesize); err != nil {
			return err
		}

		var t Tensor
		if err := binary.Read(rs, binary.LittleEndian, &t.Kind); err != nil {
			return err
		}

		t.Shape = make([]uint64, dims)
		for i := 0; uint32(i) < dims; i++ {
			var shape32 uint32
			if err := binary.Read(rs, binary.LittleEndian, &shape32); err != nil {
				return err
			}

			t.Shape[i] = uint64(shape32)
		}

		// ggla tensor shape is reversed
		// ref: https://github.com/ggerganov/llama.cpp/blob/29ae62d2ae163e2b68aa0ad3bf2ab4636de0c957/convert-lora-to-ggml.py#L44
		slices.Reverse(t.Shape)

		name := make([]byte, namesize)
		if err := binary.Read(rs, binary.LittleEndian, &name); err != nil {
			return err
		}

		t.Name = string(name)

		offset, err := rs.Seek(0, io.SeekCurrent)
		if err != nil {
			return err
		}

xuxzh1's avatar
init  
xuxzh1 committed
132
		if _, err := rs.Seek((offset+31)&-32-offset, io.SeekCurrent); err != nil {
mashun1's avatar
v1  
mashun1 committed
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
			return err
		}

		offset, err = rs.Seek(0, io.SeekCurrent)
		if err != nil {
			return err
		}

		t.Offset = uint64(offset)

		if _, err := rs.Seek(int64(t.Size()), io.SeekCurrent); err != nil {
			return err
		}

		llm.tensors = append(llm.tensors, &t)
	}
}