"research/gan/cifar/data_provider_test.py" did not exist on "0cc986287ca655ac0c675870a22ea90fdb3c4ece"
ggml.go 3.66 KB
Newer Older
1
2
3
4
5
6
package llm

import (
	"encoding/binary"
	"errors"
	"io"
Bruce MacDonald's avatar
Bruce MacDonald committed
7
8
	"path"
	"sync"
9
10
11
12
)

type ModelFamily string

Bruce MacDonald's avatar
Bruce MacDonald committed
13
14
const ModelFamilyUnknown ModelFamily = "unknown"

15
16
17
18
19
20
type ModelType uint32

const (
	ModelType3B  ModelType = 26
	ModelType7B  ModelType = 32
	ModelType13B ModelType = 40
Michael Yang's avatar
Michael Yang committed
21
	ModelType34B ModelType = 48
22
23
24
25
	ModelType30B ModelType = 60
	ModelType65B ModelType = 80
)

Michael Yang's avatar
Michael Yang committed
26
27
28
29
30
31
32
33
func (mt ModelType) String() string {
	switch mt {
	case ModelType3B:
		return "3B"
	case ModelType7B:
		return "7B"
	case ModelType13B:
		return "13B"
Michael Yang's avatar
Michael Yang committed
34
35
	case ModelType34B:
		return "34B"
Michael Yang's avatar
Michael Yang committed
36
37
38
39
40
41
42
43
	case ModelType30B:
		return "30B"
	case ModelType65B:
		return "65B"
	default:
		return "Unknown"
	}
}
44

Michael Yang's avatar
Michael Yang committed
45
46
47
type FileType interface {
	String() string
}
48
49
50
51

type GGML struct {
	magic uint32
	container
Michael Yang's avatar
Michael Yang committed
52
53
	model
}
54

Michael Yang's avatar
Michael Yang committed
55
56
57
58
type model interface {
	ModelFamily() ModelFamily
	ModelType() ModelType
	FileType() FileType
59
60
61
62
}

type container interface {
	Name() string
Bruce MacDonald's avatar
Bruce MacDonald committed
63
	Decode(io.Reader) (model, error)
64
65
}

Bruce MacDonald's avatar
Bruce MacDonald committed
66
type containerGGML struct{}
67
68
69
70
71

func (c *containerGGML) Name() string {
	return "ggml"
}

Bruce MacDonald's avatar
Bruce MacDonald committed
72
73
func (c *containerGGML) Decode(r io.Reader) (model, error) {
	return nil, nil
74
75
76
77
78
79
80
81
82
83
}

type containerGGMF struct {
	version uint32
}

func (c *containerGGMF) Name() string {
	return "ggmf"
}

Bruce MacDonald's avatar
Bruce MacDonald committed
84
func (c *containerGGMF) Decode(r io.Reader) (model, error) {
85
86
87
88
89
90
	var version uint32
	binary.Read(r, binary.LittleEndian, &version)

	switch version {
	case 1:
	default:
Bruce MacDonald's avatar
Bruce MacDonald committed
91
		return nil, errors.New("invalid version")
92
93
94
	}

	c.version = version
Bruce MacDonald's avatar
Bruce MacDonald committed
95
	return nil, nil
96
97
98
99
100
101
102
103
104
105
}

type containerGGJT struct {
	version uint32
}

func (c *containerGGJT) Name() string {
	return "ggjt"
}

Bruce MacDonald's avatar
Bruce MacDonald committed
106
func (c *containerGGJT) Decode(r io.Reader) (model, error) {
107
108
109
110
111
112
	var version uint32
	binary.Read(r, binary.LittleEndian, &version)

	switch version {
	case 1, 2, 3:
	default:
Bruce MacDonald's avatar
Bruce MacDonald committed
113
		return nil, errors.New("invalid version")
114
115
116
	}

	c.version = version
Bruce MacDonald's avatar
Bruce MacDonald committed
117
118
119
120
121

	// different model types may have different layouts for hyperparameters
	var llama llamaModel
	binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
	return &llama, nil
122
123
124
125
126
127
128
129
130
131
}

type containerLORA struct {
	version uint32
}

func (c *containerLORA) Name() string {
	return "ggla"
}

Bruce MacDonald's avatar
Bruce MacDonald committed
132
func (c *containerLORA) Decode(r io.Reader) (model, error) {
133
134
135
136
137
138
	var version uint32
	binary.Read(r, binary.LittleEndian, &version)

	switch version {
	case 1:
	default:
Bruce MacDonald's avatar
Bruce MacDonald committed
139
		return nil, errors.New("invalid version")
140
141
142
	}

	c.version = version
Bruce MacDonald's avatar
Bruce MacDonald committed
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
	return nil, nil
}

var (
	ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin")
	ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin")
)

var (
	ggmlInit       sync.Once
	ggmlRunnerPath string
)

func ggmlRunner() ModelRunner {
	ggmlInit.Do(func() {
		ggmlRunnerPath = chooseRunner(ggmlGPU, ggmlCPU)
	})
	return ModelRunner{Path: ggmlRunnerPath}
161
162
163
}

const (
Bruce MacDonald's avatar
Bruce MacDonald committed
164
	// Magic constant for `ggml` files (unversioned).
165
	FILE_MAGIC_GGML = 0x67676d6c
Bruce MacDonald's avatar
Bruce MacDonald committed
166
	// Magic constant for `ggml` files (versioned, ggmf).
167
	FILE_MAGIC_GGMF = 0x67676d66
Bruce MacDonald's avatar
Bruce MacDonald committed
168
	// Magic constant for `ggml` files (versioned, ggjt).
169
	FILE_MAGIC_GGJT = 0x67676a74
Bruce MacDonald's avatar
Bruce MacDonald committed
170
	// Magic constant for `ggla` files (LoRA adapter).
171
	FILE_MAGIC_GGLA = 0x67676C61
Bruce MacDonald's avatar
Bruce MacDonald committed
172
173
	// Magic constant for `gguf` files (versioned, gguf)
	FILE_MAGIC_GGUF = 0x46554747
174
175
)

Bruce MacDonald's avatar
Bruce MacDonald committed
176
func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
177
178
179
180
181
182
183
184
185
186
187
188
	var ggml GGML
	binary.Read(r, binary.LittleEndian, &ggml.magic)

	switch ggml.magic {
	case FILE_MAGIC_GGML:
		ggml.container = &containerGGML{}
	case FILE_MAGIC_GGMF:
		ggml.container = &containerGGMF{}
	case FILE_MAGIC_GGJT:
		ggml.container = &containerGGJT{}
	case FILE_MAGIC_GGLA:
		ggml.container = &containerLORA{}
Bruce MacDonald's avatar
Bruce MacDonald committed
189
190
	case FILE_MAGIC_GGUF:
		ggml.container = &containerGGUF{}
191
192
193
194
	default:
		return nil, errors.New("invalid file magic")
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
195
196
	model, err := ggml.Decode(r)
	if err != nil {
197
198
199
		return nil, err
	}

Bruce MacDonald's avatar
Bruce MacDonald committed
200
	ggml.model = model
201
202
203
204

	// final model type
	return &ggml, nil
}