"vscode:/vscode.git/clone" did not exist on "4bfe166246439bc40f5a530030659f4b6c86af39"
llama.go 5.39 KB
Newer Older
1
2
3
4
5
package llm

import (
	"bytes"
	"context"
6
	_ "embed"
7
8
	"errors"
	"fmt"
9
10
	"io"
	"io/fs"
11
12
	"os"
	"os/exec"
13
	"path/filepath"
14
	"sync"
15
16
17
	"time"

	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
18
	"github.com/jmorganca/ollama/format"
19
20
)

21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
const jsonGrammar = `
root   ::= object
value  ::= object | array | string | number | ("true" | "false" | "null") ws

object ::=
  "{" ws (
            string ":" ws value
    ("," ws string ":" ws value)*
  )? "}" ws

array  ::=
  "[" ws (
            value
    ("," ws value)*
  )? "]" ws

string ::=
  "\"" (
    [^"\\] |
    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
  )* "\"" ws

number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws

# Optional space: by convention, applied in this grammar after literal chars when allowed
ws ::= ([ \t\n] ws)?
`

49
50
51
52
type llamaModel struct {
	hyperparameters llamaHyperparameters
}

Michael Yang's avatar
Michael Yang committed
53
54
func (llm *llamaModel) ModelFamily() string {
	return "llama"
55
56
}

Michael Yang's avatar
Michael Yang committed
57
58
func llamaModelType(numLayer uint32) string {
	switch numLayer {
59
	case 26:
Michael Yang's avatar
Michael Yang committed
60
		return "3B"
61
	case 32:
Michael Yang's avatar
Michael Yang committed
62
		return "7B"
63
	case 40:
Michael Yang's avatar
Michael Yang committed
64
		return "13B"
65
	case 48:
Michael Yang's avatar
Michael Yang committed
66
		return "34B"
67
	case 60:
Michael Yang's avatar
Michael Yang committed
68
		return "30B"
69
	case 80:
Michael Yang's avatar
Michael Yang committed
70
71
		return "65B"
	default:
Michael Yang's avatar
Michael Yang committed
72
		return "unknown"
73
	}
Michael Yang's avatar
Michael Yang committed
74
}
75

Michael Yang's avatar
Michael Yang committed
76
77
func (llm *llamaModel) ModelType() string {
	return llamaModelType(llm.hyperparameters.NumLayer)
78
79
}

Michael Yang's avatar
Michael Yang committed
80
81
func (llm *llamaModel) FileType() string {
	return fileType(llm.hyperparameters.FileType)
82
83
}

84
85
86
87
func (llm *llamaModel) NumLayers() int64 {
	return int64(llm.hyperparameters.NumLayer)
}

88
89
90
91
92
93
94
95
96
97
98
99
100
101
type llamaHyperparameters struct {
	// NumVocab is the size of the model's vocabulary.
	NumVocab uint32

	// NumEmbd is the size of the model's embedding layer.
	NumEmbd uint32
	NumMult uint32
	NumHead uint32

	// NumLayer is the number of layers in the model.
	NumLayer uint32
	NumRot   uint32

	// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
Michael Yang's avatar
Michael Yang committed
102
	FileType uint32
103
104
105
}

type Running struct {
106
107
108
109
110
111
	Port          int
	Cmd           *exec.Cmd
	Cancel        context.CancelFunc
	exitOnce      sync.Once
	exitCh        chan error // channel to receive the exit status of the subprocess
	*StatusWriter            // captures error messages from the llama runner process
112
113
}

Patrick Devine's avatar
Patrick Devine committed
114
115
116
117
118
type ImageData struct {
	Data []byte `json:"data"`
	ID   int    `json:"id"`
}

119
var (
Jeffrey Morgan's avatar
Jeffrey Morgan committed
120
	errNvidiaSMI     = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
121
	errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
122
	payloadMissing   = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
123
)
124

125
126
// StatusWriter is a writer that captures error messages from the llama runner process
type StatusWriter struct {
127
128
	ErrCh      chan error
	LastErrMsg string
129
130
131
132
133
134
135
136
137
}

func NewStatusWriter() *StatusWriter {
	return &StatusWriter{
		ErrCh: make(chan error, 1),
	}
}

func (w *StatusWriter) Write(b []byte) (int, error) {
138
	var errMsg string
139
	if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
140
141
142
		errMsg = string(bytes.TrimSpace(after))
	} else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
		errMsg = string(bytes.TrimSpace(after))
143
	}
144
145
146
147
148
149

	if errMsg != "" {
		w.LastErrMsg = errMsg
		w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
	}

150
151
152
	return os.Stderr.Write(b)
}

Michael Yang's avatar
Michael Yang committed
153
type prediction struct {
Michael Yang's avatar
Michael Yang committed
154
155
156
157
158
	Content string `json:"content"`
	Model   string `json:"model"`
	Prompt  string `json:"prompt"`
	Stop    bool   `json:"stop"`

Michael Yang's avatar
Michael Yang committed
159
160
161
162
163
164
	Timings struct {
		PredictedN  int     `json:"predicted_n"`
		PredictedMS float64 `json:"predicted_ms"`
		PromptN     int     `json:"prompt_n"`
		PromptMS    float64 `json:"prompt_ms"`
	}
165
166
}

Michael Yang's avatar
Michael Yang committed
167
const maxBufferSize = 512 * format.KiloByte
168
169
const maxRetries = 3
const retryDelay = 1 * time.Second
170

Bruce MacDonald's avatar
Bruce MacDonald committed
171
type PredictOpts struct {
172
173
174
	Prompt string
	Format string
	Images []api.ImageData
Bruce MacDonald's avatar
Bruce MacDonald committed
175
}
176

Bruce MacDonald's avatar
Bruce MacDonald committed
177
178
179
180
181
182
183
184
type PredictResult struct {
	Content            string
	Done               bool
	PromptEvalCount    int
	PromptEvalDuration time.Duration
	EvalCount          int
	EvalDuration       time.Duration
}
185

186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
type TokenizeRequest struct {
	Content string `json:"content"`
}

type TokenizeResponse struct {
	Tokens []int `json:"tokens"`
}

type DetokenizeRequest struct {
	Tokens []int `json:"tokens"`
}

type DetokenizeResponse struct {
	Content string `json:"content"`
}

type EmbeddingRequest struct {
	Content string `json:"content"`
}

type EmbeddingResponse struct {
	Embedding []float64 `json:"embedding"`
}
209

210
func extractDynamicLibs(workDir, glob string) ([]string, error) {
211
212
	files, err := fs.Glob(libEmbed, glob)
	if err != nil || len(files) == 0 {
213
		return nil, payloadMissing
214
	}
215
	libs := make([]string, len(files))
216

217
218
	for i, file := range files {
		srcFile, err := libEmbed.Open(file)
219
		if err != nil {
220
			return nil, fmt.Errorf("read payload %s: %v", file, err)
221
		}
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
		defer srcFile.Close()
		if err := os.MkdirAll(workDir, 0o755); err != nil {
			return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err)
		}

		destFile := filepath.Join(workDir, filepath.Base(file))
		libs[i] = destFile

		_, err = os.Stat(destFile)
		switch {
		case errors.Is(err, os.ErrNotExist):
			destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
			if err != nil {
				return nil, fmt.Errorf("write payload %s: %v", file, err)
			}
			defer destFile.Close()
			if _, err := io.Copy(destFile, srcFile); err != nil {
				return nil, fmt.Errorf("copy payload %s: %v", file, err)
			}
		case err != nil:
			return nil, fmt.Errorf("stat payload %s: %v", file, err)
243
244
		}
	}
245
	return libs, nil
246
}