llama.go 3.14 KB
Newer Older
1
2
3
4
5
package llm

import (
	"bytes"
	"context"
6
	_ "embed"
7
8
9
10
11
12
13
	"errors"
	"fmt"
	"os"
	"os/exec"
	"time"

	"github.com/jmorganca/ollama/api"
Michael Yang's avatar
Michael Yang committed
14
	"github.com/jmorganca/ollama/format"
15
16
)

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
const jsonGrammar = `
root   ::= object
value  ::= object | array | string | number | ("true" | "false" | "null") ws

object ::=
  "{" ws (
            string ":" ws value
    ("," ws string ":" ws value)*
  )? "}" ws

array  ::=
  "[" ws (
            value
    ("," ws value)*
  )? "]" ws

string ::=
  "\"" (
    [^"\\] |
    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
  )* "\"" ws

number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws

# Optional space: by convention, applied in this grammar after literal chars when allowed
ws ::= ([ \t\n] ws)?
`

45
type Running struct {
46
47
48
	Port          int
	Cmd           *exec.Cmd
	Cancel        context.CancelFunc
49
	*StatusWriter // captures error messages from the llama runner process
50
51
}

Patrick Devine's avatar
Patrick Devine committed
52
53
54
55
56
type ImageData struct {
	Data []byte `json:"data"`
	ID   int    `json:"id"`
}

57
var (
Jeffrey Morgan's avatar
Jeffrey Morgan committed
58
	errNvidiaSMI     = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
59
	errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
60
	payloadMissing   = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
61
)
62

63
64
// StatusWriter is a writer that captures error messages from the llama runner process
type StatusWriter struct {
65
66
	ErrCh      chan error
	LastErrMsg string
67
68
69
70
71
72
73
74
75
}

func NewStatusWriter() *StatusWriter {
	return &StatusWriter{
		ErrCh: make(chan error, 1),
	}
}

func (w *StatusWriter) Write(b []byte) (int, error) {
76
	var errMsg string
77
	if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
78
79
80
		errMsg = string(bytes.TrimSpace(after))
	} else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
		errMsg = string(bytes.TrimSpace(after))
81
	}
82
83
84
85
86
87

	if errMsg != "" {
		w.LastErrMsg = errMsg
		w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
	}

88
89
90
	return os.Stderr.Write(b)
}

Michael Yang's avatar
Michael Yang committed
91
type prediction struct {
Michael Yang's avatar
Michael Yang committed
92
93
94
95
96
	Content string `json:"content"`
	Model   string `json:"model"`
	Prompt  string `json:"prompt"`
	Stop    bool   `json:"stop"`

Michael Yang's avatar
Michael Yang committed
97
98
99
100
101
102
	Timings struct {
		PredictedN  int     `json:"predicted_n"`
		PredictedMS float64 `json:"predicted_ms"`
		PromptN     int     `json:"prompt_n"`
		PromptMS    float64 `json:"prompt_ms"`
	}
103
104
}

Michael Yang's avatar
Michael Yang committed
105
const maxBufferSize = 512 * format.KiloByte
106
107
const maxRetries = 3
const retryDelay = 1 * time.Second
108

Bruce MacDonald's avatar
Bruce MacDonald committed
109
type PredictOpts struct {
110
111
112
113
	Prompt  string
	Format  string
	Images  []api.ImageData
	Options api.Options
Bruce MacDonald's avatar
Bruce MacDonald committed
114
}
115

Bruce MacDonald's avatar
Bruce MacDonald committed
116
117
118
119
120
121
122
123
type PredictResult struct {
	Content            string
	Done               bool
	PromptEvalCount    int
	PromptEvalDuration time.Duration
	EvalCount          int
	EvalDuration       time.Duration
}
124

125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
type TokenizeRequest struct {
	Content string `json:"content"`
}

type TokenizeResponse struct {
	Tokens []int `json:"tokens"`
}

type DetokenizeRequest struct {
	Tokens []int `json:"tokens"`
}

type DetokenizeResponse struct {
	Content string `json:"content"`
}

type EmbeddingRequest struct {
	Content string `json:"content"`
}

type EmbeddingResponse struct {
	Embedding []float64 `json:"embedding"`
}