routes.go 30.5 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"bytes"
Michael Yang's avatar
Michael Yang committed
5
	"cmp"
6
	"context"
Michael Yang's avatar
Michael Yang committed
7
	"encoding/json"
8
	"errors"
9
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
10
	"io"
11
	"log/slog"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
12
13
	"net"
	"net/http"
14
	"net/netip"
15
	"os"
16
	"os/signal"
Michael Yang's avatar
Michael Yang committed
17
	"path/filepath"
18
	"slices"
Michael Yang's avatar
Michael Yang committed
19
	"strings"
20
	"syscall"
21
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22

Michael Yang's avatar
Michael Yang committed
23
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
24
25
	"github.com/gin-gonic/gin"

26
	"github.com/ollama/ollama/api"
27
	"github.com/ollama/ollama/envconfig"
28
29
30
	"github.com/ollama/ollama/gpu"
	"github.com/ollama/ollama/llm"
	"github.com/ollama/ollama/openai"
31
	"github.com/ollama/ollama/parser"
Michael Yang's avatar
Michael Yang committed
32
	"github.com/ollama/ollama/template"
33
	"github.com/ollama/ollama/types/errtypes"
Michael Yang's avatar
Michael Yang committed
34
	"github.com/ollama/ollama/types/model"
35
	"github.com/ollama/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
36
37
)

Michael Yang's avatar
Michael Yang committed
38
39
var mode string = gin.DebugMode

40
type Server struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
41
42
	addr  net.Addr
	sched *Scheduler
43
44
}

Michael Yang's avatar
Michael Yang committed
45
46
47
48
49
50
51
52
53
54
55
56
func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Michael Yang's avatar
Michael Yang committed
57
58
var errRequired = errors.New("is required")

59
60
61
62
63
64
65
66
67
68
69
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		return api.Options{}, err
	}

	if err := opts.FromMap(requestOpts); err != nil {
		return api.Options{}, err
	}

	return opts, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
70
71
}

Michael Yang's avatar
Michael Yang committed
72
73
func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (*runnerRef, error) {
	if name == "" {
Michael Yang's avatar
Michael Yang committed
74
		return nil, fmt.Errorf("model %w", errRequired)
Bruce MacDonald's avatar
Bruce MacDonald committed
75
76
	}

Michael Yang's avatar
Michael Yang committed
77
	model, err := GetModel(name)
Bruce MacDonald's avatar
Bruce MacDonald committed
78
	if err != nil {
Michael Yang's avatar
Michael Yang committed
79
		return nil, err
80
81
	}

Michael Yang's avatar
Michael Yang committed
82
83
	if err := model.CheckCapabilities(caps...); err != nil {
		return nil, fmt.Errorf("%s %w", name, err)
84
85
	}

Michael Yang's avatar
Michael Yang committed
86
	opts, err := modelOptions(model, requestOpts)
87
	if err != nil {
Michael Yang's avatar
Michael Yang committed
88
		return nil, err
89
90
	}

Michael Yang's avatar
Michael Yang committed
91
	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
92
93
	var runner *runnerRef
	select {
Michael Yang's avatar
Michael Yang committed
94
95
96
	case runner = <-runnerCh:
	case err = <-errCh:
		return nil, err
Bruce MacDonald's avatar
Bruce MacDonald committed
97
98
	}

Michael Yang's avatar
Michael Yang committed
99
100
101
102
103
104
105
106
107
108
	return runner, nil
}

func (s *Server) GenerateHandler(c *gin.Context) {
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	} else if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
109
110
111
		return
	}

Michael Yang's avatar
Michael Yang committed
112
113
114
115
116
	if req.Format != "" && req.Format != "json" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be empty or \"json\""})
		return
	} else if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
Michael Yang's avatar
Michael Yang committed
117
118
119
		return
	}

Michael Yang's avatar
Michael Yang committed
120
121
122
123
124
125
	caps := []Capability{CapabilityCompletion}
	r, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
		return
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
126
127
128
129
130
131
132
133
134
135
136
		handleScheduleError(c, req.Model, err)
		return
	}

	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.GenerateResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Done:       true,
			DoneReason: "load",
		})
Michael Yang's avatar
Michael Yang committed
137
138
		return
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
139

Michael Yang's avatar
Michael Yang committed
140
141
142
143
	images := make([]llm.ImageData, len(req.Images))
	for i := range req.Images {
		images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
144

Michael Yang's avatar
Michael Yang committed
145
146
147
148
149
150
151
	prompt := req.Prompt
	if !req.Raw {
		var msgs []api.Message
		if req.System != "" {
			msgs = append(msgs, api.Message{Role: "system", Content: req.System})
		} else if r.model.System != "" {
			msgs = append(msgs, api.Message{Role: "system", Content: r.model.System})
152
153
		}

Michael Yang's avatar
Michael Yang committed
154
155
		for _, i := range images {
			msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
Michael Yang's avatar
Michael Yang committed
156
157
		}

Michael Yang's avatar
Michael Yang committed
158
		msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt})
Michael Yang's avatar
Michael Yang committed
159

Michael Yang's avatar
Michael Yang committed
160
161
162
163
164
165
166
167
168
169
		tmpl := r.model.Template
		if req.Template != "" {
			tmpl, err = template.Parse(req.Template)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

		var b bytes.Buffer
Bruce MacDonald's avatar
Bruce MacDonald committed
170
		if req.Context != nil {
Michael Yang's avatar
Michael Yang committed
171
			s, err := r.llama.Detokenize(c.Request.Context(), req.Context)
Bruce MacDonald's avatar
Bruce MacDonald committed
172
173
174
175
176
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

Michael Yang's avatar
Michael Yang committed
177
			b.WriteString(s)
178
179
		}

Michael Yang's avatar
Michael Yang committed
180
181
182
183
		if err := tmpl.Execute(&b, template.Values{Messages: msgs}); err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
184

Michael Yang's avatar
Michael Yang committed
185
		prompt = b.String()
Bruce MacDonald's avatar
Bruce MacDonald committed
186
187
	}

Michael Yang's avatar
Michael Yang committed
188
	slog.Debug("generate request", "prompt", prompt, "images", images)
189

Bruce MacDonald's avatar
Bruce MacDonald committed
190
191
192
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
193
194
195
196
197
198
199
		if err := r.llama.Completion(c.Request.Context(), llm.CompletionRequest{
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
			Options: *r.Options,
		}, func(r llm.CompletionResponse) {
			ch <- api.GenerateResponse{
200
201
202
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
				Response:   r.Content,
Michael Yang's avatar
Michael Yang committed
203
				Done:       r.Done,
204
				DoneReason: r.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
205
206
207
208
209
210
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
Bruce MacDonald's avatar
Bruce MacDonald committed
211
			}
Michael Yang's avatar
Michael Yang committed
212
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
213
214
215
216
217
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
Michael Yang committed
218
		var r api.GenerateResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
219
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
220
221
		for rr := range ch {
			switch t := rr.(type) {
222
			case api.GenerateResponse:
Michael Yang's avatar
Michael Yang committed
223
224
				sb.WriteString(t.Response)
				r = t
225
			case gin.H:
Michael Yang's avatar
Michael Yang committed
226
227
228
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
229
				}
Michael Yang's avatar
Michael Yang committed
230
231
232

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
233
			default:
Michael Yang's avatar
Michael Yang committed
234
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
Bruce MacDonald's avatar
Bruce MacDonald committed
235
236
237
				return
			}
		}
238

Michael Yang's avatar
Michael Yang committed
239
240
		r.Response = sb.String()
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
241
242
243
244
245
246
		return
	}

	streamResponse(c, ch)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
247
func (s *Server) EmbeddingsHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
248
	var req api.EmbeddingRequest
Michael Yang's avatar
Michael Yang committed
249
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
250
251
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
252
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
253
254
255
256
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
257
	r, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
Bruce MacDonald's avatar
Bruce MacDonald committed
258
	if err != nil {
Michael Yang's avatar
Michael Yang committed
259
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
260
261
262
		return
	}

263
264
265
	// an empty request loads the model
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
Bruce MacDonald's avatar
Bruce MacDonald committed
266
267
268
		return
	}

Michael Yang's avatar
Michael Yang committed
269
	embedding, err := r.llama.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
270
	if err != nil {
271
		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
Bruce MacDonald's avatar
Bruce MacDonald committed
272
273
274
275
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

Michael Yang's avatar
Michael Yang committed
276
	c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: embedding})
Bruce MacDonald's avatar
Bruce MacDonald committed
277
278
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
279
func (s *Server) PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
280
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
281
282
283
284
285
286
287
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
288
289
290
		return
	}

291
292
293
294
295
296
297
298
	name := model.ParseName(cmp.Or(req.Model, req.Name))
	if !name.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
		return
	}

	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
299
300
301
		return
	}

302
303
304
	ch := make(chan any)
	go func() {
		defer close(ch)
305
306
		fn := func(r api.ProgressResponse) {
			ch <- r
307
		}
308

Michael Yang's avatar
Michael Yang committed
309
		regOpts := &registryOptions{
310
311
312
			Insecure: req.Insecure,
		}

313
314
315
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

316
		if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
317
			ch <- gin.H{"error": err.Error()}
318
319
320
		}
	}()

321
322
323
324
325
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

326
327
328
	streamResponse(c, ch)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
329
func (s *Server) PushModelHandler(c *gin.Context) {
330
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
331
332
333
334
335
336
337
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
338
339
		return
	}
Michael Yang's avatar
Michael Yang committed
340

Michael Yang's avatar
Michael Yang committed
341
342
343
344
345
346
347
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
348
349
350
		return
	}

351
352
353
	ch := make(chan any)
	go func() {
		defer close(ch)
354
355
		fn := func(r api.ProgressResponse) {
			ch <- r
356
		}
357

Michael Yang's avatar
Michael Yang committed
358
		regOpts := &registryOptions{
359
360
361
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
362
363
364
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
365
		if err := PushModel(ctx, model, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
366
			ch <- gin.H{"error": err.Error()}
367
368
369
		}
	}()

370
371
372
373
374
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

375
376
377
	streamResponse(c, ch)
}

378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
func checkNameExists(name model.Name) error {
	names, err := Manifests()
	if err != nil {
		return err
	}

	for n := range names {
		if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
			return fmt.Errorf("a model with that name already exists")
		}
	}

	return nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
393
func (s *Server) CreateModelHandler(c *gin.Context) {
394
395
	var r api.CreateRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
396
397
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
398
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
399
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
400
		return
401
402
	}

403
	name := model.ParseName(cmp.Or(r.Model, r.Name))
Michael Yang's avatar
Michael Yang committed
404
	if !name.IsValid() {
405
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
406
407
408
		return
	}

409
410
411
412
413
	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

414
	if r.Path == "" && r.Modelfile == "" {
Michael Yang's avatar
Michael Yang committed
415
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
416
417
		return
	}
Michael Yang's avatar
Michael Yang committed
418

419
420
421
	var sr io.Reader = strings.NewReader(r.Modelfile)
	if r.Path != "" && r.Modelfile == "" {
		f, err := os.Open(r.Path)
Michael Yang's avatar
Michael Yang committed
422
423
424
425
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
Michael Yang's avatar
Michael Yang committed
426
		defer f.Close()
Michael Yang's avatar
Michael Yang committed
427

428
		sr = f
Michael Yang's avatar
Michael Yang committed
429
	}
Michael Yang's avatar
Michael Yang committed
430

431
	f, err := parser.ParseFile(sr)
Michael Yang's avatar
Michael Yang committed
432
433
434
435
436
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
437
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
438
439
	go func() {
		defer close(ch)
440
441
		fn := func(resp api.ProgressResponse) {
			ch <- resp
442
443
		}

444
445
446
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

447
448
		quantization := cmp.Or(r.Quantize, r.Quantization)
		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
449
			ch <- gin.H{"error": err.Error()}
450
		}
Michael Yang's avatar
Michael Yang committed
451
	}()
Michael Yang's avatar
Michael Yang committed
452

453
	if r.Stream != nil && !*r.Stream {
454
455
456
457
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
458
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
459
460
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
461
func (s *Server) DeleteModelHandler(c *gin.Context) {
462
463
	var r api.DeleteRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
464
465
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
466
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
467
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
468
469
470
		return
	}

471
472
473
	n := model.ParseName(cmp.Or(r.Model, r.Name))
	if !n.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("name %q is invalid", cmp.Or(r.Model, r.Name))})
474
475
		return
	}
Michael Yang's avatar
Michael Yang committed
476

477
	m, err := ParseNamedManifest(n)
Michael Yang's avatar
Michael Yang committed
478
479
480
481
482
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

483
	if err := m.Remove(); err != nil {
Michael Yang's avatar
Michael Yang committed
484
485
486
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
487
488
489
490
491

	if err := m.RemoveLayers(); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
492
493
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
494
func (s *Server) ShowModelHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
495
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
496
497
498
499
500
501
502
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
503
504
505
		return
	}

Michael Yang's avatar
Michael Yang committed
506
	if req.Model != "" {
Michael Yang's avatar
Michael Yang committed
507
		// noop
Michael Yang's avatar
Michael Yang committed
508
	} else if req.Name != "" {
Michael Yang's avatar
Michael Yang committed
509
		req.Model = req.Name
Michael Yang's avatar
Michael Yang committed
510
	} else {
511
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
512
513
514
		return
	}

515
	resp, err := GetModelInfo(req)
Patrick Devine's avatar
Patrick Devine committed
516
	if err != nil {
517
518
		switch {
		case os.IsNotExist(err):
Michael Yang's avatar
Michael Yang committed
519
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
520
521
522
		case err.Error() == "invalid model name":
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
Patrick Devine's avatar
Patrick Devine committed
523
524
525
526
527
528
529
530
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

531
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
532
	m, err := GetModel(req.Model)
Patrick Devine's avatar
Patrick Devine committed
533
534
535
536
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
537
	modelDetails := api.ModelDetails{
538
539
540
541
542
543
		ParentModel:       m.ParentModel,
		Format:            m.Config.ModelFormat,
		Family:            m.Config.ModelFamily,
		Families:          m.Config.ModelFamilies,
		ParameterSize:     m.Config.ModelType,
		QuantizationLevel: m.Config.FileType,
Patrick Devine's avatar
Patrick Devine committed
544
545
	}

546
	if req.System != "" {
547
		m.System = req.System
548
549
550
	}

	if req.Template != "" {
Michael Yang's avatar
Michael Yang committed
551
552
553
554
		m.Template, err = template.Parse(req.Template)
		if err != nil {
			return nil, err
		}
555
556
	}

Michael Yang's avatar
Michael Yang committed
557
558
559
	msgs := make([]api.Message, len(m.Messages))
	for i, msg := range m.Messages {
		msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
560
561
	}

562
563
564
565
566
567
568
569
570
571
	n := model.ParseName(req.Model)
	if !n.IsValid() {
		return nil, fmt.Errorf("invalid model name")
	}

	manifest, err := ParseNamedManifest(n)
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
572
	resp := &api.ShowResponse{
573
574
		License:    strings.Join(m.License, "\n"),
		System:     m.System,
Michael Yang's avatar
Michael Yang committed
575
		Template:   m.Template.String(),
576
577
578
		Details:    modelDetails,
		Messages:   msgs,
		ModifiedAt: manifest.fi.ModTime(),
Patrick Devine's avatar
Patrick Devine committed
579
580
581
582
	}

	var params []string
	cs := 30
583
	for k, v := range m.Options {
Patrick Devine's avatar
Patrick Devine committed
584
585
586
		switch val := v.(type) {
		case []interface{}:
			for _, nv := range val {
Patrick Devine's avatar
Patrick Devine committed
587
				params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
Patrick Devine's avatar
Patrick Devine committed
588
			}
Patrick Devine's avatar
Patrick Devine committed
589
590
		default:
			params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
Patrick Devine's avatar
Patrick Devine committed
591
592
593
594
		}
	}
	resp.Parameters = strings.Join(params, "\n")

595
596
	for k, v := range req.Options {
		if _, ok := req.Options[k]; ok {
597
			m.Options[k] = v
598
599
600
		}
	}

601
	var sb strings.Builder
602
	fmt.Fprintln(&sb, "# Modelfile generated by \"ollama show\"")
603
	fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
604
605
	fmt.Fprintf(&sb, "# FROM %s\n\n", m.ShortName)
	fmt.Fprint(&sb, m.String())
606
	resp.Modelfile = sb.String()
607

608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
	kvData, err := getKVData(m.ModelPath, req.Verbose)
	if err != nil {
		return nil, err
	}
	delete(kvData, "general.name")
	delete(kvData, "tokenizer.chat_template")
	resp.ModelInfo = kvData

	if len(m.ProjectorPaths) > 0 {
		projectorData, err := getKVData(m.ProjectorPaths[0], req.Verbose)
		if err != nil {
			return nil, err
		}
		resp.ProjectorInfo = projectorData
	}

Patrick Devine's avatar
Patrick Devine committed
624
625
626
	return resp, nil
}

627
func getKVData(digest string, verbose bool) (llm.KV, error) {
628
629
630
631
632
	maxArraySize := 0
	if verbose {
		maxArraySize = -1
	}
	kvData, err := llm.LoadModel(digest, maxArraySize)
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
	if err != nil {
		return nil, err
	}

	kv := kvData.KV()

	if !verbose {
		for k := range kv {
			if t, ok := kv[k].([]any); len(t) > 5 && ok {
				kv[k] = []any{}
			}
		}
	}

	return kv, nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
650
func (s *Server) ListModelsHandler(c *gin.Context) {
651
	ms, err := Manifests()
Patrick Devine's avatar
Patrick Devine committed
652
653
654
655
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
656

657
	models := []api.ListModelResponse{}
658
659
660
661
662
663
664
	for n, m := range ms {
		f, err := m.Config.Open()
		if err != nil {
			slog.Warn("bad manifest filepath", "name", n, "error", err)
			continue
		}
		defer f.Close()
665

666
667
668
669
		var cf ConfigV2
		if err := json.NewDecoder(f).Decode(&cf); err != nil {
			slog.Warn("bad manifest config", "name", n, "error", err)
			continue
Patrick Devine's avatar
Patrick Devine committed
670
		}
Michael Yang's avatar
Michael Yang committed
671

672
		// tag should never be masked
673
		models = append(models, api.ListModelResponse{
674
675
676
677
678
679
680
681
682
683
684
685
686
			Model:      n.DisplayShortest(),
			Name:       n.DisplayShortest(),
			Size:       m.Size(),
			Digest:     m.digest,
			ModifiedAt: m.fi.ModTime(),
			Details: api.ModelDetails{
				Format:            cf.ModelFormat,
				Family:            cf.ModelFamily,
				Families:          cf.ModelFamilies,
				ParameterSize:     cf.ModelType,
				QuantizationLevel: cf.FileType,
			},
		})
Patrick Devine's avatar
Patrick Devine committed
687
688
	}

689
	slices.SortStableFunc(models, func(i, j api.ListModelResponse) int {
690
691
692
693
		// most recently modified first
		return cmp.Compare(j.ModifiedAt.Unix(), i.ModifiedAt.Unix())
	})

Michael Yang's avatar
Michael Yang committed
694
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
695
696
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
697
func (s *Server) CopyModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
698
699
	var r api.CopyRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
700
701
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
702
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
703
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
704
705
706
		return
	}

Michael Yang's avatar
Michael Yang committed
707
708
	src := model.ParseName(r.Source)
	if !src.IsValid() {
Michael Yang's avatar
Michael Yang committed
709
710
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
		return
711
712
	}

Michael Yang's avatar
Michael Yang committed
713
714
	dst := model.ParseName(r.Destination)
	if !dst.IsValid() {
715
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
Patrick Devine's avatar
Patrick Devine committed
716
717
		return
	}
Michael Yang's avatar
Michael Yang committed
718

719
720
721
722
723
	if err := checkNameExists(dst); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
724
725
726
727
728
	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
	} else if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
	}
Patrick Devine's avatar
Patrick Devine committed
729
730
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
731
func (s *Server) HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
732
733
734
735
736
737
738
739
740
741
742
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
743
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
744
745
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
746
func (s *Server) CreateBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
747
748
	if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
		p, err := GetBlobsPath(ib)
749
750
751
752
753
754
		if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) {
Michael Yang's avatar
Michael Yang committed
755
756
			slog.Info("evicting intermediate blob which no longer exists", "digest", ib)
			delete(intermediateBlobs, c.Param("digest"))
757
758
759
760
761
762
763
764
765
		} else if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		} else {
			c.Status(http.StatusOK)
			return
		}
	}

766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	_, err = os.Stat(path)
	switch {
	case errors.Is(err, os.ErrNotExist):
		// noop
	case err != nil:
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	default:
		c.Status(http.StatusOK)
		return
	}

784
	layer, err := NewLayer(c.Request.Body, "")
Michael Yang's avatar
Michael Yang committed
785
786
787
788
789
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

790
791
	if layer.Digest != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
Michael Yang's avatar
Michael Yang committed
792
793
794
		return
	}

Michael Yang's avatar
Michael Yang committed
795
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
796
797
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
func isLocalIP(ip netip.Addr) bool {
	if interfaces, err := net.Interfaces(); err == nil {
		for _, iface := range interfaces {
			addrs, err := iface.Addrs()
			if err != nil {
				continue
			}

			for _, a := range addrs {
				if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
					if parsed.String() == ip.String() {
						return true
					}
				}
			}
		}
	}

	return false
}

819
func allowedHost(host string) bool {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
820
	if host == "" || host == "localhost" {
821
822
823
824
825
826
827
828
		return true
	}

	if hostname, err := os.Hostname(); err == nil && host == hostname {
		return true
	}

	var tlds = []string{
Jeffrey Morgan's avatar
Jeffrey Morgan committed
829
830
831
		"localhost",
		"local",
		"internal",
832
	}
833

Jeffrey Morgan's avatar
Jeffrey Morgan committed
834
	// check if the host is a local TLD
835
836
837
838
839
840
	for _, tld := range tlds {
		if strings.HasSuffix(host, "."+tld) {
			return true
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
841
	return false
Jeffrey Morgan's avatar
Jeffrey Morgan committed
842
}
843

Jeffrey Morgan's avatar
Jeffrey Morgan committed
844
845
846
func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
	return func(c *gin.Context) {
		if addr == nil {
847
848
849
850
			c.Next()
			return
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
851
		if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
852
853
854
855
856
857
858
859
860
			c.Next()
			return
		}

		host, _, err := net.SplitHostPort(c.Request.Host)
		if err != nil {
			host = c.Request.Host
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
861
		if addr, err := netip.ParseAddr(host); err == nil {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
862
			if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
863
864
865
866
867
				c.Next()
				return
			}
		}

868
		if allowedHost(host) {
Michael Yang's avatar
lint  
Michael Yang committed
869
			if c.Request.Method == http.MethodOptions {
870
871
872
873
				c.AbortWithStatus(http.StatusNoContent)
				return
			}

874
875
876
877
878
879
			c.Next()
			return
		}

		c.AbortWithStatus(http.StatusForbidden)
	}
880
}
881

882
func (s *Server) GenerateRoutes() http.Handler {
Michael Yang's avatar
Michael Yang committed
883
884
	config := cors.DefaultConfig()
	config.AllowWildcard = true
885
	config.AllowBrowserExtensions = true
886
	config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
royjhan's avatar
royjhan committed
887
888
889
890
	openAIProperties := []string{"lang", "package-version", "os", "arch", "runtime", "runtime-version", "async"}
	for _, prop := range openAIProperties {
		config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
	}
891
	config.AllowOrigins = envconfig.AllowOrigins
Michael Yang's avatar
Michael Yang committed
892

Bruce MacDonald's avatar
Bruce MacDonald committed
893
	r := gin.Default()
894
895
	r.Use(
		cors.New(config),
896
		allowedHostsMiddleware(s.addr),
897
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
898

Daniel Hiltgen's avatar
Daniel Hiltgen committed
899
900
901
902
903
904
905
906
907
908
909
	r.POST("/api/pull", s.PullModelHandler)
	r.POST("/api/generate", s.GenerateHandler)
	r.POST("/api/chat", s.ChatHandler)
	r.POST("/api/embeddings", s.EmbeddingsHandler)
	r.POST("/api/create", s.CreateModelHandler)
	r.POST("/api/push", s.PushModelHandler)
	r.POST("/api/copy", s.CopyModelHandler)
	r.DELETE("/api/delete", s.DeleteModelHandler)
	r.POST("/api/show", s.ShowModelHandler)
	r.POST("/api/blobs/:digest", s.CreateBlobHandler)
	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
910
	r.GET("/api/ps", s.ProcessHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
911

912
	// Compatibility endpoints
913
	r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
914
	r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
915
916
	r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler)
	r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler)
917

Michael Yang's avatar
Michael Yang committed
918
919
920
921
922
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

Daniel Hiltgen's avatar
Daniel Hiltgen committed
923
		r.Handle(method, "/api/tags", s.ListModelsHandler)
Michael Yang's avatar
Michael Yang committed
924
925
926
		r.Handle(method, "/api/version", func(c *gin.Context) {
			c.JSON(http.StatusOK, gin.H{"version": version.Version})
		})
Michael Yang's avatar
Michael Yang committed
927
928
	}

929
930
931
932
	return r
}

func Serve(ln net.Listener) error {
Michael Yang's avatar
Michael Yang committed
933
	level := slog.LevelInfo
934
	if envconfig.Debug {
Michael Yang's avatar
Michael Yang committed
935
		level = slog.LevelDebug
936
	}
Michael Yang's avatar
Michael Yang committed
937

938
	slog.Info("server config", "env", envconfig.Values())
Michael Yang's avatar
Michael Yang committed
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
	handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
		Level:     level,
		AddSource: true,
		ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
			if attr.Key == slog.SourceKey {
				source := attr.Value.Any().(*slog.Source)
				source.File = filepath.Base(source.File)
			}

			return attr
		},
	})

	slog.SetDefault(slog.New(handler))

954
955
956
957
958
959
960
961
	blobsDir, err := GetBlobsPath("")
	if err != nil {
		return err
	}
	if err := fixBlobs(blobsDir); err != nil {
		return err
	}

962
	if !envconfig.NoPrune {
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
978
	ctx, done := context.WithCancel(context.Background())
Daniel Hiltgen's avatar
Daniel Hiltgen committed
979
980
	schedCtx, schedDone := context.WithCancel(ctx)
	sched := InitScheduler(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
981
	s := &Server{addr: ln.Addr(), sched: sched}
982
983

	http.Handle("/", s.GenerateRoutes())
984

985
	slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
986
	srvr := &http.Server{
987
988
989
990
991
992
993
994
995
		// Use http.DefaultServeMux so we get net/http/pprof for
		// free.
		//
		// TODO(bmizerany): Decide if we want to make this
		// configurable so it is not exposed by default, or allow
		// users to bind it to a different port. This was a quick
		// and easy way to get pprof, but it may not be the best
		// way.
		Handler: nil,
Jeffrey Morgan's avatar
Jeffrey Morgan committed
996
997
	}

998
999
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
1000
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
1001
1002
	go func() {
		<-signals
1003
		srvr.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1004
		schedDone()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1005
		sched.unloadAllRunners()
1006
		gpu.Cleanup()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1007
		done()
1008
1009
	}()

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1010
	if err := llm.Init(); err != nil {
1011
1012
		return fmt.Errorf("unable to initialize llm library %w", err)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1013

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1014
	s.sched.Run(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1015
1016
1017

	// At startup we retrieve GPU information so we can get log messages before loading a model
	// This will log warnings to the log in case we have problems with detected GPUs
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1018
1019
	gpus := gpu.GetGPUInfo()
	gpus.LogDetails()
1020

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1021
1022
1023
1024
1025
1026
1027
	err = srvr.Serve(ln)
	// If server is closed from the signal handler, wait for the ctx to be done
	// otherwise error out quickly
	if !errors.Is(err, http.ErrServerClosed) {
		return err
	}
	<-ctx.Done()
1028
	return nil
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1029
}
Michael Yang's avatar
Michael Yang committed
1030

1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
1056
func streamResponse(c *gin.Context, ch chan any) {
1057
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
1058
1059
1060
1061
1062
1063
1064
1065
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
1066
			slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1067
1068
1069
			return false
		}

1070
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
1071
1072
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
1073
			slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1074
1075
1076
1077
1078
1079
			return false
		}

		return true
	})
}
Bruce MacDonald's avatar
Bruce MacDonald committed
1080

1081
func (s *Server) ProcessHandler(c *gin.Context) {
1082
	models := []api.ProcessModelResponse{}
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093

	for _, v := range s.sched.loaded {
		model := v.model
		modelDetails := api.ModelDetails{
			Format:            model.Config.ModelFormat,
			Family:            model.Config.ModelFamily,
			Families:          model.Config.ModelFamilies,
			ParameterSize:     model.Config.ModelType,
			QuantizationLevel: model.Config.FileType,
		}

1094
		mr := api.ProcessModelResponse{
1095
1096
1097
1098
1099
1100
1101
1102
			Model:     model.ShortName,
			Name:      model.ShortName,
			Size:      int64(v.estimatedTotal),
			SizeVRAM:  int64(v.estimatedVRAM),
			Digest:    model.Digest,
			Details:   modelDetails,
			ExpiresAt: v.expiresAt,
		}
1103
1104
1105
1106
1107
1108
1109
1110
		// The scheduler waits to set expiresAt, so if a model is loading it's
		// possible that it will be set to the unix epoch. For those cases, just
		// calculate the time w/ the sessionDuration instead.
		var epoch time.Time
		if v.expiresAt == epoch {
			mr.ExpiresAt = time.Now().Add(v.sessionDuration)
		}

1111
1112
1113
		models = append(models, mr)
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1114
1115
1116
1117
1118
	slices.SortStableFunc(models, func(i, j api.ProcessModelResponse) int {
		// longest duration remaining listed first
		return cmp.Compare(j.ExpiresAt.Unix(), i.ExpiresAt.Unix())
	})

1119
	c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
1120
1121
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1122
func (s *Server) ChatHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1123
	var req api.ChatRequest
Michael Yang's avatar
Michael Yang committed
1124
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1125
1126
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
1127
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1128
1129
1130
1131
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
1132
1133
1134
1135
	caps := []Capability{CapabilityCompletion}
	r, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
Bruce MacDonald's avatar
Bruce MacDonald committed
1136
		return
Michael Yang's avatar
Michael Yang committed
1137
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
1138
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
1139
1140
		return
	}
Michael Yang's avatar
Michael Yang committed
1141

Michael Yang's avatar
Michael Yang committed
1142
1143
	if len(req.Messages) == 0 {
		c.JSON(http.StatusOK, api.ChatResponse{
1144
			Model:      req.Model,
Michael Yang's avatar
Michael Yang committed
1145
1146
			CreatedAt:  time.Now().UTC(),
			Message:    api.Message{Role: "assistant"},
1147
1148
			Done:       true,
			DoneReason: "load",
Michael Yang's avatar
Michael Yang committed
1149
		})
1150
1151
1152
		return
	}

Michael Yang's avatar
Michael Yang committed
1153
	prompt, images, err := chatPrompt(c.Request.Context(), r.model, r.llama.Tokenize, r.Options, req.Messages)
Michael Yang's avatar
Michael Yang committed
1154
1155
1156
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
1157
1158
	}

Michael Yang's avatar
Michael Yang committed
1159
	slog.Debug("chat request", "images", len(images), "prompt", prompt)
1160

Bruce MacDonald's avatar
Bruce MacDonald committed
1161
1162
1163
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
1164
1165
1166
1167
1168
1169
1170
		if err := r.llama.Completion(c.Request.Context(), llm.CompletionRequest{
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
			Options: *r.Options,
		}, func(r llm.CompletionResponse) {
			ch <- api.ChatResponse{
1171
1172
1173
1174
1175
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
				Message:    api.Message{Role: "assistant", Content: r.Content},
				Done:       r.Done,
				DoneReason: r.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
1176
1177
1178
1179
1180
1181
1182
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}
Michael Yang's avatar
Michael Yang committed
1183
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1184
1185
1186
1187
1188
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
Michael Yang committed
1189
		var r api.ChatResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
1190
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
1191
1192
		for rr := range ch {
			switch t := rr.(type) {
1193
			case api.ChatResponse:
Michael Yang's avatar
Michael Yang committed
1194
1195
				sb.WriteString(t.Message.Content)
				r = t
1196
			case gin.H:
Michael Yang's avatar
Michael Yang committed
1197
1198
1199
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
1200
				}
Michael Yang's avatar
Michael Yang committed
1201
1202
1203

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
1204
			default:
Michael Yang's avatar
Michael Yang committed
1205
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
1206
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
1207
1208
			}
		}
1209

Michael Yang's avatar
Michael Yang committed
1210
1211
		r.Message.Content = sb.String()
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
1212
1213
1214
1215
1216
		return
	}

	streamResponse(c, ch)
}
1217

Michael Yang's avatar
Michael Yang committed
1218
func handleScheduleError(c *gin.Context, name string, err error) {
Michael Yang's avatar
Michael Yang committed
1219
	switch {
Michael Yang's avatar
Michael Yang committed
1220
1221
	case errors.Is(err, errRequired):
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1222
	case errors.Is(err, context.Canceled):
1223
		c.JSON(499, gin.H{"error": "request canceled"})
Michael Yang's avatar
Michael Yang committed
1224
	case errors.Is(err, ErrMaxQueue):
1225
		c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1226
1227
	case errors.Is(err, os.ErrNotExist):
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found, try pulling it first", name)})
Michael Yang's avatar
Michael Yang committed
1228
1229
	default:
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
1230
1231
	}
}