You need to sign in or sign up before continuing.
routes.go 30.8 KB
Newer Older
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
	"bytes"
Michael Yang's avatar
Michael Yang committed
5
	"cmp"
6
	"context"
Michael Yang's avatar
Michael Yang committed
7
	"encoding/json"
8
	"errors"
9
	"fmt"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
10
	"io"
11
	"log/slog"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
12
13
	"net"
	"net/http"
14
	"net/netip"
15
	"os"
16
	"os/signal"
Michael Yang's avatar
Michael Yang committed
17
	"path/filepath"
18
	"slices"
Michael Yang's avatar
Michael Yang committed
19
	"strings"
20
	"syscall"
21
	"time"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
22

Michael Yang's avatar
Michael Yang committed
23
	"github.com/gin-contrib/cors"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
24
25
	"github.com/gin-gonic/gin"

26
	"github.com/ollama/ollama/api"
27
	"github.com/ollama/ollama/envconfig"
28
29
30
	"github.com/ollama/ollama/gpu"
	"github.com/ollama/ollama/llm"
	"github.com/ollama/ollama/openai"
31
	"github.com/ollama/ollama/parser"
Michael Yang's avatar
Michael Yang committed
32
	"github.com/ollama/ollama/template"
33
	"github.com/ollama/ollama/types/errtypes"
Michael Yang's avatar
Michael Yang committed
34
	"github.com/ollama/ollama/types/model"
35
	"github.com/ollama/ollama/version"
Jeffrey Morgan's avatar
Jeffrey Morgan committed
36
37
)

Michael Yang's avatar
Michael Yang committed
38
39
var mode string = gin.DebugMode

40
type Server struct {
Daniel Hiltgen's avatar
Daniel Hiltgen committed
41
42
	addr  net.Addr
	sched *Scheduler
43
44
}

Michael Yang's avatar
Michael Yang committed
45
46
47
48
49
50
51
52
53
54
55
56
func init() {
	switch mode {
	case gin.DebugMode:
	case gin.ReleaseMode:
	case gin.TestMode:
	default:
		mode = gin.DebugMode
	}

	gin.SetMode(mode)
}

Michael Yang's avatar
Michael Yang committed
57
58
var errRequired = errors.New("is required")

59
60
61
62
63
64
65
66
67
68
69
func modelOptions(model *Model, requestOpts map[string]interface{}) (api.Options, error) {
	opts := api.DefaultOptions()
	if err := opts.FromMap(model.Options); err != nil {
		return api.Options{}, err
	}

	if err := opts.FromMap(requestOpts); err != nil {
		return api.Options{}, err
	}

	return opts, nil
Bruce MacDonald's avatar
Bruce MacDonald committed
70
71
}

Michael Yang's avatar
Michael Yang committed
72
73
74
// scheduleRunner schedules a runner after validating inputs such as capabilities and model options.
// It returns the allocated runner, model instance, and consolidated options if successful and error otherwise.
func (s *Server) scheduleRunner(ctx context.Context, name string, caps []Capability, requestOpts map[string]any, keepAlive *api.Duration) (llm.LlamaServer, *Model, *api.Options, error) {
Michael Yang's avatar
Michael Yang committed
75
	if name == "" {
Michael Yang's avatar
Michael Yang committed
76
		return nil, nil, nil, fmt.Errorf("model %w", errRequired)
Bruce MacDonald's avatar
Bruce MacDonald committed
77
78
	}

Michael Yang's avatar
Michael Yang committed
79
	model, err := GetModel(name)
Bruce MacDonald's avatar
Bruce MacDonald committed
80
	if err != nil {
Michael Yang's avatar
Michael Yang committed
81
		return nil, nil, nil, err
82
83
	}

Michael Yang's avatar
Michael Yang committed
84
	if err := model.CheckCapabilities(caps...); err != nil {
Michael Yang's avatar
Michael Yang committed
85
		return nil, nil, nil, fmt.Errorf("%s %w", name, err)
86
87
	}

Michael Yang's avatar
Michael Yang committed
88
	opts, err := modelOptions(model, requestOpts)
89
	if err != nil {
Michael Yang's avatar
Michael Yang committed
90
		return nil, nil, nil, err
91
92
	}

Michael Yang's avatar
Michael Yang committed
93
	runnerCh, errCh := s.sched.GetRunner(ctx, model, opts, keepAlive)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
94
95
	var runner *runnerRef
	select {
Michael Yang's avatar
Michael Yang committed
96
97
	case runner = <-runnerCh:
	case err = <-errCh:
Michael Yang's avatar
Michael Yang committed
98
		return nil, nil, nil, err
Bruce MacDonald's avatar
Bruce MacDonald committed
99
100
	}

Michael Yang's avatar
Michael Yang committed
101
	return runner.llama, model, &opts, nil
Michael Yang's avatar
Michael Yang committed
102
103
104
105
106
107
108
109
110
}

func (s *Server) GenerateHandler(c *gin.Context) {
	var req api.GenerateRequest
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	} else if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Bruce MacDonald's avatar
Bruce MacDonald committed
111
112
113
		return
	}

Michael Yang's avatar
Michael Yang committed
114
115
116
117
118
	if req.Format != "" && req.Format != "json" {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be empty or \"json\""})
		return
	} else if req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0) {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
Michael Yang's avatar
Michael Yang committed
119
120
121
		return
	}

Michael Yang's avatar
Michael Yang committed
122
	caps := []Capability{CapabilityCompletion}
Michael Yang's avatar
Michael Yang committed
123
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
124
125
126
127
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
		return
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
128
129
130
131
132
133
134
135
136
137
138
		handleScheduleError(c, req.Model, err)
		return
	}

	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.GenerateResponse{
			Model:      req.Model,
			CreatedAt:  time.Now().UTC(),
			Done:       true,
			DoneReason: "load",
		})
Michael Yang's avatar
Michael Yang committed
139
140
		return
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
141

Michael Yang's avatar
Michael Yang committed
142
143
144
145
	images := make([]llm.ImageData, len(req.Images))
	for i := range req.Images {
		images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
	}
Bruce MacDonald's avatar
Bruce MacDonald committed
146

Michael Yang's avatar
Michael Yang committed
147
148
149
150
151
	prompt := req.Prompt
	if !req.Raw {
		var msgs []api.Message
		if req.System != "" {
			msgs = append(msgs, api.Message{Role: "system", Content: req.System})
Michael Yang's avatar
Michael Yang committed
152
153
		} else if m.System != "" {
			msgs = append(msgs, api.Message{Role: "system", Content: m.System})
154
155
		}

Michael Yang's avatar
Michael Yang committed
156
157
		for _, i := range images {
			msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
Michael Yang's avatar
Michael Yang committed
158
159
		}

Michael Yang's avatar
Michael Yang committed
160
		msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt})
Michael Yang's avatar
Michael Yang committed
161

Michael Yang's avatar
Michael Yang committed
162
		tmpl := m.Template
Michael Yang's avatar
Michael Yang committed
163
164
165
166
167
168
169
170
171
		if req.Template != "" {
			tmpl, err = template.Parse(req.Template)
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}
		}

		var b bytes.Buffer
Bruce MacDonald's avatar
Bruce MacDonald committed
172
		if req.Context != nil {
Michael Yang's avatar
Michael Yang committed
173
			s, err := r.Detokenize(c.Request.Context(), req.Context)
Bruce MacDonald's avatar
Bruce MacDonald committed
174
175
176
177
178
			if err != nil {
				c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
				return
			}

Michael Yang's avatar
Michael Yang committed
179
			b.WriteString(s)
180
181
		}

Michael Yang's avatar
Michael Yang committed
182
183
184
185
		if err := tmpl.Execute(&b, template.Values{Messages: msgs}); err != nil {
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}
186

Michael Yang's avatar
Michael Yang committed
187
		prompt = b.String()
Bruce MacDonald's avatar
Bruce MacDonald committed
188
189
	}

Michael Yang's avatar
Michael Yang committed
190
	slog.Debug("generate request", "prompt", prompt, "images", images)
191

Bruce MacDonald's avatar
Bruce MacDonald committed
192
193
194
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
195
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
196
197
198
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
199
			Options: opts,
Michael Yang's avatar
Michael Yang committed
200
201
		}, func(r llm.CompletionResponse) {
			ch <- api.GenerateResponse{
202
203
204
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
				Response:   r.Content,
Michael Yang's avatar
Michael Yang committed
205
				Done:       r.Done,
206
				DoneReason: r.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
207
208
209
210
211
212
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
Bruce MacDonald's avatar
Bruce MacDonald committed
213
			}
Michael Yang's avatar
Michael Yang committed
214
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
215
216
217
218
219
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
Michael Yang committed
220
		var r api.GenerateResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
221
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
222
223
		for rr := range ch {
			switch t := rr.(type) {
224
			case api.GenerateResponse:
Michael Yang's avatar
Michael Yang committed
225
226
				sb.WriteString(t.Response)
				r = t
227
			case gin.H:
Michael Yang's avatar
Michael Yang committed
228
229
230
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
231
				}
Michael Yang's avatar
Michael Yang committed
232
233
234

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
235
			default:
Michael Yang's avatar
Michael Yang committed
236
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
Bruce MacDonald's avatar
Bruce MacDonald committed
237
238
239
				return
			}
		}
240

Michael Yang's avatar
Michael Yang committed
241
242
		r.Response = sb.String()
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
243
244
245
246
247
248
		return
	}

	streamResponse(c, ch)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
249
func (s *Server) EmbeddingsHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
250
	var req api.EmbeddingRequest
Michael Yang's avatar
Michael Yang committed
251
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
252
253
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
254
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
255
256
257
258
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
259
	r, _, _, err := s.scheduleRunner(c.Request.Context(), req.Model, []Capability{}, req.Options, req.KeepAlive)
Bruce MacDonald's avatar
Bruce MacDonald committed
260
	if err != nil {
Michael Yang's avatar
Michael Yang committed
261
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
262
263
264
		return
	}

265
266
267
	// an empty request loads the model
	if req.Prompt == "" {
		c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: []float64{}})
Bruce MacDonald's avatar
Bruce MacDonald committed
268
269
270
		return
	}

Michael Yang's avatar
Michael Yang committed
271
	embedding, err := r.Embedding(c.Request.Context(), req.Prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
272
	if err != nil {
273
		slog.Info(fmt.Sprintf("embedding generation failed: %v", err))
Bruce MacDonald's avatar
Bruce MacDonald committed
274
275
276
277
		c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to generate embedding"})
		return
	}

Michael Yang's avatar
Michael Yang committed
278
	c.JSON(http.StatusOK, api.EmbeddingResponse{Embedding: embedding})
Bruce MacDonald's avatar
Bruce MacDonald committed
279
280
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
281
func (s *Server) PullModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
282
	var req api.PullRequest
Michael Yang's avatar
Michael Yang committed
283
284
285
286
287
288
289
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
290
291
292
		return
	}

293
294
295
296
297
298
299
300
	name := model.ParseName(cmp.Or(req.Model, req.Name))
	if !name.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "invalid model name"})
		return
	}

	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
301
302
303
		return
	}

304
305
306
	ch := make(chan any)
	go func() {
		defer close(ch)
307
308
		fn := func(r api.ProgressResponse) {
			ch <- r
309
		}
310

Michael Yang's avatar
Michael Yang committed
311
		regOpts := &registryOptions{
312
313
314
			Insecure: req.Insecure,
		}

315
316
317
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

318
		if err := PullModel(ctx, name.DisplayShortest(), regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
319
			ch <- gin.H{"error": err.Error()}
320
321
322
		}
	}()

323
324
325
326
327
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

328
329
330
	streamResponse(c, ch)
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
331
func (s *Server) PushModelHandler(c *gin.Context) {
332
	var req api.PushRequest
Michael Yang's avatar
Michael Yang committed
333
334
335
336
337
338
339
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
340
341
		return
	}
Michael Yang's avatar
Michael Yang committed
342

Michael Yang's avatar
Michael Yang committed
343
344
345
346
347
348
349
	var model string
	if req.Model != "" {
		model = req.Model
	} else if req.Name != "" {
		model = req.Name
	} else {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
350
351
352
		return
	}

353
354
355
	ch := make(chan any)
	go func() {
		defer close(ch)
356
357
		fn := func(r api.ProgressResponse) {
			ch <- r
358
		}
359

Michael Yang's avatar
Michael Yang committed
360
		regOpts := &registryOptions{
361
362
363
			Insecure: req.Insecure,
		}

Michael Yang's avatar
Michael Yang committed
364
365
366
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

Michael Yang's avatar
Michael Yang committed
367
		if err := PushModel(ctx, model, regOpts, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
368
			ch <- gin.H{"error": err.Error()}
369
370
371
		}
	}()

372
373
374
375
376
	if req.Stream != nil && !*req.Stream {
		waitForStream(c, ch)
		return
	}

377
378
379
	streamResponse(c, ch)
}

380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
func checkNameExists(name model.Name) error {
	names, err := Manifests()
	if err != nil {
		return err
	}

	for n := range names {
		if strings.EqualFold(n.Filepath(), name.Filepath()) && n != name {
			return fmt.Errorf("a model with that name already exists")
		}
	}

	return nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
395
func (s *Server) CreateModelHandler(c *gin.Context) {
396
397
	var r api.CreateRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
398
399
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
400
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
401
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
402
		return
403
404
	}

405
	name := model.ParseName(cmp.Or(r.Model, r.Name))
Michael Yang's avatar
Michael Yang committed
406
	if !name.IsValid() {
407
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": errtypes.InvalidModelNameErrMsg})
408
409
410
		return
	}

411
412
413
414
415
	if err := checkNameExists(name); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

416
	if r.Path == "" && r.Modelfile == "" {
Michael Yang's avatar
Michael Yang committed
417
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
Michael Yang's avatar
Michael Yang committed
418
419
		return
	}
Michael Yang's avatar
Michael Yang committed
420

421
422
423
	var sr io.Reader = strings.NewReader(r.Modelfile)
	if r.Path != "" && r.Modelfile == "" {
		f, err := os.Open(r.Path)
Michael Yang's avatar
Michael Yang committed
424
425
426
427
		if err != nil {
			c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("error reading modelfile: %s", err)})
			return
		}
Michael Yang's avatar
Michael Yang committed
428
		defer f.Close()
Michael Yang's avatar
Michael Yang committed
429

430
		sr = f
Michael Yang's avatar
Michael Yang committed
431
	}
Michael Yang's avatar
Michael Yang committed
432

433
	f, err := parser.ParseFile(sr)
Michael Yang's avatar
Michael Yang committed
434
435
436
437
438
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
439
	ch := make(chan any)
Michael Yang's avatar
Michael Yang committed
440
441
	go func() {
		defer close(ch)
442
443
		fn := func(resp api.ProgressResponse) {
			ch <- resp
444
445
		}

446
447
448
		ctx, cancel := context.WithCancel(c.Request.Context())
		defer cancel()

449
450
		quantization := cmp.Or(r.Quantize, r.Quantization)
		if err := CreateModel(ctx, name, filepath.Dir(r.Path), strings.ToUpper(quantization), f, fn); err != nil {
Michael Yang's avatar
Michael Yang committed
451
			ch <- gin.H{"error": err.Error()}
452
		}
Michael Yang's avatar
Michael Yang committed
453
	}()
Michael Yang's avatar
Michael Yang committed
454

455
	if r.Stream != nil && !*r.Stream {
456
457
458
459
		waitForStream(c, ch)
		return
	}

Michael Yang's avatar
Michael Yang committed
460
	streamResponse(c, ch)
Bruce MacDonald's avatar
Bruce MacDonald committed
461
462
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
463
func (s *Server) DeleteModelHandler(c *gin.Context) {
464
465
	var r api.DeleteRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
466
467
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
468
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
469
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
470
471
472
		return
	}

473
474
475
	n := model.ParseName(cmp.Or(r.Model, r.Name))
	if !n.IsValid() {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("name %q is invalid", cmp.Or(r.Model, r.Name))})
476
477
		return
	}
Michael Yang's avatar
Michael Yang committed
478

479
	m, err := ParseNamedManifest(n)
Michael Yang's avatar
Michael Yang committed
480
481
482
483
484
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

485
	if err := m.Remove(); err != nil {
Michael Yang's avatar
Michael Yang committed
486
487
488
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
489
490
491
492
493

	if err := m.RemoveLayers(); err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
494
495
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
496
func (s *Server) ShowModelHandler(c *gin.Context) {
Patrick Devine's avatar
Patrick Devine committed
497
	var req api.ShowRequest
Michael Yang's avatar
Michael Yang committed
498
499
500
501
502
503
504
	err := c.ShouldBindJSON(&req)
	switch {
	case errors.Is(err, io.EOF):
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
	case err != nil:
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
505
506
507
		return
	}

Michael Yang's avatar
Michael Yang committed
508
	if req.Model != "" {
Michael Yang's avatar
Michael Yang committed
509
		// noop
Michael Yang's avatar
Michael Yang committed
510
	} else if req.Name != "" {
Michael Yang's avatar
Michael Yang committed
511
		req.Model = req.Name
Michael Yang's avatar
Michael Yang committed
512
	} else {
513
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
514
515
516
		return
	}

517
	resp, err := GetModelInfo(req)
Patrick Devine's avatar
Patrick Devine committed
518
	if err != nil {
519
520
		switch {
		case os.IsNotExist(err):
Michael Yang's avatar
Michael Yang committed
521
			c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Model)})
522
523
524
		case err.Error() == "invalid model name":
			c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		default:
Patrick Devine's avatar
Patrick Devine committed
525
526
527
528
529
530
531
532
			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		}
		return
	}

	c.JSON(http.StatusOK, resp)
}

533
func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
534
	m, err := GetModel(req.Model)
Patrick Devine's avatar
Patrick Devine committed
535
536
537
538
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
539
	modelDetails := api.ModelDetails{
540
541
542
543
544
545
		ParentModel:       m.ParentModel,
		Format:            m.Config.ModelFormat,
		Family:            m.Config.ModelFamily,
		Families:          m.Config.ModelFamilies,
		ParameterSize:     m.Config.ModelType,
		QuantizationLevel: m.Config.FileType,
Patrick Devine's avatar
Patrick Devine committed
546
547
	}

548
	if req.System != "" {
549
		m.System = req.System
550
551
552
	}

	if req.Template != "" {
Michael Yang's avatar
Michael Yang committed
553
554
555
556
		m.Template, err = template.Parse(req.Template)
		if err != nil {
			return nil, err
		}
557
558
	}

Michael Yang's avatar
Michael Yang committed
559
560
561
	msgs := make([]api.Message, len(m.Messages))
	for i, msg := range m.Messages {
		msgs[i] = api.Message{Role: msg.Role, Content: msg.Content}
562
563
	}

564
565
566
567
568
569
570
571
572
573
	n := model.ParseName(req.Model)
	if !n.IsValid() {
		return nil, fmt.Errorf("invalid model name")
	}

	manifest, err := ParseNamedManifest(n)
	if err != nil {
		return nil, err
	}

Patrick Devine's avatar
Patrick Devine committed
574
	resp := &api.ShowResponse{
575
576
		License:    strings.Join(m.License, "\n"),
		System:     m.System,
Michael Yang's avatar
Michael Yang committed
577
		Template:   m.Template.String(),
578
579
580
		Details:    modelDetails,
		Messages:   msgs,
		ModifiedAt: manifest.fi.ModTime(),
Patrick Devine's avatar
Patrick Devine committed
581
582
583
584
	}

	var params []string
	cs := 30
585
	for k, v := range m.Options {
Patrick Devine's avatar
Patrick Devine committed
586
587
588
		switch val := v.(type) {
		case []interface{}:
			for _, nv := range val {
Patrick Devine's avatar
Patrick Devine committed
589
				params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
Patrick Devine's avatar
Patrick Devine committed
590
			}
Patrick Devine's avatar
Patrick Devine committed
591
592
		default:
			params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
Patrick Devine's avatar
Patrick Devine committed
593
594
595
596
		}
	}
	resp.Parameters = strings.Join(params, "\n")

597
598
	for k, v := range req.Options {
		if _, ok := req.Options[k]; ok {
599
			m.Options[k] = v
600
601
602
		}
	}

603
	var sb strings.Builder
604
	fmt.Fprintln(&sb, "# Modelfile generated by \"ollama show\"")
605
	fmt.Fprintln(&sb, "# To build a new Modelfile based on this, replace FROM with:")
606
607
	fmt.Fprintf(&sb, "# FROM %s\n\n", m.ShortName)
	fmt.Fprint(&sb, m.String())
608
	resp.Modelfile = sb.String()
609

610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
	kvData, err := getKVData(m.ModelPath, req.Verbose)
	if err != nil {
		return nil, err
	}
	delete(kvData, "general.name")
	delete(kvData, "tokenizer.chat_template")
	resp.ModelInfo = kvData

	if len(m.ProjectorPaths) > 0 {
		projectorData, err := getKVData(m.ProjectorPaths[0], req.Verbose)
		if err != nil {
			return nil, err
		}
		resp.ProjectorInfo = projectorData
	}

Patrick Devine's avatar
Patrick Devine committed
626
627
628
	return resp, nil
}

629
func getKVData(digest string, verbose bool) (llm.KV, error) {
630
631
632
633
634
	maxArraySize := 0
	if verbose {
		maxArraySize = -1
	}
	kvData, err := llm.LoadModel(digest, maxArraySize)
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
	if err != nil {
		return nil, err
	}

	kv := kvData.KV()

	if !verbose {
		for k := range kv {
			if t, ok := kv[k].([]any); len(t) > 5 && ok {
				kv[k] = []any{}
			}
		}
	}

	return kv, nil
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
652
func (s *Server) ListModelsHandler(c *gin.Context) {
653
	ms, err := Manifests()
Patrick Devine's avatar
Patrick Devine committed
654
655
656
657
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}
Michael Yang's avatar
Michael Yang committed
658

659
	models := []api.ListModelResponse{}
660
661
662
663
664
665
666
	for n, m := range ms {
		f, err := m.Config.Open()
		if err != nil {
			slog.Warn("bad manifest filepath", "name", n, "error", err)
			continue
		}
		defer f.Close()
667

668
669
670
671
		var cf ConfigV2
		if err := json.NewDecoder(f).Decode(&cf); err != nil {
			slog.Warn("bad manifest config", "name", n, "error", err)
			continue
Patrick Devine's avatar
Patrick Devine committed
672
		}
Michael Yang's avatar
Michael Yang committed
673

674
		// tag should never be masked
675
		models = append(models, api.ListModelResponse{
676
677
678
679
680
681
682
683
684
685
686
687
688
			Model:      n.DisplayShortest(),
			Name:       n.DisplayShortest(),
			Size:       m.Size(),
			Digest:     m.digest,
			ModifiedAt: m.fi.ModTime(),
			Details: api.ModelDetails{
				Format:            cf.ModelFormat,
				Family:            cf.ModelFamily,
				Families:          cf.ModelFamilies,
				ParameterSize:     cf.ModelType,
				QuantizationLevel: cf.FileType,
			},
		})
Patrick Devine's avatar
Patrick Devine committed
689
690
	}

691
	slices.SortStableFunc(models, func(i, j api.ListModelResponse) int {
692
693
694
695
		// most recently modified first
		return cmp.Compare(j.ModifiedAt.Unix(), i.ModifiedAt.Unix())
	})

Michael Yang's avatar
Michael Yang committed
696
	c.JSON(http.StatusOK, api.ListResponse{Models: models})
Patrick Devine's avatar
Patrick Devine committed
697
698
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
699
func (s *Server) CopyModelHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
700
701
	var r api.CopyRequest
	if err := c.ShouldBindJSON(&r); errors.Is(err, io.EOF) {
Michael Yang's avatar
Michael Yang committed
702
703
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
704
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
705
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Patrick Devine's avatar
Patrick Devine committed
706
707
708
		return
	}

Michael Yang's avatar
Michael Yang committed
709
710
	src := model.ParseName(r.Source)
	if !src.IsValid() {
Michael Yang's avatar
Michael Yang committed
711
712
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("source %q is invalid", r.Source)})
		return
713
714
	}

Michael Yang's avatar
Michael Yang committed
715
716
	dst := model.ParseName(r.Destination)
	if !dst.IsValid() {
717
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("destination %q is invalid", r.Destination)})
Patrick Devine's avatar
Patrick Devine committed
718
719
		return
	}
Michael Yang's avatar
Michael Yang committed
720

721
722
723
724
725
	if err := checkNameExists(dst); err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
726
727
728
729
730
	if err := CopyModel(src, dst); errors.Is(err, os.ErrNotExist) {
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found", r.Source)})
	} else if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
	}
Patrick Devine's avatar
Patrick Devine committed
731
732
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
733
func (s *Server) HeadBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
734
735
736
737
738
739
740
741
742
743
744
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	if _, err := os.Stat(path); err != nil {
		c.AbortWithStatusJSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("blob %q not found", c.Param("digest"))})
		return
	}

Michael Yang's avatar
Michael Yang committed
745
	c.Status(http.StatusOK)
Michael Yang's avatar
Michael Yang committed
746
747
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
748
func (s *Server) CreateBlobHandler(c *gin.Context) {
Michael Yang's avatar
Michael Yang committed
749
750
	if ib, ok := intermediateBlobs[c.Param("digest")]; ok {
		p, err := GetBlobsPath(ib)
751
752
753
754
755
756
		if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		}

		if _, err := os.Stat(p); errors.Is(err, os.ErrNotExist) {
Michael Yang's avatar
Michael Yang committed
757
758
			slog.Info("evicting intermediate blob which no longer exists", "digest", ib)
			delete(intermediateBlobs, c.Param("digest"))
759
760
761
762
763
764
765
766
767
		} else if err != nil {
			c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
			return
		} else {
			c.Status(http.StatusOK)
			return
		}
	}

768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
	path, err := GetBlobsPath(c.Param("digest"))
	if err != nil {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

	_, err = os.Stat(path)
	switch {
	case errors.Is(err, os.ErrNotExist):
		// noop
	case err != nil:
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	default:
		c.Status(http.StatusOK)
		return
	}

786
	layer, err := NewLayer(c.Request.Body, "")
Michael Yang's avatar
Michael Yang committed
787
788
789
790
791
	if err != nil {
		c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
	}

792
793
	if layer.Digest != c.Param("digest") {
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("digest mismatch, expected %q, got %q", c.Param("digest"), layer.Digest)})
Michael Yang's avatar
Michael Yang committed
794
795
796
		return
	}

Michael Yang's avatar
Michael Yang committed
797
	c.Status(http.StatusCreated)
Michael Yang's avatar
Michael Yang committed
798
799
}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
func isLocalIP(ip netip.Addr) bool {
	if interfaces, err := net.Interfaces(); err == nil {
		for _, iface := range interfaces {
			addrs, err := iface.Addrs()
			if err != nil {
				continue
			}

			for _, a := range addrs {
				if parsed, _, err := net.ParseCIDR(a.String()); err == nil {
					if parsed.String() == ip.String() {
						return true
					}
				}
			}
		}
	}

	return false
}

821
func allowedHost(host string) bool {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
822
	if host == "" || host == "localhost" {
823
824
825
826
827
828
829
830
		return true
	}

	if hostname, err := os.Hostname(); err == nil && host == hostname {
		return true
	}

	var tlds = []string{
Jeffrey Morgan's avatar
Jeffrey Morgan committed
831
832
833
		"localhost",
		"local",
		"internal",
834
	}
835

Jeffrey Morgan's avatar
Jeffrey Morgan committed
836
	// check if the host is a local TLD
837
838
839
840
841
842
	for _, tld := range tlds {
		if strings.HasSuffix(host, "."+tld) {
			return true
		}
	}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
843
	return false
Jeffrey Morgan's avatar
Jeffrey Morgan committed
844
}
845

Jeffrey Morgan's avatar
Jeffrey Morgan committed
846
847
848
func allowedHostsMiddleware(addr net.Addr) gin.HandlerFunc {
	return func(c *gin.Context) {
		if addr == nil {
849
850
851
852
			c.Next()
			return
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
853
		if addr, err := netip.ParseAddrPort(addr.String()); err == nil && !addr.Addr().IsLoopback() {
854
855
856
857
858
859
860
861
862
			c.Next()
			return
		}

		host, _, err := net.SplitHostPort(c.Request.Host)
		if err != nil {
			host = c.Request.Host
		}

Jeffrey Morgan's avatar
Jeffrey Morgan committed
863
		if addr, err := netip.ParseAddr(host); err == nil {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
864
			if addr.IsLoopback() || addr.IsPrivate() || addr.IsUnspecified() || isLocalIP(addr) {
Jeffrey Morgan's avatar
Jeffrey Morgan committed
865
866
867
868
869
				c.Next()
				return
			}
		}

870
		if allowedHost(host) {
Michael Yang's avatar
lint  
Michael Yang committed
871
			if c.Request.Method == http.MethodOptions {
872
873
874
875
				c.AbortWithStatus(http.StatusNoContent)
				return
			}

876
877
878
879
880
881
			c.Next()
			return
		}

		c.AbortWithStatus(http.StatusForbidden)
	}
882
}
883

884
func (s *Server) GenerateRoutes() http.Handler {
Michael Yang's avatar
Michael Yang committed
885
886
	config := cors.DefaultConfig()
	config.AllowWildcard = true
887
	config.AllowBrowserExtensions = true
888
	config.AllowHeaders = []string{"Authorization", "Content-Type", "User-Agent", "Accept", "X-Requested-With"}
royjhan's avatar
royjhan committed
889
890
891
892
	openAIProperties := []string{"lang", "package-version", "os", "arch", "runtime", "runtime-version", "async"}
	for _, prop := range openAIProperties {
		config.AllowHeaders = append(config.AllowHeaders, "x-stainless-"+prop)
	}
893
	config.AllowOrigins = envconfig.AllowOrigins
Michael Yang's avatar
Michael Yang committed
894

Bruce MacDonald's avatar
Bruce MacDonald committed
895
	r := gin.Default()
896
897
	r.Use(
		cors.New(config),
898
		allowedHostsMiddleware(s.addr),
899
	)
Bruce MacDonald's avatar
Bruce MacDonald committed
900

Daniel Hiltgen's avatar
Daniel Hiltgen committed
901
902
903
904
905
906
907
908
909
910
911
	r.POST("/api/pull", s.PullModelHandler)
	r.POST("/api/generate", s.GenerateHandler)
	r.POST("/api/chat", s.ChatHandler)
	r.POST("/api/embeddings", s.EmbeddingsHandler)
	r.POST("/api/create", s.CreateModelHandler)
	r.POST("/api/push", s.PushModelHandler)
	r.POST("/api/copy", s.CopyModelHandler)
	r.DELETE("/api/delete", s.DeleteModelHandler)
	r.POST("/api/show", s.ShowModelHandler)
	r.POST("/api/blobs/:digest", s.CreateBlobHandler)
	r.HEAD("/api/blobs/:digest", s.HeadBlobHandler)
912
	r.GET("/api/ps", s.ProcessHandler)
Jeffrey Morgan's avatar
Jeffrey Morgan committed
913

914
	// Compatibility endpoints
915
	r.POST("/v1/chat/completions", openai.ChatMiddleware(), s.ChatHandler)
916
	r.POST("/v1/completions", openai.CompletionsMiddleware(), s.GenerateHandler)
917
918
	r.GET("/v1/models", openai.ListMiddleware(), s.ListModelsHandler)
	r.GET("/v1/models/:model", openai.RetrieveMiddleware(), s.ShowModelHandler)
919

Michael Yang's avatar
Michael Yang committed
920
921
922
923
924
	for _, method := range []string{http.MethodGet, http.MethodHead} {
		r.Handle(method, "/", func(c *gin.Context) {
			c.String(http.StatusOK, "Ollama is running")
		})

Daniel Hiltgen's avatar
Daniel Hiltgen committed
925
		r.Handle(method, "/api/tags", s.ListModelsHandler)
Michael Yang's avatar
Michael Yang committed
926
927
928
		r.Handle(method, "/api/version", func(c *gin.Context) {
			c.JSON(http.StatusOK, gin.H{"version": version.Version})
		})
Michael Yang's avatar
Michael Yang committed
929
930
	}

931
932
933
934
	return r
}

func Serve(ln net.Listener) error {
Michael Yang's avatar
Michael Yang committed
935
	level := slog.LevelInfo
936
	if envconfig.Debug {
Michael Yang's avatar
Michael Yang committed
937
		level = slog.LevelDebug
938
	}
Michael Yang's avatar
Michael Yang committed
939

940
	slog.Info("server config", "env", envconfig.Values())
Michael Yang's avatar
Michael Yang committed
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
	handler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
		Level:     level,
		AddSource: true,
		ReplaceAttr: func(_ []string, attr slog.Attr) slog.Attr {
			if attr.Key == slog.SourceKey {
				source := attr.Value.Any().(*slog.Source)
				source.File = filepath.Base(source.File)
			}

			return attr
		},
	})

	slog.SetDefault(slog.New(handler))

956
957
958
959
960
961
962
963
	blobsDir, err := GetBlobsPath("")
	if err != nil {
		return err
	}
	if err := fixBlobs(blobsDir); err != nil {
		return err
	}

964
	if !envconfig.NoPrune {
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
		// clean up unused layers and manifests
		if err := PruneLayers(); err != nil {
			return err
		}

		manifestsPath, err := GetManifestPath()
		if err != nil {
			return err
		}

		if err := PruneDirectory(manifestsPath); err != nil {
			return err
		}
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
980
	ctx, done := context.WithCancel(context.Background())
Daniel Hiltgen's avatar
Daniel Hiltgen committed
981
982
	schedCtx, schedDone := context.WithCancel(ctx)
	sched := InitScheduler(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
983
	s := &Server{addr: ln.Addr(), sched: sched}
984
985

	http.Handle("/", s.GenerateRoutes())
986

987
	slog.Info(fmt.Sprintf("Listening on %s (version %s)", ln.Addr(), version.Version))
988
	srvr := &http.Server{
989
990
991
992
993
994
995
996
997
		// Use http.DefaultServeMux so we get net/http/pprof for
		// free.
		//
		// TODO(bmizerany): Decide if we want to make this
		// configurable so it is not exposed by default, or allow
		// users to bind it to a different port. This was a quick
		// and easy way to get pprof, but it may not be the best
		// way.
		Handler: nil,
Jeffrey Morgan's avatar
Jeffrey Morgan committed
998
999
	}

1000
1001
	// listen for a ctrl+c and stop any loaded llm
	signals := make(chan os.Signal, 1)
1002
	signal.Notify(signals, syscall.SIGINT, syscall.SIGTERM)
1003
1004
	go func() {
		<-signals
1005
		srvr.Close()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1006
		schedDone()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1007
		sched.unloadAllRunners()
1008
		gpu.Cleanup()
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1009
		done()
1010
1011
	}()

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1012
	if err := llm.Init(); err != nil {
1013
1014
		return fmt.Errorf("unable to initialize llm library %w", err)
	}
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1015

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1016
	s.sched.Run(schedCtx)
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1017
1018
1019

	// At startup we retrieve GPU information so we can get log messages before loading a model
	// This will log warnings to the log in case we have problems with detected GPUs
Daniel Hiltgen's avatar
Daniel Hiltgen committed
1020
1021
	gpus := gpu.GetGPUInfo()
	gpus.LogDetails()
1022

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1023
1024
1025
1026
1027
1028
1029
	err = srvr.Serve(ln)
	// If server is closed from the signal handler, wait for the ctx to be done
	// otherwise error out quickly
	if !errors.Is(err, http.ErrServerClosed) {
		return err
	}
	<-ctx.Done()
1030
	return nil
Jeffrey Morgan's avatar
Jeffrey Morgan committed
1031
}
Michael Yang's avatar
Michael Yang committed
1032

1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
func waitForStream(c *gin.Context, ch chan interface{}) {
	c.Header("Content-Type", "application/json")
	for resp := range ch {
		switch r := resp.(type) {
		case api.ProgressResponse:
			if r.Status == "success" {
				c.JSON(http.StatusOK, r)
				return
			}
		case gin.H:
			if errorMsg, ok := r["error"].(string); ok {
				c.JSON(http.StatusInternalServerError, gin.H{"error": errorMsg})
				return
			} else {
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected error format in progress response"})
				return
			}
		default:
			c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected progress response"})
			return
		}
	}
	c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected end of progress response"})
}

Michael Yang's avatar
Michael Yang committed
1058
func streamResponse(c *gin.Context, ch chan any) {
1059
	c.Header("Content-Type", "application/x-ndjson")
Michael Yang's avatar
Michael Yang committed
1060
1061
1062
1063
1064
1065
1066
1067
	c.Stream(func(w io.Writer) bool {
		val, ok := <-ch
		if !ok {
			return false
		}

		bts, err := json.Marshal(val)
		if err != nil {
1068
			slog.Info(fmt.Sprintf("streamResponse: json.Marshal failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1069
1070
1071
			return false
		}

1072
		// Delineate chunks with new-line delimiter
Michael Yang's avatar
Michael Yang committed
1073
1074
		bts = append(bts, '\n')
		if _, err := w.Write(bts); err != nil {
1075
			slog.Info(fmt.Sprintf("streamResponse: w.Write failed with %s", err))
Michael Yang's avatar
Michael Yang committed
1076
1077
1078
1079
1080
1081
			return false
		}

		return true
	})
}
Bruce MacDonald's avatar
Bruce MacDonald committed
1082

1083
func (s *Server) ProcessHandler(c *gin.Context) {
1084
	models := []api.ProcessModelResponse{}
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095

	for _, v := range s.sched.loaded {
		model := v.model
		modelDetails := api.ModelDetails{
			Format:            model.Config.ModelFormat,
			Family:            model.Config.ModelFamily,
			Families:          model.Config.ModelFamilies,
			ParameterSize:     model.Config.ModelType,
			QuantizationLevel: model.Config.FileType,
		}

1096
		mr := api.ProcessModelResponse{
1097
1098
1099
1100
1101
1102
1103
1104
			Model:     model.ShortName,
			Name:      model.ShortName,
			Size:      int64(v.estimatedTotal),
			SizeVRAM:  int64(v.estimatedVRAM),
			Digest:    model.Digest,
			Details:   modelDetails,
			ExpiresAt: v.expiresAt,
		}
1105
1106
1107
1108
1109
1110
1111
1112
		// The scheduler waits to set expiresAt, so if a model is loading it's
		// possible that it will be set to the unix epoch. For those cases, just
		// calculate the time w/ the sessionDuration instead.
		var epoch time.Time
		if v.expiresAt == epoch {
			mr.ExpiresAt = time.Now().Add(v.sessionDuration)
		}

1113
1114
1115
		models = append(models, mr)
	}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1116
1117
1118
1119
1120
	slices.SortStableFunc(models, func(i, j api.ProcessModelResponse) int {
		// longest duration remaining listed first
		return cmp.Compare(j.ExpiresAt.Unix(), i.ExpiresAt.Unix())
	})

1121
	c.JSON(http.StatusOK, api.ProcessResponse{Models: models})
1122
1123
}

Daniel Hiltgen's avatar
Daniel Hiltgen committed
1124
func (s *Server) ChatHandler(c *gin.Context) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1125
	var req api.ChatRequest
Michael Yang's avatar
Michael Yang committed
1126
	if err := c.ShouldBindJSON(&req); errors.Is(err, io.EOF) {
Bruce MacDonald's avatar
Bruce MacDonald committed
1127
1128
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "missing request body"})
		return
Michael Yang's avatar
Michael Yang committed
1129
	} else if err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1130
1131
1132
1133
		c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
		return
	}

Michael Yang's avatar
Michael Yang committed
1134
	caps := []Capability{CapabilityCompletion}
Michael Yang's avatar
Michael Yang committed
1135
	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
Michael Yang's avatar
Michael Yang committed
1136
1137
	if errors.Is(err, errCapabilityCompletion) {
		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support chat", req.Model)})
Bruce MacDonald's avatar
Bruce MacDonald committed
1138
		return
Michael Yang's avatar
Michael Yang committed
1139
	} else if err != nil {
Michael Yang's avatar
Michael Yang committed
1140
		handleScheduleError(c, req.Model, err)
Bruce MacDonald's avatar
Bruce MacDonald committed
1141
1142
		return
	}
Michael Yang's avatar
Michael Yang committed
1143

Michael Yang's avatar
Michael Yang committed
1144
1145
	if len(req.Messages) == 0 {
		c.JSON(http.StatusOK, api.ChatResponse{
1146
			Model:      req.Model,
Michael Yang's avatar
Michael Yang committed
1147
1148
			CreatedAt:  time.Now().UTC(),
			Message:    api.Message{Role: "assistant"},
1149
1150
			Done:       true,
			DoneReason: "load",
Michael Yang's avatar
Michael Yang committed
1151
		})
1152
1153
1154
		return
	}

Michael Yang's avatar
Michael Yang committed
1155
	prompt, images, err := chatPrompt(c.Request.Context(), m, r.Tokenize, opts, req.Messages)
Michael Yang's avatar
Michael Yang committed
1156
1157
1158
	if err != nil {
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
		return
1159
1160
	}

Michael Yang's avatar
Michael Yang committed
1161
	slog.Debug("chat request", "images", len(images), "prompt", prompt)
1162

Bruce MacDonald's avatar
Bruce MacDonald committed
1163
1164
1165
	ch := make(chan any)
	go func() {
		defer close(ch)
Michael Yang's avatar
Michael Yang committed
1166
		if err := r.Completion(c.Request.Context(), llm.CompletionRequest{
Michael Yang's avatar
Michael Yang committed
1167
1168
1169
			Prompt:  prompt,
			Images:  images,
			Format:  req.Format,
Michael Yang's avatar
Michael Yang committed
1170
			Options: opts,
Michael Yang's avatar
Michael Yang committed
1171
1172
		}, func(r llm.CompletionResponse) {
			ch <- api.ChatResponse{
1173
1174
1175
1176
1177
				Model:      req.Model,
				CreatedAt:  time.Now().UTC(),
				Message:    api.Message{Role: "assistant", Content: r.Content},
				Done:       r.Done,
				DoneReason: r.DoneReason,
Bruce MacDonald's avatar
Bruce MacDonald committed
1178
1179
1180
1181
1182
1183
1184
				Metrics: api.Metrics{
					PromptEvalCount:    r.PromptEvalCount,
					PromptEvalDuration: r.PromptEvalDuration,
					EvalCount:          r.EvalCount,
					EvalDuration:       r.EvalDuration,
				},
			}
Michael Yang's avatar
Michael Yang committed
1185
		}); err != nil {
Bruce MacDonald's avatar
Bruce MacDonald committed
1186
1187
1188
1189
1190
			ch <- gin.H{"error": err.Error()}
		}
	}()

	if req.Stream != nil && !*req.Stream {
Michael Yang's avatar
Michael Yang committed
1191
		var r api.ChatResponse
Bruce MacDonald's avatar
Bruce MacDonald committed
1192
		var sb strings.Builder
Michael Yang's avatar
Michael Yang committed
1193
1194
		for rr := range ch {
			switch t := rr.(type) {
1195
			case api.ChatResponse:
Michael Yang's avatar
Michael Yang committed
1196
1197
				sb.WriteString(t.Message.Content)
				r = t
1198
			case gin.H:
Michael Yang's avatar
Michael Yang committed
1199
1200
1201
				msg, ok := t["error"].(string)
				if !ok {
					msg = "unexpected error format in response"
1202
				}
Michael Yang's avatar
Michael Yang committed
1203
1204
1205

				c.JSON(http.StatusInternalServerError, gin.H{"error": msg})
				return
1206
			default:
Michael Yang's avatar
Michael Yang committed
1207
				c.JSON(http.StatusInternalServerError, gin.H{"error": "unexpected response"})
1208
				return
Bruce MacDonald's avatar
Bruce MacDonald committed
1209
1210
			}
		}
1211

Michael Yang's avatar
Michael Yang committed
1212
1213
		r.Message.Content = sb.String()
		c.JSON(http.StatusOK, r)
Bruce MacDonald's avatar
Bruce MacDonald committed
1214
1215
1216
1217
1218
		return
	}

	streamResponse(c, ch)
}
1219

Michael Yang's avatar
Michael Yang committed
1220
func handleScheduleError(c *gin.Context, name string, err error) {
Michael Yang's avatar
Michael Yang committed
1221
	switch {
Michael Yang's avatar
Michael Yang committed
1222
1223
	case errors.Is(err, errRequired):
		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1224
	case errors.Is(err, context.Canceled):
1225
		c.JSON(499, gin.H{"error": "request canceled"})
Michael Yang's avatar
Michael Yang committed
1226
	case errors.Is(err, ErrMaxQueue):
1227
		c.JSON(http.StatusServiceUnavailable, gin.H{"error": err.Error()})
Michael Yang's avatar
Michael Yang committed
1228
1229
	case errors.Is(err, os.ErrNotExist):
		c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model %q not found, try pulling it first", name)})
Michael Yang's avatar
Michael Yang committed
1230
1231
	default:
		c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
1232
1233
	}
}