prompt.go 3.1 KB
Newer Older
1
2
3
package server

import (
Michael Yang's avatar
Michael Yang committed
4
5
	"bytes"
	"context"
6
7
	"errors"
	"fmt"
8
	"log/slog"
9
	"slices"
10
	"strings"
11

12
	"github.com/ollama/ollama/api"
Michael Yang's avatar
Michael Yang committed
13
	"github.com/ollama/ollama/llm"
Michael Yang's avatar
Michael Yang committed
14
	"github.com/ollama/ollama/template"
15
16
)

Michael Yang's avatar
Michael Yang committed
17
18
19
20
21
type tokenizeFunc func(context.Context, string) ([]int, error)

// chatPrompt accepts a list of messages and returns the prompt and images that should be used for the next chat turn.
// chatPrompt truncates any messages that exceed the context window of the model, making sure to always include 1) the
// latest message and 2) system messages
Devon Rifkin's avatar
Devon Rifkin committed
22
func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.Options, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (prompt string, images []llm.ImageData, _ error) {
Michael Yang's avatar
Michael Yang committed
23
	var system []api.Message
24

25
	// TODO: Ideally we would compute this from the projector metadata but some pieces are implementation dependent
26
27
	// Clip images are represented as 768 tokens, each an embedding
	imageNumTokens := 768
28

Michael Yang's avatar
Michael Yang committed
29
	n := len(msgs) - 1
Michael Yang's avatar
Michael Yang committed
30
	// in reverse, find all messages that fit into context window
31
32
33
34
35
36
	for i := n; i >= 0; i-- {
		// always include the last message
		if i == n {
			continue
		}

Michael Yang's avatar
Michael Yang committed
37
38
39
40
41
42
43
		system = make([]api.Message, 0)
		for j := range i {
			if msgs[j].Role == "system" {
				system = append(system, msgs[j])
			}
		}

44
		thinkVal := false
Devon Rifkin's avatar
Devon Rifkin committed
45
		thinkLevel := ""
46
		if think != nil {
Devon Rifkin's avatar
Devon Rifkin committed
47
48
			thinkVal = think.AsBool()
			thinkLevel = think.AsString()
49
		}
Michael Yang's avatar
Michael Yang committed
50
		var b bytes.Buffer
Devon Rifkin's avatar
Devon Rifkin committed
51
		if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[i:]...), Tools: tools, Think: thinkVal, ThinkLevel: thinkLevel, IsThinkSet: think != nil}); err != nil {
Michael Yang's avatar
Michael Yang committed
52
			return "", nil, err
53
54
		}

Michael Yang's avatar
Michael Yang committed
55
		s, err := tokenize(ctx, b.String())
56
		if err != nil {
Michael Yang's avatar
Michael Yang committed
57
			return "", nil, err
58
59
		}

60
		ctxLen := len(s)
Michael Yang's avatar
Michael Yang committed
61
		if m.ProjectorPaths != nil {
Michael Yang's avatar
Michael Yang committed
62
			for _, m := range msgs[i:] {
63
				ctxLen += imageNumTokens * len(m.Images)
Michael Yang's avatar
Michael Yang committed
64
			}
65
66
		}

67
		if ctxLen > opts.NumCtx {
Michael Yang's avatar
Michael Yang committed
68
			slog.Debug("truncating input messages which exceed context length", "truncated", len(msgs[i:]))
69
			break
Michael Yang's avatar
Michael Yang committed
70
71
		} else {
			n = i
72
		}
Michael Yang's avatar
Michael Yang committed
73
	}
74

75
76
	currMsgIdx := n

77
	for cnt, msg := range msgs[currMsgIdx:] {
78
79
80
81
82
		if slices.Contains(m.Config.ModelFamilies, "mllama") && len(msg.Images) > 1 {
			return "", nil, errors.New("this model only supports one image while more than one image requested")
		}

		var prefix string
83
84
85
		prompt := msg.Content

		for _, i := range msg.Images {
86
87
88
			imgData := llm.ImageData{
				ID:   len(images),
				Data: i,
89
			}
90

91
92
93
94
95
			imgTag := fmt.Sprintf("[img-%d]", imgData.ID)
			if !strings.Contains(prompt, "[img]") {
				prefix += imgTag
			} else {
				prompt = strings.Replace(prompt, "[img]", imgTag, 1)
96
			}
97
98

			images = append(images, imgData)
99
		}
100
		msgs[currMsgIdx+cnt].Content = prefix + prompt
101
102
	}

Michael Yang's avatar
Michael Yang committed
103
	// truncate any messages that do not fit into the context window
Michael Yang's avatar
Michael Yang committed
104
	var b bytes.Buffer
105
	thinkVal := false
Devon Rifkin's avatar
Devon Rifkin committed
106
	thinkLevel := ""
107
	if think != nil {
Devon Rifkin's avatar
Devon Rifkin committed
108
109
		thinkVal = think.AsBool()
		thinkLevel = think.AsString()
110
	}
Devon Rifkin's avatar
Devon Rifkin committed
111
	if err := m.Template.Execute(&b, template.Values{Messages: append(system, msgs[currMsgIdx:]...), Tools: tools, Think: thinkVal, ThinkLevel: thinkLevel, IsThinkSet: think != nil}); err != nil {
Michael Yang's avatar
Michael Yang committed
112
		return "", nil, err
113
114
	}

115
116
	return b.String(), images, nil
}