add suffix support to generate endpoint

this change is triggered by the presence of "suffix", particularly useful for code completion tasks

add suffix support to generate endpoint
this change is triggered by the presence of "suffix", particularly useful for code completion tasks
d290e875 · Michael Yang · 987dbab0 · d290e875 · d290e875 · d290e875
Commit d290e875 authored Jun 20, 2024 by Michael Yang
6 changed files
--- a/api/types.go
+++ b/api/types.go
@@ -47,6 +47,9 @@ type GenerateRequest struct {
 	// Prompt is the textual prompt to send to the model.
 	Prompt string `json:"prompt"`
+	// Suffix is the text that comes after the inserted text.
+	Suffix string `json:"suffix"`
 	// System overrides the model's default system message/prompt.
 	System string `json:"system"`

--- a/server/images.go
+++ b/server/images.go
@@ -34,13 +34,19 @@ import (
 	"github.com/ollama/ollama/version"
 )
-var errCapabilityCompletion = errors.New("completion")
+var (
+	errCapabilities         = errors.New("does not support")
+	errCapabilityCompletion = errors.New("completion")
+	errCapabilityTools      = errors.New("tools")
+	errCapabilityInsert     = errors.New("insert")
+)
 type Capability string
 const (
 	CapabilityCompletion = Capability("completion")
 	CapabilityTools      = Capability("tools")
+	CapabilityInsert     = Capability("insert")
 )
 type registryOptions struct {
@@ -93,7 +99,12 @@ func (m *Model) CheckCapabilities(caps ...Capability) error {
 			}
 		case CapabilityTools:
 			if !slices.Contains(m.Template.Vars(), "tools") {
-				errs = append(errs, errors.New("tools"))
+				errs = append(errs, errCapabilityTools)
+			}
+		case CapabilityInsert:
+			vars := m.Template.Vars()
+			if !slices.Contains(vars, "suffix") {
+				errs = append(errs, errCapabilityInsert)
 			}
 		default:
 			slog.Error("unknown capability", "capability", cap)
@@ -102,7 +113,7 @@ func (m *Model) CheckCapabilities(caps ...Capability) error {
 	}
 	if err := errors.Join(errs...); err != nil {
-		return fmt.Errorf("does not support %w", errors.Join(errs...))
+		return fmt.Errorf("%w %w", errCapabilities, errors.Join(errs...))
 	}
 	return nil

--- a/server/routes.go
+++ b/server/routes.go
@@ -122,6 +122,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	}
 	caps := []Capability{CapabilityCompletion}
+	if req.Suffix != "" {
+		caps = append(caps, CapabilityInsert)
+	}
 	r, m, opts, err := s.scheduleRunner(c.Request.Context(), req.Model, caps, req.Options, req.KeepAlive)
 	if errors.Is(err, errCapabilityCompletion) {
 		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("%q does not support generate", req.Model)})
@@ -150,19 +154,6 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	prompt := req.Prompt
 	if !req.Raw {
-		var msgs []api.Message
-		if req.System != "" {
-			msgs = append(msgs, api.Message{Role: "system", Content: req.System})
-		} else if m.System != "" {
-			msgs = append(msgs, api.Message{Role: "system", Content: m.System})
-		}
-		for _, i := range images {
-			msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
-		}
-		msgs = append(msgs, api.Message{Role: "user", Content: req.Prompt})
 		tmpl := m.Template
 		if req.Template != "" {
 			tmpl, err = template.Parse(req.Template)
@@ -183,7 +174,26 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 			b.WriteString(s)
 		}
-		if err := tmpl.Execute(&b, template.Values{Messages: msgs}); err != nil {
+		var values template.Values
+		if req.Suffix != "" {
+			values.Prompt = prompt
+			values.Suffix = req.Suffix
+		} else {
+			var msgs []api.Message
+			if req.System != "" {
+				msgs = append(msgs, api.Message{Role: "system", Content: req.System})
+			} else if m.System != "" {
+				msgs = append(msgs, api.Message{Role: "system", Content: m.System})
+			}
+			for _, i := range images {
+				msgs = append(msgs, api.Message{Role: "user", Content: fmt.Sprintf("[img-%d]", i.ID)})
+			}
+			values.Messages = append(msgs, api.Message{Role: "user", Content: req.Prompt})
+		}
+		if err := tmpl.Execute(&b, values); err != nil {
 			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
 			return
 		}
@@ -1394,7 +1404,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 func handleScheduleError(c *gin.Context, name string, err error) {
 	switch {
-	case errors.Is(err, errRequired):
+	case errors.Is(err, errCapabilities), errors.Is(err, errRequired):
 		c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
 	case errors.Is(err, context.Canceled):
 		c.JSON(499, gin.H{"error": "request canceled"})

--- a/server/routes_generate_test.go
+++ b/server/routes_generate_test.go
@@ -73,6 +73,8 @@ func TestGenerateChat(t *testing.T) {
 			getCpuFn:      gpu.GetCPUInfo,
 			reschedDelay:  250 * time.Millisecond,
 			loadFn: func(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel int) {
+				// add 10ms delay to simulate loading
+				time.Sleep(10 * time.Millisecond)
 				req.successCh <- &runnerRef{
 					llama: &mock,
 				}
@@ -83,7 +85,7 @@ func TestGenerateChat(t *testing.T) {
 	go s.sched.Run(context.TODO())
 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
-		Name: "test",
+		Model: "test",
 		Modelfile: fmt.Sprintf(`FROM %s
 		TEMPLATE """
 {{- if .System }}System: {{ .System }} {{ end }}
@@ -141,9 +143,9 @@ func TestGenerateChat(t *testing.T) {
 		}
 	})
-	t.Run("missing capabilities", func(t *testing.T) {
+	t.Run("missing capabilities chat", func(t *testing.T) {
 		w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
-			Name: "bert",
+			Model: "bert",
 			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
 				"general.architecture": "bert",
 				"bert.pooling_type":    uint32(0),
@@ -243,7 +245,7 @@ func TestGenerateChat(t *testing.T) {
 		}
 		if actual.TotalDuration == 0 {
-			t.Errorf("expected load duration > 0, got 0")
+			t.Errorf("expected total duration > 0, got 0")
 		}
 	}
@@ -379,7 +381,7 @@ func TestGenerate(t *testing.T) {
 	go s.sched.Run(context.TODO())
 	w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
-		Name: "test",
+		Model: "test",
 		Modelfile: fmt.Sprintf(`FROM %s
 		TEMPLATE """
 {{- if .System }}System: {{ .System }} {{ end }}
@@ -437,9 +439,9 @@ func TestGenerate(t *testing.T) {
 		}
 	})
-	t.Run("missing capabilities", func(t *testing.T) {
+	t.Run("missing capabilities generate", func(t *testing.T) {
 		w := createRequest(t, s.CreateModelHandler, api.CreateRequest{
-			Name: "bert",
+			Model: "bert",
 			Modelfile: fmt.Sprintf("FROM %s", createBinFile(t, llm.KV{
 				"general.architecture": "bert",
 				"bert.pooling_type":    uint32(0),
@@ -464,6 +466,22 @@ func TestGenerate(t *testing.T) {
 		}
 	})
+	t.Run("missing capabilities suffix", func(t *testing.T) {
+		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
+			Model:  "test",
+			Prompt: "def add(",
+			Suffix: "    return c",
+		})
+		if w.Code != http.StatusBadRequest {
+			t.Errorf("expected status 400, got %d", w.Code)
+		}
+		if diff := cmp.Diff(w.Body.String(), `{"error":"test does not support insert"}`); diff != "" {
+			t.Errorf("mismatch (-got +want):\n%s", diff)
+		}
+	})
 	t.Run("load model", func(t *testing.T) {
 		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
 			Model: "test",
@@ -540,7 +558,7 @@ func TestGenerate(t *testing.T) {
 		}
 		if actual.TotalDuration == 0 {
-			t.Errorf("expected load duration > 0, got 0")
+			t.Errorf("expected total duration > 0, got 0")
 		}
 	}
@@ -632,6 +650,49 @@ func TestGenerate(t *testing.T) {
 		checkGenerateResponse(t, w.Body, "test-system", "Abra kadabra!")
 	})
+	w = createRequest(t, s.CreateModelHandler, api.CreateRequest{
+		Model: "test-suffix",
+		Modelfile: `FROM test
+TEMPLATE """{{- if .Suffix }}<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
+{{- else }}{{ .Prompt }}
+{{- end }}"""`,
+	})
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected status 200, got %d", w.Code)
+	}
+	t.Run("prompt with suffix", func(t *testing.T) {
+		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
+			Model:  "test-suffix",
+			Prompt: "def add(",
+			Suffix: "    return c",
+		})
+		if w.Code != http.StatusOK {
+			t.Errorf("expected status 200, got %d", w.Code)
+		}
+		if diff := cmp.Diff(mock.CompletionRequest.Prompt, "<PRE> def add( <SUF>    return c <MID>"); diff != "" {
+			t.Errorf("mismatch (-got +want):\n%s", diff)
+		}
+	})
+	t.Run("prompt without suffix", func(t *testing.T) {
+		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
+			Model:  "test-suffix",
+			Prompt: "def add(",
+		})
+		if w.Code != http.StatusOK {
+			t.Errorf("expected status 200, got %d", w.Code)
+		}
+		if diff := cmp.Diff(mock.CompletionRequest.Prompt, "def add("); diff != "" {
+			t.Errorf("mismatch (-got +want):\n%s", diff)
+		}
+	})
 	t.Run("raw", func(t *testing.T) {
 		w := createRequest(t, s.GenerateHandler, api.GenerateRequest{
 			Model:  "test-system",

--- a/template/template.go
+++ b/template/template.go
@@ -151,6 +151,8 @@ func (t *Template) Vars() []string {
 type Values struct {
 	Messages []api.Message
 	Tools    []api.Tool
+	Prompt   string
+	Suffix   string
 	// forceLegacy is a flag used to test compatibility with legacy templates
 	forceLegacy bool
@@ -204,7 +206,13 @@ func (t *Template) Subtree(fn func(parse.Node) bool) *template.Template {
 func (t *Template) Execute(w io.Writer, v Values) error {
 	system, messages := collate(v.Messages)
-	if !v.forceLegacy && slices.Contains(t.Vars(), "messages") {
+	if v.Prompt != "" && v.Suffix != "" {
+		return t.Template.Execute(w, map[string]any{
+			"Prompt":   v.Prompt,
+			"Suffix":   v.Suffix,
+			"Response": "",
+		})
+	} else if !v.forceLegacy && slices.Contains(t.Vars(), "messages") {
 		return t.Template.Execute(w, map[string]any{
 			"System":   system,
 			"Messages": messages,

--- a/template/template_test.go
+++ b/template/template_test.go
@@ -359,3 +359,38 @@ Answer: `,
 		})
 	}
 }
+func TestExecuteWithSuffix(t *testing.T) {
+	tmpl, err := Parse(`{{- if .Suffix }}<PRE> {{ .Prompt }} <SUF>{{ .Suffix }} <MID>
+{{- else }}{{ .Prompt }}
+{{- end }}`)
+	if err != nil {
+		t.Fatal(err)
+	}
+	cases := []struct {
+		name   string
+		values Values
+		expect string
+	}{
+		{
+			"message", Values{Messages: []api.Message{{Role: "user", Content: "hello"}}}, "hello",
+		},
+		{
+			"prompt suffix", Values{Prompt: "def add(", Suffix: "return x"}, "<PRE> def add( <SUF>return x <MID>",
+		},
+	}
+	for _, tt := range cases {
+		t.Run(tt.name, func(t *testing.T) {
+			var b bytes.Buffer
+			if err := tmpl.Execute(&b, tt.values); err != nil {
+				t.Fatal(err)
+			}
+			if diff := cmp.Diff(b.String(), tt.expect); diff != "" {
+				t.Errorf("mismatch (-got +want):\n%s", diff)
+			}
+		})
+	}
+}