add done_reason to the api (#4235)

cfa84b84 · Bruce MacDonald · GitHub · 1580ed4c · cfa84b84 · cfa84b84
Unverified Commit cfa84b84 authored May 09, 2024 by Bruce MacDonald Committed by GitHub May 09, 2024
Show whitespace changes
Inline Side-by-side

Showing with 44 additions and 40 deletions

api/types.go api/types.go +7 -3

llm/server.go llm/server.go +12 -4

openai/openai.go openai/openai.go +6 -18

server/routes.go server/routes.go +19 -15

No files found.
--- a/api/types.go
+++ b/api/types.go
@@ -117,6 +117,7 @@ type ChatResponse struct {
 	Model      string    `json:"model"`
 	CreatedAt  time.Time `json:"created_at"`
 	Message    Message   `json:"message"`
+	DoneReason string    `json:"done_reason"`
 	Done bool `json:"done"`
@@ -309,6 +310,9 @@ type GenerateResponse struct {
 	// Done specifies if the response is complete.
 	Done bool `json:"done"`
+	// DoneReason is the reason the model stopped generating text.
+	DoneReason string `json:"done_reason"`
 	// Context is an encoding of the conversation used in this response; this
 	// can be sent in the next request to keep a conversational memory.
 	Context []int `json:"context,omitempty"`

--- a/llm/server.go
+++ b/llm/server.go
@@ -580,6 +580,7 @@ type completion struct {
 	Model        string `json:"model"`
 	Prompt       string `json:"prompt"`
 	Stop         bool   `json:"stop"`
+	StoppedLimit bool   `json:"stopped_limit"`
 	Timings struct {
 		PredictedN  int     `json:"predicted_n"`
@@ -598,6 +599,7 @@ type CompletionRequest struct {
 type CompletionResponse struct {
 	Content            string
+	DoneReason         string
 	Done               bool
 	PromptEvalCount    int
 	PromptEvalDuration time.Duration
@@ -739,8 +741,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
 			}
 			if c.Stop {
+				doneReason := "stop"
+				if c.StoppedLimit {
+					doneReason = "length"
+				}
 				fn(CompletionResponse{
 					Done:               true,
+					DoneReason:         doneReason,
 					PromptEvalCount:    c.Timings.PromptN,
 					PromptEvalDuration: parseDurationMs(c.Timings.PromptMS),
 					EvalCount:          c.Timings.PredictedN,

--- a/openai/openai.go
+++ b/openai/openai.go
@@ -109,13 +109,7 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
 		Choices: []Choice{{
 			Index:        0,
 			Message:      Message{Role: r.Message.Role, Content: r.Message.Content},
-			FinishReason: func(done bool) *string {
+			FinishReason: &r.DoneReason,
-				if done {
-					reason := "stop"
-					return &reason
-				}
-				return nil
-			}(r.Done),
 		}},
 		Usage: Usage{
 			// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
@@ -137,13 +131,7 @@ func toChunk(id string, r api.ChatResponse) ChatCompletionChunk {
 			{
 				Index:        0,
 				Delta:        Message{Role: "assistant", Content: r.Message.Content},
-				FinishReason: func(done bool) *string {
+				FinishReason: &r.DoneReason,
-					if done {
-						reason := "stop"
-						return &reason
-					}
-					return nil
-				}(r.Done),
 			},
 		},
 	}

--- a/server/routes.go
+++ b/server/routes.go
@@ -155,6 +155,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 			CreatedAt:  time.Now().UTC(),
 			Model:      req.Model,
 			Done:       true,
+			DoneReason: "load",
 		})
 		return
 	}
@@ -226,6 +227,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 				CreatedAt:  time.Now().UTC(),
 				Done:       r.Done,
 				Response:   r.Content,
+				DoneReason: r.DoneReason,
 				Metrics: api.Metrics{
 					PromptEvalCount:    r.PromptEvalCount,
 					PromptEvalDuration: r.PromptEvalDuration,
@@ -1218,6 +1220,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 			CreatedAt:  time.Now().UTC(),
 			Model:      req.Model,
 			Done:       true,
+			DoneReason: "load",
 			Message:    api.Message{Role: "assistant"},
 		}
 		c.JSON(http.StatusOK, resp)
@@ -1255,6 +1258,7 @@ func (s *Server) ChatHandler(c *gin.Context) {
 				CreatedAt:  time.Now().UTC(),
 				Message:    api.Message{Role: "assistant", Content: r.Content},
 				Done:       r.Done,
+				DoneReason: r.DoneReason,
 				Metrics: api.Metrics{
 					PromptEvalCount:    r.PromptEvalCount,
 					PromptEvalDuration: r.PromptEvalDuration,