Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
cfa84b84
Unverified
Commit
cfa84b84
authored
May 09, 2024
by
Bruce MacDonald
Committed by
GitHub
May 09, 2024
Browse files
add done_reason to the api (#4235)
parent
1580ed4c
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
44 additions
and
40 deletions
+44
-40
api/types.go
api/types.go
+7
-3
llm/server.go
llm/server.go
+12
-4
openai/openai.go
openai/openai.go
+6
-18
server/routes.go
server/routes.go
+19
-15
No files found.
api/types.go
View file @
cfa84b84
...
@@ -114,9 +114,10 @@ type Message struct {
...
@@ -114,9 +114,10 @@ type Message struct {
// ChatResponse is the response returned by [Client.Chat]. Its fields are
// ChatResponse is the response returned by [Client.Chat]. Its fields are
// similar to [GenerateResponse].
// similar to [GenerateResponse].
type
ChatResponse
struct
{
type
ChatResponse
struct
{
Model
string
`json:"model"`
Model
string
`json:"model"`
CreatedAt
time
.
Time
`json:"created_at"`
CreatedAt
time
.
Time
`json:"created_at"`
Message
Message
`json:"message"`
Message
Message
`json:"message"`
DoneReason
string
`json:"done_reason"`
Done
bool
`json:"done"`
Done
bool
`json:"done"`
...
@@ -309,6 +310,9 @@ type GenerateResponse struct {
...
@@ -309,6 +310,9 @@ type GenerateResponse struct {
// Done specifies if the response is complete.
// Done specifies if the response is complete.
Done
bool
`json:"done"`
Done
bool
`json:"done"`
// DoneReason is the reason the model stopped generating text.
DoneReason
string
`json:"done_reason"`
// Context is an encoding of the conversation used in this response; this
// Context is an encoding of the conversation used in this response; this
// can be sent in the next request to keep a conversational memory.
// can be sent in the next request to keep a conversational memory.
Context
[]
int
`json:"context,omitempty"`
Context
[]
int
`json:"context,omitempty"`
...
...
llm/server.go
View file @
cfa84b84
...
@@ -576,10 +576,11 @@ type ImageData struct {
...
@@ -576,10 +576,11 @@ type ImageData struct {
}
}
type
completion
struct
{
type
completion
struct
{
Content
string
`json:"content"`
Content
string
`json:"content"`
Model
string
`json:"model"`
Model
string
`json:"model"`
Prompt
string
`json:"prompt"`
Prompt
string
`json:"prompt"`
Stop
bool
`json:"stop"`
Stop
bool
`json:"stop"`
StoppedLimit
bool
`json:"stopped_limit"`
Timings
struct
{
Timings
struct
{
PredictedN
int
`json:"predicted_n"`
PredictedN
int
`json:"predicted_n"`
...
@@ -598,6 +599,7 @@ type CompletionRequest struct {
...
@@ -598,6 +599,7 @@ type CompletionRequest struct {
type
CompletionResponse
struct
{
type
CompletionResponse
struct
{
Content
string
Content
string
DoneReason
string
Done
bool
Done
bool
PromptEvalCount
int
PromptEvalCount
int
PromptEvalDuration
time
.
Duration
PromptEvalDuration
time
.
Duration
...
@@ -739,8 +741,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
...
@@ -739,8 +741,14 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
}
}
if
c
.
Stop
{
if
c
.
Stop
{
doneReason
:=
"stop"
if
c
.
StoppedLimit
{
doneReason
=
"length"
}
fn
(
CompletionResponse
{
fn
(
CompletionResponse
{
Done
:
true
,
Done
:
true
,
DoneReason
:
doneReason
,
PromptEvalCount
:
c
.
Timings
.
PromptN
,
PromptEvalCount
:
c
.
Timings
.
PromptN
,
PromptEvalDuration
:
parseDurationMs
(
c
.
Timings
.
PromptMS
),
PromptEvalDuration
:
parseDurationMs
(
c
.
Timings
.
PromptMS
),
EvalCount
:
c
.
Timings
.
PredictedN
,
EvalCount
:
c
.
Timings
.
PredictedN
,
...
...
openai/openai.go
View file @
cfa84b84
...
@@ -107,15 +107,9 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
...
@@ -107,15 +107,9 @@ func toChatCompletion(id string, r api.ChatResponse) ChatCompletion {
Model
:
r
.
Model
,
Model
:
r
.
Model
,
SystemFingerprint
:
"fp_ollama"
,
SystemFingerprint
:
"fp_ollama"
,
Choices
:
[]
Choice
{{
Choices
:
[]
Choice
{{
Index
:
0
,
Index
:
0
,
Message
:
Message
{
Role
:
r
.
Message
.
Role
,
Content
:
r
.
Message
.
Content
},
Message
:
Message
{
Role
:
r
.
Message
.
Role
,
Content
:
r
.
Message
.
Content
},
FinishReason
:
func
(
done
bool
)
*
string
{
FinishReason
:
&
r
.
DoneReason
,
if
done
{
reason
:=
"stop"
return
&
reason
}
return
nil
}(
r
.
Done
),
}},
}},
Usage
:
Usage
{
Usage
:
Usage
{
// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
// TODO: ollama returns 0 for prompt eval if the prompt was cached, but openai returns the actual count
...
@@ -135,15 +129,9 @@ func toChunk(id string, r api.ChatResponse) ChatCompletionChunk {
...
@@ -135,15 +129,9 @@ func toChunk(id string, r api.ChatResponse) ChatCompletionChunk {
SystemFingerprint
:
"fp_ollama"
,
SystemFingerprint
:
"fp_ollama"
,
Choices
:
[]
ChunkChoice
{
Choices
:
[]
ChunkChoice
{
{
{
Index
:
0
,
Index
:
0
,
Delta
:
Message
{
Role
:
"assistant"
,
Content
:
r
.
Message
.
Content
},
Delta
:
Message
{
Role
:
"assistant"
,
Content
:
r
.
Message
.
Content
},
FinishReason
:
func
(
done
bool
)
*
string
{
FinishReason
:
&
r
.
DoneReason
,
if
done
{
reason
:=
"stop"
return
&
reason
}
return
nil
}(
r
.
Done
),
},
},
},
},
}
}
...
...
server/routes.go
View file @
cfa84b84
...
@@ -152,9 +152,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
...
@@ -152,9 +152,10 @@ func (s *Server) GenerateHandler(c *gin.Context) {
// of `raw` mode so we need to check for it too
// of `raw` mode so we need to check for it too
if
req
.
Prompt
==
""
&&
req
.
Template
==
""
&&
req
.
System
==
""
{
if
req
.
Prompt
==
""
&&
req
.
Template
==
""
&&
req
.
System
==
""
{
c
.
JSON
(
http
.
StatusOK
,
api
.
GenerateResponse
{
c
.
JSON
(
http
.
StatusOK
,
api
.
GenerateResponse
{
CreatedAt
:
time
.
Now
()
.
UTC
(),
CreatedAt
:
time
.
Now
()
.
UTC
(),
Model
:
req
.
Model
,
Model
:
req
.
Model
,
Done
:
true
,
Done
:
true
,
DoneReason
:
"load"
,
})
})
return
return
}
}
...
@@ -222,10 +223,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
...
@@ -222,10 +223,11 @@ func (s *Server) GenerateHandler(c *gin.Context) {
}
}
resp
:=
api
.
GenerateResponse
{
resp
:=
api
.
GenerateResponse
{
Model
:
req
.
Model
,
Model
:
req
.
Model
,
CreatedAt
:
time
.
Now
()
.
UTC
(),
CreatedAt
:
time
.
Now
()
.
UTC
(),
Done
:
r
.
Done
,
Done
:
r
.
Done
,
Response
:
r
.
Content
,
Response
:
r
.
Content
,
DoneReason
:
r
.
DoneReason
,
Metrics
:
api
.
Metrics
{
Metrics
:
api
.
Metrics
{
PromptEvalCount
:
r
.
PromptEvalCount
,
PromptEvalCount
:
r
.
PromptEvalCount
,
PromptEvalDuration
:
r
.
PromptEvalDuration
,
PromptEvalDuration
:
r
.
PromptEvalDuration
,
...
@@ -1215,10 +1217,11 @@ func (s *Server) ChatHandler(c *gin.Context) {
...
@@ -1215,10 +1217,11 @@ func (s *Server) ChatHandler(c *gin.Context) {
// an empty request loads the model
// an empty request loads the model
if
len
(
req
.
Messages
)
==
0
||
prompt
==
""
{
if
len
(
req
.
Messages
)
==
0
||
prompt
==
""
{
resp
:=
api
.
ChatResponse
{
resp
:=
api
.
ChatResponse
{
CreatedAt
:
time
.
Now
()
.
UTC
(),
CreatedAt
:
time
.
Now
()
.
UTC
(),
Model
:
req
.
Model
,
Model
:
req
.
Model
,
Done
:
true
,
Done
:
true
,
Message
:
api
.
Message
{
Role
:
"assistant"
},
DoneReason
:
"load"
,
Message
:
api
.
Message
{
Role
:
"assistant"
},
}
}
c
.
JSON
(
http
.
StatusOK
,
resp
)
c
.
JSON
(
http
.
StatusOK
,
resp
)
return
return
...
@@ -1251,10 +1254,11 @@ func (s *Server) ChatHandler(c *gin.Context) {
...
@@ -1251,10 +1254,11 @@ func (s *Server) ChatHandler(c *gin.Context) {
fn
:=
func
(
r
llm
.
CompletionResponse
)
{
fn
:=
func
(
r
llm
.
CompletionResponse
)
{
resp
:=
api
.
ChatResponse
{
resp
:=
api
.
ChatResponse
{
Model
:
req
.
Model
,
Model
:
req
.
Model
,
CreatedAt
:
time
.
Now
()
.
UTC
(),
CreatedAt
:
time
.
Now
()
.
UTC
(),
Message
:
api
.
Message
{
Role
:
"assistant"
,
Content
:
r
.
Content
},
Message
:
api
.
Message
{
Role
:
"assistant"
,
Content
:
r
.
Content
},
Done
:
r
.
Done
,
Done
:
r
.
Done
,
DoneReason
:
r
.
DoneReason
,
Metrics
:
api
.
Metrics
{
Metrics
:
api
.
Metrics
{
PromptEvalCount
:
r
.
PromptEvalCount
,
PromptEvalCount
:
r
.
PromptEvalCount
,
PromptEvalDuration
:
r
.
PromptEvalDuration
,
PromptEvalDuration
:
r
.
PromptEvalDuration
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment