Unverified Commit 5cba29b9 authored by Jeffrey Morgan's avatar Jeffrey Morgan Committed by GitHub
Browse files

JSON mode: add `"format" as an api parameter (#1051)



* add `"format": "json"` as an API parameter
---------
Co-authored-by: default avatarBruce MacDonald <brucewmacdonald@gmail.com>
parent 5b39503b
...@@ -38,6 +38,7 @@ type GenerateRequest struct { ...@@ -38,6 +38,7 @@ type GenerateRequest struct {
Context []int `json:"context,omitempty"` Context []int `json:"context,omitempty"`
Stream *bool `json:"stream,omitempty"` Stream *bool `json:"stream,omitempty"`
Raw bool `json:"raw,omitempty"` Raw bool `json:"raw,omitempty"`
Format string `json:"format"`
Options map[string]interface{} `json:"options"` Options map[string]interface{} `json:"options"`
} }
......
...@@ -38,6 +38,7 @@ Generate a response for a given prompt with a provided model. This is a streamin ...@@ -38,6 +38,7 @@ Generate a response for a given prompt with a provided model. This is a streamin
- `model`: (required) the [model name](#model-names) - `model`: (required) the [model name](#model-names)
- `prompt`: the prompt to generate a response for - `prompt`: the prompt to generate a response for
- `format`: the format to return a response in. Currently the only accepted value is `json`
Advanced parameters (optional): Advanced parameters (optional):
...@@ -48,13 +49,17 @@ Advanced parameters (optional): ...@@ -48,13 +49,17 @@ Advanced parameters (optional):
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects - `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself. - `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
### JSON mode
Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
### Examples ### Examples
#### Request #### Request
```shell ```shell
curl -X POST http://localhost:11434/api/generate -d '{ curl -X POST http://localhost:11434/api/generate -d '{
"model": "llama2:7b", "model": "llama2",
"prompt": "Why is the sky blue?" "prompt": "Why is the sky blue?"
}' }'
``` ```
...@@ -65,7 +70,7 @@ A stream of JSON objects is returned: ...@@ -65,7 +70,7 @@ A stream of JSON objects is returned:
```json ```json
{ {
"model": "llama2:7b", "model": "llama2",
"created_at": "2023-08-04T08:52:19.385406455-07:00", "created_at": "2023-08-04T08:52:19.385406455-07:00",
"response": "The", "response": "The",
"done": false "done": false
...@@ -89,7 +94,7 @@ To calculate how fast the response is generated in tokens per second (token/s), ...@@ -89,7 +94,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
```json ```json
{ {
"model": "llama2:7b", "model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z", "created_at": "2023-08-04T19:22:45.499127Z",
"response": "", "response": "",
"context": [1, 2, 3], "context": [1, 2, 3],
...@@ -105,7 +110,7 @@ To calculate how fast the response is generated in tokens per second (token/s), ...@@ -105,7 +110,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
} }
``` ```
#### Request #### Request (No streaming)
```shell ```shell
curl -X POST http://localhost:11434/api/generate -d '{ curl -X POST http://localhost:11434/api/generate -d '{
...@@ -137,7 +142,7 @@ If `stream` is set to `false`, the response will be a single JSON object: ...@@ -137,7 +142,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
} }
``` ```
#### Request #### Request (Raw mode)
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context. In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
...@@ -167,7 +172,54 @@ curl -X POST http://localhost:11434/api/generate -d '{ ...@@ -167,7 +172,54 @@ curl -X POST http://localhost:11434/api/generate -d '{
} }
``` ```
#### Request #### Request (JSON mode)
```shell
curl -X POST http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt": "What color is the sky at different times of the day? Respond using JSON",
"format": "json",
"stream": false
}'
```
#### Response
```json
{
"model": "llama2",
"created_at": "2023-11-09T21:07:55.186497Z",
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
"done": true,
"total_duration": 4661289125,
"load_duration": 1714434500,
"prompt_eval_count": 36,
"prompt_eval_duration": 264132000,
"eval_count": 75,
"eval_duration": 2112149000
}
```
The value of `response` will be a string containing JSON similar to:
```json
{
"morning": {
"color": "blue"
},
"noon": {
"color": "blue-gray"
},
"afternoon": {
"color": "warm gray"
},
"evening": {
"color": "orange"
}
}
```
#### Request (With options)
If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override. If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
......
...@@ -27,6 +27,34 @@ import ( ...@@ -27,6 +27,34 @@ import (
"github.com/jmorganca/ollama/format" "github.com/jmorganca/ollama/format"
) )
const jsonGrammar = `
root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
# Optional space: by convention, applied in this grammar after literal chars when allowed
ws ::= ([ \t\n] ws)?
`
//go:embed llama.cpp/*/build/*/bin/* //go:embed llama.cpp/*/build/*/bin/*
var llamaCppEmbed embed.FS var llamaCppEmbed embed.FS
...@@ -497,7 +525,7 @@ type prediction struct { ...@@ -497,7 +525,7 @@ type prediction struct {
const maxBufferSize = 512 * format.KiloByte const maxBufferSize = 512 * format.KiloByte
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error { func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, format string, fn func(api.GenerateResponse)) error {
prevConvo, err := llm.Decode(ctx, prevContext) prevConvo, err := llm.Decode(ctx, prevContext)
if err != nil { if err != nil {
return err return err
...@@ -532,6 +560,10 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, ...@@ -532,6 +560,10 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
"stop": llm.Stop, "stop": llm.Stop,
} }
if format == "json" {
request["grammar"] = jsonGrammar
}
// Handling JSON marshaling with special characters unescaped. // Handling JSON marshaling with special characters unescaped.
buffer := &bytes.Buffer{} buffer := &bytes.Buffer{}
enc := json.NewEncoder(buffer) enc := json.NewEncoder(buffer)
......
...@@ -14,7 +14,7 @@ import ( ...@@ -14,7 +14,7 @@ import (
) )
type LLM interface { type LLM interface {
Predict(context.Context, []int, string, func(api.GenerateResponse)) error Predict(context.Context, []int, string, string, func(api.GenerateResponse)) error
Embedding(context.Context, string) ([]float64, error) Embedding(context.Context, string) ([]float64, error)
Encode(context.Context, string) ([]int, error) Encode(context.Context, string) ([]int, error)
Decode(context.Context, []int) (string, error) Decode(context.Context, []int) (string, error)
......
...@@ -163,6 +163,9 @@ func GenerateHandler(c *gin.Context) { ...@@ -163,6 +163,9 @@ func GenerateHandler(c *gin.Context) {
case req.Model == "": case req.Model == "":
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"}) c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "model is required"})
return return
case len(req.Format) > 0 && req.Format != "json":
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "format must be json"})
return
case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0): case req.Raw && (req.Template != "" || req.System != "" || len(req.Context) > 0):
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"}) c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "raw mode does not support template, system, or context"})
return return
...@@ -231,7 +234,7 @@ func GenerateHandler(c *gin.Context) { ...@@ -231,7 +234,7 @@ func GenerateHandler(c *gin.Context) {
ch <- r ch <- r
} }
if err := loaded.runner.Predict(c.Request.Context(), req.Context, prompt, fn); err != nil { if err := loaded.runner.Predict(c.Request.Context(), req.Context, prompt, req.Format, fn); err != nil {
ch <- gin.H{"error": err.Error()} ch <- gin.H{"error": err.Error()}
} }
}() }()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment