Add context to api docs

23e1da77 · Bruce MacDonald · GitHub · 53bc36d2 · af98a177 · 23e1da77
Unverified Commit 23e1da77 authored Aug 15, 2023 by Bruce MacDonald Committed by GitHub Aug 15, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 41 additions and 0 deletions

docs/api.md docs/api.md +3 -0

examples/python/client.py examples/python/client.py +38 -0

No files found.
--- a/docs/api.md
+++ b/docs/api.md
@@ -38,6 +38,7 @@ Advanced parameters:
 - `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
 - `system`: system prompt to (overrides what is defined in the `Modelfile`)
 - `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
+- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory

 ### Request

@@ -71,6 +72,7 @@ The final response in the stream also includes additional data about the generat
 - `prompt_eval_duration`: time spent in nanoseconds evaluating the prompt
 - `eval_count`: number of tokens the response
 - `eval_duration`: time in nanoseconds spent generating the response
+- `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory

 To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`.

@@ -78,6 +80,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
 {
  "model": "llama2:7b",
  "created_at": "2023-08-04T19:22:45.499127Z",
+  "context": [1, 2, 3],
  "done": true,
  "total_duration": 5589157167,
  "load_duration": 3013701500,

--- a/examples/python/client.py
+++ b/examples/python/client.py
+import json
+import requests
+
+# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
+model = 'llama2' # TODO: update this for whatever model you wish to use
+
+def generate(prompt, context):
+    r = requests.post('http://localhost:11434/api/generate',
+                      json={
+                          'model': model,
+                          'prompt': prompt,
+                          'context': context,
+                      },
+                      stream=True)
+    r.raise_for_status()
+
+    for line in r.iter_lines():
+        body = json.loads(line)
+        response_part = body.get('response', '')
+        # the response streams one token at a time, print that as we recieve it
+        print(response_part, end='', flush=True)
+
+        if 'error' in body:
+            raise Exception(body['error'])
+
+        if body.get('done', False):
+            return body['context']
+
+def main():
+    context = [] # the context stores a conversation history, you can use this to make the model more context aware
+    while True:
+        user_input = input("Enter a prompt: ")
+        print()
+        context = generate(user_input, context)
+        print()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file