client.py 1.25 KB
Newer Older
Bruce MacDonald's avatar
Bruce MacDonald committed
1
2
3
4
import json
import requests

# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
5
model = 'stablelm-zephyr' # TODO: update this for whatever model you wish to use
Bruce MacDonald's avatar
Bruce MacDonald committed
6

Bruce MacDonald's avatar
Bruce MacDonald committed
7
def generate(prompt, context):
Bruce MacDonald's avatar
Bruce MacDonald committed
8
9
10
11
12
13
14
15
16
17
18
19
    r = requests.post('http://localhost:11434/api/generate',
                      json={
                          'model': model,
                          'prompt': prompt,
                          'context': context,
                      },
                      stream=True)
    r.raise_for_status()

    for line in r.iter_lines():
        body = json.loads(line)
        response_part = body.get('response', '')
20
        # the response streams one token at a time, print that as we receive it
Bruce MacDonald's avatar
Bruce MacDonald committed
21
22
23
24
25
26
        print(response_part, end='', flush=True)

        if 'error' in body:
            raise Exception(body['error'])

        if body.get('done', False):
Bruce MacDonald's avatar
Bruce MacDonald committed
27
            return body['context']
Bruce MacDonald's avatar
Bruce MacDonald committed
28
29

def main():
Bruce MacDonald's avatar
Bruce MacDonald committed
30
    context = [] # the context stores a conversation history, you can use this to make the model more context aware
Bruce MacDonald's avatar
Bruce MacDonald committed
31
32
    while True:
        user_input = input("Enter a prompt: ")
33
34
        if not user_input:
            exit()
Bruce MacDonald's avatar
Bruce MacDonald committed
35
        print()
Bruce MacDonald's avatar
Bruce MacDonald committed
36
        context = generate(user_input, context)
Bruce MacDonald's avatar
Bruce MacDonald committed
37
38
39
        print()

if __name__ == "__main__":
40
    main()