ollama.py 5.47 KB
Newer Older
Bruce MacDonald's avatar
Bruce MacDonald committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import json
import os
import threading
import click
from tqdm import tqdm
from pathlib import Path
from llama_cpp import Llama
from flask import Flask, Response, stream_with_context, request
from flask_cors import CORS
from template import template

app = Flask(__name__)
CORS(app)  # enable CORS for all routes

# llms tracks which models are loaded
llms = {}
lock = threading.Lock()


def models_directory():
    home_dir = Path.home()
    models_dir = home_dir / ".ollama/models"

    if not models_dir.exists():
        models_dir.mkdir(parents=True)

    return models_dir


Bruce MacDonald's avatar
Bruce MacDonald committed
30
def load(model):
Bruce MacDonald's avatar
Bruce MacDonald committed
31
32
33
34
    """
    Load a model.

    Args:
Bruce MacDonald's avatar
Bruce MacDonald committed
35
    model (str): The name or path of the model to load.
Bruce MacDonald's avatar
Bruce MacDonald committed
36
37

    Returns:
Bruce MacDonald's avatar
Bruce MacDonald committed
38
    str or None: The name of the model
Bruce MacDonald's avatar
Bruce MacDonald committed
39
40
41
42
43
    dict or None: If the model cannot be loaded, a dictionary with an 'error' key is returned.
                  If the model is successfully loaded, None is returned.
    """

    with lock:
Bruce MacDonald's avatar
Bruce MacDonald committed
44
45
46
47
48
49
        load_from = ""
        if os.path.exists(model) and model.endswith(".bin"):
            # model is being referenced by path rather than name directly
            path = os.path.abspath(model)
            base = os.path.basename(path)

Bruce MacDonald's avatar
Bruce MacDonald committed
50
            load_from = path
Bruce MacDonald's avatar
Bruce MacDonald committed
51
52
53
            name = os.path.splitext(base)[0]  # Split the filename and extension
        else:
            # model is being loaded from the ollama models directory
Bruce MacDonald's avatar
Bruce MacDonald committed
54
            dir = models_directory()
Bruce MacDonald's avatar
Bruce MacDonald committed
55
56

            # TODO: download model from a repository if it does not exist
Bruce MacDonald's avatar
Bruce MacDonald committed
57
            load_from = str(dir / f"{model}.bin")
Bruce MacDonald's avatar
Bruce MacDonald committed
58
59
60
61
            name = model

        if load_from == "":
            return None, {"error": "Model not found."}
Bruce MacDonald's avatar
Bruce MacDonald committed
62
63

        if not os.path.exists(load_from):
Bruce MacDonald's avatar
Bruce MacDonald committed
64
            return None, {"error": f"The model {load_from} does not exist."}
Bruce MacDonald's avatar
Bruce MacDonald committed
65
66
67
68

        if name not in llms:
            llms[name] = Llama(model_path=load_from)

Bruce MacDonald's avatar
Bruce MacDonald committed
69
    return name, None
Bruce MacDonald's avatar
Bruce MacDonald committed
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85


def unload(model):
    """
    Unload a model.

    Remove a model from the list of loaded models. If the model is not loaded, this is a no-op.

    Args:
    model (str): The name of the model to unload.
    """
    llms.pop(model, None)


def generate(model, prompt):
    # auto load
Bruce MacDonald's avatar
Bruce MacDonald committed
86
    name, error = load(model)
Bruce MacDonald's avatar
Bruce MacDonald committed
87
88
    if error is not None:
        return error
Bruce MacDonald's avatar
Bruce MacDonald committed
89
    generated = llms[name](
Bruce MacDonald's avatar
Bruce MacDonald committed
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
        str(prompt),  # TODO: optimize prompt based on model
        max_tokens=4096,
        stop=["Q:", "\n"],
        stream=True,
    )
    for output in generated:
        yield json.dumps(output)


def models():
    dir = models_directory()
    all_files = os.listdir(dir)
    bin_files = [
        file.replace(".bin", "") for file in all_files if file.endswith(".bin")
    ]
    return bin_files


@app.route("/load", methods=["POST"])
def load_route_handler():
    data = request.get_json()
    model = data.get("model")
    if not model:
        return Response("Model is required", status=400)
    error = load(model)
    if error is not None:
        return error
    return Response(status=204)


@app.route("/unload", methods=["POST"])
def unload_route_handler():
    data = request.get_json()
    model = data.get("model")
    if not model:
        return Response("Model is required", status=400)
    unload(model)
    return Response(status=204)


@app.route("/generate", methods=["POST"])
def generate_route_handler():
    data = request.get_json()
    model = data.get("model")
    prompt = data.get("prompt")
Bruce MacDonald's avatar
Bruce MacDonald committed
135
    prompt = template(model, prompt)
Bruce MacDonald's avatar
Bruce MacDonald committed
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
    if not model:
        return Response("Model is required", status=400)
    if not prompt:
        return Response("Prompt is required", status=400)
    if not os.path.exists(f"{model}"):
        return {"error": "The model does not exist."}, 400
    return Response(
        stream_with_context(generate(model, prompt)), mimetype="text/event-stream"
    )


@app.route("/models", methods=["GET"])
def models_route_handler():
    bin_files = models()
    return Response(json.dumps(bin_files), mimetype="application/json")


@click.group(invoke_without_command=True)
@click.pass_context
def cli(ctx):
    # allows the script to respond to command line input when executed directly
    if ctx.invoked_subcommand is None:
        click.echo(ctx.get_help())


@cli.command()
Bruce MacDonald's avatar
Bruce MacDonald committed
162
@click.option("--port", default=7734, help="Port to run the server on")
Bruce MacDonald's avatar
Bruce MacDonald committed
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
@click.option("--debug", default=False, help="Enable debug mode")
def serve(port, debug):
    print("Serving on http://localhost:{port}")
    app.run(host="0.0.0.0", port=port, debug=debug)


@cli.command(name="load")
@click.argument("model")
@click.option("--file", default=False, help="Indicates that a file path is provided")
def load_cli(model, file):
    if file:
        error = load(path=model)
    else:
        error = load(model)
    if error is not None:
        print(error)
        return
    print("Model loaded")


@cli.command(name="generate")
@click.argument("model")
@click.option("--prompt", default="", help="The prompt for the model")
def generate_cli(model, prompt):
    if prompt == "":
        prompt = input("Prompt: ")
    output = ""
    prompt = template(model, prompt)
    for generated in generate(model, prompt):
        generated_json = json.loads(generated)
        text = generated_json["choices"][0]["text"]
        output += text
        print(f"\r{output}", end="", flush=True)


@cli.command(name="models")
def models_cli():
    print(models())


@cli.command(name="pull")
@click.argument("model")
def pull_cli(model):
    print("not implemented")


@cli.command(name="import")
@click.argument("model")
def import_cli(model):
    print("not implemented")


if __name__ == "__main__":
    cli()