ollama.py 5.83 KB
Newer Older
Bruce MacDonald's avatar
Bruce MacDonald committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import json
import os
import threading
import click
from transformers import AutoModel
from tqdm import tqdm
from pathlib import Path
from llama_cpp import Llama
from flask import Flask, Response, stream_with_context, request
from flask_cors import CORS
from template import template

app = Flask(__name__)
CORS(app)  # enable CORS for all routes

# llms tracks which models are loaded
llms = {}
lock = threading.Lock()


def models_directory():
    home_dir = Path.home()
    models_dir = home_dir / ".ollama/models"

    if not models_dir.exists():
        models_dir.mkdir(parents=True)

    return models_dir


def load(model=None, path=None):
    """
    Load a model.

    The model can be specified by providing either the path or the model name,
    but not both. If both are provided, this function will raise a ValueError.
    If the model does not exist or could not be loaded, this function returns an error.

    Args:
    model (str, optional): The name of the model to load.
    path (str, optional): The path to the model file.

    Returns:
    dict or None: If the model cannot be loaded, a dictionary with an 'error' key is returned.
                  If the model is successfully loaded, None is returned.
    """

    with lock:
        if path is not None and model is not None:
            raise ValueError(
                "Both path and model are specified. Please provide only one of them."
            )
        elif path is not None:
            name = os.path.basename(path)
            load_from = path
        elif model is not None:
            name = model
            dir = models_directory()
            load_from = str(dir / f"{model}.bin")
        else:
            raise ValueError("Either path or model must be specified.")

        if not os.path.exists(load_from):
            return {"error": f"The model at {load_from} does not exist."}

        if name not in llms:
            # TODO: download model from a repository if it does not exist
            llms[name] = Llama(model_path=load_from)

    # TODO: this should start a persistent instance of ollama with the model loaded
    return None


def unload(model):
    """
    Unload a model.

    Remove a model from the list of loaded models. If the model is not loaded, this is a no-op.

    Args:
    model (str): The name of the model to unload.
    """
    llms.pop(model, None)


def generate(model, prompt):
    # auto load
    error = load(model)
    print(error)
    if error is not None:
        return error
    generated = llms[model](
        str(prompt),  # TODO: optimize prompt based on model
        max_tokens=4096,
        stop=["Q:", "\n"],
        stream=True,
    )
    for output in generated:
        yield json.dumps(output)


def models():
    dir = models_directory()
    all_files = os.listdir(dir)
    bin_files = [
        file.replace(".bin", "") for file in all_files if file.endswith(".bin")
    ]
    return bin_files


@app.route("/load", methods=["POST"])
def load_route_handler():
    data = request.get_json()
    model = data.get("model")
    if not model:
        return Response("Model is required", status=400)
    error = load(model)
    if error is not None:
        return error
    return Response(status=204)


@app.route("/unload", methods=["POST"])
def unload_route_handler():
    data = request.get_json()
    model = data.get("model")
    if not model:
        return Response("Model is required", status=400)
    unload(model)
    return Response(status=204)


@app.route("/generate", methods=["POST"])
def generate_route_handler():
    data = request.get_json()
    model = data.get("model")
    prompt = data.get("prompt")
    if not model:
        return Response("Model is required", status=400)
    if not prompt:
        return Response("Prompt is required", status=400)
    if not os.path.exists(f"{model}"):
        return {"error": "The model does not exist."}, 400
    return Response(
        stream_with_context(generate(model, prompt)), mimetype="text/event-stream"
    )


@app.route("/models", methods=["GET"])
def models_route_handler():
    bin_files = models()
    return Response(json.dumps(bin_files), mimetype="application/json")


@click.group(invoke_without_command=True)
@click.pass_context
def cli(ctx):
    # allows the script to respond to command line input when executed directly
    if ctx.invoked_subcommand is None:
        click.echo(ctx.get_help())


@cli.command()
@click.option("--port", default=5000, help="Port to run the server on")
@click.option("--debug", default=False, help="Enable debug mode")
def serve(port, debug):
    print("Serving on http://localhost:{port}")
    app.run(host="0.0.0.0", port=port, debug=debug)


@cli.command(name="load")
@click.argument("model")
@click.option("--file", default=False, help="Indicates that a file path is provided")
def load_cli(model, file):
    if file:
        error = load(path=model)
    else:
        error = load(model)
    if error is not None:
        print(error)
        return
    print("Model loaded")


@cli.command(name="generate")
@click.argument("model")
@click.option("--prompt", default="", help="The prompt for the model")
def generate_cli(model, prompt):
    if prompt == "":
        prompt = input("Prompt: ")
    output = ""
    prompt = template(model, prompt)
    for generated in generate(model, prompt):
        generated_json = json.loads(generated)
        text = generated_json["choices"][0]["text"]
        output += text
        print(f"\r{output}", end="", flush=True)


def download_model(model_name):
    dir = models_directory()
    AutoModel.from_pretrained(model_name, cache_dir=dir)


@cli.command(name="models")
def models_cli():
    print(models())


@cli.command(name="pull")
@click.argument("model")
def pull_cli(model):
    print("not implemented")


@cli.command(name="import")
@click.argument("model")
def import_cli(model):
    print("not implemented")


if __name__ == "__main__":
    cli()