Commit ecfb4aba authored by Bruce MacDonald's avatar Bruce MacDonald
Browse files

simplify loading

parent 3b4f45f6
...@@ -10,7 +10,7 @@ Install dependencies: ...@@ -10,7 +10,7 @@ Install dependencies:
pip install -r requirements.txt pip install -r requirements.txt
``` ```
Put your model in `models/` and run: Run a server:
``` ```
python3 ollama.py serve python3 ollama.py serve
...@@ -19,17 +19,18 @@ python3 ollama.py serve ...@@ -19,17 +19,18 @@ python3 ollama.py serve
**Start frontend service:** **Start frontend service:**
Install dependencies: Install dependencies:
``` ```
cd desktop cd desktop
npm install npm install
``` ```
Run the UI: Run the UI:
``` ```
npm start npm start
``` ```
## Building ## Building
If using Apple silicon, you need a Python version that supports arm64: If using Apple silicon, you need a Python version that supports arm64:
...@@ -57,3 +58,11 @@ python3 build.py ...@@ -57,3 +58,11 @@ python3 build.py
cd desktop cd desktop
npm run package npm run package
``` ```
## Update requirements.txt
In the root directory, run:
```
pipreqs . --force
```
Place models here for use.
Here's some recommendations:
https://huggingface.co/TheBloke/vicuna-7B-v1.3-GGML
https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_1.bin
https://huggingface.co/TheBloke/orca_mini_13B-GGML/resolve/main/orca-mini-13b.ggmlv3.q4_1.bin
\ No newline at end of file
...@@ -27,47 +27,46 @@ def models_directory(): ...@@ -27,47 +27,46 @@ def models_directory():
return models_dir return models_dir
def load(model=None, path=None): def load(model):
""" """
Load a model. Load a model.
The model can be specified by providing either the path or the model name,
but not both. If both are provided, this function will raise a ValueError.
If the model does not exist or could not be loaded, this function returns an error.
Args: Args:
model (str, optional): The name of the model to load. model (str): The name or path of the model to load.
path (str, optional): The path to the model file.
Returns: Returns:
str or None: The name of the model
dict or None: If the model cannot be loaded, a dictionary with an 'error' key is returned. dict or None: If the model cannot be loaded, a dictionary with an 'error' key is returned.
If the model is successfully loaded, None is returned. If the model is successfully loaded, None is returned.
""" """
with lock: with lock:
if path is not None and model is not None: load_from = ""
raise ValueError( if os.path.exists(model) and model.endswith(".bin"):
"Both path and model are specified. Please provide only one of them." # model is being referenced by path rather than name directly
) path = os.path.abspath(model)
elif path is not None: base = os.path.basename(path)
name = os.path.basename(path)
load_from = path load_from = path
elif model is not None: name = os.path.splitext(base)[0] # Split the filename and extension
name = model else:
# model is being loaded from the ollama models directory
dir = models_directory() dir = models_directory()
# TODO: download model from a repository if it does not exist
load_from = str(dir / f"{model}.bin") load_from = str(dir / f"{model}.bin")
else: name = model
raise ValueError("Either path or model must be specified.")
if load_from == "":
return None, {"error": "Model not found."}
if not os.path.exists(load_from): if not os.path.exists(load_from):
return {"error": f"The model at {load_from} does not exist."} return None, {"error": f"The model {load_from} does not exist."}
if name not in llms: if name not in llms:
# TODO: download model from a repository if it does not exist
llms[name] = Llama(model_path=load_from) llms[name] = Llama(model_path=load_from)
# TODO: this should start a persistent instance of ollama with the model loaded return name, None
return None
def unload(model): def unload(model):
...@@ -84,10 +83,10 @@ def unload(model): ...@@ -84,10 +83,10 @@ def unload(model):
def generate(model, prompt): def generate(model, prompt):
# auto load # auto load
error = load(model) name, error = load(model)
if error is not None: if error is not None:
return error return error
generated = llms[model]( generated = llms[name](
str(prompt), # TODO: optimize prompt based on model str(prompt), # TODO: optimize prompt based on model
max_tokens=4096, max_tokens=4096,
stop=["Q:", "\n"], stop=["Q:", "\n"],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment