Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
4d0eb763
Commit
4d0eb763
authored
Jun 28, 2023
by
Michael Yang
Browse files
use ctransformers as backup to llama-cpp
parent
5cea13ce
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
791 additions
and
37 deletions
+791
-37
ollama/engine.py
ollama/engine.py
+91
-29
ollama/templates/falcon.prompt
ollama/templates/falcon.prompt
+3
-0
ollama/templates/mpt.prompt
ollama/templates/mpt.prompt
+4
-0
ollama/templates/ultralm.prompt
ollama/templates/ultralm.prompt
+2
-0
ollama/templates/wizardcoder.prompt
ollama/templates/wizardcoder.prompt
+5
-0
poetry.lock
poetry.lock
+412
-5
pyproject.toml
pyproject.toml
+3
-1
requirements.txt
requirements.txt
+271
-2
No files found.
ollama/engine.py
View file @
4d0eb763
...
...
@@ -2,58 +2,120 @@ import os
import
sys
from
os
import
path
from
contextlib
import
contextmanager
from
llama_cpp
import
Llama
as
LLM
from
fuzzywuzzy
import
process
from
llama_cpp
import
Llama
from
ctransformers
import
AutoModelForCausalLM
import
ollama.prompt
from
ollama.model
import
models_home
@
contextmanager
def
suppress
_stderr
(
):
stderr
=
os
.
dup
(
sys
.
stderr
.
fileno
())
def
suppress
(
file
):
original
=
os
.
dup
(
file
.
fileno
())
with
open
(
os
.
devnull
,
"w"
)
as
devnull
:
os
.
dup2
(
devnull
.
fileno
(),
sys
.
stderr
.
fileno
())
os
.
dup2
(
devnull
.
fileno
(),
file
.
fileno
())
yield
os
.
dup2
(
stderr
,
sys
.
stderr
.
fileno
())
os
.
dup2
(
original
,
file
.
fileno
())
def
generate
(
model_name
,
prompt
,
models
=
{},
*
args
,
**
kwargs
):
if
"max_tokens"
not
in
kwargs
:
kwargs
.
update
({
"max_tokens"
:
16384
})
if
"stop"
not
in
kwargs
:
kwargs
.
update
({
"stop"
:
[
"Q:"
]})
if
"stream"
not
in
kwargs
:
kwargs
.
update
({
"stream"
:
True
})
prompt
=
ollama
.
prompt
.
template
(
model_name
,
prompt
)
model
=
load
(
model_name
,
models
=
models
)
for
output
in
model
.
create_completion
(
prompt
,
*
args
,
**
kwargs
):
yield
output
inputs
=
ollama
.
prompt
.
template
(
model_name
,
prompt
)
return
model
.
generate
(
inputs
,
*
args
,
**
kwargs
)
def
load
(
model_name
,
models
=
{}):
model
=
models
.
get
(
model_name
,
None
)
if
not
model
:
if
not
models
.
get
(
model_name
,
None
):
model_path
=
path
.
expanduser
(
model_name
)
if
not
path
.
exists
(
model_path
):
model_path
=
path
.
join
(
models_home
,
model_name
+
".bin"
)
try
:
# suppress LLM's output
with
suppress_stderr
():
model
=
LLM
(
model_path
,
verbose
=
False
)
models
.
update
({
model_name
:
model
})
except
Exception
:
# e is sent to devnull, so create a generic exception
raise
Exception
(
f
"Failed to load model:
{
model
}
"
)
runners
=
{
model_type
:
cls
for
cls
in
[
LlamaCppRunner
,
CtransformerRunner
]
for
model
_type
in
cls
.
model_types
(
)
}
best_match
,
_
=
process
.
extractOne
(
model_path
,
runners
.
keys
())
model
=
runners
.
get
(
best_match
,
LlamaCppRunner
)
return
model
models
.
update
({
model_name
:
model
(
model_path
,
best_match
)})
return
models
.
get
(
model_name
)
def
unload
(
model_name
,
models
=
{}):
if
model_name
in
models
:
models
.
pop
(
model_name
)
class
LlamaCppRunner
:
def
__init__
(
self
,
model_path
,
model_type
):
try
:
with
suppress
(
sys
.
stderr
),
suppress
(
sys
.
stdout
):
self
.
model
=
Llama
(
model_path
,
verbose
=
False
,
n_gpu_layers
=
1
,
seed
=-
1
)
except
Exception
:
raise
Exception
(
"Failed to load model"
,
model_path
,
model_type
)
@
staticmethod
def
model_types
():
return
[
'llama'
,
'orca'
,
'vicuna'
,
'ultralm'
,
]
def
generate
(
self
,
prompt
,
*
args
,
**
kwargs
):
if
"max_tokens"
not
in
kwargs
:
kwargs
.
update
({
"max_tokens"
:
512
})
if
"stop"
not
in
kwargs
:
kwargs
.
update
({
"stop"
:
[
"Q:"
]})
if
"stream"
not
in
kwargs
:
kwargs
.
update
({
"stream"
:
True
})
with
suppress
(
sys
.
stderr
):
for
output
in
self
.
model
(
prompt
,
*
args
,
**
kwargs
):
yield
output
class
CtransformerRunner
:
def
__init__
(
self
,
model_path
,
model_type
):
self
.
model
=
AutoModelForCausalLM
.
from_pretrained
(
model_path
,
model_type
=
model_type
,
local_files_only
=
True
)
@
staticmethod
def
model_types
():
return
[
'falcon'
,
'mpt'
,
'starcoder'
,
]
def
generate
(
self
,
prompt
,
*
args
,
**
kwargs
):
if
"max_new_tokens"
not
in
kwargs
:
kwargs
.
update
({
"max_new_tokens"
:
512
})
if
"stop"
not
in
kwargs
:
kwargs
.
update
({
"stop"
:
[
"User"
]})
if
"stream"
not
in
kwargs
:
kwargs
.
update
({
"stream"
:
True
})
for
output
in
self
.
model
(
prompt
,
*
args
,
**
kwargs
):
yield
{
'choices'
:
[
{
'text'
:
output
,
},
],
}
ollama/templates/falcon.prompt
0 → 100644
View file @
4d0eb763
A helpful assistant who helps the user with any questions asked.
User: {{ prompt }}
Assistant:
ollama/templates/mpt.prompt
0 → 100644
View file @
4d0eb763
Below is an instruction that describes a task. Write a response that appropriately completes the request. Be concise. Once the request is completed, include no other text.
### Instruction:
{{ prompt }}
### Response:
ollama/templates/ultralm.prompt
0 → 100644
View file @
4d0eb763
USER: {{ prompt }}
ASSISTANT:
ollama/templates/wizardcoder.prompt
0 → 100644
View file @
4d0eb763
Below is an instruction that describes a task. Write a response that appropriately completes the request
### Instruction: {{ prompt }}
### Response:
poetry.lock
View file @
4d0eb763
This diff is collapsed.
Click to expand it.
pyproject.toml
View file @
4d0eb763
...
...
@@ -9,7 +9,6 @@ scripts = {ollama = "ollama.cmd.cli:main"}
[tool.poetry.dependencies]
python
=
"^3.8"
llama-cpp-python
=
"^0.1.66"
aiohttp
=
"^3.8.4"
aiohttp-cors
=
"^0.7.0"
jinja2
=
"^3.1.2"
...
...
@@ -17,6 +16,9 @@ requests = "^2.31.0"
tqdm
=
"^4.65.0"
validators
=
"^0.20.0"
yaspin
=
"^2.3.0"
llama-cpp-python
=
"^0.1.67"
ctransformers
=
"^0.2.10"
fuzzywuzzy
=
{
extras
=
["speedup"]
,
version
=
"^0.18.0"
}
[build-system]
requires
=
["poetry-core"]
...
...
requirements.txt
View file @
4d0eb763
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment