Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
cbe2adc7
Unverified
Commit
cbe2adc7
authored
Jan 17, 2024
by
Alexander F. Rødseth
Committed by
GitHub
Jan 17, 2024
Browse files
Merge branch 'main' into archlinux
parents
f4bf1d51
d5a73533
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
151 additions
and
372 deletions
+151
-372
.github/workflows/test.yaml
.github/workflows/test.yaml
+28
-3
README.md
README.md
+4
-0
api/client.py
api/client.py
+0
-284
llm/dyn_ext_server.go
llm/dyn_ext_server.go
+1
-2
llm/generate/gen_darwin.sh
llm/generate/gen_darwin.sh
+3
-0
llm/generate/gen_linux.sh
llm/generate/gen_linux.sh
+41
-28
llm/llama.cpp
llm/llama.cpp
+1
-1
server/images.go
server/images.go
+31
-34
server/modelpath.go
server/modelpath.go
+1
-0
server/routes.go
server/routes.go
+3
-19
server/routes_test.go
server/routes_test.go
+38
-1
No files found.
.github/workflows/test.yaml
View file @
cbe2adc7
...
@@ -8,7 +8,15 @@ jobs:
...
@@ -8,7 +8,15 @@ jobs:
strategy
:
strategy
:
matrix
:
matrix
:
os
:
[
ubuntu-latest
,
macos-latest
,
windows-latest
]
os
:
[
ubuntu-latest
,
macos-latest
,
windows-latest
]
arch
:
[
amd64
,
arm64
]
exclude
:
-
os
:
ubuntu-latest
arch
:
arm64
-
os
:
windows-latest
arch
:
arm64
runs-on
:
${{ matrix.os }}
runs-on
:
${{ matrix.os }}
env
:
GOARCH
:
${{ matrix.arch }}
steps
:
steps
:
-
uses
:
actions/checkout@v4
-
uses
:
actions/checkout@v4
-
uses
:
actions/setup-go@v4
-
uses
:
actions/setup-go@v4
...
@@ -33,7 +41,7 @@ jobs:
...
@@ -33,7 +41,7 @@ jobs:
-
run
:
go generate -x ./...
-
run
:
go generate -x ./...
-
uses
:
actions/upload-artifact@v4
-
uses
:
actions/upload-artifact@v4
with
:
with
:
name
:
${{ matrix.os }}-libraries
name
:
${{ matrix.os
}}-${{ matrix.arch
}}-libraries
path
:
|
path
:
|
llm/llama.cpp/build/**/lib/*
llm/llama.cpp/build/**/lib/*
lint
:
lint
:
...
@@ -41,7 +49,18 @@ jobs:
...
@@ -41,7 +49,18 @@ jobs:
strategy
:
strategy
:
matrix
:
matrix
:
os
:
[
ubuntu-latest
,
macos-latest
,
windows-latest
]
os
:
[
ubuntu-latest
,
macos-latest
,
windows-latest
]
arch
:
[
amd64
,
arm64
]
exclude
:
-
os
:
ubuntu-latest
arch
:
arm64
-
os
:
windows-latest
arch
:
arm64
-
os
:
macos-latest
arch
:
amd64
runs-on
:
${{ matrix.os }}
runs-on
:
${{ matrix.os }}
env
:
GOARCH
:
${{ matrix.arch }}
CGO_ENABLED
:
"
1"
steps
:
steps
:
-
uses
:
actions/checkout@v4
-
uses
:
actions/checkout@v4
with
:
with
:
...
@@ -52,7 +71,7 @@ jobs:
...
@@ -52,7 +71,7 @@ jobs:
cache
:
false
cache
:
false
-
uses
:
actions/download-artifact@v4
-
uses
:
actions/download-artifact@v4
with
:
with
:
name
:
${{ matrix.os }}-libraries
name
:
${{ matrix.os
}}-${{ matrix.arch
}}-libraries
path
:
llm/llama.cpp/build
path
:
llm/llama.cpp/build
-
uses
:
golangci/golangci-lint-action@v3
-
uses
:
golangci/golangci-lint-action@v3
test
:
test
:
...
@@ -60,6 +79,12 @@ jobs:
...
@@ -60,6 +79,12 @@ jobs:
strategy
:
strategy
:
matrix
:
matrix
:
os
:
[
ubuntu-latest
,
macos-latest
,
windows-latest
]
os
:
[
ubuntu-latest
,
macos-latest
,
windows-latest
]
arch
:
[
amd64
,
arm64
]
exclude
:
-
os
:
ubuntu-latest
arch
:
arm64
-
os
:
windows-latest
arch
:
arm64
runs-on
:
${{ matrix.os }}
runs-on
:
${{ matrix.os }}
steps
:
steps
:
-
uses
:
actions/checkout@v4
-
uses
:
actions/checkout@v4
...
@@ -72,7 +97,7 @@ jobs:
...
@@ -72,7 +97,7 @@ jobs:
-
run
:
go get
-
run
:
go get
-
uses
:
actions/download-artifact@v4
-
uses
:
actions/download-artifact@v4
with
:
with
:
name
:
${{ matrix.os }}-libraries
name
:
${{ matrix.os
}}-${{ matrix.arch
}}-libraries
path
:
llm/llama.cpp/build
path
:
llm/llama.cpp/build
-
run
:
go build
-
run
:
go build
-
run
:
go test -v ./...
-
run
:
go test -v ./...
README.md
View file @
cbe2adc7
...
@@ -248,6 +248,10 @@ curl http://localhost:11434/api/chat -d '{
...
@@ -248,6 +248,10 @@ curl http://localhost:11434/api/chat -d '{
See the
[
API documentation
](
./docs/api.md
)
for all endpoints.
See the
[
API documentation
](
./docs/api.md
)
for all endpoints.
## Integrations
-
[
ollama-python
](
https://github.com/jmorganca/ollama-python
)
## Community Integrations
## Community Integrations
### Web & Desktop
### Web & Desktop
...
...
api/client.py
deleted
100644 → 0
View file @
f4bf1d51
import
os
import
json
import
requests
import
os
import
hashlib
import
json
from
pathlib
import
Path
BASE_URL
=
os
.
environ
.
get
(
'OLLAMA_HOST'
,
'http://localhost:11434'
)
# Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses.
# The final response object will include statistics and additional data from the request. Use the callback function to override
# the default handler.
def
generate
(
model_name
,
prompt
,
system
=
None
,
template
=
None
,
format
=
""
,
context
=
None
,
options
=
None
,
callback
=
None
):
try
:
url
=
f
"
{
BASE_URL
}
/api/generate"
payload
=
{
"model"
:
model_name
,
"prompt"
:
prompt
,
"system"
:
system
,
"template"
:
template
,
"context"
:
context
,
"options"
:
options
,
"format"
:
format
,
}
# Remove keys with None values
payload
=
{
k
:
v
for
k
,
v
in
payload
.
items
()
if
v
is
not
None
}
with
requests
.
post
(
url
,
json
=
payload
,
stream
=
True
)
as
response
:
response
.
raise_for_status
()
# Creating a variable to hold the context history of the final chunk
final_context
=
None
# Variable to hold concatenated response strings if no callback is provided
full_response
=
""
# Iterating over the response line by line and displaying the details
for
line
in
response
.
iter_lines
():
if
line
:
# Parsing each line (JSON chunk) and extracting the details
chunk
=
json
.
loads
(
line
)
# If a callback function is provided, call it with the chunk
if
callback
:
callback
(
chunk
)
else
:
# If this is not the last chunk, add the "response" field value to full_response and print it
if
not
chunk
.
get
(
"done"
):
response_piece
=
chunk
.
get
(
"response"
,
""
)
full_response
+=
response_piece
print
(
response_piece
,
end
=
""
,
flush
=
True
)
# Check if it's the last chunk (done is true)
if
chunk
.
get
(
"done"
):
final_context
=
chunk
.
get
(
"context"
)
# Return the full response and the final context
return
full_response
,
final_context
except
requests
.
exceptions
.
RequestException
as
e
:
print
(
f
"An error occurred:
{
e
}
"
)
return
None
,
None
# Create a blob file on the server if it doesn't exist.
def
create_blob
(
digest
,
file_path
):
url
=
f
"
{
BASE_URL
}
/api/blobs/
{
digest
}
"
# Check if the blob exists
response
=
requests
.
head
(
url
)
if
response
.
status_code
!=
404
:
return
# Blob already exists, no need to upload
response
.
raise_for_status
()
# Upload the blob
with
open
(
file_path
,
'rb'
)
as
file_data
:
requests
.
post
(
url
,
data
=
file_data
)
# Create a model from a Modelfile. Use the callback function to override the default handler.
def
create
(
model_name
,
filename
,
callback
=
None
):
try
:
file_path
=
Path
(
filename
).
expanduser
().
resolve
()
processed_lines
=
[]
# Read and process the modelfile
with
open
(
file_path
,
'r'
)
as
f
:
for
line
in
f
:
# Skip empty or whitespace-only lines
if
not
line
.
strip
():
continue
command
,
args
=
line
.
split
(
maxsplit
=
1
)
if
command
.
upper
()
in
[
"FROM"
,
"ADAPTER"
]:
path
=
Path
(
args
.
strip
()).
expanduser
()
# Check if path is relative and resolve it
if
not
path
.
is_absolute
():
path
=
(
file_path
.
parent
/
path
)
# Skip if file does not exist for "model", this is handled by the server
if
not
path
.
exists
():
processed_lines
.
append
(
line
)
continue
# Calculate SHA-256 hash
with
open
(
path
,
'rb'
)
as
bin_file
:
hash
=
hashlib
.
sha256
()
hash
.
update
(
bin_file
.
read
())
blob
=
f
"sha256:
{
hash
.
hexdigest
()
}
"
# Add the file to the remote server
create_blob
(
blob
,
path
)
# Replace path with digest in the line
line
=
f
"
{
command
}
@
{
blob
}
\n
"
processed_lines
.
append
(
line
)
# Combine processed lines back into a single string
modelfile_content
=
'
\n
'
.
join
(
processed_lines
)
url
=
f
"
{
BASE_URL
}
/api/create"
payload
=
{
"name"
:
model_name
,
"modelfile"
:
modelfile_content
}
# Making a POST request with the stream parameter set to True to handle streaming responses
with
requests
.
post
(
url
,
json
=
payload
,
stream
=
True
)
as
response
:
response
.
raise_for_status
()
# Iterating over the response line by line and displaying the status
for
line
in
response
.
iter_lines
():
if
line
:
chunk
=
json
.
loads
(
line
)
if
callback
:
callback
(
chunk
)
else
:
print
(
f
"Status:
{
chunk
.
get
(
'status'
)
}
"
)
except
Exception
as
e
:
print
(
f
"An error occurred:
{
e
}
"
)
# Pull a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple
# calls to will share the same download progress. Use the callback function to override the default handler.
def
pull
(
model_name
,
insecure
=
False
,
callback
=
None
):
try
:
url
=
f
"
{
BASE_URL
}
/api/pull"
payload
=
{
"name"
:
model_name
,
"insecure"
:
insecure
}
# Making a POST request with the stream parameter set to True to handle streaming responses
with
requests
.
post
(
url
,
json
=
payload
,
stream
=
True
)
as
response
:
response
.
raise_for_status
()
# Iterating over the response line by line and displaying the details
for
line
in
response
.
iter_lines
():
if
line
:
# Parsing each line (JSON chunk) and extracting the details
chunk
=
json
.
loads
(
line
)
# If a callback function is provided, call it with the chunk
if
callback
:
callback
(
chunk
)
else
:
# Print the status message directly to the console
print
(
chunk
.
get
(
'status'
,
''
),
end
=
''
,
flush
=
True
)
# If there's layer data, you might also want to print that (adjust as necessary)
if
'digest'
in
chunk
:
print
(
f
" - Digest:
{
chunk
[
'digest'
]
}
"
,
end
=
''
,
flush
=
True
)
print
(
f
" - Total:
{
chunk
[
'total'
]
}
"
,
end
=
''
,
flush
=
True
)
print
(
f
" - Completed:
{
chunk
[
'completed'
]
}
"
,
end
=
'
\n
'
,
flush
=
True
)
else
:
print
()
except
requests
.
exceptions
.
RequestException
as
e
:
print
(
f
"An error occurred:
{
e
}
"
)
# Push a model to the model registry. Use the callback function to override the default handler.
def
push
(
model_name
,
insecure
=
False
,
callback
=
None
):
try
:
url
=
f
"
{
BASE_URL
}
/api/push"
payload
=
{
"name"
:
model_name
,
"insecure"
:
insecure
}
# Making a POST request with the stream parameter set to True to handle streaming responses
with
requests
.
post
(
url
,
json
=
payload
,
stream
=
True
)
as
response
:
response
.
raise_for_status
()
# Iterating over the response line by line and displaying the details
for
line
in
response
.
iter_lines
():
if
line
:
# Parsing each line (JSON chunk) and extracting the details
chunk
=
json
.
loads
(
line
)
# If a callback function is provided, call it with the chunk
if
callback
:
callback
(
chunk
)
else
:
# Print the status message directly to the console
print
(
chunk
.
get
(
'status'
,
''
),
end
=
''
,
flush
=
True
)
# If there's layer data, you might also want to print that (adjust as necessary)
if
'digest'
in
chunk
:
print
(
f
" - Digest:
{
chunk
[
'digest'
]
}
"
,
end
=
''
,
flush
=
True
)
print
(
f
" - Total:
{
chunk
[
'total'
]
}
"
,
end
=
''
,
flush
=
True
)
print
(
f
" - Completed:
{
chunk
[
'completed'
]
}
"
,
end
=
'
\n
'
,
flush
=
True
)
else
:
print
()
except
requests
.
exceptions
.
RequestException
as
e
:
print
(
f
"An error occurred:
{
e
}
"
)
# List models that are available locally.
def
list
():
try
:
response
=
requests
.
get
(
f
"
{
BASE_URL
}
/api/tags"
)
response
.
raise_for_status
()
data
=
response
.
json
()
models
=
data
.
get
(
'models'
,
[])
return
models
except
requests
.
exceptions
.
RequestException
as
e
:
print
(
f
"An error occurred:
{
e
}
"
)
return
None
# Copy a model. Creates a model with another name from an existing model.
def
copy
(
source
,
destination
):
try
:
# Create the JSON payload
payload
=
{
"source"
:
source
,
"destination"
:
destination
}
response
=
requests
.
post
(
f
"
{
BASE_URL
}
/api/copy"
,
json
=
payload
)
response
.
raise_for_status
()
# If the request was successful, return a message indicating that the copy was successful
return
"Copy successful"
except
requests
.
exceptions
.
RequestException
as
e
:
print
(
f
"An error occurred:
{
e
}
"
)
return
None
# Delete a model and its data.
def
delete
(
model_name
):
try
:
url
=
f
"
{
BASE_URL
}
/api/delete"
payload
=
{
"name"
:
model_name
}
response
=
requests
.
delete
(
url
,
json
=
payload
)
response
.
raise_for_status
()
return
"Delete successful"
except
requests
.
exceptions
.
RequestException
as
e
:
print
(
f
"An error occurred:
{
e
}
"
)
return
None
# Show info about a model.
def
show
(
model_name
):
try
:
url
=
f
"
{
BASE_URL
}
/api/show"
payload
=
{
"name"
:
model_name
}
response
=
requests
.
post
(
url
,
json
=
payload
)
response
.
raise_for_status
()
# Parse the JSON response and return it
data
=
response
.
json
()
return
data
except
requests
.
exceptions
.
RequestException
as
e
:
print
(
f
"An error occurred:
{
e
}
"
)
return
None
def
heartbeat
():
try
:
url
=
f
"
{
BASE_URL
}
/"
response
=
requests
.
head
(
url
)
response
.
raise_for_status
()
return
"Ollama is running"
except
requests
.
exceptions
.
RequestException
as
e
:
print
(
f
"An error occurred:
{
e
}
"
)
return
"Ollama is not running"
llm/dyn_ext_server.go
View file @
cbe2adc7
...
@@ -75,7 +75,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
...
@@ -75,7 +75,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
updatePath
(
filepath
.
Dir
(
library
))
updatePath
(
filepath
.
Dir
(
library
))
libPath
:=
C
.
CString
(
library
)
libPath
:=
C
.
CString
(
library
)
defer
C
.
free
(
unsafe
.
Pointer
(
libPath
))
defer
C
.
free
(
unsafe
.
Pointer
(
libPath
))
resp
:=
newExtServerResp
(
12
8
)
resp
:=
newExtServerResp
(
5
12
)
defer
freeExtServerResp
(
resp
)
defer
freeExtServerResp
(
resp
)
var
srv
C
.
struct_dynamic_llama_server
var
srv
C
.
struct_dynamic_llama_server
C
.
dyn_init
(
libPath
,
&
srv
,
&
resp
)
C
.
dyn_init
(
libPath
,
&
srv
,
&
resp
)
...
@@ -181,7 +181,6 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
...
@@ -181,7 +181,6 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
"seed"
:
predict
.
Options
.
Seed
,
"seed"
:
predict
.
Options
.
Seed
,
"stop"
:
predict
.
Options
.
Stop
,
"stop"
:
predict
.
Options
.
Stop
,
"image_data"
:
imageData
,
"image_data"
:
imageData
,
"cache_prompt"
:
true
,
}
}
if
predict
.
Format
==
"json"
{
if
predict
.
Format
==
"json"
{
...
...
llm/generate/gen_darwin.sh
View file @
cbe2adc7
...
@@ -14,9 +14,11 @@ BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
...
@@ -14,9 +14,11 @@ BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
case
"
${
GOARCH
}
"
in
case
"
${
GOARCH
}
"
in
"amd64"
)
"amd64"
)
CMAKE_DEFS
=
"-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
${
CMAKE_DEFS
}
"
ARCH
=
"x86_64"
;;
;;
"arm64"
)
"arm64"
)
CMAKE_DEFS
=
"-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on
${
CMAKE_DEFS
}
"
ARCH
=
"arm64"
;;
;;
*
)
*
)
echo
"GOARCH must be set"
echo
"GOARCH must be set"
...
@@ -30,6 +32,7 @@ apply_patches
...
@@ -30,6 +32,7 @@ apply_patches
build
build
install
install
gcc
-fPIC
-g
-shared
-o
${
BUILD_DIR
}
/lib/libext_server.so
\
gcc
-fPIC
-g
-shared
-o
${
BUILD_DIR
}
/lib/libext_server.so
\
-arch
${
ARCH
}
\
-Wl
,-force_load
${
BUILD_DIR
}
/lib/libext_server.a
\
-Wl
,-force_load
${
BUILD_DIR
}
/lib/libext_server.a
\
${
BUILD_DIR
}
/lib/libcommon.a
\
${
BUILD_DIR
}
/lib/libcommon.a
\
${
BUILD_DIR
}
/lib/libllama.a
\
${
BUILD_DIR
}
/lib/libllama.a
\
...
...
llm/generate/gen_linux.sh
View file @
cbe2adc7
...
@@ -39,8 +39,13 @@ amdGPUs() {
...
@@ -39,8 +39,13 @@ amdGPUs() {
}
}
echo
"Starting linux generate script"
echo
"Starting linux generate script"
if
[
-z
"
${
CUDACXX
}
"
-a
-x
/usr/local/cuda/bin/nvcc
]
;
then
if
[
-z
"
${
CUDACXX
}
"
]
;
then
if
[
-x
/usr/local/cuda/bin/nvcc
]
;
then
export
CUDACXX
=
/usr/local/cuda/bin/nvcc
export
CUDACXX
=
/usr/local/cuda/bin/nvcc
else
# Try the default location in case it exists
export
CUDACXX
=
$(
command
-v
nvcc
)
fi
fi
fi
COMMON_CMAKE_DEFS
=
"-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
COMMON_CMAKE_DEFS
=
"-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
source
$(
dirname
$0
)
/gen_common.sh
source
$(
dirname
$0
)
/gen_common.sh
...
@@ -109,33 +114,41 @@ else
...
@@ -109,33 +114,41 @@ else
echo
"Skipping CPU generation step as requested"
echo
"Skipping CPU generation step as requested"
fi
fi
for
cudalibpath
in
"/usr/local/cuda/lib64"
"/opt/cuda/targets/x86_64-linux/lib"
;
do
# If needed, look for the default CUDA toolkit location
if
[
-d
"
$cudalibpath
"
]
;
then
if
[
-z
"
${
CUDA_LIB_DIR
}
"
]
&&
[
-d
/usr/local/cuda/lib64
]
;
then
CUDA_LIB_DIR
=
/usr/local/cuda/lib64
fi
# If needed, look for CUDA on Arch Linux
if
[
-z
"
${
CUDA_LIB_DIR
}
"
]
&&
[
-d
/opt/cuda/targets/x86_64-linux/lib
]
;
then
CUDA_LIB_DIR
=
/opt/cuda/targets/x86_64-linux/lib
fi
if
[
-d
"
${
CUDA_LIB_DIR
}
"
]
;
then
echo
"CUDA libraries detected - building dynamic CUDA library"
echo
"CUDA libraries detected - building dynamic CUDA library"
init_vars
init_vars
CUDA_MAJOR
=
$(
find
"
$cudalibpath
"
-name
'
libcudart.so.*
'
-print
|
head
-1
|
cut
-f3
-d
.
||
true
)
CUDA_MAJOR
=
$(
ls
"
${
CUDA_LIB_DIR
}
"
/
libcudart.so.
*
|
head
-1
|
cut
-f3
-d
.
||
true
)
if
[
-n
"
${
CUDA_MAJOR
}
"
]
;
then
if
[
-n
"
${
CUDA_MAJOR
}
"
]
;
then
CUDA_VARIANT
=
"
_v
${
CUDA_MAJOR
}
"
CUDA_VARIANT
=
_v
${
CUDA_MAJOR
}
fi
fi
CMAKE_DEFS
=
"-DLLAMA_CUBLAS=on
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
"
CMAKE_DEFS
=
"-DLLAMA_CUBLAS=on
${
COMMON_CMAKE_DEFS
}
${
CMAKE_DEFS
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/cuda
${
CUDA_VARIANT
}
"
BUILD_DIR
=
"
${
LLAMACPP_DIR
}
/build/linux/cuda
${
CUDA_VARIANT
}
"
CUDA_LIB_DIR
=
"
$cudalibpath
"
build
build
install
install
gcc
-fPIC
-g
-shared
-o
"
${
BUILD_DIR
}
/lib/libext_server.so
"
\
gcc
-fPIC
-g
-shared
-o
${
BUILD_DIR
}
/lib/libext_server.so
\
-Wl
,--whole-archive
\
-Wl
,--whole-archive
\
"
${
BUILD_DIR
}
/lib/libext_server.a
"
\
${
BUILD_DIR
}
/lib/libext_server.a
\
"
${
BUILD_DIR
}
/lib/libcommon.a
"
\
${
BUILD_DIR
}
/lib/libcommon.a
\
"
${
BUILD_DIR
}
/lib/libllama.a
"
\
${
BUILD_DIR
}
/lib/libllama.a
\
-Wl
,--no-whole-archive
\
-Wl
,--no-whole-archive
\
"
${
CUDA_LIB_DIR
}
/libcudart_static.a"
\
${
CUDA_LIB_DIR
}
/libcudart_static.a
\
"
${
CUDA_LIB_DIR
}
/libcublas_static.a"
\
${
CUDA_LIB_DIR
}
/libcublas_static.a
\
"
${
CUDA_LIB_DIR
}
/libcublasLt_static.a"
\
${
CUDA_LIB_DIR
}
/libcublasLt_static.a
\
"
${
CUDA_LIB_DIR
}
/libcudadevrt.a"
\
${
CUDA_LIB_DIR
}
/libcudadevrt.a
\
"
${
CUDA_LIB_DIR
}
/libculibos.a"
\
${
CUDA_LIB_DIR
}
/libculibos.a
\
-lcuda
\
-lrt
-lpthread
-ldl
-lstdc
++
-lm
-lrt
-lpthread
-ldl
-lstdc
++
-lm
fi
fi
done
if
[
-z
"
${
ROCM_PATH
}
"
]
;
then
if
[
-z
"
${
ROCM_PATH
}
"
]
;
then
# Try the default location in case it exists
# Try the default location in case it exists
...
...
llama.cpp
@
584d674b
Compare
328b83de
...
584d674b
Subproject commit
328b83de23b33240e28f4e74900d1d06726f5eb1
Subproject commit
584d674be622fbf1578694ada6e62eebedbfd377
server/images.go
View file @
cbe2adc7
...
@@ -1132,6 +1132,7 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
...
@@ -1132,6 +1132,7 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
var
errUnauthorized
=
fmt
.
Errorf
(
"unauthorized"
)
var
errUnauthorized
=
fmt
.
Errorf
(
"unauthorized"
)
func
makeRequestWithRetry
(
ctx
context
.
Context
,
method
string
,
requestURL
*
url
.
URL
,
headers
http
.
Header
,
body
io
.
ReadSeeker
,
regOpts
*
RegistryOptions
)
(
*
http
.
Response
,
error
)
{
func
makeRequestWithRetry
(
ctx
context
.
Context
,
method
string
,
requestURL
*
url
.
URL
,
headers
http
.
Header
,
body
io
.
ReadSeeker
,
regOpts
*
RegistryOptions
)
(
*
http
.
Response
,
error
)
{
for
i
:=
0
;
i
<
2
;
i
++
{
resp
,
err
:=
makeRequest
(
ctx
,
method
,
requestURL
,
headers
,
body
,
regOpts
)
resp
,
err
:=
makeRequest
(
ctx
,
method
,
requestURL
,
headers
,
body
,
regOpts
)
if
err
!=
nil
{
if
err
!=
nil
{
if
!
errors
.
Is
(
err
,
context
.
Canceled
)
{
if
!
errors
.
Is
(
err
,
context
.
Canceled
)
{
...
@@ -1157,13 +1158,6 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
...
@@ -1157,13 +1158,6 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
return
nil
,
err
return
nil
,
err
}
}
}
}
resp
,
err
:=
makeRequest
(
ctx
,
method
,
requestURL
,
headers
,
body
,
regOpts
)
if
resp
.
StatusCode
==
http
.
StatusUnauthorized
{
return
nil
,
errUnauthorized
}
return
resp
,
err
case
resp
.
StatusCode
==
http
.
StatusNotFound
:
case
resp
.
StatusCode
==
http
.
StatusNotFound
:
return
nil
,
os
.
ErrNotExist
return
nil
,
os
.
ErrNotExist
case
resp
.
StatusCode
>=
http
.
StatusBadRequest
:
case
resp
.
StatusCode
>=
http
.
StatusBadRequest
:
...
@@ -1172,9 +1166,12 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
...
@@ -1172,9 +1166,12 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
return
nil
,
fmt
.
Errorf
(
"%d: %s"
,
resp
.
StatusCode
,
err
)
return
nil
,
fmt
.
Errorf
(
"%d: %s"
,
resp
.
StatusCode
,
err
)
}
}
return
nil
,
fmt
.
Errorf
(
"%d: %s"
,
resp
.
StatusCode
,
responseBody
)
return
nil
,
fmt
.
Errorf
(
"%d: %s"
,
resp
.
StatusCode
,
responseBody
)
default
:
return
resp
,
nil
}
}
}
return
resp
,
nil
return
nil
,
errUnauthorized
}
}
func
makeRequest
(
ctx
context
.
Context
,
method
string
,
requestURL
*
url
.
URL
,
headers
http
.
Header
,
body
io
.
Reader
,
regOpts
*
RegistryOptions
)
(
*
http
.
Response
,
error
)
{
func
makeRequest
(
ctx
context
.
Context
,
method
string
,
requestURL
*
url
.
URL
,
headers
http
.
Header
,
body
io
.
Reader
,
regOpts
*
RegistryOptions
)
(
*
http
.
Response
,
error
)
{
...
...
server/modelpath.go
View file @
cbe2adc7
...
@@ -46,6 +46,7 @@ func ParseModelPath(name string) ModelPath {
...
@@ -46,6 +46,7 @@ func ParseModelPath(name string) ModelPath {
name
=
after
name
=
after
}
}
name
=
strings
.
ReplaceAll
(
name
,
string
(
os
.
PathSeparator
),
"/"
)
parts
:=
strings
.
Split
(
name
,
"/"
)
parts
:=
strings
.
Split
(
name
,
"/"
)
switch
len
(
parts
)
{
switch
len
(
parts
)
{
case
3
:
case
3
:
...
...
server/routes.go
View file @
cbe2adc7
...
@@ -15,7 +15,6 @@ import (
...
@@ -15,7 +15,6 @@ import (
"path/filepath"
"path/filepath"
"reflect"
"reflect"
"runtime"
"runtime"
"strconv"
"strings"
"strings"
"sync"
"sync"
"syscall"
"syscall"
...
@@ -668,27 +667,12 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
...
@@ -668,27 +667,12 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
cs
:=
30
cs
:=
30
for
k
,
v
:=
range
model
.
Options
{
for
k
,
v
:=
range
model
.
Options
{
switch
val
:=
v
.
(
type
)
{
switch
val
:=
v
.
(
type
)
{
case
string
:
params
=
append
(
params
,
fmt
.
Sprintf
(
"%-*s %s"
,
cs
,
k
,
val
))
case
int
:
params
=
append
(
params
,
fmt
.
Sprintf
(
"%-*s %s"
,
cs
,
k
,
strconv
.
Itoa
(
val
)))
case
float64
:
params
=
append
(
params
,
fmt
.
Sprintf
(
"%-*s %s"
,
cs
,
k
,
strconv
.
FormatFloat
(
val
,
'f'
,
0
,
64
)))
case
bool
:
params
=
append
(
params
,
fmt
.
Sprintf
(
"%-*s %s"
,
cs
,
k
,
strconv
.
FormatBool
(
val
)))
case
[]
interface
{}
:
case
[]
interface
{}
:
for
_
,
nv
:=
range
val
{
for
_
,
nv
:=
range
val
{
switch
nval
:=
nv
.
(
type
)
{
params
=
append
(
params
,
fmt
.
Sprintf
(
"%-*s %#v"
,
cs
,
k
,
nv
))
case
string
:
params
=
append
(
params
,
fmt
.
Sprintf
(
"%-*s %s"
,
cs
,
k
,
nval
))
case
int
:
params
=
append
(
params
,
fmt
.
Sprintf
(
"%-*s %s"
,
cs
,
k
,
strconv
.
Itoa
(
nval
)))
case
float64
:
params
=
append
(
params
,
fmt
.
Sprintf
(
"%-*s %s"
,
cs
,
k
,
strconv
.
FormatFloat
(
nval
,
'f'
,
0
,
64
)))
case
bool
:
params
=
append
(
params
,
fmt
.
Sprintf
(
"%-*s %s"
,
cs
,
k
,
strconv
.
FormatBool
(
nval
)))
}
}
}
default
:
params
=
append
(
params
,
fmt
.
Sprintf
(
"%-*s %#v"
,
cs
,
k
,
v
))
}
}
}
}
resp
.
Parameters
=
strings
.
Join
(
params
,
"
\n
"
)
resp
.
Parameters
=
strings
.
Join
(
params
,
"
\n
"
)
...
...
server/routes_test.go
View file @
cbe2adc7
...
@@ -9,6 +9,7 @@ import (
...
@@ -9,6 +9,7 @@ import (
"net/http"
"net/http"
"net/http/httptest"
"net/http/httptest"
"os"
"os"
"sort"
"strings"
"strings"
"testing"
"testing"
...
@@ -50,7 +51,7 @@ func Test_Routes(t *testing.T) {
...
@@ -50,7 +51,7 @@ func Test_Routes(t *testing.T) {
createTestModel
:=
func
(
t
*
testing
.
T
,
name
string
)
{
createTestModel
:=
func
(
t
*
testing
.
T
,
name
string
)
{
fname
:=
createTestFile
(
t
,
"ollama-model"
)
fname
:=
createTestFile
(
t
,
"ollama-model"
)
modelfile
:=
strings
.
NewReader
(
fmt
.
Sprintf
(
"FROM %s"
,
fname
))
modelfile
:=
strings
.
NewReader
(
fmt
.
Sprintf
(
"FROM %s
\n
PARAMETER seed 42
\n
PARAMETER top_p 0.9
\n
PARAMETER stop foo
\n
PARAMETER stop bar
"
,
fname
))
commands
,
err
:=
parser
.
Parse
(
modelfile
)
commands
,
err
:=
parser
.
Parse
(
modelfile
)
assert
.
Nil
(
t
,
err
)
assert
.
Nil
(
t
,
err
)
fn
:=
func
(
resp
api
.
ProgressResponse
)
{
fn
:=
func
(
resp
api
.
ProgressResponse
)
{
...
@@ -167,6 +168,42 @@ func Test_Routes(t *testing.T) {
...
@@ -167,6 +168,42 @@ func Test_Routes(t *testing.T) {
assert
.
Equal
(
t
,
"beefsteak:latest"
,
model
.
ShortName
)
assert
.
Equal
(
t
,
"beefsteak:latest"
,
model
.
ShortName
)
},
},
},
},
{
Name
:
"Show Model Handler"
,
Method
:
http
.
MethodPost
,
Path
:
"/api/show"
,
Setup
:
func
(
t
*
testing
.
T
,
req
*
http
.
Request
)
{
createTestModel
(
t
,
"show-model"
)
showReq
:=
api
.
ShowRequest
{
Model
:
"show-model"
}
jsonData
,
err
:=
json
.
Marshal
(
showReq
)
assert
.
Nil
(
t
,
err
)
req
.
Body
=
io
.
NopCloser
(
bytes
.
NewReader
(
jsonData
))
},
Expected
:
func
(
t
*
testing
.
T
,
resp
*
http
.
Response
)
{
contentType
:=
resp
.
Header
.
Get
(
"Content-Type"
)
assert
.
Equal
(
t
,
contentType
,
"application/json; charset=utf-8"
)
body
,
err
:=
io
.
ReadAll
(
resp
.
Body
)
assert
.
Nil
(
t
,
err
)
var
showResp
api
.
ShowResponse
err
=
json
.
Unmarshal
(
body
,
&
showResp
)
assert
.
Nil
(
t
,
err
)
var
params
[]
string
paramsSplit
:=
strings
.
Split
(
showResp
.
Parameters
,
"
\n
"
)
for
_
,
p
:=
range
paramsSplit
{
params
=
append
(
params
,
strings
.
Join
(
strings
.
Fields
(
p
),
" "
))
}
sort
.
Strings
(
params
)
expectedParams
:=
[]
string
{
"seed 42"
,
"stop
\"
bar
\"
"
,
"stop
\"
foo
\"
"
,
"top_p 0.9"
,
}
assert
.
Equal
(
t
,
expectedParams
,
params
)
},
},
}
}
s
,
err
:=
setupServer
(
t
)
s
,
err
:=
setupServer
(
t
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment