Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
5b5cc9c9
Unverified
Commit
5b5cc9c9
authored
Aug 10, 2023
by
Bruce MacDonald
Committed by
GitHub
Aug 10, 2023
Browse files
embeddings endpoint
parents
5ebce03c
4b3507f0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
85 additions
and
31 deletions
+85
-31
api/types.go
api/types.go
+11
-0
server/routes.go
server/routes.go
+74
-31
No files found.
api/types.go
View file @
5b5cc9c9
...
...
@@ -42,6 +42,17 @@ type GenerateRequest struct {
Options
map
[
string
]
interface
{}
`json:"options"`
}
type
EmbeddingRequest
struct
{
Model
string
`json:"model"`
Prompt
string
`json:"prompt"`
Options
map
[
string
]
interface
{}
`json:"options"`
}
type
EmbeddingResponse
struct
{
Embedding
[]
float64
`json:"embedding"`
}
type
CreateRequest
struct
{
Name
string
`json:"name"`
Path
string
`json:"path"`
...
...
server/routes.go
View file @
5b5cc9c9
...
...
@@ -38,35 +38,17 @@ var loaded struct {
options
api
.
Options
}
func
GenerateHandler
(
c
*
gin
.
Context
)
{
loaded
.
mu
.
Lock
()
defer
loaded
.
mu
.
Unlock
()
checkpointStart
:=
time
.
Now
()
var
req
api
.
GenerateRequest
if
err
:=
c
.
ShouldBindJSON
(
&
req
);
err
!=
nil
{
c
.
JSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
model
,
err
:=
GetModel
(
req
.
Model
)
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func
load
(
model
*
Model
,
reqOpts
map
[
string
]
interface
{},
sessionDuration
time
.
Duration
)
error
{
opts
:=
api
.
DefaultOptions
()
if
err
:=
opts
.
FromMap
(
model
.
Options
);
err
!=
nil
{
log
.
Printf
(
"could not load model options: %v"
,
err
)
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
return
err
}
if
err
:=
opts
.
FromMap
(
req
.
Opt
ion
s
);
err
!=
nil
{
if
err
:=
opts
.
FromMap
(
reqOpts
);
err
!=
nil
{
log
.
Printf
(
"could not merge model options: %v"
,
err
)
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
return
err
}
if
model
.
Digest
!=
loaded
.
digest
||
!
reflect
.
DeepEqual
(
loaded
.
options
,
opts
)
{
...
...
@@ -83,21 +65,18 @@ func GenerateHandler(c *gin.Context) {
llm
,
err
:=
llama
.
New
(
model
.
ModelPath
,
opts
)
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
return
err
}
if
opts
.
NumKeep
<
0
{
promptWithSystem
,
err
:=
model
.
Prompt
(
api
.
GenerateRequest
{},
""
)
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
return
err
}
promptNoSystem
,
err
:=
model
.
Prompt
(
api
.
GenerateRequest
{
Context
:
[]
int
{
0
}},
""
)
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
return
err
}
tokensWithSystem
:=
llm
.
Encode
(
promptWithSystem
)
...
...
@@ -110,9 +89,8 @@ func GenerateHandler(c *gin.Context) {
loaded
.
digest
=
model
.
Digest
loaded
.
options
=
opts
}
sessionDuration
:=
5
*
time
.
Minute
loaded
.
expireAt
=
time
.
Now
()
.
Add
(
sessionDuration
)
if
loaded
.
expireTimer
==
nil
{
loaded
.
expireTimer
=
time
.
AfterFunc
(
sessionDuration
,
func
()
{
loaded
.
mu
.
Lock
()
...
...
@@ -132,6 +110,32 @@ func GenerateHandler(c *gin.Context) {
})
}
loaded
.
expireTimer
.
Reset
(
sessionDuration
)
return
nil
}
func
GenerateHandler
(
c
*
gin
.
Context
)
{
loaded
.
mu
.
Lock
()
defer
loaded
.
mu
.
Unlock
()
checkpointStart
:=
time
.
Now
()
var
req
api
.
GenerateRequest
if
err
:=
c
.
ShouldBindJSON
(
&
req
);
err
!=
nil
{
c
.
JSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
model
,
err
:=
GetModel
(
req
.
Model
)
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
sessionDuration
:=
5
*
time
.
Minute
if
err
:=
load
(
model
,
req
.
Options
,
sessionDuration
);
err
!=
nil
{
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
checkpointLoaded
:=
time
.
Now
()
...
...
@@ -181,6 +185,44 @@ func GenerateHandler(c *gin.Context) {
streamResponse
(
c
,
ch
)
}
func
EmbeddingHandler
(
c
*
gin
.
Context
)
{
loaded
.
mu
.
Lock
()
defer
loaded
.
mu
.
Unlock
()
var
req
api
.
EmbeddingRequest
if
err
:=
c
.
ShouldBindJSON
(
&
req
);
err
!=
nil
{
c
.
JSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
model
,
err
:=
GetModel
(
req
.
Model
)
if
err
!=
nil
{
c
.
JSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
if
err
:=
load
(
model
,
req
.
Options
,
5
*
time
.
Minute
);
err
!=
nil
{
c
.
JSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
err
.
Error
()})
return
}
if
!
loaded
.
options
.
EmbeddingOnly
{
c
.
JSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
"embedding option must be set to true"
})
return
}
embedding
,
err
:=
loaded
.
llm
.
Embedding
(
req
.
Prompt
)
if
err
!=
nil
{
log
.
Printf
(
"embedding generation failed: %v"
,
err
)
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
"failed to generate embedding"
})
return
}
resp
:=
api
.
EmbeddingResponse
{
Embedding
:
embedding
,
}
c
.
JSON
(
http
.
StatusOK
,
resp
)
}
func
PullModelHandler
(
c
*
gin
.
Context
)
{
var
req
api
.
PullRequest
if
err
:=
c
.
ShouldBindJSON
(
&
req
);
err
!=
nil
{
...
...
@@ -381,6 +423,7 @@ func Serve(ln net.Listener, extraOrigins []string) error {
r
.
POST
(
"/api/pull"
,
PullModelHandler
)
r
.
POST
(
"/api/generate"
,
GenerateHandler
)
r
.
POST
(
"/api/embeddings"
,
EmbeddingHandler
)
r
.
POST
(
"/api/create"
,
CreateModelHandler
)
r
.
POST
(
"/api/push"
,
PushModelHandler
)
r
.
POST
(
"/api/copy"
,
CopyModelHandler
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment