Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
de4fc297
Unverified
Commit
de4fc297
authored
Aug 06, 2024
by
Jeffrey Morgan
Committed by
GitHub
Aug 06, 2024
Browse files
llm: reserve required number of slots for embeddings (#6219)
parent
e04c7012
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
7 deletions
+12
-7
llm/server.go
llm/server.go
+12
-7
No files found.
llm/server.go
View file @
de4fc297
...
@@ -44,11 +44,12 @@ type LlamaServer interface {
...
@@ -44,11 +44,12 @@ type LlamaServer interface {
// llmServer is an instance of the llama.cpp server
// llmServer is an instance of the llama.cpp server
type
llmServer
struct
{
type
llmServer
struct
{
port
int
port
int
cmd
*
exec
.
Cmd
cmd
*
exec
.
Cmd
done
chan
error
// Channel to signal when the process exits
done
chan
error
// Channel to signal when the process exits
status
*
StatusWriter
status
*
StatusWriter
options
api
.
Options
options
api
.
Options
numParallel
int
estimate
MemoryEstimate
estimate
MemoryEstimate
totalLayers
uint64
totalLayers
uint64
...
@@ -343,6 +344,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
...
@@ -343,6 +344,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
status
:
NewStatusWriter
(
os
.
Stderr
),
status
:
NewStatusWriter
(
os
.
Stderr
),
options
:
opts
,
options
:
opts
,
estimate
:
estimate
,
estimate
:
estimate
,
numParallel
:
numParallel
,
sem
:
semaphore
.
NewWeighted
(
int64
(
numParallel
)),
sem
:
semaphore
.
NewWeighted
(
int64
(
numParallel
)),
totalLayers
:
ggml
.
KV
()
.
BlockCount
()
+
1
,
totalLayers
:
ggml
.
KV
()
.
BlockCount
()
+
1
,
gpus
:
gpus
,
gpus
:
gpus
,
...
@@ -890,11 +892,14 @@ type EmbedResponse struct {
...
@@ -890,11 +892,14 @@ type EmbedResponse struct {
}
}
func
(
s
*
llmServer
)
Embed
(
ctx
context
.
Context
,
input
[]
string
)
(
*
EmbedResponse
,
error
)
{
func
(
s
*
llmServer
)
Embed
(
ctx
context
.
Context
,
input
[]
string
)
(
*
EmbedResponse
,
error
)
{
if
err
:=
s
.
sem
.
Acquire
(
ctx
,
1
);
err
!=
nil
{
// each input will use a slot, so we need to acquire the semaphore for
// the number of inputs up to numParallel
slots
:=
int64
(
min
(
len
(
input
),
s
.
numParallel
))
if
err
:=
s
.
sem
.
Acquire
(
ctx
,
slots
);
err
!=
nil
{
slog
.
Error
(
"Failed to acquire semaphore"
,
"error"
,
err
)
slog
.
Error
(
"Failed to acquire semaphore"
,
"error"
,
err
)
return
nil
,
err
return
nil
,
err
}
}
defer
s
.
sem
.
Release
(
1
)
defer
s
.
sem
.
Release
(
slots
)
// Make sure the server is ready
// Make sure the server is ready
status
,
err
:=
s
.
getServerStatusRetry
(
ctx
)
status
,
err
:=
s
.
getServerStatusRetry
(
ctx
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment