Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
68459888
Unverified
Commit
68459888
authored
May 13, 2024
by
Patrick Devine
Committed by
GitHub
May 13, 2024
Browse files
Ollama `ps` command for showing currently loaded models (#4327)
parent
9eed4a90
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
193 additions
and
50 deletions
+193
-50
api/client.go
api/client.go
+9
-0
api/types.go
api/types.go
+3
-1
cmd/cmd.go
cmd/cmd.go
+75
-0
cmd/interactive.go
cmd/interactive.go
+5
-0
format/time.go
format/time.go
+3
-1
format/time_test.go
format/time_test.go
+10
-0
llm/server.go
llm/server.go
+5
-0
server/routes.go
server/routes.go
+29
-0
server/sched.go
server/sched.go
+42
-38
server/sched_test.go
server/sched_test.go
+12
-10
No files found.
api/client.go
View file @
68459888
...
...
@@ -354,6 +354,15 @@ func (c *Client) List(ctx context.Context) (*ListResponse, error) {
return
&
lr
,
nil
}
// List running models.
func
(
c
*
Client
)
ListRunning
(
ctx
context
.
Context
)
(
*
ListResponse
,
error
)
{
var
lr
ListResponse
if
err
:=
c
.
do
(
ctx
,
http
.
MethodGet
,
"/api/ps"
,
nil
,
&
lr
);
err
!=
nil
{
return
nil
,
err
}
return
&
lr
,
nil
}
// Copy copies a model - creating a model with another name from an existing
// model.
func
(
c
*
Client
)
Copy
(
ctx
context
.
Context
,
req
*
CopyRequest
)
error
{
...
...
api/types.go
View file @
68459888
...
...
@@ -289,10 +289,12 @@ type ListResponse struct {
type
ModelResponse
struct
{
Name
string
`json:"name"`
Model
string
`json:"model"`
ModifiedAt
time
.
Time
`json:"modified_at"`
ModifiedAt
time
.
Time
`json:"modified_at
,omitempty
"`
Size
int64
`json:"size"`
Digest
string
`json:"digest"`
Details
ModelDetails
`json:"details,omitempty"`
ExpiresAt
time
.
Time
`json:"expires_at,omitempty"`
SizeVRAM
int64
`json:"size_vram,omitempty"`
}
type
TokenResponse
struct
{
...
...
cmd/cmd.go
View file @
68459888
...
...
@@ -12,6 +12,7 @@ import (
"fmt"
"io"
"log"
"math"
"net"
"net/http"
"os"
...
...
@@ -324,6 +325,18 @@ func RunHandler(cmd *cobra.Command, args []string) error {
}
opts
.
Format
=
format
keepAlive
,
err
:=
cmd
.
Flags
()
.
GetString
(
"keepalive"
)
if
err
!=
nil
{
return
err
}
if
keepAlive
!=
""
{
d
,
err
:=
time
.
ParseDuration
(
keepAlive
)
if
err
!=
nil
{
return
err
}
opts
.
KeepAlive
=
&
api
.
Duration
{
Duration
:
d
}
}
prompts
:=
args
[
1
:
]
// prepend stdin to the prompt if provided
if
!
term
.
IsTerminal
(
int
(
os
.
Stdin
.
Fd
()))
{
...
...
@@ -496,6 +509,52 @@ func ListHandler(cmd *cobra.Command, args []string) error {
return
nil
}
func
ListRunningHandler
(
cmd
*
cobra
.
Command
,
args
[]
string
)
error
{
client
,
err
:=
api
.
ClientFromEnvironment
()
if
err
!=
nil
{
return
err
}
models
,
err
:=
client
.
ListRunning
(
cmd
.
Context
())
if
err
!=
nil
{
return
err
}
var
data
[][]
string
for
_
,
m
:=
range
models
.
Models
{
if
len
(
args
)
==
0
||
strings
.
HasPrefix
(
m
.
Name
,
args
[
0
])
{
var
procStr
string
switch
{
case
m
.
SizeVRAM
==
0
:
procStr
=
"100% CPU"
case
m
.
SizeVRAM
==
m
.
Size
:
procStr
=
"100% GPU"
case
m
.
SizeVRAM
>
m
.
Size
||
m
.
Size
==
0
:
procStr
=
"Unknown"
default
:
sizeCPU
:=
m
.
Size
-
m
.
SizeVRAM
cpuPercent
:=
math
.
Round
(
float64
(
sizeCPU
)
/
float64
(
m
.
Size
)
*
100
)
procStr
=
fmt
.
Sprintf
(
"%d%%/%d%% CPU/GPU"
,
int
(
cpuPercent
),
int
(
100
-
cpuPercent
))
}
data
=
append
(
data
,
[]
string
{
m
.
Name
,
m
.
Digest
[
:
12
],
format
.
HumanBytes
(
m
.
Size
),
procStr
,
format
.
HumanTime
(
m
.
ExpiresAt
,
"Never"
)})
}
}
table
:=
tablewriter
.
NewWriter
(
os
.
Stdout
)
table
.
SetHeader
([]
string
{
"NAME"
,
"ID"
,
"SIZE"
,
"PROCESSOR"
,
"UNTIL"
})
table
.
SetHeaderAlignment
(
tablewriter
.
ALIGN_LEFT
)
table
.
SetAlignment
(
tablewriter
.
ALIGN_LEFT
)
table
.
SetHeaderLine
(
false
)
table
.
SetBorder
(
false
)
table
.
SetNoWhiteSpace
(
true
)
table
.
SetTablePadding
(
"
\t
"
)
table
.
AppendBulk
(
data
)
table
.
Render
()
return
nil
}
func
DeleteHandler
(
cmd
*
cobra
.
Command
,
args
[]
string
)
error
{
client
,
err
:=
api
.
ClientFromEnvironment
()
if
err
!=
nil
{
...
...
@@ -672,6 +731,7 @@ type runOptions struct {
Images
[]
api
.
ImageData
Options
map
[
string
]
interface
{}
MultiModal
bool
KeepAlive
*
api
.
Duration
}
type
displayResponseState
struct
{
...
...
@@ -766,6 +826,10 @@ func chat(cmd *cobra.Command, opts runOptions) (*api.Message, error) {
Options
:
opts
.
Options
,
}
if
opts
.
KeepAlive
!=
nil
{
req
.
KeepAlive
=
opts
.
KeepAlive
}
if
err
:=
client
.
Chat
(
cancelCtx
,
req
,
fn
);
err
!=
nil
{
if
errors
.
Is
(
err
,
context
.
Canceled
)
{
return
nil
,
nil
...
...
@@ -1075,6 +1139,7 @@ func NewCLI() *cobra.Command {
RunE
:
RunHandler
,
}
runCmd
.
Flags
()
.
String
(
"keepalive"
,
""
,
"Duration to keep a model loaded (e.g. 5m)"
)
runCmd
.
Flags
()
.
Bool
(
"verbose"
,
false
,
"Show timings for response"
)
runCmd
.
Flags
()
.
Bool
(
"insecure"
,
false
,
"Use an insecure registry"
)
runCmd
.
Flags
()
.
Bool
(
"nowordwrap"
,
false
,
"Don't wrap words to the next line automatically"
)
...
...
@@ -1123,6 +1188,14 @@ Environment Variables:
PreRunE
:
checkServerHeartbeat
,
RunE
:
ListHandler
,
}
psCmd
:=
&
cobra
.
Command
{
Use
:
"ps"
,
Short
:
"List running models"
,
PreRunE
:
checkServerHeartbeat
,
RunE
:
ListRunningHandler
,
}
copyCmd
:=
&
cobra
.
Command
{
Use
:
"cp SOURCE DESTINATION"
,
Short
:
"Copy a model"
,
...
...
@@ -1146,6 +1219,7 @@ Environment Variables:
pullCmd
,
pushCmd
,
listCmd
,
psCmd
,
copyCmd
,
deleteCmd
,
}
{
...
...
@@ -1160,6 +1234,7 @@ Environment Variables:
pullCmd
,
pushCmd
,
listCmd
,
psCmd
,
copyCmd
,
deleteCmd
,
)
...
...
cmd/interactive.go
View file @
68459888
...
...
@@ -56,6 +56,11 @@ func loadModel(cmd *cobra.Command, opts *runOptions) error {
Model
:
opts
.
Model
,
Messages
:
[]
api
.
Message
{},
}
if
opts
.
KeepAlive
!=
nil
{
chatReq
.
KeepAlive
=
opts
.
KeepAlive
}
err
=
client
.
Chat
(
cmd
.
Context
(),
chatReq
,
func
(
resp
api
.
ChatResponse
)
error
{
p
.
StopAndClear
()
if
len
(
opts
.
Messages
)
>
0
{
...
...
format/time.go
View file @
68459888
...
...
@@ -60,7 +60,9 @@ func humanTime(t time.Time, zeroValue string) string {
}
delta
:=
time
.
Since
(
t
)
if
delta
<
0
{
if
int
(
delta
.
Hours
())
/
24
/
365
<
-
20
{
return
"Forever"
}
else
if
delta
<
0
{
return
humanDuration
(
-
delta
)
+
" from now"
}
...
...
format/time_test.go
View file @
68459888
...
...
@@ -32,4 +32,14 @@ func TestHumanTime(t *testing.T) {
v
:=
now
.
Add
(
800
*
time
.
Millisecond
)
assertEqual
(
t
,
HumanTime
(
v
,
""
),
"Less than a second from now"
)
})
t
.
Run
(
"time way in the future"
,
func
(
t
*
testing
.
T
)
{
v
:=
now
.
Add
(
24
*
time
.
Hour
*
365
*
200
)
assertEqual
(
t
,
HumanTime
(
v
,
""
),
"Forever"
)
})
t
.
Run
(
"time way in the future lowercase"
,
func
(
t
*
testing
.
T
)
{
v
:=
now
.
Add
(
24
*
time
.
Hour
*
365
*
200
)
assertEqual
(
t
,
HumanTimeLower
(
v
,
""
),
"forever"
)
})
}
llm/server.go
View file @
68459888
...
...
@@ -38,6 +38,7 @@ type LlamaServer interface {
Detokenize
(
ctx
context
.
Context
,
tokens
[]
int
)
(
string
,
error
)
Close
()
error
EstimatedVRAM
()
uint64
EstimatedTotal
()
uint64
}
// llmServer is an instance of the llama.cpp server
...
...
@@ -955,6 +956,10 @@ func (s *llmServer) EstimatedVRAM() uint64 {
return
s
.
estimatedVRAM
}
func
(
s
*
llmServer
)
EstimatedTotal
()
uint64
{
return
s
.
estimatedTotal
}
func
parseDurationMs
(
ms
float64
)
time
.
Duration
{
dur
,
err
:=
time
.
ParseDuration
(
fmt
.
Sprintf
(
"%fms"
,
ms
))
if
err
!=
nil
{
...
...
server/routes.go
View file @
68459888
...
...
@@ -979,6 +979,7 @@ func (s *Server) GenerateRoutes() http.Handler {
r
.
POST
(
"/api/show"
,
s
.
ShowModelHandler
)
r
.
POST
(
"/api/blobs/:digest"
,
s
.
CreateBlobHandler
)
r
.
HEAD
(
"/api/blobs/:digest"
,
s
.
HeadBlobHandler
)
r
.
GET
(
"/api/ps"
,
s
.
ProcessHandler
)
// Compatibility endpoints
r
.
POST
(
"/v1/chat/completions"
,
openai
.
Middleware
(),
s
.
ChatHandler
)
...
...
@@ -1137,6 +1138,34 @@ func streamResponse(c *gin.Context, ch chan any) {
})
}
func
(
s
*
Server
)
ProcessHandler
(
c
*
gin
.
Context
)
{
models
:=
[]
api
.
ModelResponse
{}
for
_
,
v
:=
range
s
.
sched
.
loaded
{
model
:=
v
.
model
modelDetails
:=
api
.
ModelDetails
{
Format
:
model
.
Config
.
ModelFormat
,
Family
:
model
.
Config
.
ModelFamily
,
Families
:
model
.
Config
.
ModelFamilies
,
ParameterSize
:
model
.
Config
.
ModelType
,
QuantizationLevel
:
model
.
Config
.
FileType
,
}
mr
:=
api
.
ModelResponse
{
Model
:
model
.
ShortName
,
Name
:
model
.
ShortName
,
Size
:
int64
(
v
.
estimatedTotal
),
SizeVRAM
:
int64
(
v
.
estimatedVRAM
),
Digest
:
model
.
Digest
,
Details
:
modelDetails
,
ExpiresAt
:
v
.
expiresAt
,
}
models
=
append
(
models
,
mr
)
}
c
.
JSON
(
http
.
StatusOK
,
api
.
ListResponse
{
Models
:
models
})
}
// ChatPrompt builds up a prompt from a series of messages for the currently `loaded` model
func
chatPrompt
(
ctx
context
.
Context
,
runner
*
runnerRef
,
template
string
,
messages
[]
api
.
Message
,
numCtx
int
)
(
string
,
error
)
{
encode
:=
func
(
s
string
)
([]
int
,
error
)
{
...
...
server/sched.go
View file @
68459888
...
...
@@ -177,7 +177,7 @@ func (s *Scheduler) processPending(ctx context.Context) {
}
// Trigger an expiration to unload once it's done
runnerToExpire
.
refMu
.
Lock
()
slog
.
Debug
(
"resetting model to expire immediately to make room"
,
"model"
,
runnerToExpire
.
model
,
"refCount"
,
runnerToExpire
.
refCount
)
slog
.
Debug
(
"resetting model to expire immediately to make room"
,
"model
Path
"
,
runnerToExpire
.
model
Path
,
"refCount"
,
runnerToExpire
.
refCount
)
if
runnerToExpire
.
expireTimer
!=
nil
{
runnerToExpire
.
expireTimer
.
Stop
()
runnerToExpire
.
expireTimer
=
nil
...
...
@@ -190,13 +190,13 @@ func (s *Scheduler) processPending(ctx context.Context) {
// Wait for the unload to happen
// Note: at this point we're queueing up all incoming requests, even if they were for
// a different model that's loaded and not scheduled to be removed.
slog
.
Debug
(
"waiting for pending requests to complete and unload to occur"
,
"model"
,
runnerToExpire
.
model
)
slog
.
Debug
(
"waiting for pending requests to complete and unload to occur"
,
"model
Path
"
,
runnerToExpire
.
model
Path
)
select
{
case
<-
ctx
.
Done
()
:
slog
.
Debug
(
"shutting down scheduler pending loop"
)
return
case
<-
s
.
unloadedCh
:
slog
.
Debug
(
"unload completed"
,
"model"
,
runnerToExpire
.
model
)
slog
.
Debug
(
"unload completed"
,
"model
Path
"
,
runnerToExpire
.
model
Path
)
continue
}
}
...
...
@@ -219,23 +219,23 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
runner
:=
s
.
loaded
[
finished
.
model
.
ModelPath
]
s
.
loadedMu
.
Unlock
()
if
runner
==
nil
{
slog
.
Error
(
"finished requeset signal received after model unloaded"
,
"model"
,
finished
.
model
.
ModelPath
)
slog
.
Error
(
"finished requeset signal received after model unloaded"
,
"model
Path
"
,
finished
.
model
.
ModelPath
)
continue
}
runner
.
refMu
.
Lock
()
runner
.
refCount
--
if
runner
.
refCount
<=
0
{
if
runner
.
sessionDuration
<=
0
{
slog
.
Debug
(
"runner with zero duration has gone idle, expiring to unload"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"runner with zero duration has gone idle, expiring to unload"
,
"model
Path
"
,
runner
.
model
Path
)
if
runner
.
expireTimer
!=
nil
{
runner
.
expireTimer
.
Stop
()
runner
.
expireTimer
=
nil
}
s
.
expiredCh
<-
runner
}
else
if
runner
.
expireTimer
==
nil
{
slog
.
Debug
(
"runner with non-zero duration has gone idle, adding timer"
,
"model"
,
runner
.
model
,
"duration"
,
runner
.
sessionDuration
)
slog
.
Debug
(
"runner with non-zero duration has gone idle, adding timer"
,
"model
Path
"
,
runner
.
model
Path
,
"duration"
,
runner
.
sessionDuration
)
runner
.
expireTimer
=
time
.
AfterFunc
(
runner
.
sessionDuration
,
func
()
{
slog
.
Debug
(
"timer expired, expiring to unload"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"timer expired, expiring to unload"
,
"model
Path
"
,
runner
.
model
Path
)
runner
.
refMu
.
Lock
()
defer
runner
.
refMu
.
Unlock
()
if
runner
.
expireTimer
!=
nil
{
...
...
@@ -244,19 +244,21 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
}
s
.
expiredCh
<-
runner
})
runner
.
expiresAt
=
time
.
Now
()
.
Add
(
runner
.
sessionDuration
)
}
else
{
slog
.
Debug
(
"runner with non-zero duration has gone idle, resetting timer"
,
"model"
,
runner
.
model
,
"duration"
,
runner
.
sessionDuration
)
slog
.
Debug
(
"runner with non-zero duration has gone idle, resetting timer"
,
"model
Path
"
,
runner
.
model
Path
,
"duration"
,
runner
.
sessionDuration
)
runner
.
expireTimer
.
Reset
(
runner
.
sessionDuration
)
runner
.
expiresAt
=
time
.
Now
()
.
Add
(
runner
.
sessionDuration
)
}
}
slog
.
Debug
(
"after processing request finished event"
,
"model"
,
runner
.
model
,
"refCount"
,
runner
.
refCount
)
slog
.
Debug
(
"after processing request finished event"
,
"model
Path
"
,
runner
.
model
Path
,
"refCount"
,
runner
.
refCount
)
runner
.
refMu
.
Unlock
()
case
runner
:=
<-
s
.
expiredCh
:
slog
.
Debug
(
"runner expired event received"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"runner expired event received"
,
"model
Path
"
,
runner
.
model
Path
)
runner
.
refMu
.
Lock
()
if
runner
.
refCount
>
0
{
// Shouldn't happen, but safeguard to ensure no leaked runners
slog
.
Debug
(
"expired event with positive ref count, retrying"
,
"model"
,
runner
.
model
,
"refCount"
,
runner
.
refCount
)
slog
.
Debug
(
"expired event with positive ref count, retrying"
,
"model
Path
"
,
runner
.
model
Path
,
"refCount"
,
runner
.
refCount
)
go
func
(
runner
*
runnerRef
)
{
// We can't unload yet, but want to as soon as the current request completes
// So queue up another expired event
...
...
@@ -268,16 +270,16 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
}
s
.
loadedMu
.
Lock
()
slog
.
Debug
(
"got lock to unload"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"got lock to unload"
,
"model
Path
"
,
runner
.
model
Path
)
finished
:=
runner
.
waitForVRAMRecovery
()
runner
.
unload
()
delete
(
s
.
loaded
,
runner
.
model
)
delete
(
s
.
loaded
,
runner
.
model
Path
)
s
.
loadedMu
.
Unlock
()
slog
.
Debug
(
"runner released"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"runner released"
,
"model
Path
"
,
runner
.
model
Path
)
runner
.
refMu
.
Unlock
()
<-
finished
slog
.
Debug
(
"sending an unloaded event"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"sending an unloaded event"
,
"model
Path
"
,
runner
.
model
Path
)
s
.
unloadedCh
<-
struct
{}{}
}
}
...
...
@@ -316,18 +318,20 @@ func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList)
req
.
errCh
<-
err
return
}
runner
:=
&
runnerRef
{}
runner
.
model
=
req
.
model
.
ModelPath
runner
.
adapters
=
req
.
model
.
AdapterPaths
runner
.
projectors
=
req
.
model
.
ProjectorPaths
runner
.
llama
=
llama
runner
.
Options
=
&
req
.
opts
runner
.
sessionDuration
=
req
.
sessionDuration
runner
.
gpus
=
gpus
runner
.
estimatedVRAM
=
llama
.
EstimatedVRAM
()
runner
.
loading
=
true
runner
.
refCount
=
1
runner
:=
&
runnerRef
{
model
:
req
.
model
,
modelPath
:
req
.
model
.
ModelPath
,
llama
:
llama
,
Options
:
&
req
.
opts
,
sessionDuration
:
req
.
sessionDuration
,
gpus
:
gpus
,
estimatedVRAM
:
llama
.
EstimatedVRAM
(),
estimatedTotal
:
llama
.
EstimatedTotal
(),
loading
:
true
,
refCount
:
1
,
}
runner
.
refMu
.
Lock
()
s
.
loadedMu
.
Lock
()
s
.
loaded
[
req
.
model
.
ModelPath
]
=
runner
slog
.
Info
(
"loaded runners"
,
"count"
,
len
(
s
.
loaded
))
...
...
@@ -339,7 +343,7 @@ func (s *Scheduler) load(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList)
slog
.
Error
(
"error loading llama server"
,
"error"
,
err
)
runner
.
refCount
--
req
.
errCh
<-
err
slog
.
Debug
(
"triggering expiration for failed load"
,
"model"
,
runner
.
model
)
slog
.
Debug
(
"triggering expiration for failed load"
,
"model"
,
runner
.
model
Path
)
s
.
expiredCh
<-
runner
return
}
...
...
@@ -408,17 +412,18 @@ type runnerRef struct {
refCount
uint
// prevent unloading if > 0
// unloading bool // set to true when we are trying to unload the runner
llama
llm
.
LlamaServer
loading
bool
// True only during initial load, then false forever
gpus
gpu
.
GpuInfoList
// Recorded at time of provisioning
estimatedVRAM
uint64
llama
llm
.
LlamaServer
loading
bool
// True only during initial load, then false forever
gpus
gpu
.
GpuInfoList
// Recorded at time of provisioning
estimatedVRAM
uint64
estimatedTotal
uint64
sessionDuration
time
.
Duration
expireTimer
*
time
.
Timer
expiresAt
time
.
Time
model
string
adapters
[]
string
projectors
[]
string
model
*
Model
modelPath
string
*
api
.
Options
}
...
...
@@ -431,9 +436,8 @@ func (runner *runnerRef) unload() {
if
runner
.
llama
!=
nil
{
runner
.
llama
.
Close
()
}
runner
.
model
=
nil
runner
.
llama
=
nil
runner
.
adapters
=
nil
runner
.
projectors
=
nil
runner
.
Options
=
nil
runner
.
gpus
=
nil
}
...
...
@@ -462,8 +466,8 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
ctx
,
cancel
:=
context
.
WithTimeout
(
ctx
,
timeout
)
defer
cancel
()
if
!
reflect
.
DeepEqual
(
runner
.
a
dapters
,
req
.
model
.
AdapterPaths
)
||
// have the adapters changed?
!
reflect
.
DeepEqual
(
runner
.
p
rojectors
,
req
.
model
.
ProjectorPaths
)
||
// have the projectors changed?
if
!
reflect
.
DeepEqual
(
runner
.
model
.
A
dapter
Path
s
,
req
.
model
.
AdapterPaths
)
||
// have the adapters changed?
!
reflect
.
DeepEqual
(
runner
.
model
.
P
rojector
Path
s
,
req
.
model
.
ProjectorPaths
)
||
// have the projectors changed?
!
reflect
.
DeepEqual
(
optsExisting
,
optsNew
)
||
// have the runner options changed?
runner
.
llama
.
Ping
(
ctx
)
!=
nil
{
return
true
...
...
server/sched_test.go
View file @
68459888
...
...
@@ -164,7 +164,8 @@ func TestRequests(t *testing.T) {
// simple reload of same model
scenario2a
:=
newScenario
(
t
,
ctx
,
"ollama-model-1"
,
20
)
scenario2a
.
req
.
model
=
scenario1a
.
req
.
model
tmpModel
:=
*
scenario1a
.
req
.
model
scenario2a
.
req
.
model
=
&
tmpModel
scenario2a
.
ggml
=
scenario1a
.
ggml
// Multiple loaded models
...
...
@@ -496,10 +497,9 @@ func TestNeedsReload(t *testing.T) {
llm
:=
&
mockLlm
{}
do
:=
api
.
DefaultOptions
()
runner
:=
&
runnerRef
{
adapters
:
[]
string
{
"adapter1"
},
projectors
:
[]
string
{
"projector1"
},
Options
:
&
do
,
llama
:
llm
,
model
:
&
Model
{
AdapterPaths
:
[]
string
{
"adapter1"
},
ProjectorPaths
:
[]
string
{
"projector1"
}},
Options
:
&
do
,
llama
:
llm
,
}
req
:=
&
LlmRequest
{
model
:
&
Model
{
...
...
@@ -510,10 +510,10 @@ func TestNeedsReload(t *testing.T) {
}
resp
:=
runner
.
needsReload
(
ctx
,
req
)
require
.
True
(
t
,
resp
)
req
.
model
.
AdapterPaths
=
runner
.
a
dapters
req
.
model
.
AdapterPaths
=
runner
.
model
.
A
dapter
Path
s
resp
=
runner
.
needsReload
(
ctx
,
req
)
require
.
True
(
t
,
resp
)
req
.
model
.
ProjectorPaths
=
runner
.
p
rojectors
req
.
model
.
ProjectorPaths
=
runner
.
model
.
P
rojector
Path
s
runner
.
loading
=
true
req
.
opts
.
NumBatch
=
1234
resp
=
runner
.
needsReload
(
ctx
,
req
)
...
...
@@ -558,11 +558,11 @@ func TestUnloadAllRunners(t *testing.T) {
func
TestUnload
(
t
*
testing
.
T
)
{
llm1
:=
&
mockLlm
{}
r1
:=
&
runnerRef
{
llama
:
llm1
}
r2
:=
&
runnerRef
{
a
dapters
:
[]
string
{
"A"
}}
r2
:=
&
runnerRef
{
model
:
&
Model
{
A
dapter
Path
s
:
[]
string
{
"A"
}}
}
r1
.
unload
()
require
.
True
(
t
,
llm1
.
closeCalled
)
r2
.
unload
()
require
.
Nil
(
t
,
r2
.
adapters
)
require
.
Nil
(
t
,
r2
.
model
)
}
type
mockLlm
struct
{
...
...
@@ -578,6 +578,7 @@ type mockLlm struct {
closeResp
error
closeCalled
bool
estimatedVRAM
uint64
estimatedTotal
uint64
}
func
(
s
*
mockLlm
)
Ping
(
ctx
context
.
Context
)
error
{
return
s
.
pingResp
}
...
...
@@ -598,4 +599,5 @@ func (s *mockLlm) Close() error {
s
.
closeCalled
=
true
return
s
.
closeResp
}
func
(
s
*
mockLlm
)
EstimatedVRAM
()
uint64
{
return
s
.
estimatedVRAM
}
func
(
s
*
mockLlm
)
EstimatedVRAM
()
uint64
{
return
s
.
estimatedVRAM
}
func
(
s
*
mockLlm
)
EstimatedTotal
()
uint64
{
return
s
.
estimatedTotal
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment