Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
abed273d
"git@developer.sourcefind.cn:OpenDAS/torch-spline-conv.git" did not exist on "70d03142a6ed1a1ea6370a9ba65599f7da155da2"
Unverified
Commit
abed273d
authored
Sep 11, 2024
by
Patrick Devine
Committed by
GitHub
Sep 11, 2024
Browse files
add "stop" command (#6739)
parent
03439262
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
172 additions
and
25 deletions
+172
-25
cmd/cmd.go
cmd/cmd.go
+54
-2
cmd/interactive.go
cmd/interactive.go
+1
-22
server/routes.go
server/routes.go
+52
-0
server/sched.go
server/sched.go
+19
-1
server/sched_test.go
server/sched_test.go
+46
-0
No files found.
cmd/cmd.go
View file @
abed273d
...
...
@@ -346,6 +346,39 @@ func (w *progressWriter) Write(p []byte) (n int, err error) {
return
len
(
p
),
nil
}
func
loadOrUnloadModel
(
cmd
*
cobra
.
Command
,
opts
*
runOptions
)
error
{
p
:=
progress
.
NewProgress
(
os
.
Stderr
)
defer
p
.
StopAndClear
()
spinner
:=
progress
.
NewSpinner
(
""
)
p
.
Add
(
""
,
spinner
)
client
,
err
:=
api
.
ClientFromEnvironment
()
if
err
!=
nil
{
return
err
}
req
:=
&
api
.
GenerateRequest
{
Model
:
opts
.
Model
,
KeepAlive
:
opts
.
KeepAlive
,
}
return
client
.
Generate
(
cmd
.
Context
(),
req
,
func
(
api
.
GenerateResponse
)
error
{
return
nil
})
}
func
StopHandler
(
cmd
*
cobra
.
Command
,
args
[]
string
)
error
{
opts
:=
&
runOptions
{
Model
:
args
[
0
],
KeepAlive
:
&
api
.
Duration
{
Duration
:
0
},
}
if
err
:=
loadOrUnloadModel
(
cmd
,
opts
);
err
!=
nil
{
if
strings
.
Contains
(
err
.
Error
(),
"not found"
)
{
return
fmt
.
Errorf
(
"couldn't find model
\"
%s
\"
to stop"
,
args
[
0
])
}
}
return
nil
}
func
RunHandler
(
cmd
*
cobra
.
Command
,
args
[]
string
)
error
{
interactive
:=
true
...
...
@@ -424,7 +457,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
opts
.
ParentModel
=
info
.
Details
.
ParentModel
if
interactive
{
if
err
:=
loadModel
(
cmd
,
&
opts
);
err
!=
nil
{
if
err
:=
loadOrUn
loadModel
(
cmd
,
&
opts
);
err
!=
nil
{
return
err
}
...
...
@@ -615,7 +648,15 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error {
cpuPercent
:=
math
.
Round
(
float64
(
sizeCPU
)
/
float64
(
m
.
Size
)
*
100
)
procStr
=
fmt
.
Sprintf
(
"%d%%/%d%% CPU/GPU"
,
int
(
cpuPercent
),
int
(
100
-
cpuPercent
))
}
data
=
append
(
data
,
[]
string
{
m
.
Name
,
m
.
Digest
[
:
12
],
format
.
HumanBytes
(
m
.
Size
),
procStr
,
format
.
HumanTime
(
m
.
ExpiresAt
,
"Never"
)})
var
until
string
delta
:=
time
.
Since
(
m
.
ExpiresAt
)
if
delta
>
0
{
until
=
"Stopping..."
}
else
{
until
=
format
.
HumanTime
(
m
.
ExpiresAt
,
"Never"
)
}
data
=
append
(
data
,
[]
string
{
m
.
Name
,
m
.
Digest
[
:
12
],
format
.
HumanBytes
(
m
.
Size
),
procStr
,
until
})
}
}
...
...
@@ -1294,6 +1335,15 @@ func NewCLI() *cobra.Command {
runCmd
.
Flags
()
.
Bool
(
"insecure"
,
false
,
"Use an insecure registry"
)
runCmd
.
Flags
()
.
Bool
(
"nowordwrap"
,
false
,
"Don't wrap words to the next line automatically"
)
runCmd
.
Flags
()
.
String
(
"format"
,
""
,
"Response format (e.g. json)"
)
stopCmd
:=
&
cobra
.
Command
{
Use
:
"stop MODEL"
,
Short
:
"Stop a running model"
,
Args
:
cobra
.
ExactArgs
(
1
),
PreRunE
:
checkServerHeartbeat
,
RunE
:
StopHandler
,
}
serveCmd
:=
&
cobra
.
Command
{
Use
:
"serve"
,
Aliases
:
[]
string
{
"start"
},
...
...
@@ -1361,6 +1411,7 @@ func NewCLI() *cobra.Command {
createCmd
,
showCmd
,
runCmd
,
stopCmd
,
pullCmd
,
pushCmd
,
listCmd
,
...
...
@@ -1400,6 +1451,7 @@ func NewCLI() *cobra.Command {
createCmd
,
showCmd
,
runCmd
,
stopCmd
,
pullCmd
,
pushCmd
,
listCmd
,
...
...
cmd/interactive.go
View file @
abed273d
...
...
@@ -18,7 +18,6 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/parser"
"github.com/ollama/ollama/progress"
"github.com/ollama/ollama/readline"
"github.com/ollama/ollama/types/errtypes"
)
...
...
@@ -31,26 +30,6 @@ const (
MultilineSystem
)
func
loadModel
(
cmd
*
cobra
.
Command
,
opts
*
runOptions
)
error
{
p
:=
progress
.
NewProgress
(
os
.
Stderr
)
defer
p
.
StopAndClear
()
spinner
:=
progress
.
NewSpinner
(
""
)
p
.
Add
(
""
,
spinner
)
client
,
err
:=
api
.
ClientFromEnvironment
()
if
err
!=
nil
{
return
err
}
chatReq
:=
&
api
.
ChatRequest
{
Model
:
opts
.
Model
,
KeepAlive
:
opts
.
KeepAlive
,
}
return
client
.
Chat
(
cmd
.
Context
(),
chatReq
,
func
(
api
.
ChatResponse
)
error
{
return
nil
})
}
func
generateInteractive
(
cmd
*
cobra
.
Command
,
opts
runOptions
)
error
{
usage
:=
func
()
{
fmt
.
Fprintln
(
os
.
Stderr
,
"Available Commands:"
)
...
...
@@ -217,7 +196,7 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
opts
.
Model
=
args
[
1
]
opts
.
Messages
=
[]
api
.
Message
{}
fmt
.
Printf
(
"Loading model '%s'
\n
"
,
opts
.
Model
)
if
err
:=
loadModel
(
cmd
,
&
opts
);
err
!=
nil
{
if
err
:=
loadOrUn
loadModel
(
cmd
,
&
opts
);
err
!=
nil
{
return
err
}
continue
...
...
server/routes.go
View file @
abed273d
...
...
@@ -117,6 +117,32 @@ func (s *Server) GenerateHandler(c *gin.Context) {
return
}
// expire the runner
if
req
.
Prompt
==
""
&&
req
.
KeepAlive
!=
nil
&&
int
(
req
.
KeepAlive
.
Seconds
())
==
0
{
model
,
err
:=
GetModel
(
req
.
Model
)
if
err
!=
nil
{
switch
{
case
os
.
IsNotExist
(
err
)
:
c
.
JSON
(
http
.
StatusNotFound
,
gin
.
H
{
"error"
:
fmt
.
Sprintf
(
"model '%s' not found"
,
req
.
Model
)})
case
err
.
Error
()
==
"invalid model name"
:
c
.
JSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
err
.
Error
()})
default
:
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
}
return
}
s
.
sched
.
expireRunner
(
model
)
c
.
JSON
(
http
.
StatusOK
,
api
.
GenerateResponse
{
Model
:
req
.
Model
,
CreatedAt
:
time
.
Now
()
.
UTC
(),
Response
:
""
,
Done
:
true
,
DoneReason
:
"unload"
,
})
return
}
if
req
.
Format
!=
""
&&
req
.
Format
!=
"json"
{
c
.
AbortWithStatusJSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
"format must be empty or
\"
json
\"
"
})
return
...
...
@@ -1322,6 +1348,32 @@ func (s *Server) ChatHandler(c *gin.Context) {
return
}
// expire the runner
if
len
(
req
.
Messages
)
==
0
&&
req
.
KeepAlive
!=
nil
&&
int
(
req
.
KeepAlive
.
Seconds
())
==
0
{
model
,
err
:=
GetModel
(
req
.
Model
)
if
err
!=
nil
{
switch
{
case
os
.
IsNotExist
(
err
)
:
c
.
JSON
(
http
.
StatusNotFound
,
gin
.
H
{
"error"
:
fmt
.
Sprintf
(
"model '%s' not found"
,
req
.
Model
)})
case
err
.
Error
()
==
"invalid model name"
:
c
.
JSON
(
http
.
StatusBadRequest
,
gin
.
H
{
"error"
:
err
.
Error
()})
default
:
c
.
JSON
(
http
.
StatusInternalServerError
,
gin
.
H
{
"error"
:
err
.
Error
()})
}
return
}
s
.
sched
.
expireRunner
(
model
)
c
.
JSON
(
http
.
StatusOK
,
api
.
ChatResponse
{
Model
:
req
.
Model
,
CreatedAt
:
time
.
Now
()
.
UTC
(),
Message
:
api
.
Message
{
Role
:
"assistant"
},
Done
:
true
,
DoneReason
:
"unload"
,
})
return
}
caps
:=
[]
Capability
{
CapabilityCompletion
}
if
len
(
req
.
Tools
)
>
0
{
caps
=
append
(
caps
,
CapabilityTools
)
...
...
server/sched.go
View file @
abed273d
...
...
@@ -360,7 +360,6 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
slog
.
Debug
(
"runner expired event received"
,
"modelPath"
,
runner
.
modelPath
)
runner
.
refMu
.
Lock
()
if
runner
.
refCount
>
0
{
// Shouldn't happen, but safeguard to ensure no leaked runners
slog
.
Debug
(
"expired event with positive ref count, retrying"
,
"modelPath"
,
runner
.
modelPath
,
"refCount"
,
runner
.
refCount
)
go
func
(
runner
*
runnerRef
)
{
// We can't unload yet, but want to as soon as the current request completes
...
...
@@ -802,6 +801,25 @@ func (s *Scheduler) unloadAllRunners() {
}
}
func
(
s
*
Scheduler
)
expireRunner
(
model
*
Model
)
{
s
.
loadedMu
.
Lock
()
defer
s
.
loadedMu
.
Unlock
()
runner
,
ok
:=
s
.
loaded
[
model
.
ModelPath
]
if
ok
{
runner
.
refMu
.
Lock
()
runner
.
expiresAt
=
time
.
Now
()
if
runner
.
expireTimer
!=
nil
{
runner
.
expireTimer
.
Stop
()
runner
.
expireTimer
=
nil
}
runner
.
sessionDuration
=
0
if
runner
.
refCount
<=
0
{
s
.
expiredCh
<-
runner
}
runner
.
refMu
.
Unlock
()
}
}
// If other runners are loaded, make sure the pending request will fit in system memory
// If not, pick a runner to unload, else return nil and the request can be loaded
func
(
s
*
Scheduler
)
maybeFindCPURunnerToUnload
(
req
*
LlmRequest
,
ggml
*
llm
.
GGML
,
gpus
gpu
.
GpuInfoList
)
*
runnerRef
{
...
...
server/sched_test.go
View file @
abed273d
...
...
@@ -406,6 +406,52 @@ func TestGetRunner(t *testing.T) {
b
.
ctxDone
()
}
func
TestExpireRunner
(
t
*
testing
.
T
)
{
ctx
,
done
:=
context
.
WithTimeout
(
context
.
Background
(),
20
*
time
.
Millisecond
)
defer
done
()
s
:=
InitScheduler
(
ctx
)
req
:=
&
LlmRequest
{
ctx
:
ctx
,
model
:
&
Model
{
ModelPath
:
"foo"
},
opts
:
api
.
DefaultOptions
(),
successCh
:
make
(
chan
*
runnerRef
,
1
),
errCh
:
make
(
chan
error
,
1
),
sessionDuration
:
&
api
.
Duration
{
Duration
:
2
*
time
.
Minute
},
}
var
ggml
*
llm
.
GGML
gpus
:=
gpu
.
GpuInfoList
{}
server
:=
&
mockLlm
{
estimatedVRAM
:
10
,
estimatedVRAMByGPU
:
map
[
string
]
uint64
{}}
s
.
newServerFn
=
func
(
gpus
gpu
.
GpuInfoList
,
model
string
,
ggml
*
llm
.
GGML
,
adapters
[]
string
,
projectors
[]
string
,
opts
api
.
Options
,
numParallel
int
)
(
llm
.
LlamaServer
,
error
)
{
return
server
,
nil
}
s
.
load
(
req
,
ggml
,
gpus
,
0
)
select
{
case
err
:=
<-
req
.
errCh
:
if
err
!=
nil
{
t
.
Fatalf
(
"expected no errors when loading, got '%s'"
,
err
.
Error
())
}
case
resp
:=
<-
req
.
successCh
:
s
.
loadedMu
.
Lock
()
if
resp
.
refCount
!=
uint
(
1
)
||
len
(
s
.
loaded
)
!=
1
{
t
.
Fatalf
(
"expected a model to be loaded"
)
}
s
.
loadedMu
.
Unlock
()
}
s
.
expireRunner
(
&
Model
{
ModelPath
:
"foo"
})
s
.
finishedReqCh
<-
req
s
.
processCompleted
(
ctx
)
s
.
loadedMu
.
Lock
()
if
len
(
s
.
loaded
)
!=
0
{
t
.
Fatalf
(
"expected model to be unloaded"
)
}
s
.
loadedMu
.
Unlock
()
}
// TODO - add one scenario that triggers the bogus finished event with positive ref count
func
TestPrematureExpired
(
t
*
testing
.
T
)
{
ctx
,
done
:=
context
.
WithTimeout
(
context
.
Background
(),
500
*
time
.
Millisecond
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment