Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
967a82f5
Commit
967a82f5
authored
Sep 29, 2025
by
Michael Yang
Committed by
Michael Yang
Oct 09, 2025
Browse files
ollamarunner: measure only active time
parent
bbbc73d6
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
44 additions
and
27 deletions
+44
-27
runner/ollamarunner/runner.go
runner/ollamarunner/runner.go
+44
-27
No files found.
runner/ollamarunner/runner.go
View file @
967a82f5
...
@@ -91,8 +91,9 @@ type Sequence struct {
...
@@ -91,8 +91,9 @@ type Sequence struct {
doneReason
llm
.
DoneReason
doneReason
llm
.
DoneReason
// Metrics
// Metrics
startProcessingTime
time
.
Time
startedAt
,
lastUpdatedAt
time
.
Time
startGenerationTime
time
.
Time
processingDuration
time
.
Duration
samplingDuration
time
.
Duration
numPredicted
int
numPredicted
int
numPromptInputs
int
numPromptInputs
int
}
}
...
@@ -108,8 +109,6 @@ type NewSequenceParams struct {
...
@@ -108,8 +109,6 @@ type NewSequenceParams struct {
func
(
s
*
Server
)
NewSequence
(
prompt
string
,
images
[]
llm
.
ImageData
,
params
NewSequenceParams
)
(
*
Sequence
,
error
)
{
func
(
s
*
Server
)
NewSequence
(
prompt
string
,
images
[]
llm
.
ImageData
,
params
NewSequenceParams
)
(
*
Sequence
,
error
)
{
s
.
ready
.
Wait
()
s
.
ready
.
Wait
()
startTime
:=
time
.
Now
()
inputs
,
ctxs
,
mmStore
,
err
:=
s
.
inputs
(
prompt
,
images
)
inputs
,
ctxs
,
mmStore
,
err
:=
s
.
inputs
(
prompt
,
images
)
if
err
!=
nil
{
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to process inputs: %w"
,
err
)
return
nil
,
fmt
.
Errorf
(
"failed to process inputs: %w"
,
err
)
...
@@ -168,7 +167,6 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
...
@@ -168,7 +167,6 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
mmStore
:
mmStore
,
mmStore
:
mmStore
,
inputs
:
inputs
,
inputs
:
inputs
,
numPromptInputs
:
len
(
inputs
),
numPromptInputs
:
len
(
inputs
),
startProcessingTime
:
startTime
,
numPredict
:
params
.
numPredict
,
numPredict
:
params
.
numPredict
,
pendingResponses
:
make
([]
string
,
0
),
pendingResponses
:
make
([]
string
,
0
),
responses
:
make
(
chan
string
,
100
),
responses
:
make
(
chan
string
,
100
),
...
@@ -408,7 +406,7 @@ func (s *Server) run(ctx context.Context) {
...
@@ -408,7 +406,7 @@ func (s *Server) run(ctx context.Context) {
supportsAsync
:=
pooling
.
Type
(
s
.
model
.
Backend
()
.
Config
()
.
Uint
(
"pooling_type"
))
==
pooling
.
TypeNone
supportsAsync
:=
pooling
.
Type
(
s
.
model
.
Backend
()
.
Config
()
.
Uint
(
"pooling_type"
))
==
pooling
.
TypeNone
var
active
Batch
batchState
var
previous
Batch
batchState
for
{
for
{
select
{
select
{
case
<-
ctx
.
Done
()
:
case
<-
ctx
.
Done
()
:
...
@@ -417,16 +415,18 @@ func (s *Server) run(ctx context.Context) {
...
@@ -417,16 +415,18 @@ func (s *Server) run(ctx context.Context) {
panic
(
err
)
panic
(
err
)
default
:
default
:
var
err
error
var
err
error
active
Batch
,
err
=
s
.
forwardBatch
(
active
Batch
)
next
Batch
,
err
:
=
s
.
forwardBatch
(
previous
Batch
)
if
err
!=
nil
{
if
err
!=
nil
{
panic
(
err
)
panic
(
err
)
}
}
if
supportsAsync
{
if
supportsAsync
{
go
s
.
computeBatch
(
active
Batch
)
go
s
.
computeBatch
(
next
Batch
)
}
else
{
}
else
{
s
.
computeBatch
(
active
Batch
)
s
.
computeBatch
(
next
Batch
)
}
}
previousBatch
=
nextBatch
}
}
}
}
}
}
...
@@ -562,6 +562,13 @@ func (s *Server) forwardBatch(pendingBatch batchState) (nextBatch batchState, er
...
@@ -562,6 +562,13 @@ func (s *Server) forwardBatch(pendingBatch batchState) (nextBatch batchState, er
seq
.
inputs
=
seq
.
inputs
[
len
(
seq
.
pendingInputs
)
:
]
seq
.
inputs
=
seq
.
inputs
[
len
(
seq
.
pendingInputs
)
:
]
}
}
startedAt
:=
time
.
Now
()
for
i
:=
range
nextBatch
.
seqs
{
if
nextBatch
.
seqs
[
i
]
!=
nil
&&
nextBatch
.
seqs
[
i
]
.
startedAt
.
IsZero
()
{
nextBatch
.
seqs
[
i
]
.
startedAt
=
startedAt
}
}
if
resumeSeq
!=
-
1
{
if
resumeSeq
!=
-
1
{
s
.
nextSeq
=
resumeSeq
s
.
nextSeq
=
resumeSeq
}
else
{
}
else
{
...
@@ -682,6 +689,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
...
@@ -682,6 +689,7 @@ func (s *Server) computeBatch(activeBatch batchState) {
activeBatch
.
modelOutput
)
activeBatch
.
modelOutput
)
outputs
:=
activeBatch
.
modelOutput
.
Floats
()
outputs
:=
activeBatch
.
modelOutput
.
Floats
()
t
:=
time
.
Now
()
logutil
.
Trace
(
"computeBatch: logits ready"
,
"batchID"
,
activeBatch
.
id
)
logutil
.
Trace
(
"computeBatch: logits ready"
,
"batchID"
,
activeBatch
.
id
)
...
@@ -694,8 +702,10 @@ func (s *Server) computeBatch(activeBatch batchState) {
...
@@ -694,8 +702,10 @@ func (s *Server) computeBatch(activeBatch batchState) {
continue
continue
}
}
seq
.
lastUpdatedAt
=
t
if
seq
.
numPredicted
==
1
{
if
seq
.
numPredicted
==
1
{
seq
.
startGenerationTime
=
time
.
Now
()
seq
.
processingDuration
=
seq
.
lastUpdatedAt
.
Sub
(
seq
.
startedAt
)
seq
.
startedAt
=
seq
.
lastUpdatedAt
}
}
// if done processing the prompt, generate an embedding and return
// if done processing the prompt, generate an embedding and return
...
@@ -774,6 +784,13 @@ func (s *Server) computeBatch(activeBatch batchState) {
...
@@ -774,6 +784,13 @@ func (s *Server) computeBatch(activeBatch batchState) {
s
.
removeSequence
(
i
,
llm
.
DoneReasonConnectionClosed
)
s
.
removeSequence
(
i
,
llm
.
DoneReasonConnectionClosed
)
}
}
}
}
samplingDuration
:=
time
.
Since
(
t
)
for
i
,
seq
:=
range
s
.
seqs
{
if
seq
!=
nil
&&
nextBatchTokens
[
i
]
!=
nil
{
s
.
seqs
[
i
]
.
samplingDuration
+=
samplingDuration
}
}
}
}
func
(
s
*
Server
)
completion
(
w
http
.
ResponseWriter
,
r
*
http
.
Request
)
{
func
(
s
*
Server
)
completion
(
w
http
.
ResponseWriter
,
r
*
http
.
Request
)
{
...
@@ -887,9 +904,9 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
...
@@ -887,9 +904,9 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
Done
:
true
,
Done
:
true
,
DoneReason
:
seq
.
doneReason
,
DoneReason
:
seq
.
doneReason
,
PromptEvalCount
:
seq
.
numPromptInputs
,
PromptEvalCount
:
seq
.
numPromptInputs
,
PromptEvalDuration
:
seq
.
startGenerationTime
.
Sub
(
seq
.
startP
rocessing
Time
)
,
PromptEvalDuration
:
seq
.
p
rocessing
Duration
,
EvalCount
:
seq
.
numPredicted
,
EvalCount
:
seq
.
numPredicted
,
EvalDuration
:
time
.
Since
(
seq
.
startGene
ration
Time
)
,
EvalDuration
:
seq
.
lastUpdatedAt
.
Sub
(
seq
.
startedAt
)
-
seq
.
samplingDu
ration
,
});
err
!=
nil
{
});
err
!=
nil
{
http
.
Error
(
w
,
fmt
.
Sprintf
(
"failed to encode final response: %v"
,
err
),
http
.
StatusInternalServerError
)
http
.
Error
(
w
,
fmt
.
Sprintf
(
"failed to encode final response: %v"
,
err
),
http
.
StatusInternalServerError
)
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment