Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
bd3a9d8e
Unverified
Commit
bd3a9d8e
authored
Jun 23, 2023
by
OlivierDehaene
Committed by
GitHub
Jun 23, 2023
Browse files
fix(router): add timeout on flume sends (#488)
parent
776d150c
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
16 deletions
+31
-16
router/src/infer.rs
router/src/infer.rs
+30
-15
router/src/queue.rs
router/src/queue.rs
+1
-1
No files found.
router/src/infer.rs
View file @
bd3a9d8e
...
...
@@ -3,7 +3,7 @@ use crate::validation::{Validation, ValidationError};
use
crate
::{
Entry
,
Queue
,
Token
};
use
crate
::{
GenerateRequest
,
PrefillToken
};
use
flume
::
r
#
async
::
RecvStream
;
use
flume
::
SendError
;
use
flume
::
Send
Timeout
Error
;
use
futures
::
future
::
try_join_all
;
use
futures
::
stream
::
StreamExt
;
use
nohash_hasher
::
IntMap
;
...
...
@@ -11,6 +11,7 @@ use std::sync::{
atomic
::{
AtomicBool
,
Ordering
},
Arc
,
};
use
std
::
time
::
Duration
;
use
text_generation_client
::{
Batch
,
CachedBatch
,
ClientError
,
GeneratedText
,
Generation
,
PrefillTokens
,
ShardedClient
,
};
...
...
@@ -472,6 +473,10 @@ fn filter_send_generations(generations: Vec<Generation>, entries: &mut IntMap<u6
// If the receive an error from the Flume channel, it means that the client dropped the
// request and we need to stop generating hence why we unwrap_or(true)
let
stopped
=
send_responses
(
generation
,
entry
)
.map_err
(|
err
|
{
if
let
SendTimeoutError
::
Timeout
(
_
)
=
*
err
{
tracing
::
error!
(
"Entry response channel timed out."
)
}
metrics
::
increment_counter!
(
"tgi_request_failure"
,
"err"
=>
"dropped"
);
err
})
.unwrap_or
(
true
);
...
...
@@ -485,14 +490,20 @@ fn filter_send_generations(generations: Vec<Generation>, entries: &mut IntMap<u6
fn
send_responses
(
generation
:
Generation
,
entry
:
&
Entry
,
)
->
Result
<
bool
,
SendError
<
Result
<
InferStreamResponse
,
InferError
>>>
{
)
->
Result
<
bool
,
Box
<
SendTimeoutError
<
Result
<
InferStreamResponse
,
InferError
>>>>
{
// Return directly if the channel is disconnected
if
entry
.response_tx
.is_disconnected
()
{
return
Ok
(
true
);
}
let
mut
stopped
=
false
;
if
let
Some
(
prefill_tokens
)
=
generation
.prefill_tokens
{
// Send message
entry
.response_tx
.send
(
Ok
(
InferStreamResponse
::
Prefill
(
prefill_tokens
)))
?
;
entry
.response_tx
.send_timeout
(
Ok
(
InferStreamResponse
::
Prefill
(
prefill_tokens
)),
Duration
::
from_millis
(
10
),
)
?
;
}
// Create last Token
...
...
@@ -507,17 +518,21 @@ fn send_responses(
// Generation has ended
stopped
=
true
;
// Send message
entry
.response_tx
.send
(
Ok
(
InferStreamResponse
::
End
{
entry
.response_tx
.send_timeout
(
Ok
(
InferStreamResponse
::
End
{
token
,
generated_text
,
queued
:
entry
.queue_time
,
start
:
entry
.batch_time
.unwrap
(),
}))
?
;
}),
Duration
::
from_millis
(
10
),
)
?
;
}
else
{
// Send message
entry
.response_tx
.send
(
Ok
(
InferStreamResponse
::
Token
(
token
)))
?
;
entry
.response_tx
.send_timeout
(
Ok
(
InferStreamResponse
::
Token
(
token
)),
Duration
::
from_millis
(
10
),
)
?
;
}
Ok
(
stopped
)
}
...
...
@@ -535,7 +550,7 @@ fn send_errors(error: ClientError, entries: &mut IntMap<u64, Entry>) {
// unwrap_or is valid here as we don't care if the receiver is gone.
entry
.response_tx
.send
(
Err
(
err
))
.send
_timeout
(
Err
(
err
),
Duration
::
from_millis
(
10
))
.unwrap_or
(());
});
}
...
...
router/src/queue.rs
View file @
bd3a9d8e
...
...
@@ -95,7 +95,7 @@ async fn queue_task(requires_padding: bool, receiver: flume::Receiver<QueueComma
span
,
}
=>
span
.in_scope
(||
{
let
next_batch
=
state
.next_batch
(
min_size
,
token_budget
);
response_sender
.send
(
next_batch
)
.unwrap
_or
(()
);
response_sender
.send
(
next_batch
)
.unwrap
(
);
metrics
::
gauge!
(
"tgi_queue_size"
,
state
.entries
.len
()
as
f64
);
}),
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment