Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
94200538
Unverified
Commit
94200538
authored
May 23, 2023
by
OlivierDehaene
Committed by
GitHub
May 23, 2023
Browse files
feat(router): log input/ouput at debug level (#364)
@njhill FYI
parent
e3e487dc
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
20 additions
and
7 deletions
+20
-7
integration-tests/conftest.py
integration-tests/conftest.py
+5
-2
router/src/server.rs
router/src/server.rs
+15
-5
No files found.
integration-tests/conftest.py
View file @
94200538
...
@@ -231,8 +231,11 @@ def launcher(event_loop):
...
@@ -231,8 +231,11 @@ def launcher(event_loop):
if
quantize
:
if
quantize
:
args
.
append
(
"--quantize"
)
args
.
append
(
"--quantize"
)
env
=
os
.
environ
env
[
"LOG_LEVEL"
]
=
"info,text_generation_router=debug"
with
subprocess
.
Popen
(
with
subprocess
.
Popen
(
args
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
args
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
env
=
env
)
as
process
:
)
as
process
:
yield
ProcessLauncherHandle
(
process
,
port
)
yield
ProcessLauncherHandle
(
process
,
port
)
...
@@ -271,7 +274,7 @@ def launcher(event_loop):
...
@@ -271,7 +274,7 @@ def launcher(event_loop):
gpu_count
=
num_shard
if
num_shard
is
not
None
else
1
gpu_count
=
num_shard
if
num_shard
is
not
None
else
1
env
=
{}
env
=
{
"LOG_LEVEL"
:
"info,text_generation_router=debug"
}
if
HUGGING_FACE_HUB_TOKEN
is
not
None
:
if
HUGGING_FACE_HUB_TOKEN
is
not
None
:
env
[
"HUGGING_FACE_HUB_TOKEN"
]
=
HUGGING_FACE_HUB_TOKEN
env
[
"HUGGING_FACE_HUB_TOKEN"
]
=
HUGGING_FACE_HUB_TOKEN
...
...
router/src/server.rs
View file @
94200538
...
@@ -52,7 +52,7 @@ use utoipa_swagger_ui::SwaggerUi;
...
@@ -52,7 +52,7 @@ use utoipa_swagger_ui::SwaggerUi;
example
=
json
!
(
{
"error"
:
"Incomplete generation"
}
)),
example
=
json
!
(
{
"error"
:
"Incomplete generation"
}
)),
)
)
)]
)]
#[instrument(skip(infer))]
#[instrument(skip(infer
,
req
))]
async
fn
compat_generate
(
async
fn
compat_generate
(
default_return_full_text
:
Extension
<
bool
>
,
default_return_full_text
:
Extension
<
bool
>
,
infer
:
Extension
<
Infer
>
,
infer
:
Extension
<
Infer
>
,
...
@@ -133,8 +133,9 @@ async fn health(mut health: Extension<Health>) -> Result<(), (StatusCode, Json<E
...
@@ -133,8 +133,9 @@ async fn health(mut health: Extension<Health>) -> Result<(), (StatusCode, Json<E
)
)
)]
)]
#[instrument(
#[instrument(
skip
(infer)
,
skip
_all
,
fields(
fields(
parameters
=
?
req
.
0
.
parameters,
total_time,
total_time,
validation_time,
validation_time,
queue_time,
queue_time,
...
@@ -151,6 +152,8 @@ async fn generate(
...
@@ -151,6 +152,8 @@ async fn generate(
let
start_time
=
Instant
::
now
();
let
start_time
=
Instant
::
now
();
metrics
::
increment_counter!
(
"tgi_request_count"
);
metrics
::
increment_counter!
(
"tgi_request_count"
);
tracing
::
debug!
(
"Input: {}"
,
req
.0
.inputs
);
let
compute_characters
=
req
.0
.inputs
.chars
()
.count
();
let
compute_characters
=
req
.0
.inputs
.chars
()
.count
();
let
mut
add_prompt
=
None
;
let
mut
add_prompt
=
None
;
if
req
.0
.parameters.return_full_text
.unwrap_or
(
false
)
{
if
req
.0
.parameters.return_full_text
.unwrap_or
(
false
)
{
...
@@ -282,7 +285,8 @@ async fn generate(
...
@@ -282,7 +285,8 @@ async fn generate(
output_text
=
prompt
+
&
output_text
;
output_text
=
prompt
+
&
output_text
;
}
}
tracing
::
info!
(
"Output: {}"
,
output_text
);
tracing
::
debug!
(
"Output: {}"
,
output_text
);
tracing
::
info!
(
"Success"
);
let
response
=
GenerateResponse
{
let
response
=
GenerateResponse
{
generated_text
:
output_text
,
generated_text
:
output_text
,
...
@@ -315,8 +319,9 @@ async fn generate(
...
@@ -315,8 +319,9 @@ async fn generate(
)
)
)]
)]
#[instrument(
#[instrument(
skip
(infer)
,
skip
_all
,
fields(
fields(
parameters
=
?
req
.
0
.
parameters,
total_time,
total_time,
validation_time,
validation_time,
queue_time,
queue_time,
...
@@ -336,6 +341,8 @@ async fn generate_stream(
...
@@ -336,6 +341,8 @@ async fn generate_stream(
let
start_time
=
Instant
::
now
();
let
start_time
=
Instant
::
now
();
metrics
::
increment_counter!
(
"tgi_request_count"
);
metrics
::
increment_counter!
(
"tgi_request_count"
);
tracing
::
debug!
(
"Input: {}"
,
req
.0
.inputs
);
let
compute_characters
=
req
.0
.inputs
.chars
()
.count
();
let
compute_characters
=
req
.0
.inputs
.chars
()
.count
();
let
mut
headers
=
HeaderMap
::
new
();
let
mut
headers
=
HeaderMap
::
new
();
...
@@ -370,6 +377,8 @@ async fn generate_stream(
...
@@ -370,6 +377,8 @@ async fn generate_stream(
InferStreamResponse
::
Prefill
(
_
)
=>
{}
InferStreamResponse
::
Prefill
(
_
)
=>
{}
// Yield event for every new token
// Yield event for every new token
InferStreamResponse
::
Token
(
token
)
=>
{
InferStreamResponse
::
Token
(
token
)
=>
{
tracing
::
debug!
(
parent
:
&
span
,
"Token: {:?}"
,
token
);
// StreamResponse
// StreamResponse
let
stream_token
=
StreamResponse
{
let
stream_token
=
StreamResponse
{
token
,
token
,
...
@@ -428,7 +437,8 @@ async fn generate_stream(
...
@@ -428,7 +437,8 @@ async fn generate_stream(
output_text
=
prompt
+
&
output_text
;
output_text
=
prompt
+
&
output_text
;
}
}
tracing
::
info!
(
parent
:
&
span
,
"Output: {}"
,
output_text
);
tracing
::
debug!
(
parent
:
&
span
,
"Output: {}"
,
output_text
);
tracing
::
info!
(
parent
:
&
span
,
"Success"
);
let
stream_token
=
StreamResponse
{
let
stream_token
=
StreamResponse
{
token
,
token
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment