Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
text-generation-inference
Commits
be2d3803
Unverified
Commit
be2d3803
authored
Jun 25, 2024
by
drbh
Committed by
GitHub
Jun 25, 2024
Browse files
fix: simplify kserve endpoint and fix imports (#2119)
parent
f1f98e36
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
33 additions
and
35 deletions
+33
-35
router/src/kserve.rs
router/src/kserve.rs
+32
-34
router/src/server.rs
router/src/server.rs
+1
-1
No files found.
router/src/kserve.rs
View file @
be2d3803
use
crate
::
infer
::
Infer
;
use
crate
::{
use
crate
::{
default_parameters
,
default_parameters
,
server
::{
generate_internal
,
ComputeType
},
server
::{
generate_internal
,
ComputeType
},
Deserialize
,
ErrorResponse
,
GenerateParameters
,
GenerateRequest
,
Infer
,
Serialize
,
ToSchema
,
Deserialize
,
ErrorResponse
,
GenerateParameters
,
GenerateRequest
,
Serialize
,
ToSchema
,
};
};
use
axum
::
extract
::{
Extension
,
Path
};
use
axum
::
extract
::{
Extension
,
Path
};
use
axum
::
response
::{
IntoResponse
,
Response
};
use
axum
::
http
::{
HeaderMap
,
StatusCode
};
use
axum
::
response
::
IntoResponse
;
use
axum
::
Json
;
use
axum
::
Json
;
use
futures
::
stream
::
FuturesUnordered
;
use
futures
::
stream
::
FuturesUnordered
;
use
futures
::
TryStreamExt
;
use
futures
::
TryStreamExt
;
use
reqwest
::
header
::
HeaderMap
;
use
reqwest
::
StatusCode
;
#[derive(Debug,
Serialize,
Deserialize,
ToSchema)]
#[derive(Debug,
Serialize,
Deserialize,
ToSchema)]
pub
struct
OutputChunk
{
pub
struct
OutputChunk
{
...
@@ -64,8 +64,6 @@ pub struct MetadataServerResponse {
...
@@ -64,8 +64,6 @@ pub struct MetadataServerResponse {
pub
extensions
:
Vec
<
String
>
,
pub
extensions
:
Vec
<
String
>
,
}
}
// Routes
#[utoipa::path(
#[utoipa::path(
post,
post,
tag
=
"Text Generation Inference"
,
tag
=
"Text Generation Inference"
,
...
@@ -76,13 +74,13 @@ pub struct MetadataServerResponse {
...
@@ -76,13 +74,13 @@ pub struct MetadataServerResponse {
example
=
json
!
(
{
"error"
:
"No response"
}
))
example
=
json
!
(
{
"error"
:
"No response"
}
))
)
)
)]
)]
pub
async
fn
kserve_health_live
()
->
Result
<
Response
,
(
StatusCode
,
Json
<
Error
Response
>
)
>
{
pub
async
fn
kserve_health_live
()
->
Json
<
Live
Response
>
{
let
data
=
LiveResponse
{
live
:
true
};
let
data
=
LiveResponse
{
live
:
true
};
Ok
((
HeaderMap
::
new
(),
Json
(
data
))
.into_response
()
)
Json
(
data
)
}
}
#[utoipa::path(
#[utoipa::path(
pos
t,
ge
t,
tag
=
"Text Generation Inference"
,
tag
=
"Text Generation Inference"
,
path
=
"/v2/health/ready"
,
path
=
"/v2/health/ready"
,
responses(
responses(
...
@@ -91,9 +89,9 @@ pub async fn kserve_health_live() -> Result<Response, (StatusCode, Json<ErrorRes
...
@@ -91,9 +89,9 @@ pub async fn kserve_health_live() -> Result<Response, (StatusCode, Json<ErrorRes
example
=
json
!
(
{
"error"
:
"No response"
}
))
example
=
json
!
(
{
"error"
:
"No response"
}
))
)
)
)]
)]
pub
async
fn
kserve_health_ready
()
->
Result
<
Response
,
(
StatusCode
,
Json
<
Error
Response
>
)
>
{
pub
async
fn
kserve_health_ready
()
->
Json
<
Ready
Response
>
{
let
data
=
ReadyResponse
{
live
:
true
};
let
data
=
ReadyResponse
{
live
:
true
};
Ok
((
HeaderMap
::
new
(),
Json
(
data
))
.into_response
()
)
Json
(
data
)
}
}
#[utoipa::path(
#[utoipa::path(
...
@@ -106,7 +104,7 @@ pub async fn kserve_health_ready() -> Result<Response, (StatusCode, Json<ErrorRe
...
@@ -106,7 +104,7 @@ pub async fn kserve_health_ready() -> Result<Response, (StatusCode, Json<ErrorRe
example
=
json
!
(
{
"error"
:
"No response"
}
))
example
=
json
!
(
{
"error"
:
"No response"
}
))
)
)
)]
)]
pub
async
fn
kerve_server_metadata
()
->
Result
<
Response
,
(
StatusCode
,
Json
<
Erro
rResponse
>
)
>
{
pub
async
fn
kerve_server_metadata
()
->
Json
<
MetadataServe
rResponse
>
{
let
data
=
MetadataServerResponse
{
let
data
=
MetadataServerResponse
{
name
:
"text-generation-inference"
.to_string
(),
name
:
"text-generation-inference"
.to_string
(),
version
:
env!
(
"CARGO_PKG_VERSION"
)
.to_string
(),
version
:
env!
(
"CARGO_PKG_VERSION"
)
.to_string
(),
...
@@ -116,7 +114,7 @@ pub async fn kerve_server_metadata() -> Result<Response, (StatusCode, Json<Error
...
@@ -116,7 +114,7 @@ pub async fn kerve_server_metadata() -> Result<Response, (StatusCode, Json<Error
"metrics"
.to_string
(),
"metrics"
.to_string
(),
],
],
};
};
Ok
((
HeaderMap
::
new
(),
Json
(
data
))
.into_response
()
)
Json
(
data
)
}
}
#[utoipa::path(
#[utoipa::path(
...
@@ -131,13 +129,30 @@ pub async fn kerve_server_metadata() -> Result<Response, (StatusCode, Json<Error
...
@@ -131,13 +129,30 @@ pub async fn kerve_server_metadata() -> Result<Response, (StatusCode, Json<Error
)]
)]
pub
async
fn
kserve_model_metadata
(
pub
async
fn
kserve_model_metadata
(
Path
((
model_name
,
model_version
)):
Path
<
(
String
,
String
)
>
,
Path
((
model_name
,
model_version
)):
Path
<
(
String
,
String
)
>
,
)
->
Result
<
Response
,
(
StatusCode
,
Json
<
Erro
rResponse
>
)
>
{
)
->
Json
<
MetadataServe
rResponse
>
{
let
data
=
MetadataServerResponse
{
let
data
=
MetadataServerResponse
{
name
:
model_name
,
name
:
model_name
,
version
:
model_version
,
version
:
model_version
,
extensions
:
vec!
[
"infer"
.to_string
(),
"ready"
.to_string
()],
extensions
:
vec!
[
"infer"
.to_string
(),
"ready"
.to_string
()],
};
};
Ok
((
HeaderMap
::
new
(),
Json
(
data
))
.into_response
())
Json
(
data
)
}
#[utoipa::path(
get,
tag
=
"Text Generation Inference"
,
path
=
"/v2/models/{model_name}/versions/{model_version}/ready"
,
responses(
(status
=
200
,
description
=
"Model version is ready"
,
body
=
ReadyResponse),
(status
=
404
,
description
=
"Model or version not found"
,
body
=
ErrorResponse,
example
=
json
!
(
{
"error"
:
"No response"
}
))
)
)]
pub
async
fn
kserve_model_metadata_ready
(
Path
((
_
model_name
,
_
model_version
)):
Path
<
(
String
,
String
)
>
,
)
->
Json
<
ReadyResponse
>
{
let
data
=
ReadyResponse
{
live
:
true
};
Json
(
data
)
}
}
#[utoipa::path(
#[utoipa::path(
...
@@ -155,7 +170,7 @@ pub async fn kserve_model_infer(
...
@@ -155,7 +170,7 @@ pub async fn kserve_model_infer(
infer
:
Extension
<
Infer
>
,
infer
:
Extension
<
Infer
>
,
Extension
(
compute_type
):
Extension
<
ComputeType
>
,
Extension
(
compute_type
):
Extension
<
ComputeType
>
,
Json
(
payload
):
Json
<
InferenceRequest
>
,
Json
(
payload
):
Json
<
InferenceRequest
>
,
)
->
Result
<
Response
,
(
StatusCode
,
Json
<
ErrorResponse
>
)
>
{
)
->
Result
<
impl
Into
Response
,
(
StatusCode
,
Json
<
ErrorResponse
>
)
>
{
let
id
=
payload
.id
.clone
();
let
id
=
payload
.id
.clone
();
let
str_inputs
=
payload
let
str_inputs
=
payload
.inputs
.inputs
...
@@ -226,22 +241,5 @@ pub async fn kserve_model_infer(
...
@@ -226,22 +241,5 @@ pub async fn kserve_model_infer(
outputs
:
output_chunks
,
outputs
:
output_chunks
,
};
};
Ok
((
HeaderMap
::
new
(),
Json
(
inference_output
))
.into_response
())
Ok
((
HeaderMap
::
new
(),
Json
(
inference_output
)))
}
#[utoipa::path(
get,
tag
=
"Text Generation Inference"
,
path
=
"/v2/models/{model_name}/versions/{model_version}/ready"
,
responses(
(status
=
200
,
description
=
"Model version is ready"
,
body
=
ReadyResponse),
(status
=
404
,
description
=
"Model or version not found"
,
body
=
ErrorResponse,
example
=
json
!
(
{
"error"
:
"No response"
}
))
)
)]
pub
async
fn
kserve_model_metadata_ready
(
Path
((
_
model_name
,
_
model_version
)):
Path
<
(
String
,
String
)
>
,
)
->
Result
<
Response
,
(
StatusCode
,
Json
<
ErrorResponse
>
)
>
{
let
data
=
ReadyResponse
{
live
:
true
};
Ok
((
HeaderMap
::
new
(),
Json
(
data
))
.into_response
())
}
}
router/src/server.rs
View file @
be2d3803
...
@@ -1766,12 +1766,12 @@ pub async fn run(
...
@@ -1766,12 +1766,12 @@ pub async fn run(
#[derive(OpenApi)]
#[derive(OpenApi)]
#[openapi(
#[openapi(
paths(
paths(
kserve_model_infer,
kserve_health_live,
kserve_health_live,
kserve_health_ready,
kserve_health_ready,
kerve_server_metadata,
kerve_server_metadata,
kserve_model_metadata,
kserve_model_metadata,
kserve_model_metadata_ready,
kserve_model_metadata_ready,
kserve_model_infer,
),
),
components(schemas(
components(schemas(
InferenceOutput,
InferenceOutput,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment