Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
bbb81c24
Unverified
Commit
bbb81c24
authored
Nov 23, 2024
by
Byron Hsu
Committed by
GitHub
Nov 23, 2024
Browse files
Add more api routes (completion, health, etc) to the router (#2146)
parent
52f58fc4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
98 additions
and
19 deletions
+98
-19
rust/pyproject.toml
rust/pyproject.toml
+1
-1
rust/src/router.rs
rust/src/router.rs
+31
-14
rust/src/server.rs
rust/src/server.rs
+66
-4
No files found.
rust/pyproject.toml
View file @
bbb81c24
...
...
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name
=
"sglang-router"
version
=
"0.0.
5
"
version
=
"0.0.
6
"
description
=
"SGLang router is a standalone module implemented in Rust to achieve data parallelism across SGLang instances."
authors
=
[
{name
=
"Byron Hsu"
,
email
=
"byronhsu1230@gmail.com"
}
]
requires-python
=
">=3.8"
...
...
rust/src/router.rs
View file @
bbb81c24
...
...
@@ -97,14 +97,27 @@ pub enum PolicyConfig {
},
}
fn
get_text_from_request
(
body
:
&
Bytes
)
->
String
{
//
1.
convert body to json
fn
get_text_from_request
(
body
:
&
Bytes
,
route
:
&
str
)
->
String
{
// convert body to json
let
json
=
serde_json
::
from_slice
::
<
serde_json
::
Value
>
(
body
)
.unwrap
();
// 2. get the text field
let
text
=
json
.get
(
"text"
)
.and_then
(|
t
|
t
.as_str
())
.unwrap_or
(
""
);
return
text
.to_string
();
}
if
route
==
"generate"
{
// get the "text" field
let
text
=
json
.get
(
"text"
)
.and_then
(|
t
|
t
.as_str
())
.unwrap_or
(
""
);
return
text
.to_string
();
}
else
if
route
==
"v1/chat/completions"
{
// get the messages field as raw text
if
let
Some
(
messages
)
=
json
.get
(
"messages"
)
{
// Convert messages back to a string, preserving all JSON formatting
return
serde_json
::
to_string
(
messages
)
.unwrap_or_default
();
}
}
else
if
route
==
"v1/completions"
{
let
prompt
=
json
.get
(
"prompt"
)
.and_then
(|
t
|
t
.as_str
())
.unwrap_or
(
""
);
return
prompt
.to_string
();
}
return
""
.to_string
();
}
impl
Router
{
pub
fn
new
(
worker_urls
:
Vec
<
String
>
,
policy_config
:
PolicyConfig
)
->
Self
{
match
policy_config
{
...
...
@@ -187,8 +200,11 @@ impl Router {
client
:
&
reqwest
::
Client
,
req
:
HttpRequest
,
body
:
Bytes
,
route
:
&
str
,
)
->
HttpResponse
{
let
text
=
get_text_from_request
(
&
body
);
let
text
=
get_text_from_request
(
&
body
,
route
);
// For Debug
// println!("text: {:?}, route: {:?}", text, route);
let
worker_url
=
match
self
{
Router
::
RoundRobin
{
...
...
@@ -236,13 +252,14 @@ impl Router {
if
matched_rate
>
*
cache_threshold
{
matched_worker
.to_string
()
}
else
{
let
m_map
:
HashMap
<
String
,
usize
>
=
tree
.tenant_char_count
.iter
()
.map
(|
entry
|
(
entry
.key
()
.clone
(),
*
entry
.value
()))
.collect
();
// For Debug
// let m_map: HashMap<String, usize> = tree
// .tenant_char_count
// .iter()
// .map(|entry| (entry.key().clone(), *entry.value()))
// .collect();
println!
(
"map: {:?}, mmap: {:?}"
,
tree
.get_tenant_char_count
(),
m_map
);
//
println!("map: {:?}, mmap: {:?}", tree.get_tenant_char_count(), m_map);
tree
.get_smallest_tenant
()
}
...
...
@@ -276,7 +293,7 @@ impl Router {
.unwrap_or
(
false
);
let
res
=
match
client
.post
(
format!
(
"{}/
generate
"
,
worker_url
.clone
()))
.post
(
format!
(
"{}/
{}
"
,
worker_url
.clone
()
,
route
))
.header
(
"Content-Type"
,
req
.headers
()
...
...
rust/src/server.rs
View file @
bbb81c24
...
...
@@ -33,7 +33,10 @@ async fn forward_request(
.unwrap_or
(
actix_web
::
http
::
StatusCode
::
INTERNAL_SERVER_ERROR
);
// print the status
println!
(
"Worker URL: {}, Status: {}"
,
worker_url
,
status
);
println!
(
"Forwarding Request Worker URL: {}, Route: {}, Status: {}"
,
worker_url
,
route
,
status
);
match
res
.bytes
()
.await
{
Ok
(
body
)
=>
HttpResponse
::
build
(
status
)
.body
(
body
.to_vec
()),
Err
(
_
)
=>
HttpResponse
::
InternalServerError
()
.finish
(),
...
...
@@ -43,8 +46,38 @@ async fn forward_request(
}
}
#[get(
"/health"
)]
async
fn
health
(
data
:
web
::
Data
<
AppState
>
)
->
impl
Responder
{
let
worker_url
=
match
data
.router
.get_first
()
{
Some
(
url
)
=>
url
,
None
=>
return
HttpResponse
::
InternalServerError
()
.finish
(),
};
forward_request
(
&
data
.client
,
worker_url
,
"/health"
.to_string
())
.await
}
#[get(
"/health_generate"
)]
async
fn
health_generate
(
data
:
web
::
Data
<
AppState
>
)
->
impl
Responder
{
let
worker_url
=
match
data
.router
.get_first
()
{
Some
(
url
)
=>
url
,
None
=>
return
HttpResponse
::
InternalServerError
()
.finish
(),
};
forward_request
(
&
data
.client
,
worker_url
,
"/health_generate"
.to_string
())
.await
}
#[get(
"/get_server_args"
)]
async
fn
get_server_args
(
data
:
web
::
Data
<
AppState
>
)
->
impl
Responder
{
let
worker_url
=
match
data
.router
.get_first
()
{
Some
(
url
)
=>
url
,
None
=>
return
HttpResponse
::
InternalServerError
()
.finish
(),
};
forward_request
(
&
data
.client
,
worker_url
,
"/get_server_args"
.to_string
())
.await
}
#[get(
"/v1/models"
)]
async
fn
v1_model
(
data
:
web
::
Data
<
AppState
>
)
->
impl
Responder
{
async
fn
v1_model
s
(
data
:
web
::
Data
<
AppState
>
)
->
impl
Responder
{
let
worker_url
=
match
data
.router
.get_first
()
{
Some
(
url
)
=>
url
,
None
=>
return
HttpResponse
::
InternalServerError
()
.finish
(),
...
...
@@ -65,7 +98,31 @@ async fn get_model_info(data: web::Data<AppState>) -> impl Responder {
#[post(
"/generate"
)]
async
fn
generate
(
req
:
HttpRequest
,
body
:
Bytes
,
data
:
web
::
Data
<
AppState
>
)
->
impl
Responder
{
data
.router
.dispatch
(
&
data
.client
,
req
,
body
)
.await
data
.router
.dispatch
(
&
data
.client
,
req
,
body
,
"generate"
)
.await
}
#[post(
"/v1/chat/completions"
)]
async
fn
v1_chat_completions
(
req
:
HttpRequest
,
body
:
Bytes
,
data
:
web
::
Data
<
AppState
>
,
)
->
impl
Responder
{
data
.router
.dispatch
(
&
data
.client
,
req
,
body
,
"v1/chat/completions"
)
.await
}
#[post(
"/v1/completions"
)]
async
fn
v1_completions
(
req
:
HttpRequest
,
body
:
Bytes
,
data
:
web
::
Data
<
AppState
>
,
)
->
impl
Responder
{
data
.router
.dispatch
(
&
data
.client
,
req
,
body
,
"v1/completions"
)
.await
}
pub
async
fn
startup
(
...
...
@@ -90,8 +147,13 @@ pub async fn startup(
App
::
new
()
.app_data
(
app_state
.clone
())
.service
(
generate
)
.service
(
v1_model
)
.service
(
v1_chat_completions
)
.service
(
v1_completions
)
.service
(
v1_models
)
.service
(
get_model_info
)
.service
(
health
)
.service
(
health_generate
)
.service
(
get_server_args
)
})
.bind
((
host
,
port
))
?
.run
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment