Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0fb88aaa
Unverified
Commit
0fb88aaa
authored
Dec 11, 2024
by
Byron Hsu
Committed by
GitHub
Dec 11, 2024
Browse files
[router] Use borrow if possible to save cost (#2441)
parent
d4de9a62
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
28 additions
and
19 deletions
+28
-19
rust/src/router.rs
rust/src/router.rs
+23
-14
rust/src/server.rs
rust/src/server.rs
+5
-5
No files found.
rust/src/router.rs
View file @
0fb88aaa
...
...
@@ -252,7 +252,7 @@ impl Router {
async
fn
send_request
(
&
self
,
client
:
&
reqwest
::
Client
,
worker_url
:
String
,
worker_url
:
&
str
,
route
:
&
str
,
)
->
HttpResponse
{
match
client
.get
(
format!
(
"{}{}"
,
worker_url
,
route
))
.send
()
.await
{
...
...
@@ -275,7 +275,7 @@ impl Router {
pub
async
fn
route_to_first
(
&
self
,
client
:
&
reqwest
::
Client
,
route
:
&
str
)
->
HttpResponse
{
match
self
.select_first_worker
()
{
Ok
(
worker_url
)
=>
self
.send_request
(
client
,
worker_url
,
route
)
.await
,
Ok
(
worker_url
)
=>
self
.send_request
(
client
,
&
worker_url
,
route
)
.await
,
Err
(
e
)
=>
HttpResponse
::
InternalServerError
()
.body
(
e
),
}
}
...
...
@@ -398,8 +398,8 @@ impl Router {
async
fn
send_generate_request
(
&
self
,
client
:
&
reqwest
::
Client
,
req
:
HttpRequest
,
body
:
Bytes
,
req
:
&
HttpRequest
,
body
:
&
Bytes
,
route
:
&
str
,
worker_url
:
&
str
,
)
->
HttpResponse
{
...
...
@@ -484,8 +484,8 @@ impl Router {
pub
async
fn
route_generate_request
(
&
self
,
client
:
&
reqwest
::
Client
,
req
:
HttpRequest
,
body
:
Bytes
,
req
:
&
HttpRequest
,
body
:
&
Bytes
,
route
:
&
str
,
)
->
HttpResponse
{
let
worker_url
=
self
.select_generate_worker
(
&
body
,
route
);
...
...
@@ -493,7 +493,7 @@ impl Router {
.await
}
pub
async
fn
add_worker
(
&
self
,
worker_url
:
String
)
->
Result
<
String
,
String
>
{
pub
async
fn
add_worker
(
&
self
,
worker_url
:
&
str
)
->
Result
<
String
,
String
>
{
let
interval_secs
=
10
;
// check every 10 seconds
let
timeout_secs
=
300
;
// 5 minutes
...
...
@@ -517,11 +517,11 @@ impl Router {
|
Router
::
CacheAware
{
worker_urls
,
..
}
=>
{
info!
(
"Worker {} health check passed"
,
worker_url
);
let
mut
urls
=
worker_urls
.write
()
.unwrap
();
if
urls
.contains
(
&
worker_url
)
{
if
urls
.contains
(
&
worker_url
.to_string
()
)
{
return
Err
(
format!
(
"Worker {} already exists"
,
worker_url
));
}
info!
(
"Added worker: {}"
,
worker_url
);
urls
.push
(
worker_url
.
clone
());
urls
.push
(
worker_url
.
to_string
());
}
}
...
...
@@ -534,13 +534,16 @@ impl Router {
}
=
self
{
// Add worker to running queue with initial count of 0
running_queue
.lock
()
.unwrap
()
.insert
(
worker_url
.clone
(),
0
);
running_queue
.lock
()
.unwrap
()
.insert
(
worker_url
.to_string
(),
0
);
// Add worker to processed queue with initial count of 0
processed_queue
.lock
()
.unwrap
()
.insert
(
worker_url
.
clone
(),
0
);
.insert
(
worker_url
.
to_string
(),
0
);
// Add worker to tree
tree
.lock
()
.unwrap
()
.insert
(
&
""
.to_string
(),
&
worker_url
);
...
...
@@ -581,7 +584,7 @@ impl Router {
}
}
pub
fn
remove_worker
(
&
self
,
worker_url
:
String
)
{
pub
fn
remove_worker
(
&
self
,
worker_url
:
&
str
)
{
match
self
{
Router
::
RoundRobin
{
worker_urls
,
..
}
|
Router
::
Random
{
worker_urls
}
...
...
@@ -602,8 +605,14 @@ impl Router {
}
=
self
{
tree
.lock
()
.unwrap
()
.remove_tenant
(
&
worker_url
);
running_queue
.lock
()
.unwrap
()
.remove
(
&
worker_url
);
processed_queue
.lock
()
.unwrap
()
.remove
(
&
worker_url
);
running_queue
.lock
()
.unwrap
()
.remove
(
&
worker_url
.to_string
());
processed_queue
.lock
()
.unwrap
()
.remove
(
&
worker_url
.to_string
());
info!
(
"Removed worker from tree and cleaned up queues: {}"
,
worker_url
...
...
rust/src/server.rs
View file @
0fb88aaa
...
...
@@ -63,7 +63,7 @@ async fn get_model_info(data: web::Data<AppState>) -> impl Responder {
#[post(
"/generate"
)]
async
fn
generate
(
req
:
HttpRequest
,
body
:
Bytes
,
data
:
web
::
Data
<
AppState
>
)
->
impl
Responder
{
data
.router
.route_generate_request
(
&
data
.client
,
req
,
body
,
"/generate"
)
.route_generate_request
(
&
data
.client
,
&
req
,
&
body
,
"/generate"
)
.await
}
...
...
@@ -74,7 +74,7 @@ async fn v1_chat_completions(
data
:
web
::
Data
<
AppState
>
,
)
->
impl
Responder
{
data
.router
.route_generate_request
(
&
data
.client
,
req
,
body
,
"/v1/chat/completions"
)
.route_generate_request
(
&
data
.client
,
&
req
,
&
body
,
"/v1/chat/completions"
)
.await
}
...
...
@@ -85,7 +85,7 @@ async fn v1_completions(
data
:
web
::
Data
<
AppState
>
,
)
->
impl
Responder
{
data
.router
.route_generate_request
(
&
data
.client
,
req
,
body
,
"/v1/completions"
)
.route_generate_request
(
&
data
.client
,
&
req
,
&
body
,
"/v1/completions"
)
.await
}
...
...
@@ -102,7 +102,7 @@ async fn add_worker(
}
};
match
data
.router
.add_worker
(
worker_url
)
.await
{
match
data
.router
.add_worker
(
&
worker_url
)
.await
{
Ok
(
message
)
=>
HttpResponse
::
Ok
()
.body
(
message
),
Err
(
error
)
=>
HttpResponse
::
BadRequest
()
.body
(
error
),
}
...
...
@@ -117,7 +117,7 @@ async fn remove_worker(
Some
(
url
)
=>
url
.to_string
(),
None
=>
return
HttpResponse
::
BadRequest
()
.finish
(),
};
data
.router
.remove_worker
(
worker_url
);
data
.router
.remove_worker
(
&
worker_url
);
HttpResponse
::
Ok
()
.finish
()
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment