Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
13662fd5
"docs/source/tutorials/syncbn.rst" did not exist on "25985c31fbad7eaff58dbd6575414fad06cb4b42"
Unverified
Commit
13662fd5
authored
Mar 11, 2024
by
Lianmin Zheng
Committed by
GitHub
Mar 11, 2024
Browse files
Fix RuntimeEndpoint (#279)
parent
d5ae2eba
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
7 deletions
+20
-7
python/sglang/api.py
python/sglang/api.py
+3
-0
python/sglang/backend/runtime_endpoint.py
python/sglang/backend/runtime_endpoint.py
+10
-4
python/sglang/srt/server.py
python/sglang/srt/server.py
+4
-2
python/sglang/utils.py
python/sglang/utils.py
+3
-1
No files found.
python/sglang/api.py
View file @
13662fd5
...
@@ -43,18 +43,21 @@ def Runtime(*args, **kwargs):
...
@@ -43,18 +43,21 @@ def Runtime(*args, **kwargs):
def
set_default_backend
(
backend
:
BaseBackend
):
def
set_default_backend
(
backend
:
BaseBackend
):
global_config
.
default_backend
=
backend
global_config
.
default_backend
=
backend
def
flush_cache
(
backend
:
BaseBackend
=
None
):
def
flush_cache
(
backend
:
BaseBackend
=
None
):
backend
=
backend
or
global_config
.
default_backend
backend
=
backend
or
global_config
.
default_backend
if
backend
is
None
:
if
backend
is
None
:
return
False
return
False
return
backend
.
flush_cache
()
return
backend
.
flush_cache
()
def
get_server_args
(
backend
:
BaseBackend
=
None
):
def
get_server_args
(
backend
:
BaseBackend
=
None
):
backend
=
backend
or
global_config
.
default_backend
backend
=
backend
or
global_config
.
default_backend
if
backend
is
None
:
if
backend
is
None
:
return
None
return
None
return
backend
.
get_server_args
()
return
backend
.
get_server_args
()
def
gen
(
def
gen
(
name
:
Optional
[
str
]
=
None
,
name
:
Optional
[
str
]
=
None
,
max_tokens
:
Optional
[
int
]
=
None
,
max_tokens
:
Optional
[
int
]
=
None
,
...
...
python/sglang/backend/runtime_endpoint.py
View file @
13662fd5
...
@@ -12,7 +12,13 @@ from sglang.utils import encode_image_base64, find_printable_text, http_request
...
@@ -12,7 +12,13 @@ from sglang.utils import encode_image_base64, find_printable_text, http_request
class
RuntimeEndpoint
(
BaseBackend
):
class
RuntimeEndpoint
(
BaseBackend
):
def
__init__
(
self
,
base_url
,
auth_token
=
None
,
api_key
=
None
,
verify
=
None
):
def
__init__
(
self
,
base_url
:
str
,
auth_token
:
Optional
[
str
]
=
None
,
api_key
:
Optional
[
str
]
=
None
,
verify
:
Optional
[
str
]
=
None
,
):
super
().
__init__
()
super
().
__init__
()
self
.
support_concate_and_append
=
True
self
.
support_concate_and_append
=
True
...
@@ -61,7 +67,7 @@ class RuntimeEndpoint(BaseBackend):
...
@@ -61,7 +67,7 @@ class RuntimeEndpoint(BaseBackend):
self
.
base_url
+
"/generate"
,
self
.
base_url
+
"/generate"
,
json
=
{
"text"
:
prefix_str
,
"sampling_params"
:
{
"max_new_tokens"
:
0
}},
json
=
{
"text"
:
prefix_str
,
"sampling_params"
:
{
"max_new_tokens"
:
0
}},
auth_token
=
self
.
auth_token
,
auth_token
=
self
.
auth_token
,
api_key
=
self
.
api_key
api_key
=
self
.
api_key
,
verify
=
self
.
verify
,
verify
=
self
.
verify
,
)
)
assert
res
.
status_code
==
200
assert
res
.
status_code
==
200
...
@@ -71,7 +77,7 @@ class RuntimeEndpoint(BaseBackend):
...
@@ -71,7 +77,7 @@ class RuntimeEndpoint(BaseBackend):
self
.
base_url
+
"/generate"
,
self
.
base_url
+
"/generate"
,
json
=
{
"text"
:
s
.
text_
,
"sampling_params"
:
{
"max_new_tokens"
:
0
}},
json
=
{
"text"
:
s
.
text_
,
"sampling_params"
:
{
"max_new_tokens"
:
0
}},
auth_token
=
self
.
auth_token
,
auth_token
=
self
.
auth_token
,
api_key
=
self
.
api_key
api_key
=
self
.
api_key
,
verify
=
self
.
verify
,
verify
=
self
.
verify
,
)
)
assert
res
.
status_code
==
200
assert
res
.
status_code
==
200
...
@@ -159,7 +165,7 @@ class RuntimeEndpoint(BaseBackend):
...
@@ -159,7 +165,7 @@ class RuntimeEndpoint(BaseBackend):
json
=
data
,
json
=
data
,
stream
=
True
,
stream
=
True
,
auth_token
=
self
.
auth_token
,
auth_token
=
self
.
auth_token
,
api_key
=
self
.
api_key
api_key
=
self
.
api_key
,
verify
=
self
.
verify
,
verify
=
self
.
verify
,
)
)
pos
=
0
pos
=
0
...
...
python/sglang/srt/server.py
View file @
13662fd5
...
@@ -20,8 +20,6 @@ import requests
...
@@ -20,8 +20,6 @@ import requests
import
uvicorn
import
uvicorn
import
uvloop
import
uvloop
from
fastapi
import
FastAPI
,
HTTPException
,
Request
from
fastapi
import
FastAPI
,
HTTPException
,
Request
from
starlette.middleware.base
import
BaseHTTPMiddleware
from
starlette.responses
import
JSONResponse
from
fastapi.responses
import
Response
,
StreamingResponse
from
fastapi.responses
import
Response
,
StreamingResponse
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
from
sglang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.backend.runtime_endpoint
import
RuntimeEndpoint
...
@@ -56,11 +54,14 @@ from sglang.srt.managers.router.manager import start_router_process
...
@@ -56,11 +54,14 @@ from sglang.srt.managers.router.manager import start_router_process
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.utils
import
handle_port_init
from
sglang.srt.utils
import
handle_port_init
from
starlette.middleware.base
import
BaseHTTPMiddleware
from
starlette.responses
import
JSONResponse
asyncio
.
set_event_loop_policy
(
uvloop
.
EventLoopPolicy
())
asyncio
.
set_event_loop_policy
(
uvloop
.
EventLoopPolicy
())
API_KEY_HEADER_NAME
=
"X-API-Key"
API_KEY_HEADER_NAME
=
"X-API-Key"
class
APIKeyValidatorMiddleware
(
BaseHTTPMiddleware
):
class
APIKeyValidatorMiddleware
(
BaseHTTPMiddleware
):
def
__init__
(
self
,
app
,
api_key
:
str
):
def
__init__
(
self
,
app
,
api_key
:
str
):
super
().
__init__
(
app
)
super
().
__init__
(
app
)
...
@@ -77,6 +78,7 @@ class APIKeyValidatorMiddleware(BaseHTTPMiddleware):
...
@@ -77,6 +78,7 @@ class APIKeyValidatorMiddleware(BaseHTTPMiddleware):
response
=
await
call_next
(
request
)
response
=
await
call_next
(
request
)
return
response
return
response
app
=
FastAPI
()
app
=
FastAPI
()
tokenizer_manager
=
None
tokenizer_manager
=
None
chat_template_name
=
None
chat_template_name
=
None
...
...
python/sglang/utils.py
View file @
13662fd5
...
@@ -88,7 +88,9 @@ class HttpResponse:
...
@@ -88,7 +88,9 @@ class HttpResponse:
return
self
.
resp
.
status
return
self
.
resp
.
status
def
http_request
(
url
,
json
=
None
,
stream
=
False
,
auth_token
=
None
,
api_key
=
None
,
verify
=
None
):
def
http_request
(
url
,
json
=
None
,
stream
=
False
,
auth_token
=
None
,
api_key
=
None
,
verify
=
None
):
"""A faster version of requests.post with low-level urllib API."""
"""A faster version of requests.post with low-level urllib API."""
headers
=
{
"Content-Type"
:
"application/json; charset=utf-8"
}
headers
=
{
"Content-Type"
:
"application/json; charset=utf-8"
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment