Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c4707f1b
Unverified
Commit
c4707f1b
authored
Jan 16, 2024
by
Lianmin Zheng
Committed by
GitHub
Jan 16, 2024
Browse files
Improve docs (#17)
parent
ffe4aaee
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
24 additions
and
12 deletions
+24
-12
README.md
README.md
+18
-3
examples/usage/readme_examples.py
examples/usage/readme_examples.py
+4
-1
python/sglang/lang/interpreter.py
python/sglang/lang/interpreter.py
+1
-5
python/sglang/srt/layers/context_flashattention_nopad.py
python/sglang/srt/layers/context_flashattention_nopad.py
+0
-1
python/sglang/srt/layers/extend_attention.py
python/sglang/srt/layers/extend_attention.py
+0
-1
python/sglang/srt/managers/router/model_rpc.py
python/sglang/srt/managers/router/model_rpc.py
+1
-1
No files found.
README.md
View file @
c4707f1b
...
...
@@ -115,13 +115,14 @@ You can then invoke the function with `run` or `run_batch`.
The system will manage the state, chat template, and parallelism for you.
### Control Flow
You can use any Python code within the function body, including control flow, nested function calls, and external libraries.
```
python
@
sgl
.
function
def
control_flow
(
s
,
question
):
s
+=
"To answer this question: "
+
question
+
", "
s
+=
"I need to use a "
+
sgl
.
gen
(
"tool"
,
choices
=
[
"calculator"
,
"web browser"
])
+
". "
# You can use if or nested function calls
if
s
[
"tool"
]
==
"calculator"
:
s
+=
"The math expression is"
+
sgl
.
gen
(
"expression"
)
elif
s
[
"tool"
]
==
"web browser"
:
...
...
@@ -129,6 +130,9 @@ def control_flow(s, question):
```
### Parallelism
Use
`fork`
to launch parallel prompts.
Because
`sgl.gen`
is non-blocking, the for loop below issues two generation calls in parallel.
```
python
@
sgl
.
function
def
tip_suggestion
(
s
):
...
...
@@ -137,7 +141,7 @@ def tip_suggestion(s):
"1. Balanced Diet. 2. Regular Exercise.
\n\n
"
)
forks
=
s
.
fork
(
2
)
# Launch parallel prompts
forks
=
s
.
fork
(
2
)
for
i
,
f
in
enumerate
(
forks
):
f
+=
f
"Now, expand tip
{
i
+
1
}
into a paragraph:
\n
"
f
+=
sgl
.
gen
(
f
"detailed_tip"
,
max_tokens
=
256
,
stop
=
"
\n\n
"
)
...
...
@@ -148,6 +152,8 @@ def tip_suggestion(s):
```
### Multi Modality
Use
`sgl.image`
to pass an image as input.
```
python
@
sgl
.
function
def
image_qa
(
s
,
image_file
,
question
):
...
...
@@ -156,6 +162,8 @@ def image_qa(s, image_file, question):
```
### Constrained Decoding
Use
`regex=`
to specify a regular expression as a decoding constraint.
```
python
@
sgl
.
function
def
regular_expression_gen
(
s
):
...
...
@@ -168,6 +176,8 @@ def regular_expression_gen(s):
```
### Batching
Use
`run_batch`
to run a batch of requests with continuous batching.
```
python
@
sgl
.
function
def
text_qa
(
s
,
question
):
...
...
@@ -180,10 +190,13 @@ states = text_qa.run_batch(
{
"question"
:
"What is the capital of France?"
},
{
"question"
:
"What is the capital of Japan?"
},
],
progress_bar
=
True
)
```
### Streaming
Add
`stream=True`
to enable streaming.
```
python
@
sgl
.
function
def
text_qa
(
s
,
question
):
...
...
@@ -192,7 +205,9 @@ def text_qa(s, question):
states
=
text_qa
.
run
(
question
=
"What is the capital of France?"
,
temperature
=
0.1
)
temperature
=
0.1
,
stream
=
True
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
...
...
examples/usage/readme_examples.py
View file @
c4707f1b
...
...
@@ -53,6 +53,7 @@ def driver_batching():
{
"question"
:
"What is the capital of France?"
},
{
"question"
:
"What is the capital of Japan?"
},
],
progress_bar
=
True
)
for
s
in
states
:
...
...
@@ -63,7 +64,9 @@ def driver_batching():
def
driver_stream
():
state
=
text_qa
.
run
(
question
=
"What is the capital of France?"
,
temperature
=
0.1
)
temperature
=
0.1
,
stream
=
True
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
...
...
python/sglang/lang/interpreter.py
View file @
c4707f1b
...
...
@@ -632,11 +632,7 @@ class ProgramState:
self
.
stream_executor
.
end
()
def
__repr__
(
self
)
->
str
:
msgs
=
self
.
messages
()
ret
=
""
for
msg
in
msgs
:
ret
+=
msg
[
"role"
]
+
":
\n
"
+
msg
[
"content"
]
+
"
\n
"
return
ret
return
f
"ProgramState(
{
self
.
text
()
}
)"
class
ProgramStateGroup
:
...
...
python/sglang/srt/layers/context_flashattention_nopad.py
View file @
c4707f1b
...
...
@@ -5,7 +5,6 @@ import triton
import
triton.language
as
tl
from
sglang.srt.utils
import
wrap_kernel_launcher
CUDA_CAPABILITY
=
torch
.
cuda
.
get_device_capability
()
...
...
python/sglang/srt/layers/extend_attention.py
View file @
c4707f1b
...
...
@@ -4,7 +4,6 @@ import triton.language as tl
from
sglang.srt.layers.context_flashattention_nopad
import
context_attention_fwd
from
sglang.srt.utils
import
wrap_kernel_launcher
CUDA_CAPABILITY
=
torch
.
cuda
.
get_device_capability
()
...
...
python/sglang/srt/managers/router/model_rpc.py
View file @
c4707f1b
...
...
@@ -2,10 +2,10 @@ import asyncio
import
logging
import
multiprocessing
import
time
import
warnings
from
concurrent.futures
import
ThreadPoolExecutor
from
enum
import
Enum
,
auto
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Union
import
warnings
import
numpy
as
np
import
rpyc
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment