Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2e341cd4
Unverified
Commit
2e341cd4
authored
Jul 18, 2024
by
zhyncs
Committed by
GitHub
Jul 17, 2024
Browse files
misc: add pre-commit config (#637)
parent
a8552cb1
Changes
43
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
207 additions
and
145 deletions
+207
-145
.pre-commit-config.yaml
.pre-commit-config.yaml
+9
-0
benchmark/latency_throughput/bench_serving.py
benchmark/latency_throughput/bench_serving.py
+2
-2
benchmark/line_retrieval/gen_data.py
benchmark/line_retrieval/gen_data.py
+3
-3
examples/quick_start/anthropic_example_chat.py
examples/quick_start/anthropic_example_chat.py
+14
-8
examples/quick_start/anthropic_example_complete.py
examples/quick_start/anthropic_example_complete.py
+10
-9
examples/quick_start/azure_openai_example_chat.py
examples/quick_start/azure_openai_example_chat.py
+16
-9
examples/quick_start/gemini_example_chat.py
examples/quick_start/gemini_example_chat.py
+14
-8
examples/quick_start/gemini_example_complete.py
examples/quick_start/gemini_example_complete.py
+10
-9
examples/quick_start/gemini_example_multimodal_chat.py
examples/quick_start/gemini_example_multimodal_chat.py
+2
-1
examples/quick_start/openai_example_chat.py
examples/quick_start/openai_example_chat.py
+14
-8
examples/quick_start/openai_example_complete.py
examples/quick_start/openai_example_complete.py
+10
-9
examples/quick_start/openrouter_example_chat.py
examples/quick_start/openrouter_example_chat.py
+3
-1
examples/quick_start/srt_example_chat.py
examples/quick_start/srt_example_chat.py
+14
-8
examples/quick_start/srt_example_complete.py
examples/quick_start/srt_example_complete.py
+11
-9
examples/quick_start/srt_example_llava.py
examples/quick_start/srt_example_llava.py
+11
-8
examples/quick_start/srt_example_yi_vl.py
examples/quick_start/srt_example_yi_vl.py
+8
-5
examples/quick_start/together_example_chat.py
examples/quick_start/together_example_chat.py
+16
-9
examples/quick_start/together_example_complete.py
examples/quick_start/together_example_complete.py
+12
-10
examples/usage/async_io.py
examples/usage/async_io.py
+7
-2
examples/usage/cot_decoding.py
examples/usage/cot_decoding.py
+21
-27
No files found.
.pre-commit-config.yaml
0 → 100644
View file @
2e341cd4
repos
:
-
repo
:
https://github.com/PyCQA/isort
rev
:
5.13.2
hooks
:
-
id
:
isort
-
repo
:
https://github.com/psf/black
rev
:
stable
hooks
:
-
id
:
black
benchmark/latency_throughput/bench_serving.py
View file @
2e341cd4
...
@@ -312,8 +312,8 @@ def main(args: argparse.Namespace):
...
@@ -312,8 +312,8 @@ def main(args: argparse.Namespace):
np
.
sum
([
output_len
for
_
,
output_len
,
_
in
REQUEST_LATENCY
])
/
benchmark_time
np
.
sum
([
output_len
for
_
,
output_len
,
_
in
REQUEST_LATENCY
])
/
benchmark_time
)
)
#latencies = [round(latency, 2) for _, _, latency in REQUEST_LATENCY]
#
latencies = [round(latency, 2) for _, _, latency in REQUEST_LATENCY]
#print(latencies)
#
print(latencies)
print
(
f
"Total time:
{
benchmark_time
:.
2
f
}
s"
)
print
(
f
"Total time:
{
benchmark_time
:.
2
f
}
s"
)
print
(
f
"Request throughput:
{
args
.
num_prompts
/
benchmark_time
:.
2
f
}
requests/s"
)
print
(
f
"Request throughput:
{
args
.
num_prompts
/
benchmark_time
:.
2
f
}
requests/s"
)
...
...
benchmark/line_retrieval/gen_data.py
View file @
2e341cd4
...
@@ -48,9 +48,9 @@ def generate_lines(random_words, num_lines, redirect_ratio):
...
@@ -48,9 +48,9 @@ def generate_lines(random_words, num_lines, redirect_ratio):
)
)
for
i
in
redirect_indices
:
for
i
in
redirect_indices
:
target_idx
=
np
.
random
.
choice
(
min
(
i
*
2
+
100
,
num_lines
))
target_idx
=
np
.
random
.
choice
(
min
(
i
*
2
+
100
,
num_lines
))
lines
[
lines
[
i
]
=
(
i
f
"Line
{
indices
[
i
]
}
: The REGISTER_CONTENT is the same as Line
{
indices
[
target_idx
]
}
."
]
=
f
"Line
{
indices
[
i
]
}
: The REGISTER_CONTENT is the same as Line
{
indices
[
target_idx
]
}
."
)
redirects
[
i
]
=
target_idx
redirects
[
i
]
=
target_idx
# Build links and find sources
# Build links and find sources
...
...
examples/quick_start/anthropic_example_chat.py
View file @
2e341cd4
...
@@ -3,6 +3,7 @@ Usage:
...
@@ -3,6 +3,7 @@ Usage:
export ANTHROPIC_API_KEY=sk-******
export ANTHROPIC_API_KEY=sk-******
python3 anthropic_example_chat.py
python3 anthropic_example_chat.py
"""
"""
import
sglang
as
sgl
import
sglang
as
sgl
...
@@ -30,7 +31,7 @@ def stream():
...
@@ -30,7 +31,7 @@ def stream():
state
=
multi_turn_question
.
run
(
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
question_2
=
"List two local attractions."
,
stream
=
True
stream
=
True
,
)
)
for
out
in
state
.
text_iter
():
for
out
in
state
.
text_iter
():
...
@@ -39,13 +40,18 @@ def stream():
...
@@ -39,13 +40,18 @@ def stream():
def
batch
():
def
batch
():
states
=
multi_turn_question
.
run_batch
([
states
=
multi_turn_question
.
run_batch
(
{
"question_1"
:
"What is the capital of the United States?"
,
[
"question_2"
:
"List two local attractions."
},
{
"question_1"
:
"What is the capital of the United States?"
,
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"List two local attractions."
,
"question_2"
:
"What is the population of this city?"
},
},
])
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
.
messages
())
print
(
s
.
messages
())
...
...
examples/quick_start/anthropic_example_complete.py
View file @
2e341cd4
...
@@ -9,15 +9,14 @@ import sglang as sgl
...
@@ -9,15 +9,14 @@ import sglang as sgl
@
sgl
.
function
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
def
few_shot_qa
(
s
,
question
):
s
+=
(
s
+=
"""
"""
\n\n
Human: What is the capital of France?
\n\n
Human: What is the capital of France?
\n\n
Assistant: Paris
\n\n
Assistant: Paris
\n\n
Human: What is the capital of Germany?
\n\n
Human: What is the capital of Germany?
\n\n
Assistant: Berlin
\n\n
Assistant: Berlin
\n\n
Human: What is the capital of Italy?
\n\n
Human: What is the capital of Italy?
\n\n
Assistant: Rome
\n\n
Assistant: Rome
"""
)
"""
s
+=
"
\n\n
Human: "
+
question
+
"
\n
"
s
+=
"
\n\n
Human: "
+
question
+
"
\n
"
s
+=
"
\n\n
Assistant:"
+
sgl
.
gen
(
"answer"
,
temperature
=
0
)
s
+=
"
\n\n
Assistant:"
+
sgl
.
gen
(
"answer"
,
temperature
=
0
)
...
@@ -33,8 +32,8 @@ def single():
...
@@ -33,8 +32,8 @@ def single():
def
stream
():
def
stream
():
state
=
few_shot_qa
.
run
(
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
question
=
"What is the capital of the United States?"
,
stream
=
True
stream
=
True
)
)
for
out
in
state
.
text_iter
(
"answer"
):
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
(
out
,
end
=
""
,
flush
=
True
)
...
@@ -42,10 +41,12 @@ def stream():
...
@@ -42,10 +41,12 @@ def stream():
def
batch
():
def
batch
():
states
=
few_shot_qa
.
run_batch
([
states
=
few_shot_qa
.
run_batch
(
{
"question"
:
"What is the capital of the United States?"
},
[
{
"question"
:
"What is the capital of China?"
},
{
"question"
:
"What is the capital of the United States?"
},
])
{
"question"
:
"What is the capital of China?"
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
[
"answer"
])
print
(
s
[
"answer"
])
...
...
examples/quick_start/azure_openai_example_chat.py
View file @
2e341cd4
...
@@ -3,9 +3,11 @@ Usage:
...
@@ -3,9 +3,11 @@ Usage:
export AZURE_OPENAI_API_KEY=sk-******
export AZURE_OPENAI_API_KEY=sk-******
python3 openai_example_chat.py
python3 openai_example_chat.py
"""
"""
import
sglang
as
sgl
import
os
import
os
import
sglang
as
sgl
@
sgl
.
function
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
def
multi_turn_question
(
s
,
question_1
,
question_2
):
...
@@ -32,7 +34,7 @@ def stream():
...
@@ -32,7 +34,7 @@ def stream():
state
=
multi_turn_question
.
run
(
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
question_2
=
"List two local attractions."
,
stream
=
True
stream
=
True
,
)
)
for
out
in
state
.
text_iter
():
for
out
in
state
.
text_iter
():
...
@@ -41,13 +43,18 @@ def stream():
...
@@ -41,13 +43,18 @@ def stream():
def
batch
():
def
batch
():
states
=
multi_turn_question
.
run_batch
([
states
=
multi_turn_question
.
run_batch
(
{
"question_1"
:
"What is the capital of the United States?"
,
[
"question_2"
:
"List two local attractions."
},
{
"question_1"
:
"What is the capital of the United States?"
,
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"List two local attractions."
,
"question_2"
:
"What is the population of this city?"
},
},
])
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
.
messages
())
print
(
s
.
messages
())
...
...
examples/quick_start/gemini_example_chat.py
View file @
2e341cd4
...
@@ -3,6 +3,7 @@ Usage:
...
@@ -3,6 +3,7 @@ Usage:
export GCP_PROJECT_ID=******
export GCP_PROJECT_ID=******
python3 gemini_example_chat.py
python3 gemini_example_chat.py
"""
"""
import
sglang
as
sgl
import
sglang
as
sgl
...
@@ -30,7 +31,7 @@ def stream():
...
@@ -30,7 +31,7 @@ def stream():
state
=
multi_turn_question
.
run
(
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
question_2
=
"List two local attractions."
,
stream
=
True
stream
=
True
,
)
)
for
out
in
state
.
text_iter
():
for
out
in
state
.
text_iter
():
...
@@ -39,13 +40,18 @@ def stream():
...
@@ -39,13 +40,18 @@ def stream():
def
batch
():
def
batch
():
states
=
multi_turn_question
.
run_batch
([
states
=
multi_turn_question
.
run_batch
(
{
"question_1"
:
"What is the capital of the United States?"
,
[
"question_2"
:
"List two local attractions."
},
{
"question_1"
:
"What is the capital of the United States?"
,
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"List two local attractions."
,
"question_2"
:
"What is the population of this city?"
},
},
])
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
.
messages
())
print
(
s
.
messages
())
...
...
examples/quick_start/gemini_example_complete.py
View file @
2e341cd4
...
@@ -9,15 +9,14 @@ import sglang as sgl
...
@@ -9,15 +9,14 @@ import sglang as sgl
@
sgl
.
function
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
def
few_shot_qa
(
s
,
question
):
s
+=
(
s
+=
"""The following are questions with answers.
"""The following are questions with answers.
Q: What is the capital of France?
Q: What is the capital of France?
A: Paris
A: Paris
Q: What is the capital of Germany?
Q: What is the capital of Germany?
A: Berlin
A: Berlin
Q: What is the capital of Italy?
Q: What is the capital of Italy?
A: Rome
A: Rome
"""
)
"""
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
...
@@ -33,8 +32,8 @@ def single():
...
@@ -33,8 +32,8 @@ def single():
def
stream
():
def
stream
():
state
=
few_shot_qa
.
run
(
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
question
=
"What is the capital of the United States?"
,
stream
=
True
stream
=
True
)
)
for
out
in
state
.
text_iter
(
"answer"
):
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
(
out
,
end
=
""
,
flush
=
True
)
...
@@ -42,10 +41,12 @@ def stream():
...
@@ -42,10 +41,12 @@ def stream():
def
batch
():
def
batch
():
states
=
few_shot_qa
.
run_batch
([
states
=
few_shot_qa
.
run_batch
(
{
"question"
:
"What is the capital of the United States?"
},
[
{
"question"
:
"What is the capital of China?"
},
{
"question"
:
"What is the capital of the United States?"
},
])
{
"question"
:
"What is the capital of China?"
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
[
"answer"
])
print
(
s
[
"answer"
])
...
...
examples/quick_start/gemini_example_multimodal_chat.py
View file @
2e341cd4
...
@@ -3,6 +3,7 @@ Usage:
...
@@ -3,6 +3,7 @@ Usage:
export GCP_PROJECT_ID=******
export GCP_PROJECT_ID=******
python3 gemini_example_multimodal_chat.py
python3 gemini_example_multimodal_chat.py
"""
"""
import
sglang
as
sgl
import
sglang
as
sgl
...
@@ -19,7 +20,7 @@ if __name__ == "__main__":
...
@@ -19,7 +20,7 @@ if __name__ == "__main__":
image_file1
=
"./images/cat.jpeg"
,
image_file1
=
"./images/cat.jpeg"
,
image_file2
=
"./images/dog.jpeg"
,
image_file2
=
"./images/dog.jpeg"
,
question
=
"Describe difference of the two images in one sentence."
,
question
=
"Describe difference of the two images in one sentence."
,
stream
=
True
stream
=
True
,
)
)
for
out
in
state
.
text_iter
(
"answer"
):
for
out
in
state
.
text_iter
(
"answer"
):
...
...
examples/quick_start/openai_example_chat.py
View file @
2e341cd4
...
@@ -3,6 +3,7 @@ Usage:
...
@@ -3,6 +3,7 @@ Usage:
export OPENAI_API_KEY=sk-******
export OPENAI_API_KEY=sk-******
python3 openai_example_chat.py
python3 openai_example_chat.py
"""
"""
import
sglang
as
sgl
import
sglang
as
sgl
...
@@ -31,7 +32,7 @@ def stream():
...
@@ -31,7 +32,7 @@ def stream():
state
=
multi_turn_question
.
run
(
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
question_2
=
"List two local attractions."
,
stream
=
True
stream
=
True
,
)
)
for
out
in
state
.
text_iter
():
for
out
in
state
.
text_iter
():
...
@@ -40,13 +41,18 @@ def stream():
...
@@ -40,13 +41,18 @@ def stream():
def
batch
():
def
batch
():
states
=
multi_turn_question
.
run_batch
([
states
=
multi_turn_question
.
run_batch
(
{
"question_1"
:
"What is the capital of the United States?"
,
[
"question_2"
:
"List two local attractions."
},
{
"question_1"
:
"What is the capital of the United States?"
,
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"List two local attractions."
,
"question_2"
:
"What is the population of this city?"
},
},
])
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
.
messages
())
print
(
s
.
messages
())
...
...
examples/quick_start/openai_example_complete.py
View file @
2e341cd4
...
@@ -9,15 +9,14 @@ import sglang as sgl
...
@@ -9,15 +9,14 @@ import sglang as sgl
@
sgl
.
function
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
def
few_shot_qa
(
s
,
question
):
s
+=
(
s
+=
"""The following are questions with answers.
"""The following are questions with answers.
Q: What is the capital of France?
Q: What is the capital of France?
A: Paris
A: Paris
Q: What is the capital of Germany?
Q: What is the capital of Germany?
A: Berlin
A: Berlin
Q: What is the capital of Italy?
Q: What is the capital of Italy?
A: Rome
A: Rome
"""
)
"""
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
...
@@ -33,8 +32,8 @@ def single():
...
@@ -33,8 +32,8 @@ def single():
def
stream
():
def
stream
():
state
=
few_shot_qa
.
run
(
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
question
=
"What is the capital of the United States?"
,
stream
=
True
stream
=
True
)
)
for
out
in
state
.
text_iter
(
"answer"
):
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
(
out
,
end
=
""
,
flush
=
True
)
...
@@ -42,10 +41,12 @@ def stream():
...
@@ -42,10 +41,12 @@ def stream():
def
batch
():
def
batch
():
states
=
few_shot_qa
.
run_batch
([
states
=
few_shot_qa
.
run_batch
(
{
"question"
:
"What is the capital of the United States?"
},
[
{
"question"
:
"What is the capital of China?"
},
{
"question"
:
"What is the capital of the United States?"
},
])
{
"question"
:
"What is the capital of China?"
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
[
"answer"
])
print
(
s
[
"answer"
])
...
...
examples/quick_start/openrouter_example_chat.py
View file @
2e341cd4
...
@@ -3,9 +3,11 @@ Usage:
...
@@ -3,9 +3,11 @@ Usage:
export OPENROUTER_API_KEY=sk-******
export OPENROUTER_API_KEY=sk-******
python3 together_example_chat.py
python3 together_example_chat.py
"""
"""
import
sglang
as
sgl
import
os
import
os
import
sglang
as
sgl
@
sgl
.
function
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
def
multi_turn_question
(
s
,
question_1
,
question_2
):
...
...
examples/quick_start/srt_example_chat.py
View file @
2e341cd4
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
Usage:
Usage:
python3 srt_example_chat.py
python3 srt_example_chat.py
"""
"""
import
sglang
as
sgl
import
sglang
as
sgl
...
@@ -29,7 +30,7 @@ def stream():
...
@@ -29,7 +30,7 @@ def stream():
state
=
multi_turn_question
.
run
(
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
question_2
=
"List two local attractions."
,
stream
=
True
stream
=
True
,
)
)
for
out
in
state
.
text_iter
():
for
out
in
state
.
text_iter
():
...
@@ -38,13 +39,18 @@ def stream():
...
@@ -38,13 +39,18 @@ def stream():
def
batch
():
def
batch
():
states
=
multi_turn_question
.
run_batch
([
states
=
multi_turn_question
.
run_batch
(
{
"question_1"
:
"What is the capital of the United States?"
,
[
"question_2"
:
"List two local attractions."
},
{
"question_1"
:
"What is the capital of the United States?"
,
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"List two local attractions."
,
"question_2"
:
"What is the population of this city?"
},
},
])
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
.
messages
())
print
(
s
.
messages
())
...
...
examples/quick_start/srt_example_complete.py
View file @
2e341cd4
...
@@ -2,20 +2,20 @@
...
@@ -2,20 +2,20 @@
Usage:
Usage:
python3 srt_example_complete.py
python3 srt_example_complete.py
"""
"""
import
sglang
as
sgl
import
sglang
as
sgl
@
sgl
.
function
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
def
few_shot_qa
(
s
,
question
):
s
+=
(
s
+=
"""The following are questions with answers.
"""The following are questions with answers.
Q: What is the capital of France?
Q: What is the capital of France?
A: Paris
A: Paris
Q: What is the capital of Germany?
Q: What is the capital of Germany?
A: Berlin
A: Berlin
Q: What is the capital of Italy?
Q: What is the capital of Italy?
A: Rome
A: Rome
"""
)
"""
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
...
@@ -31,8 +31,8 @@ def single():
...
@@ -31,8 +31,8 @@ def single():
def
stream
():
def
stream
():
state
=
few_shot_qa
.
run
(
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
question
=
"What is the capital of the United States?"
,
stream
=
True
stream
=
True
)
)
for
out
in
state
.
text_iter
(
"answer"
):
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
(
out
,
end
=
""
,
flush
=
True
)
...
@@ -40,10 +40,12 @@ def stream():
...
@@ -40,10 +40,12 @@ def stream():
def
batch
():
def
batch
():
states
=
few_shot_qa
.
run_batch
([
states
=
few_shot_qa
.
run_batch
(
{
"question"
:
"What is the capital of the United States?"
},
[
{
"question"
:
"What is the capital of China?"
},
{
"question"
:
"What is the capital of the United States?"
},
])
{
"question"
:
"What is the capital of China?"
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
[
"answer"
])
print
(
s
[
"answer"
])
...
...
examples/quick_start/srt_example_llava.py
View file @
2e341cd4
"""
"""
Usage: python3 srt_example_llava.py
Usage: python3 srt_example_llava.py
"""
"""
import
sglang
as
sgl
import
sglang
as
sgl
...
@@ -12,9 +13,8 @@ def image_qa(s, image_path, question):
...
@@ -12,9 +13,8 @@ def image_qa(s, image_path, question):
def
single
():
def
single
():
state
=
image_qa
.
run
(
state
=
image_qa
.
run
(
image_path
=
"images/cat.jpeg"
,
image_path
=
"images/cat.jpeg"
,
question
=
"What is this?"
,
max_new_tokens
=
128
question
=
"What is this?"
,
)
max_new_tokens
=
128
)
print
(
state
[
"answer"
],
"
\n
"
)
print
(
state
[
"answer"
],
"
\n
"
)
...
@@ -23,7 +23,8 @@ def stream():
...
@@ -23,7 +23,8 @@ def stream():
image_path
=
"images/cat.jpeg"
,
image_path
=
"images/cat.jpeg"
,
question
=
"What is this?"
,
question
=
"What is this?"
,
max_new_tokens
=
64
,
max_new_tokens
=
64
,
stream
=
True
)
stream
=
True
,
)
for
out
in
state
.
text_iter
(
"answer"
):
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
(
out
,
end
=
""
,
flush
=
True
)
...
@@ -33,8 +34,8 @@ def stream():
...
@@ -33,8 +34,8 @@ def stream():
def
batch
():
def
batch
():
states
=
image_qa
.
run_batch
(
states
=
image_qa
.
run_batch
(
[
[
{
"image_path"
:
"images/cat.jpeg"
,
"question"
:
"What is this?"
},
{
"image_path"
:
"images/cat.jpeg"
,
"question"
:
"What is this?"
},
{
"image_path"
:
"images/dog.jpeg"
,
"question"
:
"What is this?"
},
{
"image_path"
:
"images/dog.jpeg"
,
"question"
:
"What is this?"
},
],
],
max_new_tokens
=
128
,
max_new_tokens
=
128
,
)
)
...
@@ -43,8 +44,10 @@ def batch():
...
@@ -43,8 +44,10 @@ def batch():
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
runtime
=
sgl
.
Runtime
(
model_path
=
"liuhaotian/llava-v1.6-vicuna-7b"
,
runtime
=
sgl
.
Runtime
(
tokenizer_path
=
"llava-hf/llava-1.5-7b-hf"
)
model_path
=
"liuhaotian/llava-v1.6-vicuna-7b"
,
tokenizer_path
=
"llava-hf/llava-1.5-7b-hf"
,
)
sgl
.
set_default_backend
(
runtime
)
sgl
.
set_default_backend
(
runtime
)
print
(
f
"chat template:
{
runtime
.
endpoint
.
chat_template
.
name
}
"
)
print
(
f
"chat template:
{
runtime
.
endpoint
.
chat_template
.
name
}
"
)
...
...
examples/quick_start/srt_example_yi_vl.py
View file @
2e341cd4
...
@@ -3,6 +3,7 @@ Usage: python3 srt_example_yi_vl.py
...
@@ -3,6 +3,7 @@ Usage: python3 srt_example_yi_vl.py
Requirements: transformers==4.38
Requirements: transformers==4.38
"""
"""
import
sglang
as
sgl
import
sglang
as
sgl
...
@@ -17,7 +18,8 @@ def single():
...
@@ -17,7 +18,8 @@ def single():
image_path
=
"images/cat.jpeg"
,
image_path
=
"images/cat.jpeg"
,
question
=
"What is this?"
,
question
=
"What is this?"
,
max_new_tokens
=
64
,
max_new_tokens
=
64
,
stop
=
"###"
)
stop
=
"###"
,
)
print
(
state
[
"answer"
],
"
\n
"
)
print
(
state
[
"answer"
],
"
\n
"
)
...
@@ -27,7 +29,8 @@ def stream():
...
@@ -27,7 +29,8 @@ def stream():
question
=
"What is this?"
,
question
=
"What is this?"
,
max_new_tokens
=
64
,
max_new_tokens
=
64
,
stream
=
True
,
stream
=
True
,
stop
=
"###"
)
stop
=
"###"
,
)
for
out
in
state
.
text_iter
(
"answer"
):
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
(
out
,
end
=
""
,
flush
=
True
)
...
@@ -37,11 +40,11 @@ def stream():
...
@@ -37,11 +40,11 @@ def stream():
def
batch
():
def
batch
():
states
=
image_qa
.
run_batch
(
states
=
image_qa
.
run_batch
(
[
[
{
"image_path"
:
"images/cat.jpeg"
,
"question"
:
"What is this?"
},
{
"image_path"
:
"images/cat.jpeg"
,
"question"
:
"What is this?"
},
{
"image_path"
:
"images/dog.jpeg"
,
"question"
:
"What is this?"
},
{
"image_path"
:
"images/dog.jpeg"
,
"question"
:
"What is this?"
},
],
],
max_new_tokens
=
64
,
max_new_tokens
=
64
,
stop
=
"###"
stop
=
"###"
,
)
)
for
s
in
states
:
for
s
in
states
:
print
(
s
[
"answer"
],
"
\n
"
)
print
(
s
[
"answer"
],
"
\n
"
)
...
...
examples/quick_start/together_example_chat.py
View file @
2e341cd4
...
@@ -3,9 +3,11 @@ Usage:
...
@@ -3,9 +3,11 @@ Usage:
export TOGETHER_API_KEY=sk-******
export TOGETHER_API_KEY=sk-******
python3 together_example_chat.py
python3 together_example_chat.py
"""
"""
import
sglang
as
sgl
import
os
import
os
import
sglang
as
sgl
@
sgl
.
function
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
def
multi_turn_question
(
s
,
question_1
,
question_2
):
...
@@ -32,7 +34,7 @@ def stream():
...
@@ -32,7 +34,7 @@ def stream():
state
=
multi_turn_question
.
run
(
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
question_2
=
"List two local attractions."
,
stream
=
True
stream
=
True
,
)
)
for
out
in
state
.
text_iter
():
for
out
in
state
.
text_iter
():
...
@@ -41,13 +43,18 @@ def stream():
...
@@ -41,13 +43,18 @@ def stream():
def
batch
():
def
batch
():
states
=
multi_turn_question
.
run_batch
([
states
=
multi_turn_question
.
run_batch
(
{
"question_1"
:
"What is the capital of the United States?"
,
[
"question_2"
:
"List two local attractions."
},
{
"question_1"
:
"What is the capital of the United States?"
,
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"List two local attractions."
,
"question_2"
:
"What is the population of this city?"
},
},
])
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
.
messages
())
print
(
s
.
messages
())
...
...
examples/quick_start/together_example_complete.py
View file @
2e341cd4
...
@@ -4,21 +4,21 @@ export TOGETHER_API_KEY=sk-******
...
@@ -4,21 +4,21 @@ export TOGETHER_API_KEY=sk-******
python3 together_example_complete.py
python3 together_example_complete.py
"""
"""
import
sglang
as
sgl
import
os
import
os
import
sglang
as
sgl
@
sgl
.
function
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
def
few_shot_qa
(
s
,
question
):
s
+=
(
s
+=
"""The following are questions with answers.
"""The following are questions with answers.
Q: What is the capital of France?
Q: What is the capital of France?
A: Paris
A: Paris
Q: What is the capital of Germany?
Q: What is the capital of Germany?
A: Berlin
A: Berlin
Q: What is the capital of Italy?
Q: What is the capital of Italy?
A: Rome
A: Rome
"""
)
"""
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
...
@@ -34,8 +34,8 @@ def single():
...
@@ -34,8 +34,8 @@ def single():
def
stream
():
def
stream
():
state
=
few_shot_qa
.
run
(
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
question
=
"What is the capital of the United States?"
,
stream
=
True
stream
=
True
)
)
for
out
in
state
.
text_iter
(
"answer"
):
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
(
out
,
end
=
""
,
flush
=
True
)
...
@@ -43,10 +43,12 @@ def stream():
...
@@ -43,10 +43,12 @@ def stream():
def
batch
():
def
batch
():
states
=
few_shot_qa
.
run_batch
([
states
=
few_shot_qa
.
run_batch
(
{
"question"
:
"What is the capital of the United States?"
},
[
{
"question"
:
"What is the capital of China?"
},
{
"question"
:
"What is the capital of the United States?"
},
])
{
"question"
:
"What is the capital of China?"
},
]
)
for
s
in
states
:
for
s
in
states
:
print
(
s
[
"answer"
])
print
(
s
[
"answer"
])
...
...
examples/usage/async_io.py
View file @
2e341cd4
...
@@ -2,7 +2,9 @@
...
@@ -2,7 +2,9 @@
Usage:
Usage:
python3 async_io.py
python3 async_io.py
"""
"""
import
asyncio
import
asyncio
from
sglang
import
Runtime
from
sglang
import
Runtime
...
@@ -14,7 +16,10 @@ async def generate(
...
@@ -14,7 +16,10 @@ async def generate(
tokenizer
=
engine
.
get_tokenizer
()
tokenizer
=
engine
.
get_tokenizer
()
messages
=
[
messages
=
[
{
"role"
:
"system"
,
"content"
:
"You will be given question answer tasks."
,},
{
"role"
:
"system"
,
"content"
:
"You will be given question answer tasks."
,
},
{
"role"
:
"user"
,
"content"
:
prompt
},
{
"role"
:
"user"
,
"content"
:
prompt
},
]
]
...
@@ -36,5 +41,5 @@ if __name__ == "__main__":
...
@@ -36,5 +41,5 @@ if __name__ == "__main__":
prompt
=
"Who is Alan Turing?"
prompt
=
"Who is Alan Turing?"
sampling_params
=
{
"max_new_tokens"
:
128
}
sampling_params
=
{
"max_new_tokens"
:
128
}
asyncio
.
run
(
generate
(
runtime
,
prompt
,
sampling_params
))
asyncio
.
run
(
generate
(
runtime
,
prompt
,
sampling_params
))
runtime
.
shutdown
()
runtime
.
shutdown
()
examples/usage/cot_decoding.py
View file @
2e341cd4
...
@@ -33,8 +33,7 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose):
...
@@ -33,8 +33,7 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose):
)
)
logprobs
=
step_0
.
get_meta_info
(
"get_top_k"
)[
"decode_top_logprobs"
][
0
]
logprobs
=
step_0
.
get_meta_info
(
"get_top_k"
)[
"decode_top_logprobs"
][
0
]
print
(
"Decoding step 0:"
,
print
(
"Decoding step 0:"
,
", "
.
join
(
pformat
(
token
[
2
])
for
token
in
logprobs
))
", "
.
join
(
pformat
(
token
[
2
])
for
token
in
logprobs
))
for
idx
,
(
f
,
token
)
in
enumerate
(
zip
(
forks
,
logprobs
)):
for
idx
,
(
f
,
token
)
in
enumerate
(
zip
(
forks
,
logprobs
)):
logprob
,
token_id
,
text
=
token
logprob
,
token_id
,
text
=
token
f
+=
text
f
+=
text
...
@@ -56,17 +55,9 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose):
...
@@ -56,17 +55,9 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose):
)
)
# calculate probability disparity between the top and secondary tokens
# calculate probability disparity between the top and secondary tokens
x1s
=
[
x1s
=
[
exp
(
xt
[
0
][
0
])
for
xt
in
f
.
get_meta_info
(
"answer"
)[
"decode_top_logprobs"
]]
exp
(
xt
[
0
][
0
])
x2s
=
[
exp
(
xt
[
1
][
0
])
for
xt
in
f
.
get_meta_info
(
"answer"
)[
"decode_top_logprobs"
]]
for
xt
in
f
.
get_meta_info
(
"answer"
)[
"decode_top_logprobs"
]
tokens
=
[
xt
[
0
][
2
]
for
xt
in
f
.
get_meta_info
(
"answer"
)[
"decode_top_logprobs"
]]
]
x2s
=
[
exp
(
xt
[
1
][
0
])
for
xt
in
f
.
get_meta_info
(
"answer"
)[
"decode_top_logprobs"
]
]
tokens
=
[
xt
[
0
][
2
]
for
xt
in
f
.
get_meta_info
(
"answer"
)[
"decode_top_logprobs"
]
]
delta
=
(
sum
(
x1s
)
-
sum
(
x2s
))
/
len
(
x1s
)
delta
=
(
sum
(
x1s
)
-
sum
(
x2s
))
/
len
(
x1s
)
# extract the answer span (without the '<|end_of_text|>' token)
# extract the answer span (without the '<|end_of_text|>' token)
...
@@ -79,42 +70,45 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose):
...
@@ -79,42 +70,45 @@ def cot_decoding(s, question, get_top_k, is_chat_model, verbose):
top_logprobs_num
=
2
,
top_logprobs_num
=
2
,
return_text_in_logprobs
=
True
,
return_text_in_logprobs
=
True
,
)
)
answer
=
answer_forks
[
idx
][
'
answer_span
'
].
replace
(
'
\n
'
,
' '
).
strip
(
':'
)
answer
=
answer_forks
[
idx
][
"
answer_span
"
].
replace
(
"
\n
"
,
" "
).
strip
(
":"
)
print
(
print
(
f
"
{
YELLOW
}
Path #
{
idx
}
{
pformat
(
text
)
}
[
{
exp
(
logprob
):.
3
f
}
] (score=
{
delta
}
, answer=
{
answer
}
)
{
CLEAR
}
"
f
"
{
YELLOW
}
Path #
{
idx
}
{
pformat
(
text
)
}
[
{
exp
(
logprob
):.
3
f
}
] (score=
{
delta
}
, answer=
{
answer
}
)
{
CLEAR
}
"
)
)
generated_text
=
str
(
answer_forks
[
idx
])[
len
(
"ProgramState("
)
:
-
1
]
generated_text
=
str
(
answer_forks
[
idx
])[
len
(
"ProgramState("
)
:
-
1
]
print
(
f
"
{
BLUE
}{
pformat
(
generated_text
)
}{
CLEAR
}
"
)
print
(
f
"
{
BLUE
}{
pformat
(
generated_text
)
}{
CLEAR
}
"
)
if
verbose
:
if
verbose
:
answer_tokens
=
[
answer_tokens
=
[
xt
[
0
][
2
]
for
xt
in
answer_forks
[
idx
].
get_meta_info
(
xt
[
0
][
2
]
"answer_span"
)[
"decode_top_logprobs"
]
for
xt
in
answer_forks
[
idx
].
get_meta_info
(
"answer_span"
)[
"decode_top_logprobs"
]
]
]
answer_x1s
=
[
answer_x1s
=
[
exp
(
xt
[
0
][
0
])
for
xt
in
answer_forks
[
idx
].
get_meta_info
(
exp
(
xt
[
0
][
0
])
"answer_span"
)[
"decode_top_logprobs"
]
for
xt
in
answer_forks
[
idx
].
get_meta_info
(
"answer_span"
)[
"decode_top_logprobs"
]
]
]
answer_x2s
=
[
answer_x2s
=
[
exp
(
xt
[
1
][
0
])
for
xt
in
answer_forks
[
idx
].
get_meta_info
(
exp
(
xt
[
1
][
0
])
"answer_span"
)[
"decode_top_logprobs"
]
for
xt
in
answer_forks
[
idx
].
get_meta_info
(
"answer_span"
)[
"decode_top_logprobs"
]
]
]
for
token
,
x1
,
x2
in
zip
(
tokens
,
x1s
,
x2s
):
for
token
,
x1
,
x2
in
zip
(
tokens
,
x1s
,
x2s
):
print
(
f
"
{
GREEN
}{
pformat
(
token
)
}{
CLEAR
}
(
{
x1
:.
3
f
}
-
{
x2
:.
3
f
}
)"
,
print
(
f
"
{
GREEN
}{
pformat
(
token
)
}{
CLEAR
}
(
{
x1
:.
3
f
}
-
{
x2
:.
3
f
}
)"
,
end
=
""
)
end
=
""
)
print
(
"
\n
==========="
)
print
(
"
\n
==========="
)
for
token
,
x1
,
x2
in
zip
(
answer_tokens
,
answer_x1s
,
answer_x2s
):
for
token
,
x1
,
x2
in
zip
(
answer_tokens
,
answer_x1s
,
answer_x2s
):
print
(
f
"
{
GREEN
}{
pformat
(
token
)
}{
CLEAR
}
(
{
x1
:.
3
f
}
-
{
x2
:.
3
f
}
)"
,
print
(
f
"
{
GREEN
}{
pformat
(
token
)
}{
CLEAR
}
(
{
x1
:.
3
f
}
-
{
x2
:.
3
f
}
)"
,
end
=
""
)
end
=
""
)
print
()
print
()
sgl
.
set_default_backend
(
sgl
.
RuntimeEndpoint
(
"http://localhost:30000"
))
sgl
.
set_default_backend
(
sgl
.
RuntimeEndpoint
(
"http://localhost:30000"
))
state
=
cot_decoding
.
run
(
state
=
cot_decoding
.
run
(
question
=
question
=
r
"Claire makes a 3 egg omelet every morning for breakfast. How many dozens of eggs will she eat in 4 weeks?"
,
r
"Claire makes a 3 egg omelet every morning for breakfast. How many dozens of eggs will she eat in 4 weeks?"
,
get_top_k
=
10
,
get_top_k
=
10
,
is_chat_model
=
True
,
is_chat_model
=
True
,
verbose
=
False
,
verbose
=
False
,
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment