Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
909abb58
Commit
909abb58
authored
Sep 04, 2025
by
maxiao
Browse files
adapt to sglang v0.5.2rc1 on dcu
parents
Changes
347
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1223 additions
and
0 deletions
+1223
-0
examples/frontend_language/quick_start/anthropic_example_chat.py
...s/frontend_language/quick_start/anthropic_example_chat.py
+73
-0
examples/frontend_language/quick_start/anthropic_example_complete.py
...ontend_language/quick_start/anthropic_example_complete.py
+68
-0
examples/frontend_language/quick_start/azure_openai_example_chat.py
...rontend_language/quick_start/azure_openai_example_chat.py
+83
-0
examples/frontend_language/quick_start/gemini_example_chat.py
...ples/frontend_language/quick_start/gemini_example_chat.py
+73
-0
examples/frontend_language/quick_start/gemini_example_complete.py
.../frontend_language/quick_start/gemini_example_complete.py
+68
-0
examples/frontend_language/quick_start/gemini_example_multimodal_chat.py
...nd_language/quick_start/gemini_example_multimodal_chat.py
+30
-0
examples/frontend_language/quick_start/images/cat.jpeg
examples/frontend_language/quick_start/images/cat.jpeg
+0
-0
examples/frontend_language/quick_start/images/dog.jpeg
examples/frontend_language/quick_start/images/dog.jpeg
+0
-0
examples/frontend_language/quick_start/local_example_chat.py
examples/frontend_language/quick_start/local_example_chat.py
+75
-0
examples/frontend_language/quick_start/local_example_complete.py
...s/frontend_language/quick_start/local_example_complete.py
+70
-0
examples/frontend_language/quick_start/local_example_llava_next.py
...frontend_language/quick_start/local_example_llava_next.py
+78
-0
examples/frontend_language/quick_start/openai_example_chat.py
...ples/frontend_language/quick_start/openai_example_chat.py
+74
-0
examples/frontend_language/quick_start/openai_example_complete.py
.../frontend_language/quick_start/openai_example_complete.py
+68
-0
examples/frontend_language/quick_start/openai_example_n.py
examples/frontend_language/quick_start/openai_example_n.py
+71
-0
examples/frontend_language/quick_start/openai_example_o1.py
examples/frontend_language/quick_start/openai_example_o1.py
+57
-0
examples/frontend_language/quick_start/openrouter_example_chat.py
.../frontend_language/quick_start/openrouter_example_chat.py
+81
-0
examples/frontend_language/quick_start/together_example_chat.py
...es/frontend_language/quick_start/together_example_chat.py
+81
-0
examples/frontend_language/quick_start/together_example_complete.py
...rontend_language/quick_start/together_example_complete.py
+76
-0
examples/frontend_language/usage/chinese_regex.py
examples/frontend_language/usage/chinese_regex.py
+53
-0
examples/frontend_language/usage/choices_logprob.py
examples/frontend_language/usage/choices_logprob.py
+44
-0
No files found.
Too many changes to show.
To preserve performance only
347 of 347+
files are displayed.
Plain diff
Email patch
examples/frontend_language/quick_start/anthropic_example_chat.py
0 → 100644
View file @
909abb58
"""
Usage:
export ANTHROPIC_API_KEY=sk-******
python3 anthropic_example_chat.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
,
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
(
[
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
,
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
sgl
.
set_default_backend
(
sgl
.
Anthropic
(
"claude-3-haiku-20240307"
))
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/anthropic_example_complete.py
0 → 100644
View file @
909abb58
"""
Usage:
export ANTHROPIC_API_KEY=sk-******
python3 anthropic_example_complete.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
s
+=
"""
\n\n
Human: What is the capital of France?
\n\n
Assistant: Paris
\n\n
Human: What is the capital of Germany?
\n\n
Assistant: Berlin
\n\n
Human: What is the capital of Italy?
\n\n
Assistant: Rome
"""
s
+=
"
\n\n
Human: "
+
question
+
"
\n
"
s
+=
"
\n\n
Assistant:"
+
sgl
.
gen
(
"answer"
,
temperature
=
0
)
def
single
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
)
answer
=
state
[
"answer"
].
strip
().
lower
()
assert
"washington"
in
answer
,
f
"answer:
{
state
[
'answer'
]
}
"
print
(
state
.
text
())
def
stream
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
stream
=
True
)
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
few_shot_qa
.
run_batch
(
[
{
"question"
:
"What is the capital of the United States?"
},
{
"question"
:
"What is the capital of China?"
},
]
)
for
s
in
states
:
print
(
s
[
"answer"
])
if
__name__
==
"__main__"
:
sgl
.
set_default_backend
(
sgl
.
Anthropic
(
"claude-3-haiku-20240307"
))
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/azure_openai_example_chat.py
0 → 100644
View file @
909abb58
"""
Usage:
export AZURE_OPENAI_API_KEY=sk-******
python3 openai_example_chat.py
"""
import
os
import
sglang
as
sgl
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
system
(
"You are a helpful assistant."
)
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
,
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
(
[
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
,
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
backend
=
sgl
.
OpenAI
(
model_name
=
"azure-gpt-4"
,
api_version
=
"2023-07-01-preview"
,
azure_endpoint
=
"https://oai-arena-sweden.openai.azure.com/"
,
api_key
=
os
.
environ
[
"AZURE_OPENAI_API_KEY"
],
is_azure
=
True
,
)
sgl
.
set_default_backend
(
backend
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/gemini_example_chat.py
0 → 100644
View file @
909abb58
"""
Usage:
export GCP_PROJECT_ID=******
python3 gemini_example_chat.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
,
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
(
[
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
,
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
sgl
.
set_default_backend
(
sgl
.
VertexAI
(
"gemini-pro"
))
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/gemini_example_complete.py
0 → 100644
View file @
909abb58
"""
Usage:
export GCP_PROJECT_ID=******
python3 gemini_example_complete.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
s
+=
"""The following are questions with answers.
Q: What is the capital of France?
A: Paris
Q: What is the capital of Germany?
A: Berlin
Q: What is the capital of Italy?
A: Rome
"""
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
def
single
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
)
answer
=
state
[
"answer"
].
strip
().
lower
()
assert
"washington"
in
answer
,
f
"answer:
{
state
[
'answer'
]
}
"
print
(
state
.
text
())
def
stream
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
stream
=
True
)
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
few_shot_qa
.
run_batch
(
[
{
"question"
:
"What is the capital of the United States?"
},
{
"question"
:
"What is the capital of China?"
},
]
)
for
s
in
states
:
print
(
s
[
"answer"
])
if
__name__
==
"__main__"
:
sgl
.
set_default_backend
(
sgl
.
VertexAI
(
"gemini-pro"
))
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/gemini_example_multimodal_chat.py
0 → 100644
View file @
909abb58
"""
Usage:
export GCP_PROJECT_ID=******
python3 gemini_example_multimodal_chat.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
image_qa
(
s
,
image_file1
,
image_file2
,
question
):
s
+=
sgl
.
user
(
sgl
.
image
(
image_file1
)
+
sgl
.
image
(
image_file2
)
+
question
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer"
,
max_tokens
=
256
))
if
__name__
==
"__main__"
:
sgl
.
set_default_backend
(
sgl
.
VertexAI
(
"gemini-pro-vision"
))
state
=
image_qa
.
run
(
image_file1
=
"./images/cat.jpeg"
,
image_file2
=
"./images/dog.jpeg"
,
question
=
"Describe difference of the two images in one sentence."
,
stream
=
True
,
)
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
print
(
state
[
"answer"
])
examples/frontend_language/quick_start/images/cat.jpeg
0 → 100644
View file @
909abb58
337 KB
examples/frontend_language/quick_start/images/dog.jpeg
0 → 100644
View file @
909abb58
407 KB
examples/frontend_language/quick_start/local_example_chat.py
0 → 100644
View file @
909abb58
"""
Usage:
python3 local_example_chat.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
,
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
(
[
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
,
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
runtime
=
sgl
.
Runtime
(
model_path
=
"meta-llama/Llama-2-7b-chat-hf"
)
sgl
.
set_default_backend
(
runtime
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
runtime
.
shutdown
()
examples/frontend_language/quick_start/local_example_complete.py
0 → 100644
View file @
909abb58
"""
Usage:
python3 local_example_complete.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
s
+=
"""The following are questions with answers.
Q: What is the capital of France?
A: Paris
Q: What is the capital of Germany?
A: Berlin
Q: What is the capital of Italy?
A: Rome
"""
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
def
single
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
)
answer
=
state
[
"answer"
].
strip
().
lower
()
assert
"washington"
in
answer
,
f
"answer:
{
state
[
'answer'
]
}
"
print
(
state
.
text
())
def
stream
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
stream
=
True
)
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
few_shot_qa
.
run_batch
(
[
{
"question"
:
"What is the capital of the United States?"
},
{
"question"
:
"What is the capital of China?"
},
]
)
for
s
in
states
:
print
(
s
[
"answer"
])
if
__name__
==
"__main__"
:
runtime
=
sgl
.
Runtime
(
model_path
=
"meta-llama/Llama-2-7b-chat-hf"
)
sgl
.
set_default_backend
(
runtime
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
runtime
.
shutdown
()
examples/frontend_language/quick_start/local_example_llava_next.py
0 → 100644
View file @
909abb58
"""
Usage: python3 local_example_llava_next.py
"""
import
sglang
as
sgl
from
sglang.lang.chat_template
import
get_chat_template
@
sgl
.
function
def
image_qa
(
s
,
image_path
,
question
):
s
+=
sgl
.
user
(
sgl
.
image
(
image_path
)
+
question
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer"
))
def
single
():
state
=
image_qa
.
run
(
image_path
=
"images/cat.jpeg"
,
question
=
"What is this?"
,
max_new_tokens
=
128
)
print
(
state
[
"answer"
],
"
\n
"
)
def
stream
():
state
=
image_qa
.
run
(
image_path
=
"images/cat.jpeg"
,
question
=
"What is this?"
,
max_new_tokens
=
64
,
stream
=
True
,
)
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
image_qa
.
run_batch
(
[
{
"image_path"
:
"images/cat.jpeg"
,
"question"
:
"What is this?"
},
{
"image_path"
:
"images/dog.jpeg"
,
"question"
:
"What is this?"
},
],
max_new_tokens
=
128
,
)
for
s
in
states
:
print
(
s
[
"answer"
],
"
\n
"
)
if
__name__
==
"__main__"
:
import
multiprocessing
as
mp
mp
.
set_start_method
(
"spawn"
,
force
=
True
)
runtime
=
sgl
.
Runtime
(
model_path
=
"lmms-lab/llama3-llava-next-8b"
)
runtime
.
endpoint
.
chat_template
=
get_chat_template
(
"llama-3-instruct-llava"
)
# Or you can use the 72B model
# runtime = sgl.Runtime(model_path="lmms-lab/llava-next-72b", tp_size=8)
# runtime.endpoint.chat_template = get_chat_template("chatml-llava")
sgl
.
set_default_backend
(
runtime
)
print
(
f
"chat template:
{
runtime
.
endpoint
.
chat_template
.
name
}
"
)
# Or you can use API models
# sgl.set_default_backend(sgl.OpenAI("gpt-4-vision-preview"))
# sgl.set_default_backend(sgl.VertexAI("gemini-pro-vision"))
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
runtime
.
shutdown
()
examples/frontend_language/quick_start/openai_example_chat.py
0 → 100644
View file @
909abb58
"""
Usage:
export OPENAI_API_KEY=sk-******
python3 openai_example_chat.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
system
(
"You are a helpful assistant."
)
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
,
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
(
[
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
,
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
sgl
.
set_default_backend
(
sgl
.
OpenAI
(
"gpt-3.5-turbo"
))
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/openai_example_complete.py
0 → 100644
View file @
909abb58
"""
Usage:
export OPENAI_API_KEY=sk-******
python3 openai_example_complete.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
s
+=
"""The following are questions with answers.
Q: What is the capital of France?
A: Paris
Q: What is the capital of Germany?
A: Berlin
Q: What is the capital of Italy?
A: Rome
"""
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
def
single
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
)
answer
=
state
[
"answer"
].
strip
().
lower
()
assert
"washington"
in
answer
,
f
"answer:
{
state
[
'answer'
]
}
"
print
(
state
.
text
())
def
stream
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
stream
=
True
)
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
few_shot_qa
.
run_batch
(
[
{
"question"
:
"What is the capital of the United States?"
},
{
"question"
:
"What is the capital of China?"
},
]
)
for
s
in
states
:
print
(
s
[
"answer"
])
if
__name__
==
"__main__"
:
sgl
.
set_default_backend
(
sgl
.
OpenAI
(
"gpt-3.5-turbo-instruct"
))
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/openai_example_n.py
0 → 100644
View file @
909abb58
"""
Usage:
export OPENAI_API_KEY=sk-******
python3 openai_example_chat.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
system
(
"You are a helpful assistant."
)
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
1024
,
n
=
2
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
1024
,
)
)
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
print
(
"
\n
-- answer_2 --
\n
"
,
state
[
"answer_2"
])
assert
isinstance
(
state
[
"answer_1"
],
list
)
assert
len
(
state
[
"answer_1"
])
==
2
assert
isinstance
(
state
[
"answer_2"
],
str
)
def
batch
():
states
=
multi_turn_question
.
run_batch
(
[
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
,
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
print
(
s
.
messages
())
print
(
"
\n
-- answer_1 --
\n
"
,
s
[
"answer_1"
])
print
(
"
\n
-- answer_2 --
\n
"
,
s
[
"answer_2"
])
assert
isinstance
(
s
[
"answer_1"
],
list
)
assert
len
(
s
[
"answer_1"
])
==
2
assert
isinstance
(
s
[
"answer_2"
],
str
)
if
__name__
==
"__main__"
:
sgl
.
set_default_backend
(
sgl
.
OpenAI
(
"o1"
))
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/openai_example_o1.py
0 → 100644
View file @
909abb58
"""
Usage:
export OPENAI_API_KEY=sk-******
python3 openai_example_chat.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
system
(
"You are a helpful assistant."
)
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
100
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
batch
():
states
=
multi_turn_question
.
run_batch
(
[
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
,
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
sgl
.
set_default_backend
(
sgl
.
OpenAI
(
"o1"
))
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/openrouter_example_chat.py
0 → 100644
View file @
909abb58
"""
Usage:
export OPENROUTER_API_KEY=sk-******
python3 together_example_chat.py
"""
import
os
import
sglang
as
sgl
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
system
(
"You are a helpful assistant."
)
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
,
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
(
[
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
,
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
backend
=
sgl
.
OpenAI
(
model_name
=
"google/gemma-7b-it:free"
,
base_url
=
"https://openrouter.ai/api/v1"
,
api_key
=
os
.
environ
.
get
(
"OPENROUTER_API_KEY"
),
)
sgl
.
set_default_backend
(
backend
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/together_example_chat.py
0 → 100644
View file @
909abb58
"""
Usage:
export TOGETHER_API_KEY=sk-******
python3 together_example_chat.py
"""
import
os
import
sglang
as
sgl
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
system
(
"You are a helpful assistant."
)
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
,
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
(
[
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
,
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
,
},
]
)
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
backend
=
sgl
.
OpenAI
(
model_name
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
,
base_url
=
"https://api.together.xyz/v1"
,
api_key
=
os
.
environ
.
get
(
"TOGETHER_API_KEY"
),
)
sgl
.
set_default_backend
(
backend
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/quick_start/together_example_complete.py
0 → 100644
View file @
909abb58
"""
Usage:
export TOGETHER_API_KEY=sk-******
python3 together_example_complete.py
"""
import
os
import
sglang
as
sgl
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
s
+=
"""The following are questions with answers.
Q: What is the capital of France?
A: Paris
Q: What is the capital of Germany?
A: Berlin
Q: What is the capital of Italy?
A: Rome
"""
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
def
single
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
)
answer
=
state
[
"answer"
].
strip
().
lower
()
assert
"washington"
in
answer
,
f
"answer:
{
state
[
'answer'
]
}
"
print
(
state
.
text
())
def
stream
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
stream
=
True
)
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
few_shot_qa
.
run_batch
(
[
{
"question"
:
"What is the capital of the United States?"
},
{
"question"
:
"What is the capital of China?"
},
]
)
for
s
in
states
:
print
(
s
[
"answer"
])
if
__name__
==
"__main__"
:
backend
=
sgl
.
OpenAI
(
model_name
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
,
is_chat_model
=
False
,
base_url
=
"https://api.together.xyz/v1"
,
api_key
=
os
.
environ
.
get
(
"TOGETHER_API_KEY"
),
)
sgl
.
set_default_backend
(
backend
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/frontend_language/usage/chinese_regex.py
0 → 100644
View file @
909abb58
import
sglang
as
sgl
character_regex
=
(
r
"""\{\n"""
+
r
""" "姓名": "[^"]{1,32}",\n"""
+
r
""" "学院": "(格兰芬多|赫奇帕奇|拉文克劳|斯莱特林)",\n"""
+
r
""" "血型": "(纯血|混血|麻瓜)",\n"""
+
r
""" "职业": "(学生|教师|傲罗|魔法部|食死徒|凤凰社成员)",\n"""
+
r
""" "魔杖": \{\n"""
+
r
""" "材质": "[^"]{1,32}",\n"""
+
r
""" "杖芯": "[^"]{1,32}",\n"""
+
r
""" "长度": [0-9]{1,2}\.[0-9]{0,2}\n"""
+
r
""" \},\n"""
+
r
""" "存活": "(存活|死亡)",\n"""
+
r
""" "守护神": "[^"]{1,32}",\n"""
+
r
""" "博格特": "[^"]{1,32}"\n"""
+
r
"""\}"""
)
@
sgl
.
function
def
character_gen
(
s
,
name
):
s
+=
name
+
" 是一名哈利波特系列小说中的角色。请填写以下关于这个角色的信息。"
s
+=
"""
\
这是一个例子
{
"姓名": "哈利波特",
"学院": "格兰芬多",
"血型": "混血",
"职业": "学生",
"魔杖": {
"材质": "冬青木",
"杖芯": "凤凰尾羽",
"长度": 11.0
},
"存活": "存活",
"守护神": "麋鹿",
"博格特": "摄魂怪"
}
"""
s
+=
f
"现在请你填写
{
name
}
的信息:
\n
"
s
+=
sgl
.
gen
(
"json_output"
,
max_tokens
=
256
,
regex
=
character_regex
)
def
main
():
backend
=
sgl
.
RuntimeEndpoint
(
"http://localhost:30000"
)
sgl
.
set_default_backend
(
backend
)
ret
=
character_gen
.
run
(
name
=
"赫敏格兰杰"
,
temperature
=
0
)
print
(
ret
.
text
())
if
__name__
==
"__main__"
:
main
()
examples/frontend_language/usage/choices_logprob.py
0 → 100644
View file @
909abb58
"""
Usage:
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
python choices_logprob.py
"""
import
sglang
as
sgl
@
sgl
.
function
def
tool_use
(
s
,
question
):
s
+=
"To answer this question: "
+
question
+
", "
s
+=
"I need to use a "
+
sgl
.
gen
(
"tool"
,
choices
=
[
"calculator"
,
"search engine"
])
def
main
():
# Run one case
question
=
"What is 5 + 5?"
state
=
tool_use
.
run
(
question
)
print
(
"questions:"
,
question
)
print
(
"choice:"
,
state
[
"tool"
])
meta_info
=
state
.
get_meta_info
(
"tool"
)
print
(
"logprobs of choice 1"
,
meta_info
[
"input_token_logprobs"
][
0
])
print
(
"logprobs of choice 2"
,
meta_info
[
"input_token_logprobs"
][
1
])
print
(
"-"
*
50
)
# Run a batch
questions
=
[
"What is 5 + 6?"
,
"Who is Michael Jordan?"
,
]
states
=
tool_use
.
run_batch
([{
"question"
:
q
}
for
q
in
questions
])
for
question
,
state
in
zip
(
questions
,
states
):
print
(
"questions:"
,
question
)
print
(
"choice:"
,
state
[
"tool"
])
meta_info
=
state
.
get_meta_info
(
"tool"
)
print
(
"logprobs of choice 1"
,
meta_info
[
"input_token_logprobs"
][
0
])
print
(
"logprobs of choice 2"
,
meta_info
[
"input_token_logprobs"
][
1
])
print
(
"-"
*
50
)
if
__name__
==
"__main__"
:
sgl
.
set_default_backend
(
sgl
.
RuntimeEndpoint
(
"http://localhost:30000"
))
main
()
Prev
1
…
10
11
12
13
14
15
16
17
18
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment