Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
bb824da4
"docs/vscode:/vscode.git/clone" did not exist on "b6d7e31d10df675d86c6fe7838044712c6dca4e9"
Unverified
Commit
bb824da4
authored
Feb 12, 2024
by
Lianmin Zheng
Committed by
GitHub
Feb 12, 2024
Browse files
Add Together and AzureOpenAI examples (#184)
parent
93121324
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
262 additions
and
15 deletions
+262
-15
examples/quick_start/anthropic_example_chat.py
examples/quick_start/anthropic_example_chat.py
+1
-1
examples/quick_start/azure_openai_example_chat.py
examples/quick_start/azure_openai_example_chat.py
+76
-0
examples/quick_start/gemini_example_chat.py
examples/quick_start/gemini_example_chat.py
+1
-1
examples/quick_start/openai_example_chat.py
examples/quick_start/openai_example_chat.py
+1
-1
examples/quick_start/srt_example_chat.py
examples/quick_start/srt_example_chat.py
+1
-1
examples/quick_start/together_example_chat.py
examples/quick_start/together_example_chat.py
+74
-0
examples/quick_start/together_example_complete.py
examples/quick_start/together_example_complete.py
+74
-0
python/sglang/backend/openai.py
python/sglang/backend/openai.py
+34
-11
No files found.
examples/quick_start/anthropic_example_chat.py
View file @
bb824da4
...
...
@@ -23,7 +23,7 @@ def single():
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"answer_1"
,
state
[
"answer_1"
])
print
(
"
\n
--
answer_1
--
\n
"
,
state
[
"answer_1"
])
def
stream
():
...
...
examples/quick_start/azure_openai_example_chat.py
0 → 100644
View file @
bb824da4
"""
Usage:
export AZURE_OPENAI_API_KEY=sk-******
python3 openai_example_chat.py
"""
import
sglang
as
sgl
import
os
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
system
(
"You are a helpful assistant."
)
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
([
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
},
])
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
backend
=
sgl
.
OpenAI
(
model_name
=
"azure-gpt-4"
,
api_version
=
"2023-07-01-preview"
,
azure_endpoint
=
"https://oai-arena-sweden.openai.azure.com/"
,
api_key
=
os
.
environ
[
"AZURE_OPENAI_API_KEY"
],
is_azure
=
True
,
)
sgl
.
set_default_backend
(
backend
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/quick_start/gemini_example_chat.py
View file @
bb824da4
...
...
@@ -23,7 +23,7 @@ def single():
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"answer_1"
,
state
[
"answer_1"
])
print
(
"
\n
--
answer_1
--
\n
"
,
state
[
"answer_1"
])
def
stream
():
...
...
examples/quick_start/openai_example_chat.py
View file @
bb824da4
...
...
@@ -24,7 +24,7 @@ def single():
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"answer_1"
,
state
[
"answer_1"
])
print
(
"
\n
--
answer_1
--
\n
"
,
state
[
"answer_1"
])
def
stream
():
...
...
examples/quick_start/srt_example_chat.py
View file @
bb824da4
...
...
@@ -22,7 +22,7 @@ def single():
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"answer_1"
,
state
[
"answer_1"
])
print
(
"
\n
--
answer_1
--
\n
"
,
state
[
"answer_1"
])
def
stream
():
...
...
examples/quick_start/together_example_chat.py
0 → 100644
View file @
bb824da4
"""
Usage:
export TOGETHER_API_KEY=sk-******
python3 together_example_chat.py
"""
import
sglang
as
sgl
import
os
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
system
(
"You are a helpful assistant."
)
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
([
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
},
])
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
backend
=
sgl
.
OpenAI
(
model_name
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
,
base_url
=
"https://api.together.xyz/v1"
,
api_key
=
os
.
environ
.
get
(
"TOGETHER_API_KEY"
),
)
sgl
.
set_default_backend
(
backend
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/quick_start/together_example_complete.py
0 → 100644
View file @
bb824da4
"""
Usage:
export TOGETHER_API_KEY=sk-******
python3 together_example_complete.py
"""
import
sglang
as
sgl
import
os
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
s
+=
(
"""The following are questions with answers.
Q: What is the capital of France?
A: Paris
Q: What is the capital of Germany?
A: Berlin
Q: What is the capital of Italy?
A: Rome
"""
)
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
def
single
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
)
answer
=
state
[
"answer"
].
strip
().
lower
()
assert
"washington"
in
answer
,
f
"answer:
{
state
[
'answer'
]
}
"
print
(
state
.
text
())
def
stream
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
stream
=
True
)
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
few_shot_qa
.
run_batch
([
{
"question"
:
"What is the capital of the United States?"
},
{
"question"
:
"What is the capital of China?"
},
])
for
s
in
states
:
print
(
s
[
"answer"
])
if
__name__
==
"__main__"
:
backend
=
sgl
.
OpenAI
(
model_name
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
,
is_chat_model
=
False
,
base_url
=
"https://api.together.xyz/v1"
,
api_key
=
os
.
environ
.
get
(
"TOGETHER_API_KEY"
),
)
sgl
.
set_default_backend
(
backend
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
python/sglang/backend/openai.py
View file @
bb824da4
...
...
@@ -4,7 +4,7 @@ from typing import Callable, List, Optional, Union
import
numpy
as
np
from
sglang.backend.base_backend
import
BaseBackend
from
sglang.lang.chat_template
import
get_chat_template
from
sglang.lang.chat_template
import
get_chat_template
_by_model_path
,
ChatTemplate
from
sglang.lang.interpreter
import
StreamExecutor
from
sglang.lang.ir
import
SglSamplingParams
...
...
@@ -41,23 +41,39 @@ INSTRUCT_MODEL_NAMES = [
class
OpenAI
(
BaseBackend
):
def
__init__
(
self
,
model_name
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
model_name
:
str
,
is_chat_model
:
Optional
[
bool
]
=
None
,
chat_template
:
Optional
[
ChatTemplate
]
=
None
,
is_azure
:
bool
=
False
,
*
args
,
**
kwargs
):
super
().
__init__
()
if
isinstance
(
openai
,
Exception
):
raise
openai
self
.
client
=
openai
.
OpenAI
(
*
args
,
**
kwargs
)
if
is_azure
:
self
.
client
=
openai
.
AzureOpenAI
(
*
args
,
**
kwargs
)
else
:
self
.
client
=
openai
.
OpenAI
(
*
args
,
**
kwargs
)
self
.
model_name
=
model_name
self
.
tokenizer
=
tiktoken
.
encoding_for_model
(
model_name
)
try
:
self
.
tokenizer
=
tiktoken
.
encoding_for_model
(
model_name
)
except
KeyError
:
self
.
tokenizer
=
tiktoken
.
get_encoding
(
"cl100k_base"
)
self
.
logit_bias_int
=
create_logit_bias_int
(
self
.
tokenizer
)
if
model_name
in
INSTRUCT_MODEL_NAMES
:
self
.
is_chat_model
=
False
self
.
chat_template
=
chat_template
or
get_chat_template_by_model_path
(
model_name
)
if
is_chat_model
is
not
None
:
self
.
is_chat_model
=
is_chat_model
else
:
self
.
is_chat_model
=
True
if
model_name
in
INSTRUCT_MODEL_NAMES
:
self
.
is_chat_model
=
False
else
:
self
.
is_chat_model
=
True
self
.
chat_
template
=
get_chat_template
(
"default"
)
self
.
chat_
begin_str
=
self
.
chat_template
.
role_prefix_and_suffix
[
"assistant"
][
0
]
def
get_chat_template
(
self
):
return
self
.
chat_template
...
...
@@ -69,7 +85,7 @@ class OpenAI(BaseBackend):
):
if
sampling_params
.
dtype
is
None
:
if
self
.
is_chat_model
:
if
not
s
.
text_
.
endswith
(
"ASSISTANT:"
):
if
not
s
.
text_
.
endswith
(
self
.
chat_begin_str
):
raise
RuntimeError
(
"This use case is not supported. "
"For OpenAI chat models, sgl.gen must be right after sgl.assistant"
...
...
@@ -122,7 +138,11 @@ class OpenAI(BaseBackend):
):
if
sampling_params
.
dtype
is
None
:
if
self
.
is_chat_model
:
assert
s
.
text_
.
endswith
(
"ASSISTANT:"
)
if
not
s
.
text_
.
endswith
(
self
.
chat_begin_str
):
raise
RuntimeError
(
"This use case is not supported. "
"For OpenAI chat models, sgl.gen must be right after sgl.assistant"
)
prompt
=
s
.
messages_
else
:
prompt
=
s
.
text_
...
...
@@ -241,7 +261,10 @@ def openai_completion_stream(client, retries=3, is_chat=None, prompt=None, **kwa
messages
=
prompt
,
stream
=
True
,
**
kwargs
)
for
ret
in
generator
:
content
=
ret
.
choices
[
0
].
delta
.
content
try
:
content
=
ret
.
choices
[
0
].
delta
.
content
except
IndexError
:
content
=
None
yield
content
or
""
,
{}
else
:
generator
=
client
.
completions
.
create
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment