Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
bb824da4
Unverified
Commit
bb824da4
authored
Feb 12, 2024
by
Lianmin Zheng
Committed by
GitHub
Feb 12, 2024
Browse files
Add Together and AzureOpenAI examples (#184)
parent
93121324
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
262 additions
and
15 deletions
+262
-15
examples/quick_start/anthropic_example_chat.py
examples/quick_start/anthropic_example_chat.py
+1
-1
examples/quick_start/azure_openai_example_chat.py
examples/quick_start/azure_openai_example_chat.py
+76
-0
examples/quick_start/gemini_example_chat.py
examples/quick_start/gemini_example_chat.py
+1
-1
examples/quick_start/openai_example_chat.py
examples/quick_start/openai_example_chat.py
+1
-1
examples/quick_start/srt_example_chat.py
examples/quick_start/srt_example_chat.py
+1
-1
examples/quick_start/together_example_chat.py
examples/quick_start/together_example_chat.py
+74
-0
examples/quick_start/together_example_complete.py
examples/quick_start/together_example_complete.py
+74
-0
python/sglang/backend/openai.py
python/sglang/backend/openai.py
+34
-11
No files found.
examples/quick_start/anthropic_example_chat.py
View file @
bb824da4
...
@@ -23,7 +23,7 @@ def single():
...
@@ -23,7 +23,7 @@ def single():
for
m
in
state
.
messages
():
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"answer_1"
,
state
[
"answer_1"
])
print
(
"
\n
--
answer_1
--
\n
"
,
state
[
"answer_1"
])
def
stream
():
def
stream
():
...
...
examples/quick_start/azure_openai_example_chat.py
0 → 100644
View file @
bb824da4
"""
Usage:
export AZURE_OPENAI_API_KEY=sk-******
python3 openai_example_chat.py
"""
import
sglang
as
sgl
import
os
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
system
(
"You are a helpful assistant."
)
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
([
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
},
])
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
backend
=
sgl
.
OpenAI
(
model_name
=
"azure-gpt-4"
,
api_version
=
"2023-07-01-preview"
,
azure_endpoint
=
"https://oai-arena-sweden.openai.azure.com/"
,
api_key
=
os
.
environ
[
"AZURE_OPENAI_API_KEY"
],
is_azure
=
True
,
)
sgl
.
set_default_backend
(
backend
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/quick_start/gemini_example_chat.py
View file @
bb824da4
...
@@ -23,7 +23,7 @@ def single():
...
@@ -23,7 +23,7 @@ def single():
for
m
in
state
.
messages
():
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"answer_1"
,
state
[
"answer_1"
])
print
(
"
\n
--
answer_1
--
\n
"
,
state
[
"answer_1"
])
def
stream
():
def
stream
():
...
...
examples/quick_start/openai_example_chat.py
View file @
bb824da4
...
@@ -24,7 +24,7 @@ def single():
...
@@ -24,7 +24,7 @@ def single():
for
m
in
state
.
messages
():
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"answer_1"
,
state
[
"answer_1"
])
print
(
"
\n
--
answer_1
--
\n
"
,
state
[
"answer_1"
])
def
stream
():
def
stream
():
...
...
examples/quick_start/srt_example_chat.py
View file @
bb824da4
...
@@ -22,7 +22,7 @@ def single():
...
@@ -22,7 +22,7 @@ def single():
for
m
in
state
.
messages
():
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"answer_1"
,
state
[
"answer_1"
])
print
(
"
\n
--
answer_1
--
\n
"
,
state
[
"answer_1"
])
def
stream
():
def
stream
():
...
...
examples/quick_start/together_example_chat.py
0 → 100644
View file @
bb824da4
"""
Usage:
export TOGETHER_API_KEY=sk-******
python3 together_example_chat.py
"""
import
sglang
as
sgl
import
os
@
sgl
.
function
def
multi_turn_question
(
s
,
question_1
,
question_2
):
s
+=
sgl
.
system
(
"You are a helpful assistant."
)
s
+=
sgl
.
user
(
question_1
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_1"
,
max_tokens
=
256
))
s
+=
sgl
.
user
(
question_2
)
s
+=
sgl
.
assistant
(
sgl
.
gen
(
"answer_2"
,
max_tokens
=
256
))
def
single
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
)
for
m
in
state
.
messages
():
print
(
m
[
"role"
],
":"
,
m
[
"content"
])
print
(
"
\n
-- answer_1 --
\n
"
,
state
[
"answer_1"
])
def
stream
():
state
=
multi_turn_question
.
run
(
question_1
=
"What is the capital of the United States?"
,
question_2
=
"List two local attractions."
,
stream
=
True
)
for
out
in
state
.
text_iter
():
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
multi_turn_question
.
run_batch
([
{
"question_1"
:
"What is the capital of the United States?"
,
"question_2"
:
"List two local attractions."
},
{
"question_1"
:
"What is the capital of France?"
,
"question_2"
:
"What is the population of this city?"
},
])
for
s
in
states
:
print
(
s
.
messages
())
if
__name__
==
"__main__"
:
backend
=
sgl
.
OpenAI
(
model_name
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
,
base_url
=
"https://api.together.xyz/v1"
,
api_key
=
os
.
environ
.
get
(
"TOGETHER_API_KEY"
),
)
sgl
.
set_default_backend
(
backend
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
examples/quick_start/together_example_complete.py
0 → 100644
View file @
bb824da4
"""
Usage:
export TOGETHER_API_KEY=sk-******
python3 together_example_complete.py
"""
import
sglang
as
sgl
import
os
@
sgl
.
function
def
few_shot_qa
(
s
,
question
):
s
+=
(
"""The following are questions with answers.
Q: What is the capital of France?
A: Paris
Q: What is the capital of Germany?
A: Berlin
Q: What is the capital of Italy?
A: Rome
"""
)
s
+=
"Q: "
+
question
+
"
\n
"
s
+=
"A:"
+
sgl
.
gen
(
"answer"
,
stop
=
"
\n
"
,
temperature
=
0
)
def
single
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
)
answer
=
state
[
"answer"
].
strip
().
lower
()
assert
"washington"
in
answer
,
f
"answer:
{
state
[
'answer'
]
}
"
print
(
state
.
text
())
def
stream
():
state
=
few_shot_qa
.
run
(
question
=
"What is the capital of the United States?"
,
stream
=
True
)
for
out
in
state
.
text_iter
(
"answer"
):
print
(
out
,
end
=
""
,
flush
=
True
)
print
()
def
batch
():
states
=
few_shot_qa
.
run_batch
([
{
"question"
:
"What is the capital of the United States?"
},
{
"question"
:
"What is the capital of China?"
},
])
for
s
in
states
:
print
(
s
[
"answer"
])
if
__name__
==
"__main__"
:
backend
=
sgl
.
OpenAI
(
model_name
=
"mistralai/Mixtral-8x7B-Instruct-v0.1"
,
is_chat_model
=
False
,
base_url
=
"https://api.together.xyz/v1"
,
api_key
=
os
.
environ
.
get
(
"TOGETHER_API_KEY"
),
)
sgl
.
set_default_backend
(
backend
)
# Run a single request
print
(
"
\n
========== single ==========
\n
"
)
single
()
# Stream output
print
(
"
\n
========== stream ==========
\n
"
)
stream
()
# Run a batch of requests
print
(
"
\n
========== batch ==========
\n
"
)
batch
()
python/sglang/backend/openai.py
View file @
bb824da4
...
@@ -4,7 +4,7 @@ from typing import Callable, List, Optional, Union
...
@@ -4,7 +4,7 @@ from typing import Callable, List, Optional, Union
import
numpy
as
np
import
numpy
as
np
from
sglang.backend.base_backend
import
BaseBackend
from
sglang.backend.base_backend
import
BaseBackend
from
sglang.lang.chat_template
import
get_chat_template
from
sglang.lang.chat_template
import
get_chat_template
_by_model_path
,
ChatTemplate
from
sglang.lang.interpreter
import
StreamExecutor
from
sglang.lang.interpreter
import
StreamExecutor
from
sglang.lang.ir
import
SglSamplingParams
from
sglang.lang.ir
import
SglSamplingParams
...
@@ -41,23 +41,39 @@ INSTRUCT_MODEL_NAMES = [
...
@@ -41,23 +41,39 @@ INSTRUCT_MODEL_NAMES = [
class
OpenAI
(
BaseBackend
):
class
OpenAI
(
BaseBackend
):
def
__init__
(
self
,
model_name
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
model_name
:
str
,
is_chat_model
:
Optional
[
bool
]
=
None
,
chat_template
:
Optional
[
ChatTemplate
]
=
None
,
is_azure
:
bool
=
False
,
*
args
,
**
kwargs
):
super
().
__init__
()
super
().
__init__
()
if
isinstance
(
openai
,
Exception
):
if
isinstance
(
openai
,
Exception
):
raise
openai
raise
openai
self
.
client
=
openai
.
OpenAI
(
*
args
,
**
kwargs
)
if
is_azure
:
self
.
client
=
openai
.
AzureOpenAI
(
*
args
,
**
kwargs
)
else
:
self
.
client
=
openai
.
OpenAI
(
*
args
,
**
kwargs
)
self
.
model_name
=
model_name
self
.
model_name
=
model_name
self
.
tokenizer
=
tiktoken
.
encoding_for_model
(
model_name
)
try
:
self
.
tokenizer
=
tiktoken
.
encoding_for_model
(
model_name
)
except
KeyError
:
self
.
tokenizer
=
tiktoken
.
get_encoding
(
"cl100k_base"
)
self
.
logit_bias_int
=
create_logit_bias_int
(
self
.
tokenizer
)
self
.
logit_bias_int
=
create_logit_bias_int
(
self
.
tokenizer
)
if
model_name
in
INSTRUCT_MODEL_NAMES
:
self
.
chat_template
=
chat_template
or
get_chat_template_by_model_path
(
model_name
)
self
.
is_chat_model
=
False
if
is_chat_model
is
not
None
:
self
.
is_chat_model
=
is_chat_model
else
:
else
:
self
.
is_chat_model
=
True
if
model_name
in
INSTRUCT_MODEL_NAMES
:
self
.
is_chat_model
=
False
else
:
self
.
is_chat_model
=
True
self
.
chat_
template
=
get_chat_template
(
"default"
)
self
.
chat_
begin_str
=
self
.
chat_template
.
role_prefix_and_suffix
[
"assistant"
][
0
]
def
get_chat_template
(
self
):
def
get_chat_template
(
self
):
return
self
.
chat_template
return
self
.
chat_template
...
@@ -69,7 +85,7 @@ class OpenAI(BaseBackend):
...
@@ -69,7 +85,7 @@ class OpenAI(BaseBackend):
):
):
if
sampling_params
.
dtype
is
None
:
if
sampling_params
.
dtype
is
None
:
if
self
.
is_chat_model
:
if
self
.
is_chat_model
:
if
not
s
.
text_
.
endswith
(
"ASSISTANT:"
):
if
not
s
.
text_
.
endswith
(
self
.
chat_begin_str
):
raise
RuntimeError
(
raise
RuntimeError
(
"This use case is not supported. "
"This use case is not supported. "
"For OpenAI chat models, sgl.gen must be right after sgl.assistant"
"For OpenAI chat models, sgl.gen must be right after sgl.assistant"
...
@@ -122,7 +138,11 @@ class OpenAI(BaseBackend):
...
@@ -122,7 +138,11 @@ class OpenAI(BaseBackend):
):
):
if
sampling_params
.
dtype
is
None
:
if
sampling_params
.
dtype
is
None
:
if
self
.
is_chat_model
:
if
self
.
is_chat_model
:
assert
s
.
text_
.
endswith
(
"ASSISTANT:"
)
if
not
s
.
text_
.
endswith
(
self
.
chat_begin_str
):
raise
RuntimeError
(
"This use case is not supported. "
"For OpenAI chat models, sgl.gen must be right after sgl.assistant"
)
prompt
=
s
.
messages_
prompt
=
s
.
messages_
else
:
else
:
prompt
=
s
.
text_
prompt
=
s
.
text_
...
@@ -241,7 +261,10 @@ def openai_completion_stream(client, retries=3, is_chat=None, prompt=None, **kwa
...
@@ -241,7 +261,10 @@ def openai_completion_stream(client, retries=3, is_chat=None, prompt=None, **kwa
messages
=
prompt
,
stream
=
True
,
**
kwargs
messages
=
prompt
,
stream
=
True
,
**
kwargs
)
)
for
ret
in
generator
:
for
ret
in
generator
:
content
=
ret
.
choices
[
0
].
delta
.
content
try
:
content
=
ret
.
choices
[
0
].
delta
.
content
except
IndexError
:
content
=
None
yield
content
or
""
,
{}
yield
content
or
""
,
{}
else
:
else
:
generator
=
client
.
completions
.
create
(
generator
=
client
.
completions
.
create
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment