Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
87260b7b
"vscode:/vscode.git/clone" did not exist on "4ca2c358b178d5e026db925a1ed9f8945010a98f"
Unverified
Commit
87260b7b
authored
Jun 08, 2024
by
胡译文
Committed by
GitHub
Jun 07, 2024
Browse files
Litellm Backend (#502)
parent
651a23ee
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
136 additions
and
2 deletions
+136
-2
python/pyproject.toml
python/pyproject.toml
+3
-2
python/sglang/__init__.py
python/sglang/__init__.py
+2
-0
python/sglang/backend/litellm.py
python/sglang/backend/litellm.py
+89
-0
python/sglang/lang/ir.py
python/sglang/lang/ir.py
+15
-0
test/lang/test_litellm_backend.py
test/lang/test_litellm_backend.py
+27
-0
No files found.
python/pyproject.toml
View file @
87260b7b
...
@@ -23,7 +23,8 @@ srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
...
@@ -23,7 +23,8 @@ srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
"zmq"
,
"vllm==0.4.3"
,
"interegular"
,
"pydantic"
,
"pillow"
,
"packaging"
,
"huggingface_hub"
,
"hf_transfer"
,
"outlines>=0.0.34"
]
"zmq"
,
"vllm==0.4.3"
,
"interegular"
,
"pydantic"
,
"pillow"
,
"packaging"
,
"huggingface_hub"
,
"hf_transfer"
,
"outlines>=0.0.34"
]
openai
=
[
"openai>=1.0"
,
"numpy"
,
"tiktoken"
]
openai
=
[
"openai>=1.0"
,
"numpy"
,
"tiktoken"
]
anthropic
=
[
"anthropic>=0.20.0"
,
"numpy"
]
anthropic
=
[
"anthropic>=0.20.0"
,
"numpy"
]
all
=
["sglang[srt]
", "
sglang
[openai]
", "
sglang
[anthropic]"]
litellm
=
["litellm>=1.0.0"]
all
=
["sglang[srt]
", "
sglang
[openai]
", "
sglang
[anthropic]
", "
sglang
[litellm]"]
[project.urls]
[project.urls]
"Homepage"
=
"https://github.com/sgl-project/sglang"
"Homepage"
=
"https://github.com/sgl-project/sglang"
...
...
python/sglang/__init__.py
View file @
87260b7b
...
@@ -27,6 +27,7 @@ from sglang.backend.anthropic import Anthropic
...
@@ -27,6 +27,7 @@ from sglang.backend.anthropic import Anthropic
from
sglang.backend.openai
import
OpenAI
from
sglang.backend.openai
import
OpenAI
from
sglang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.backend.vertexai
import
VertexAI
from
sglang.backend.vertexai
import
VertexAI
from
sglang.backend.litellm
import
LiteLLM
# Global Configurations
# Global Configurations
from
sglang.global_config
import
global_config
from
sglang.global_config
import
global_config
...
@@ -35,6 +36,7 @@ from sglang.global_config import global_config
...
@@ -35,6 +36,7 @@ from sglang.global_config import global_config
__all__
=
[
__all__
=
[
"global_config"
,
"global_config"
,
"Anthropic"
,
"Anthropic"
,
"LiteLLM"
,
"OpenAI"
,
"OpenAI"
,
"RuntimeEndpoint"
,
"RuntimeEndpoint"
,
"VertexAI"
,
"VertexAI"
,
...
...
python/sglang/backend/litellm.py
0 → 100644
View file @
87260b7b
from
typing
import
Mapping
,
Optional
from
sglang.backend.base_backend
import
BaseBackend
from
sglang.lang.chat_template
import
get_chat_template_by_model_path
from
sglang.lang.interpreter
import
StreamExecutor
from
sglang.lang.ir
import
SglSamplingParams
try
:
import
litellm
except
ImportError
as
e
:
litellm
=
e
class
LiteLLM
(
BaseBackend
):
def
__init__
(
self
,
model_name
,
chat_template
=
None
,
api_key
=
None
,
organization
:
Optional
[
str
]
=
None
,
base_url
:
Optional
[
str
]
=
None
,
timeout
:
Optional
[
float
]
=
600
,
max_retries
:
Optional
[
int
]
=
litellm
.
num_retries
,
default_headers
:
Optional
[
Mapping
[
str
,
str
]]
=
None
,
):
super
().
__init__
()
if
isinstance
(
litellm
,
Exception
):
raise
litellm
self
.
model_name
=
model_name
self
.
chat_template
=
chat_template
or
get_chat_template_by_model_path
(
model_name
)
self
.
client_params
=
{
"api_key"
:
api_key
,
"organization"
:
organization
,
"base_url"
:
base_url
,
"timeout"
:
timeout
,
"max_retries"
:
max_retries
,
"default_headers"
:
default_headers
,
}
def
get_chat_template
(
self
):
return
self
.
chat_template
def
generate
(
self
,
s
:
StreamExecutor
,
sampling_params
:
SglSamplingParams
,
):
if
s
.
messages_
:
messages
=
s
.
messages_
else
:
messages
=
[{
"role"
:
"user"
,
"content"
:
s
.
text_
}]
ret
=
litellm
.
completion
(
model
=
self
.
model_name
,
messages
=
messages
,
**
self
.
client_params
,
**
sampling_params
.
to_anthropic_kwargs
(),
)
comp
=
ret
.
choices
[
0
].
message
.
content
return
comp
,
{}
def
generate_stream
(
self
,
s
:
StreamExecutor
,
sampling_params
:
SglSamplingParams
,
):
if
s
.
messages_
:
messages
=
s
.
messages_
else
:
messages
=
[{
"role"
:
"user"
,
"content"
:
s
.
text_
}]
ret
=
litellm
.
completion
(
model
=
self
.
model_name
,
messages
=
messages
,
stream
=
True
,
**
self
.
client_params
,
**
sampling_params
.
to_litellm_kwargs
(),
)
for
chunk
in
ret
:
text
=
chunk
.
choices
[
0
].
delta
.
content
if
text
is
not
None
:
yield
text
,
{}
python/sglang/lang/ir.py
View file @
87260b7b
...
@@ -82,6 +82,21 @@ class SglSamplingParams:
...
@@ -82,6 +82,21 @@ class SglSamplingParams:
"top_k"
:
self
.
top_k
,
"top_k"
:
self
.
top_k
,
}
}
def
to_litellm_kwargs
(
self
):
if
self
.
regex
is
not
None
:
warnings
.
warn
(
"Regular expression is not supported in the LiteLLM backend."
)
return
{
"max_tokens"
:
self
.
max_new_tokens
,
"stop"
:
self
.
stop
or
None
,
"temperature"
:
self
.
temperature
,
"top_p"
:
self
.
top_p
,
"top_k"
:
self
.
top_k
,
"frequency_penalty"
:
self
.
frequency_penalty
,
"presence_penalty"
:
self
.
presence_penalty
,
}
def
to_srt_kwargs
(
self
):
def
to_srt_kwargs
(
self
):
return
{
return
{
"max_new_tokens"
:
self
.
max_new_tokens
,
"max_new_tokens"
:
self
.
max_new_tokens
,
...
...
test/lang/test_litellm_backend.py
0 → 100644
View file @
87260b7b
import
json
import
unittest
from
sglang
import
LiteLLM
,
set_default_backend
from
sglang.test.test_programs
import
test_mt_bench
,
test_stream
class
TestAnthropicBackend
(
unittest
.
TestCase
):
backend
=
None
chat_backend
=
None
def
setUp
(
self
):
cls
=
type
(
self
)
if
cls
.
backend
is
None
:
cls
.
backend
=
LiteLLM
(
"gpt-3.5-turbo"
)
set_default_backend
(
cls
.
backend
)
def
test_mt_bench
(
self
):
test_mt_bench
()
def
test_stream
(
self
):
test_stream
()
if
__name__
==
"__main__"
:
unittest
.
main
(
warnings
=
"ignore"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment