Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
9acc6e35
Unverified
Commit
9acc6e35
authored
Apr 22, 2024
by
Liangsheng Yin
Committed by
GitHub
Apr 22, 2024
Browse files
add `.isort.cfg` (#378)
parent
cf9d8efd
Changes
57
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
71 additions
and
60 deletions
+71
-60
python/sglang/srt/managers/router/infer_batch.py
python/sglang/srt/managers/router/infer_batch.py
+1
-0
python/sglang/srt/managers/router/manager.py
python/sglang/srt/managers/router/manager.py
+1
-0
python/sglang/srt/managers/router/model_rpc.py
python/sglang/srt/managers/router/model_rpc.py
+2
-1
python/sglang/srt/managers/router/model_runner.py
python/sglang/srt/managers/router/model_runner.py
+5
-4
python/sglang/srt/managers/router/radix_cache.py
python/sglang/srt/managers/router/radix_cache.py
+0
-2
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+1
-0
python/sglang/srt/models/commandr.py
python/sglang/srt/models/commandr.py
+5
-4
python/sglang/srt/models/dbrx.py
python/sglang/srt/models/dbrx.py
+5
-4
python/sglang/srt/models/gemma.py
python/sglang/srt/models/gemma.py
+4
-3
python/sglang/srt/models/llama2.py
python/sglang/srt/models/llama2.py
+5
-4
python/sglang/srt/models/llava.py
python/sglang/srt/models/llava.py
+9
-8
python/sglang/srt/models/mixtral.py
python/sglang/srt/models/mixtral.py
+5
-4
python/sglang/srt/models/qwen.py
python/sglang/srt/models/qwen.py
+5
-4
python/sglang/srt/models/qwen2.py
python/sglang/srt/models/qwen2.py
+5
-4
python/sglang/srt/models/stablelm.py
python/sglang/srt/models/stablelm.py
+4
-3
python/sglang/srt/models/yivl.py
python/sglang/srt/models/yivl.py
+6
-7
python/sglang/srt/server.py
python/sglang/srt/server.py
+7
-6
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+0
-1
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+1
-0
test/lang/run_all.py
test/lang/run_all.py
+0
-1
No files found.
python/sglang/srt/managers/router/infer_batch.py
View file @
9acc6e35
...
...
@@ -4,6 +4,7 @@ from typing import List
import
numpy
as
np
import
torch
from
sglang.srt.managers.router.radix_cache
import
RadixCache
from
sglang.srt.memory_pool
import
ReqToTokenPool
,
TokenToKVPool
...
...
python/sglang/srt/managers/router/manager.py
View file @
9acc6e35
...
...
@@ -4,6 +4,7 @@ import logging
import
uvloop
import
zmq
import
zmq.asyncio
from
sglang.srt.backend_config
import
GLOBAL_BACKEND_CONFIG
from
sglang.srt.managers.router.model_rpc
import
ModelRpcClient
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
...
...
python/sglang/srt/managers/router/model_rpc.py
View file @
9acc6e35
...
...
@@ -10,6 +10,8 @@ import rpyc
import
torch
from
rpyc.utils.classic
import
obtain
from
rpyc.utils.server
import
ThreadedServer
from
vllm.logger
import
_default_handler
as
vllm_default_handler
from
sglang.srt.constrained.fsm_cache
import
FSMCache
from
sglang.srt.constrained.jump_forward
import
JumpForwardCache
from
sglang.srt.hf_transformers_utils
import
get_processor
,
get_tokenizer
...
...
@@ -30,7 +32,6 @@ from sglang.srt.utils import (
is_multimodal_model
,
set_random_seed
,
)
from
vllm.logger
import
_default_handler
as
vllm_default_handler
logger
=
logging
.
getLogger
(
"model_rpc"
)
...
...
python/sglang/srt/managers/router/model_runner.py
View file @
9acc6e35
...
...
@@ -9,16 +9,17 @@ from typing import List
import
numpy
as
np
import
torch
from
sglang.srt.managers.router.infer_batch
import
Batch
,
ForwardMode
from
sglang.srt.memory_pool
import
ReqToTokenPool
,
TokenToKVPool
from
sglang.srt.utils
import
is_multimodal_model
from
sglang.utils
import
get_available_gpu_memory
from
vllm.model_executor.layers.quantization.awq
import
AWQConfig
from
vllm.model_executor.layers.quantization.gptq
import
GPTQConfig
from
vllm.model_executor.layers.quantization.marlin
import
MarlinConfig
from
vllm.model_executor.model_loader
import
_set_default_torch_dtype
from
vllm.model_executor.parallel_utils.parallel_state
import
initialize_model_parallel
from
sglang.srt.managers.router.infer_batch
import
Batch
,
ForwardMode
from
sglang.srt.memory_pool
import
ReqToTokenPool
,
TokenToKVPool
from
sglang.srt.utils
import
is_multimodal_model
from
sglang.utils
import
get_available_gpu_memory
QUANTIZATION_CONFIG_MAPPING
=
{
"awq"
:
AWQConfig
,
"gptq"
:
GPTQConfig
,
...
...
python/sglang/srt/managers/router/radix_cache.py
View file @
9acc6e35
import
heapq
import
time
from
collections
import
defaultdict
from
dataclasses
import
dataclass
from
typing
import
Tuple
import
torch
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
9acc6e35
...
...
@@ -10,6 +10,7 @@ import transformers
import
uvloop
import
zmq
import
zmq.asyncio
from
sglang.srt.hf_transformers_utils
import
(
get_config
,
get_context_length
,
...
...
python/sglang/srt/models/commandr.py
View file @
9acc6e35
...
...
@@ -20,13 +20,10 @@
# This file is based on the LLama model definition file in transformers
"""PyTorch Cohere model."""
from
typing
import
List
,
Optional
,
Tuple
from
typing
import
Optional
,
Tuple
import
torch
import
torch.utils.checkpoint
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
torch.nn.parameter
import
Parameter
from
transformers
import
PretrainedConfig
...
...
@@ -49,6 +46,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
@
torch
.
compile
def
layer_norm_func
(
hidden_states
,
weight
,
variance_epsilon
):
...
...
python/sglang/srt/models/dbrx.py
View file @
9acc6e35
...
...
@@ -5,10 +5,6 @@ from typing import Optional
import
torch
import
torch.nn
as
nn
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
sglang.srt.models.dbrx_config
import
DbrxConfig
from
vllm.model_executor.layers.fused_moe
import
fused_moe
from
vllm.model_executor.layers.linear
import
(
LinearMethodBase
,
...
...
@@ -35,6 +31,11 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
sglang.srt.models.dbrx_config
import
DbrxConfig
class
DbrxRouter
(
nn
.
Module
):
"""A Router implementation for DBRX that returns logits for each expert
...
...
python/sglang/srt/models/gemma.py
View file @
9acc6e35
...
...
@@ -4,9 +4,6 @@
from
typing
import
Optional
,
Tuple
import
torch
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.config
import
LoRAConfig
...
...
@@ -28,6 +25,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
class
GemmaMLP
(
nn
.
Module
):
def
__init__
(
...
...
python/sglang/srt/models/llama2.py
View file @
9acc6e35
# Adapted from
# https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
"""Inference-only LLaMA model compatible with HuggingFace weights."""
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
from
typing
import
Any
,
Dict
,
Optional
,
Tuple
import
torch
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
transformers
import
LlamaConfig
from
vllm.model_executor.layers.activation
import
SiluAndMul
...
...
@@ -30,6 +27,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
class
LlamaMLP
(
nn
.
Module
):
def
__init__
(
...
...
python/sglang/srt/models/llava.py
View file @
9acc6e35
...
...
@@ -4,6 +4,15 @@ from typing import List, Optional
import
numpy
as
np
import
torch
from
torch
import
nn
from
transformers
import
CLIPVisionModel
,
LlavaConfig
from
transformers.models.llava.modeling_llava
import
LlavaMultiModalProjector
from
vllm.model_executor.layers.linear
import
LinearMethodBase
from
vllm.model_executor.weight_utils
import
(
default_weight_loader
,
hf_model_weights_iterator
,
)
from
sglang.srt.managers.router.infer_batch
import
ForwardMode
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
sglang.srt.mm_utils
import
(
...
...
@@ -12,14 +21,6 @@ from sglang.srt.mm_utils import (
unpad_image_shape
,
)
from
sglang.srt.models.llama2
import
LlamaForCausalLM
from
torch
import
nn
from
transformers
import
CLIPVisionModel
,
LlamaConfig
,
LlavaConfig
from
transformers.models.llava.modeling_llava
import
LlavaMultiModalProjector
from
vllm.model_executor.layers.linear
import
LinearMethodBase
from
vllm.model_executor.weight_utils
import
(
default_weight_loader
,
hf_model_weights_iterator
,
)
class
LlavaLlamaForCausalLM
(
nn
.
Module
):
...
...
python/sglang/srt/models/mixtral.py
View file @
9acc6e35
# Adapted from
# https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
"""Inference-only Mixtral model."""
from
typing
import
List
,
Optional
,
Tuple
from
typing
import
Optional
import
numpy
as
np
import
torch
import
torch.nn.functional
as
F
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
transformers
import
MixtralConfig
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
...
@@ -35,6 +32,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
class
MixtralMLP
(
nn
.
Module
):
def
__init__
(
...
...
python/sglang/srt/models/qwen.py
View file @
9acc6e35
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
from
typing
import
Any
,
Dict
,
Optional
import
torch
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.model_executor.layers.activation
import
SiluAndMul
...
...
@@ -27,6 +24,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
class
QWenMLP
(
nn
.
Module
):
def
__init__
(
...
...
python/sglang/srt/models/qwen2.py
View file @
9acc6e35
# Adapted from llama2.py
# Modify details for the adaptation of Qwen2 model.
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
from
typing
import
Any
,
Dict
,
Optional
,
Tuple
import
torch
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
...
@@ -29,6 +26,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
Qwen2Config
=
None
...
...
python/sglang/srt/models/stablelm.py
View file @
9acc6e35
...
...
@@ -5,9 +5,6 @@ model compatible with HuggingFace weights."""
from
typing
import
Optional
,
Tuple
import
torch
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
vllm.model_executor.layers.activation
import
SiluAndMul
...
...
@@ -30,6 +27,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
class
StablelmMLP
(
nn
.
Module
):
def
__init__
(
...
...
python/sglang/srt/models/yivl.py
View file @
9acc6e35
"""Inference-only Yi-VL model."""
import
os
from
typing
import
List
,
Optional
from
typing
import
Optional
import
torch
import
torch.nn
as
nn
from
sglang.srt.models.llava
import
(
LlavaLlamaForCausalLM
,
clip_vision_embed_forward
,
monkey_path_clip_vision_embed_forward
,
)
from
transformers
import
CLIPVisionModel
,
LlavaConfig
from
vllm.model_executor.weight_utils
import
(
default_weight_loader
,
hf_model_weights_iterator
,
)
from
sglang.srt.models.llava
import
(
LlavaLlamaForCausalLM
,
monkey_path_clip_vision_embed_forward
,
)
class
YiVLForCausalLM
(
LlavaLlamaForCausalLM
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
...
...
python/sglang/srt/server.py
View file @
9acc6e35
...
...
@@ -10,9 +10,6 @@ import threading
import
time
from
typing
import
List
,
Optional
,
Union
# Fix a Python bug
setattr
(
threading
,
"_register_atexit"
,
lambda
*
args
,
**
kwargs
:
None
)
import
aiohttp
import
psutil
import
pydantic
...
...
@@ -22,6 +19,9 @@ import uvloop
from
fastapi
import
FastAPI
,
HTTPException
,
Request
from
fastapi.responses
import
Response
,
StreamingResponse
from
pydantic
import
BaseModel
from
starlette.middleware.base
import
BaseHTTPMiddleware
from
starlette.responses
import
JSONResponse
from
sglang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.srt.constrained
import
disable_cache
from
sglang.srt.conversation
import
(
...
...
@@ -54,8 +54,9 @@ from sglang.srt.managers.router.manager import start_router_process
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.utils
import
enable_show_time_cost
,
handle_port_init
from
starlette.middleware.base
import
BaseHTTPMiddleware
from
starlette.responses
import
JSONResponse
# Fix a Python bug
setattr
(
threading
,
"_register_atexit"
,
lambda
*
args
,
**
kwargs
:
None
)
asyncio
.
set_event_loop_policy
(
uvloop
.
EventLoopPolicy
())
...
...
@@ -618,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer):
try
:
requests
.
get
(
url
+
"/get_model_info"
,
timeout
=
5
,
headers
=
headers
)
break
except
requests
.
exceptions
.
RequestException
as
e
:
except
requests
.
exceptions
.
RequestException
:
pass
else
:
if
pipe_finish_writer
is
not
None
:
...
...
python/sglang/srt/utils.py
View file @
9acc6e35
...
...
@@ -157,7 +157,6 @@ def get_exception_traceback():
def
get_int_token_logit_bias
(
tokenizer
,
vocab_size
):
from
transformers
import
LlamaTokenizer
,
LlamaTokenizerFast
# a bug when model's vocab size > tokenizer.vocab_size
vocab_size
=
tokenizer
.
vocab_size
...
...
python/sglang/test/test_utils.py
View file @
9acc6e35
...
...
@@ -2,6 +2,7 @@
import
numpy
as
np
import
requests
from
sglang.backend.openai
import
OpenAI
from
sglang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.global_config
import
global_config
...
...
test/lang/run_all.py
View file @
9acc6e35
import
argparse
import
glob
import
multiprocessing
import
os
import
time
import
unittest
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment