Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
9acc6e35
Unverified
Commit
9acc6e35
authored
Apr 22, 2024
by
Liangsheng Yin
Committed by
GitHub
Apr 22, 2024
Browse files
add `.isort.cfg` (#378)
parent
cf9d8efd
Changes
57
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
71 additions
and
60 deletions
+71
-60
python/sglang/srt/managers/router/infer_batch.py
python/sglang/srt/managers/router/infer_batch.py
+1
-0
python/sglang/srt/managers/router/manager.py
python/sglang/srt/managers/router/manager.py
+1
-0
python/sglang/srt/managers/router/model_rpc.py
python/sglang/srt/managers/router/model_rpc.py
+2
-1
python/sglang/srt/managers/router/model_runner.py
python/sglang/srt/managers/router/model_runner.py
+5
-4
python/sglang/srt/managers/router/radix_cache.py
python/sglang/srt/managers/router/radix_cache.py
+0
-2
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+1
-0
python/sglang/srt/models/commandr.py
python/sglang/srt/models/commandr.py
+5
-4
python/sglang/srt/models/dbrx.py
python/sglang/srt/models/dbrx.py
+5
-4
python/sglang/srt/models/gemma.py
python/sglang/srt/models/gemma.py
+4
-3
python/sglang/srt/models/llama2.py
python/sglang/srt/models/llama2.py
+5
-4
python/sglang/srt/models/llava.py
python/sglang/srt/models/llava.py
+9
-8
python/sglang/srt/models/mixtral.py
python/sglang/srt/models/mixtral.py
+5
-4
python/sglang/srt/models/qwen.py
python/sglang/srt/models/qwen.py
+5
-4
python/sglang/srt/models/qwen2.py
python/sglang/srt/models/qwen2.py
+5
-4
python/sglang/srt/models/stablelm.py
python/sglang/srt/models/stablelm.py
+4
-3
python/sglang/srt/models/yivl.py
python/sglang/srt/models/yivl.py
+6
-7
python/sglang/srt/server.py
python/sglang/srt/server.py
+7
-6
python/sglang/srt/utils.py
python/sglang/srt/utils.py
+0
-1
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+1
-0
test/lang/run_all.py
test/lang/run_all.py
+0
-1
No files found.
python/sglang/srt/managers/router/infer_batch.py
View file @
9acc6e35
...
@@ -4,6 +4,7 @@ from typing import List
...
@@ -4,6 +4,7 @@ from typing import List
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
sglang.srt.managers.router.radix_cache
import
RadixCache
from
sglang.srt.managers.router.radix_cache
import
RadixCache
from
sglang.srt.memory_pool
import
ReqToTokenPool
,
TokenToKVPool
from
sglang.srt.memory_pool
import
ReqToTokenPool
,
TokenToKVPool
...
...
python/sglang/srt/managers/router/manager.py
View file @
9acc6e35
...
@@ -4,6 +4,7 @@ import logging
...
@@ -4,6 +4,7 @@ import logging
import
uvloop
import
uvloop
import
zmq
import
zmq
import
zmq.asyncio
import
zmq.asyncio
from
sglang.srt.backend_config
import
GLOBAL_BACKEND_CONFIG
from
sglang.srt.backend_config
import
GLOBAL_BACKEND_CONFIG
from
sglang.srt.managers.router.model_rpc
import
ModelRpcClient
from
sglang.srt.managers.router.model_rpc
import
ModelRpcClient
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
...
...
python/sglang/srt/managers/router/model_rpc.py
View file @
9acc6e35
...
@@ -10,6 +10,8 @@ import rpyc
...
@@ -10,6 +10,8 @@ import rpyc
import
torch
import
torch
from
rpyc.utils.classic
import
obtain
from
rpyc.utils.classic
import
obtain
from
rpyc.utils.server
import
ThreadedServer
from
rpyc.utils.server
import
ThreadedServer
from
vllm.logger
import
_default_handler
as
vllm_default_handler
from
sglang.srt.constrained.fsm_cache
import
FSMCache
from
sglang.srt.constrained.fsm_cache
import
FSMCache
from
sglang.srt.constrained.jump_forward
import
JumpForwardCache
from
sglang.srt.constrained.jump_forward
import
JumpForwardCache
from
sglang.srt.hf_transformers_utils
import
get_processor
,
get_tokenizer
from
sglang.srt.hf_transformers_utils
import
get_processor
,
get_tokenizer
...
@@ -30,7 +32,6 @@ from sglang.srt.utils import (
...
@@ -30,7 +32,6 @@ from sglang.srt.utils import (
is_multimodal_model
,
is_multimodal_model
,
set_random_seed
,
set_random_seed
,
)
)
from
vllm.logger
import
_default_handler
as
vllm_default_handler
logger
=
logging
.
getLogger
(
"model_rpc"
)
logger
=
logging
.
getLogger
(
"model_rpc"
)
...
...
python/sglang/srt/managers/router/model_runner.py
View file @
9acc6e35
...
@@ -9,16 +9,17 @@ from typing import List
...
@@ -9,16 +9,17 @@ from typing import List
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
sglang.srt.managers.router.infer_batch
import
Batch
,
ForwardMode
from
sglang.srt.memory_pool
import
ReqToTokenPool
,
TokenToKVPool
from
sglang.srt.utils
import
is_multimodal_model
from
sglang.utils
import
get_available_gpu_memory
from
vllm.model_executor.layers.quantization.awq
import
AWQConfig
from
vllm.model_executor.layers.quantization.awq
import
AWQConfig
from
vllm.model_executor.layers.quantization.gptq
import
GPTQConfig
from
vllm.model_executor.layers.quantization.gptq
import
GPTQConfig
from
vllm.model_executor.layers.quantization.marlin
import
MarlinConfig
from
vllm.model_executor.layers.quantization.marlin
import
MarlinConfig
from
vllm.model_executor.model_loader
import
_set_default_torch_dtype
from
vllm.model_executor.model_loader
import
_set_default_torch_dtype
from
vllm.model_executor.parallel_utils.parallel_state
import
initialize_model_parallel
from
vllm.model_executor.parallel_utils.parallel_state
import
initialize_model_parallel
from
sglang.srt.managers.router.infer_batch
import
Batch
,
ForwardMode
from
sglang.srt.memory_pool
import
ReqToTokenPool
,
TokenToKVPool
from
sglang.srt.utils
import
is_multimodal_model
from
sglang.utils
import
get_available_gpu_memory
QUANTIZATION_CONFIG_MAPPING
=
{
QUANTIZATION_CONFIG_MAPPING
=
{
"awq"
:
AWQConfig
,
"awq"
:
AWQConfig
,
"gptq"
:
GPTQConfig
,
"gptq"
:
GPTQConfig
,
...
...
python/sglang/srt/managers/router/radix_cache.py
View file @
9acc6e35
import
heapq
import
heapq
import
time
import
time
from
collections
import
defaultdict
from
collections
import
defaultdict
from
dataclasses
import
dataclass
from
typing
import
Tuple
import
torch
import
torch
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
9acc6e35
...
@@ -10,6 +10,7 @@ import transformers
...
@@ -10,6 +10,7 @@ import transformers
import
uvloop
import
uvloop
import
zmq
import
zmq
import
zmq.asyncio
import
zmq.asyncio
from
sglang.srt.hf_transformers_utils
import
(
from
sglang.srt.hf_transformers_utils
import
(
get_config
,
get_config
,
get_context_length
,
get_context_length
,
...
...
python/sglang/srt/models/commandr.py
View file @
9acc6e35
...
@@ -20,13 +20,10 @@
...
@@ -20,13 +20,10 @@
# This file is based on the LLama model definition file in transformers
# This file is based on the LLama model definition file in transformers
"""PyTorch Cohere model."""
"""PyTorch Cohere model."""
from
typing
import
List
,
Optional
,
Tuple
from
typing
import
Optional
,
Tuple
import
torch
import
torch
import
torch.utils.checkpoint
import
torch.utils.checkpoint
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
torch
import
nn
from
torch.nn.parameter
import
Parameter
from
torch.nn.parameter
import
Parameter
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
...
@@ -49,6 +46,10 @@ from vllm.model_executor.weight_utils import (
...
@@ -49,6 +46,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
hf_model_weights_iterator
,
)
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
@
torch
.
compile
@
torch
.
compile
def
layer_norm_func
(
hidden_states
,
weight
,
variance_epsilon
):
def
layer_norm_func
(
hidden_states
,
weight
,
variance_epsilon
):
...
...
python/sglang/srt/models/dbrx.py
View file @
9acc6e35
...
@@ -5,10 +5,6 @@ from typing import Optional
...
@@ -5,10 +5,6 @@ from typing import Optional
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
sglang.srt.models.dbrx_config
import
DbrxConfig
from
vllm.model_executor.layers.fused_moe
import
fused_moe
from
vllm.model_executor.layers.fused_moe
import
fused_moe
from
vllm.model_executor.layers.linear
import
(
from
vllm.model_executor.layers.linear
import
(
LinearMethodBase
,
LinearMethodBase
,
...
@@ -35,6 +31,11 @@ from vllm.model_executor.weight_utils import (
...
@@ -35,6 +31,11 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
hf_model_weights_iterator
,
)
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
sglang.srt.models.dbrx_config
import
DbrxConfig
class
DbrxRouter
(
nn
.
Module
):
class
DbrxRouter
(
nn
.
Module
):
"""A Router implementation for DBRX that returns logits for each expert
"""A Router implementation for DBRX that returns logits for each expert
...
...
python/sglang/srt/models/gemma.py
View file @
9acc6e35
...
@@ -4,9 +4,6 @@
...
@@ -4,9 +4,6 @@
from
typing
import
Optional
,
Tuple
from
typing
import
Optional
,
Tuple
import
torch
import
torch
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.config
import
LoRAConfig
from
vllm.config
import
LoRAConfig
...
@@ -28,6 +25,10 @@ from vllm.model_executor.weight_utils import (
...
@@ -28,6 +25,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
hf_model_weights_iterator
,
)
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
class
GemmaMLP
(
nn
.
Module
):
class
GemmaMLP
(
nn
.
Module
):
def
__init__
(
def
__init__
(
...
...
python/sglang/srt/models/llama2.py
View file @
9acc6e35
# Adapted from
# Adapted from
# https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
# https://github.com/vllm-project/vllm/blob/671af2b1c0b3ed6d856d37c21a561cc429a10701/vllm/model_executor/models/llama.py#L1
"""Inference-only LLaMA model compatible with HuggingFace weights."""
"""Inference-only LLaMA model compatible with HuggingFace weights."""
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
from
typing
import
Any
,
Dict
,
Optional
,
Tuple
import
torch
import
torch
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
torch
import
nn
from
transformers
import
LlamaConfig
from
transformers
import
LlamaConfig
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
...
@@ -30,6 +27,10 @@ from vllm.model_executor.weight_utils import (
...
@@ -30,6 +27,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
hf_model_weights_iterator
,
)
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
class
LlamaMLP
(
nn
.
Module
):
class
LlamaMLP
(
nn
.
Module
):
def
__init__
(
def
__init__
(
...
...
python/sglang/srt/models/llava.py
View file @
9acc6e35
...
@@ -4,6 +4,15 @@ from typing import List, Optional
...
@@ -4,6 +4,15 @@ from typing import List, Optional
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
torch
import
nn
from
transformers
import
CLIPVisionModel
,
LlavaConfig
from
transformers.models.llava.modeling_llava
import
LlavaMultiModalProjector
from
vllm.model_executor.layers.linear
import
LinearMethodBase
from
vllm.model_executor.weight_utils
import
(
default_weight_loader
,
hf_model_weights_iterator
,
)
from
sglang.srt.managers.router.infer_batch
import
ForwardMode
from
sglang.srt.managers.router.infer_batch
import
ForwardMode
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
sglang.srt.mm_utils
import
(
from
sglang.srt.mm_utils
import
(
...
@@ -12,14 +21,6 @@ from sglang.srt.mm_utils import (
...
@@ -12,14 +21,6 @@ from sglang.srt.mm_utils import (
unpad_image_shape
,
unpad_image_shape
,
)
)
from
sglang.srt.models.llama2
import
LlamaForCausalLM
from
sglang.srt.models.llama2
import
LlamaForCausalLM
from
torch
import
nn
from
transformers
import
CLIPVisionModel
,
LlamaConfig
,
LlavaConfig
from
transformers.models.llava.modeling_llava
import
LlavaMultiModalProjector
from
vllm.model_executor.layers.linear
import
LinearMethodBase
from
vllm.model_executor.weight_utils
import
(
default_weight_loader
,
hf_model_weights_iterator
,
)
class
LlavaLlamaForCausalLM
(
nn
.
Module
):
class
LlavaLlamaForCausalLM
(
nn
.
Module
):
...
...
python/sglang/srt/models/mixtral.py
View file @
9acc6e35
# Adapted from
# Adapted from
# https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
# https://github.com/vllm-project/vllm/blob/d0215a58e78572d91dadafe9d832a2db89b09a13/vllm/model_executor/models/mixtral.py#L1
"""Inference-only Mixtral model."""
"""Inference-only Mixtral model."""
from
typing
import
List
,
Optional
,
Tuple
from
typing
import
Optional
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
torch
import
nn
from
transformers
import
MixtralConfig
from
transformers
import
MixtralConfig
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
@@ -35,6 +32,10 @@ from vllm.model_executor.weight_utils import (
...
@@ -35,6 +32,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
hf_model_weights_iterator
,
)
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
class
MixtralMLP
(
nn
.
Module
):
class
MixtralMLP
(
nn
.
Module
):
def
__init__
(
def
__init__
(
...
...
python/sglang/srt/models/qwen.py
View file @
9acc6e35
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
from
typing
import
Any
,
Dict
,
Optional
import
torch
import
torch
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
...
@@ -27,6 +24,10 @@ from vllm.model_executor.weight_utils import (
...
@@ -27,6 +24,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
hf_model_weights_iterator
,
)
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
class
QWenMLP
(
nn
.
Module
):
class
QWenMLP
(
nn
.
Module
):
def
__init__
(
def
__init__
(
...
...
python/sglang/srt/models/qwen2.py
View file @
9acc6e35
# Adapted from llama2.py
# Adapted from llama2.py
# Modify details for the adaptation of Qwen2 model.
# Modify details for the adaptation of Qwen2 model.
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
from
typing
import
Any
,
Dict
,
List
,
Optional
,
Tuple
from
typing
import
Any
,
Dict
,
Optional
,
Tuple
import
torch
import
torch
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
torch
import
nn
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.layernorm
import
RMSNorm
from
vllm.model_executor.layers.layernorm
import
RMSNorm
...
@@ -29,6 +26,10 @@ from vllm.model_executor.weight_utils import (
...
@@ -29,6 +26,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
hf_model_weights_iterator
,
)
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
Qwen2Config
=
None
Qwen2Config
=
None
...
...
python/sglang/srt/models/stablelm.py
View file @
9acc6e35
...
@@ -5,9 +5,6 @@ model compatible with HuggingFace weights."""
...
@@ -5,9 +5,6 @@ model compatible with HuggingFace weights."""
from
typing
import
Optional
,
Tuple
from
typing
import
Optional
,
Tuple
import
torch
import
torch
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
from
torch
import
nn
from
torch
import
nn
from
transformers
import
PretrainedConfig
from
transformers
import
PretrainedConfig
from
vllm.model_executor.layers.activation
import
SiluAndMul
from
vllm.model_executor.layers.activation
import
SiluAndMul
...
@@ -30,6 +27,10 @@ from vllm.model_executor.weight_utils import (
...
@@ -30,6 +27,10 @@ from vllm.model_executor.weight_utils import (
hf_model_weights_iterator
,
hf_model_weights_iterator
,
)
)
from
sglang.srt.layers.logits_processor
import
LogitsProcessor
from
sglang.srt.layers.radix_attention
import
RadixAttention
from
sglang.srt.managers.router.model_runner
import
InputMetadata
class
StablelmMLP
(
nn
.
Module
):
class
StablelmMLP
(
nn
.
Module
):
def
__init__
(
def
__init__
(
...
...
python/sglang/srt/models/yivl.py
View file @
9acc6e35
"""Inference-only Yi-VL model."""
"""Inference-only Yi-VL model."""
import
os
from
typing
import
Optional
from
typing
import
List
,
Optional
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
sglang.srt.models.llava
import
(
LlavaLlamaForCausalLM
,
clip_vision_embed_forward
,
monkey_path_clip_vision_embed_forward
,
)
from
transformers
import
CLIPVisionModel
,
LlavaConfig
from
transformers
import
CLIPVisionModel
,
LlavaConfig
from
vllm.model_executor.weight_utils
import
(
from
vllm.model_executor.weight_utils
import
(
default_weight_loader
,
default_weight_loader
,
hf_model_weights_iterator
,
hf_model_weights_iterator
,
)
)
from
sglang.srt.models.llava
import
(
LlavaLlamaForCausalLM
,
monkey_path_clip_vision_embed_forward
,
)
class
YiVLForCausalLM
(
LlavaLlamaForCausalLM
):
class
YiVLForCausalLM
(
LlavaLlamaForCausalLM
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
...
...
python/sglang/srt/server.py
View file @
9acc6e35
...
@@ -10,9 +10,6 @@ import threading
...
@@ -10,9 +10,6 @@ import threading
import
time
import
time
from
typing
import
List
,
Optional
,
Union
from
typing
import
List
,
Optional
,
Union
# Fix a Python bug
setattr
(
threading
,
"_register_atexit"
,
lambda
*
args
,
**
kwargs
:
None
)
import
aiohttp
import
aiohttp
import
psutil
import
psutil
import
pydantic
import
pydantic
...
@@ -22,6 +19,9 @@ import uvloop
...
@@ -22,6 +19,9 @@ import uvloop
from
fastapi
import
FastAPI
,
HTTPException
,
Request
from
fastapi
import
FastAPI
,
HTTPException
,
Request
from
fastapi.responses
import
Response
,
StreamingResponse
from
fastapi.responses
import
Response
,
StreamingResponse
from
pydantic
import
BaseModel
from
pydantic
import
BaseModel
from
starlette.middleware.base
import
BaseHTTPMiddleware
from
starlette.responses
import
JSONResponse
from
sglang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.srt.constrained
import
disable_cache
from
sglang.srt.constrained
import
disable_cache
from
sglang.srt.conversation
import
(
from
sglang.srt.conversation
import
(
...
@@ -54,8 +54,9 @@ from sglang.srt.managers.router.manager import start_router_process
...
@@ -54,8 +54,9 @@ from sglang.srt.managers.router.manager import start_router_process
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.managers.tokenizer_manager
import
TokenizerManager
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.server_args
import
PortArgs
,
ServerArgs
from
sglang.srt.utils
import
enable_show_time_cost
,
handle_port_init
from
sglang.srt.utils
import
enable_show_time_cost
,
handle_port_init
from
starlette.middleware.base
import
BaseHTTPMiddleware
from
starlette.responses
import
JSONResponse
# Fix a Python bug
setattr
(
threading
,
"_register_atexit"
,
lambda
*
args
,
**
kwargs
:
None
)
asyncio
.
set_event_loop_policy
(
uvloop
.
EventLoopPolicy
())
asyncio
.
set_event_loop_policy
(
uvloop
.
EventLoopPolicy
())
...
@@ -618,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer):
...
@@ -618,7 +619,7 @@ def launch_server(server_args, pipe_finish_writer):
try
:
try
:
requests
.
get
(
url
+
"/get_model_info"
,
timeout
=
5
,
headers
=
headers
)
requests
.
get
(
url
+
"/get_model_info"
,
timeout
=
5
,
headers
=
headers
)
break
break
except
requests
.
exceptions
.
RequestException
as
e
:
except
requests
.
exceptions
.
RequestException
:
pass
pass
else
:
else
:
if
pipe_finish_writer
is
not
None
:
if
pipe_finish_writer
is
not
None
:
...
...
python/sglang/srt/utils.py
View file @
9acc6e35
...
@@ -157,7 +157,6 @@ def get_exception_traceback():
...
@@ -157,7 +157,6 @@ def get_exception_traceback():
def
get_int_token_logit_bias
(
tokenizer
,
vocab_size
):
def
get_int_token_logit_bias
(
tokenizer
,
vocab_size
):
from
transformers
import
LlamaTokenizer
,
LlamaTokenizerFast
# a bug when model's vocab size > tokenizer.vocab_size
# a bug when model's vocab size > tokenizer.vocab_size
vocab_size
=
tokenizer
.
vocab_size
vocab_size
=
tokenizer
.
vocab_size
...
...
python/sglang/test/test_utils.py
View file @
9acc6e35
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
import
numpy
as
np
import
numpy
as
np
import
requests
import
requests
from
sglang.backend.openai
import
OpenAI
from
sglang.backend.openai
import
OpenAI
from
sglang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.backend.runtime_endpoint
import
RuntimeEndpoint
from
sglang.global_config
import
global_config
from
sglang.global_config
import
global_config
...
...
test/lang/run_all.py
View file @
9acc6e35
import
argparse
import
argparse
import
glob
import
glob
import
multiprocessing
import
multiprocessing
import
os
import
time
import
time
import
unittest
import
unittest
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment