Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
11616fc6
Unverified
Commit
11616fc6
authored
Jun 29, 2024
by
sglang
Committed by
GitHub
Jun 29, 2024
Browse files
Minor fix in compiler & format (#545)
parent
9ce89bc1
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
28 additions
and
33 deletions
+28
-33
benchmark/latency_throughput/bench_serving.py
benchmark/latency_throughput/bench_serving.py
+0
-1
benchmark/line_retrieval/gen_data.py
benchmark/line_retrieval/gen_data.py
+3
-3
python/sglang/backend/litellm.py
python/sglang/backend/litellm.py
+0
-1
python/sglang/lang/compiler.py
python/sglang/lang/compiler.py
+2
-2
python/sglang/launch_server_llavavid.py
python/sglang/launch_server_llavavid.py
+0
-1
python/sglang/srt/managers/controller/infer_batch.py
python/sglang/srt/managers/controller/infer_batch.py
+4
-3
python/sglang/srt/managers/controller/tp_worker.py
python/sglang/srt/managers/controller/tp_worker.py
+8
-7
python/sglang/srt/managers/io_struct.py
python/sglang/srt/managers/io_struct.py
+0
-1
python/sglang/srt/managers/tokenizer_manager.py
python/sglang/srt/managers/tokenizer_manager.py
+8
-8
python/sglang/srt/models/chatglm.py
python/sglang/srt/models/chatglm.py
+0
-2
python/sglang/srt/models/grok.py
python/sglang/srt/models/grok.py
+0
-1
python/sglang/srt/models/llama2.py
python/sglang/srt/models/llama2.py
+3
-3
No files found.
benchmark/latency_throughput/bench_serving.py
View file @
11616fc6
...
@@ -38,7 +38,6 @@ def sample_requests(
...
@@ -38,7 +38,6 @@ def sample_requests(
num_requests
:
int
,
num_requests
:
int
,
tokenizer
:
AutoTokenizer
,
tokenizer
:
AutoTokenizer
,
)
->
List
[
Tuple
[
str
,
int
,
int
]]:
)
->
List
[
Tuple
[
str
,
int
,
int
]]:
def
load_dataset
():
def
load_dataset
():
with
open
(
dataset_path
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
dataset_path
,
encoding
=
"utf-8"
)
as
f
:
dataset
=
json
.
load
(
f
)
dataset
=
json
.
load
(
f
)
...
...
benchmark/line_retrieval/gen_data.py
View file @
11616fc6
...
@@ -48,9 +48,9 @@ def generate_lines(random_words, num_lines, redirect_ratio):
...
@@ -48,9 +48,9 @@ def generate_lines(random_words, num_lines, redirect_ratio):
)
)
for
i
in
redirect_indices
:
for
i
in
redirect_indices
:
target_idx
=
np
.
random
.
choice
(
min
(
i
*
2
+
100
,
num_lines
))
target_idx
=
np
.
random
.
choice
(
min
(
i
*
2
+
100
,
num_lines
))
lines
[
i
]
=
(
lines
[
f
"Line
{
indices
[
i
]
}
: The REGISTER_CONTENT is the same as Line
{
indices
[
target_idx
]
}
."
i
)
]
=
f
"Line
{
indices
[
i
]
}
: The REGISTER_CONTENT is the same as Line
{
indices
[
target_idx
]
}
."
redirects
[
i
]
=
target_idx
redirects
[
i
]
=
target_idx
# Build links and find sources
# Build links and find sources
...
...
python/sglang/backend/litellm.py
View file @
11616fc6
...
@@ -13,7 +13,6 @@ except ImportError as e:
...
@@ -13,7 +13,6 @@ except ImportError as e:
class
LiteLLM
(
BaseBackend
):
class
LiteLLM
(
BaseBackend
):
def
__init__
(
def
__init__
(
self
,
self
,
model_name
,
model_name
,
...
...
python/sglang/lang/compiler.py
View file @
11616fc6
...
@@ -4,7 +4,7 @@ from queue import Queue
...
@@ -4,7 +4,7 @@ from queue import Queue
from
typing
import
List
,
Union
from
typing
import
List
,
Union
from
sglang.global_config
import
global_config
from
sglang.global_config
import
global_config
from
sglang.lang.interpreter
import
ProgramState
,
StreamExecutor
,
pin
_program
from
sglang.lang.interpreter
import
ProgramState
,
StreamExecutor
,
cache
_program
from
sglang.lang.ir
import
(
from
sglang.lang.ir
import
(
SglArgument
,
SglArgument
,
SglConstantText
,
SglConstantText
,
...
@@ -184,7 +184,7 @@ class CompiledFunction:
...
@@ -184,7 +184,7 @@ class CompiledFunction:
# Extract prefix by tracing and cache it
# Extract prefix by tracing and cache it
if
len
(
batch_kwargs
)
>
1
:
if
len
(
batch_kwargs
)
>
1
:
pin
_program
(
self
.
function
,
backend
)
cache
_program
(
self
.
function
,
backend
)
# Run all programs
# Run all programs
if
num_threads
==
"auto"
:
if
num_threads
==
"auto"
:
...
...
python/sglang/launch_server_llavavid.py
View file @
11616fc6
...
@@ -6,7 +6,6 @@ import multiprocessing as mp
...
@@ -6,7 +6,6 @@ import multiprocessing as mp
from
sglang.srt.server
import
ServerArgs
,
launch_server
from
sglang.srt.server
import
ServerArgs
,
launch_server
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
model_overide_args
=
{}
model_overide_args
=
{}
model_overide_args
[
"mm_spatial_pool_stride"
]
=
2
model_overide_args
[
"mm_spatial_pool_stride"
]
=
2
...
...
python/sglang/srt/managers/controller/infer_batch.py
View file @
11616fc6
...
@@ -498,9 +498,10 @@ class Batch:
...
@@ -498,9 +498,10 @@ class Batch:
req
.
output_ids
=
cur_output_ids
req
.
output_ids
=
cur_output_ids
continue
continue
jump_forward_str
,
next_state
=
(
(
req
.
jump_forward_map
.
jump_forward_symbol
(
cur_state
)
jump_forward_str
,
)
next_state
,
)
=
req
.
jump_forward_map
.
jump_forward_symbol
(
cur_state
)
# Make the incrementally decoded text part of jump_forward_str
# Make the incrementally decoded text part of jump_forward_str
# so that the UTF-8 will not corrupt
# so that the UTF-8 will not corrupt
...
...
python/sglang/srt/managers/controller/tp_worker.py
View file @
11616fc6
...
@@ -283,13 +283,14 @@ class ModelTpServer:
...
@@ -283,13 +283,14 @@ class ModelTpServer:
(
recv_req
.
image_hash
>>
64
)
%
self
.
model_config
.
vocab_size
,
(
recv_req
.
image_hash
>>
64
)
%
self
.
model_config
.
vocab_size
,
]
]
req
.
image_size
=
recv_req
.
image_size
req
.
image_size
=
recv_req
.
image_size
req
.
origin_input_ids
,
req
.
image_offset
=
(
(
self
.
model_runner
.
model
.
pad_input_ids
(
req
.
origin_input_ids
,
req
.
origin_input_ids_unpadded
,
req
.
image_offset
,
req
.
pad_value
,
)
=
self
.
model_runner
.
model
.
pad_input_ids
(
req
.
pixel_values
.
shape
,
req
.
origin_input_ids_unpadded
,
req
.
image_size
,
req
.
pad_value
,
)
req
.
pixel_values
.
shape
,
req
.
image_size
,
)
)
req
.
sampling_params
=
recv_req
.
sampling_params
req
.
sampling_params
=
recv_req
.
sampling_params
req
.
return_logprob
=
recv_req
.
return_logprob
req
.
return_logprob
=
recv_req
.
return_logprob
...
...
python/sglang/srt/managers/io_struct.py
View file @
11616fc6
...
@@ -35,7 +35,6 @@ class GenerateReqInput:
...
@@ -35,7 +35,6 @@ class GenerateReqInput:
stream
:
bool
=
False
stream
:
bool
=
False
def
post_init
(
self
):
def
post_init
(
self
):
if
(
self
.
text
is
None
and
self
.
input_ids
is
None
)
or
(
if
(
self
.
text
is
None
and
self
.
input_ids
is
None
)
or
(
self
.
text
is
not
None
and
self
.
input_ids
is
not
None
self
.
text
is
not
None
and
self
.
input_ids
is
not
None
):
):
...
...
python/sglang/srt/managers/tokenizer_manager.py
View file @
11616fc6
...
@@ -334,15 +334,15 @@ class TokenizerManager:
...
@@ -334,15 +334,15 @@ class TokenizerManager:
ret
[
"meta_info"
][
"decode_token_logprobs"
],
return_text_in_logprobs
ret
[
"meta_info"
][
"decode_token_logprobs"
],
return_text_in_logprobs
)
)
if
top_logprobs_num
>
0
:
if
top_logprobs_num
>
0
:
ret
[
"meta_info"
][
"prefill_top_logprobs"
]
=
(
ret
[
"meta_info"
][
self
.
detokenize
_top_logprobs
_tokens
(
"prefill
_top_logprobs
"
ret
[
"meta_info"
][
"prefill_top_logprobs"
],
return_text_in_logprobs
]
=
self
.
detokenize_top_logprobs_tokens
(
)
ret
[
"meta_info"
][
"prefill_top_logprobs"
],
return_text_in_logprobs
)
)
ret
[
"meta_info"
][
"decode_top_logprobs"
]
=
(
ret
[
"meta_info"
][
self
.
detokeniz
e_top_logprobs
_tokens
(
"decod
e_top_logprobs
"
ret
[
"meta_info"
][
"decode_top_logprobs"
],
return_text_in_logprobs
]
=
self
.
detokenize_top_logprobs_tokens
(
)
ret
[
"meta_info"
][
"decode_top_logprobs"
],
return_text_in_logprobs
)
)
return
ret
return
ret
...
...
python/sglang/srt/models/chatglm.py
View file @
11616fc6
...
@@ -36,7 +36,6 @@ LoraConfig = None
...
@@ -36,7 +36,6 @@ LoraConfig = None
class
GLMAttention
(
nn
.
Module
):
class
GLMAttention
(
nn
.
Module
):
def
__init__
(
def
__init__
(
self
,
self
,
config
,
config
,
...
@@ -294,7 +293,6 @@ class GLMTransformer(nn.Module):
...
@@ -294,7 +293,6 @@ class GLMTransformer(nn.Module):
class
ChatGLMModel
(
nn
.
Module
):
class
ChatGLMModel
(
nn
.
Module
):
def
__init__
(
def
__init__
(
self
,
self
,
config
,
config
,
...
...
python/sglang/srt/models/grok.py
View file @
11616fc6
...
@@ -521,7 +521,6 @@ class Grok1DecoderLayer(nn.Module):
...
@@ -521,7 +521,6 @@ class Grok1DecoderLayer(nn.Module):
hidden_states
:
torch
.
Tensor
,
hidden_states
:
torch
.
Tensor
,
input_metadata
:
InputMetadata
,
input_metadata
:
InputMetadata
,
)
->
torch
.
Tensor
:
)
->
torch
.
Tensor
:
hidden_states
=
(
hidden_states
=
(
self
.
post_attn_norm
(
self
.
post_attn_norm
(
self
.
self_attn
(
self
.
self_attn
(
...
...
python/sglang/srt/models/llama2.py
View file @
11616fc6
...
@@ -160,9 +160,9 @@ class LlamaDecoderLayer(nn.Module):
...
@@ -160,9 +160,9 @@ class LlamaDecoderLayer(nn.Module):
if
rope_scaling
is
not
None
and
getattr
(
if
rope_scaling
is
not
None
and
getattr
(
config
,
"original_max_position_embeddings"
,
None
config
,
"original_max_position_embeddings"
,
None
):
):
rope_scaling
[
"original_max_position_embeddings"
]
=
(
rope_scaling
[
config
.
original_max_position_embeddings
"
original_max_position_embeddings
"
)
]
=
config
.
original_max_position_embeddings
max_position_embeddings
=
getattr
(
config
,
"max_position_embeddings"
,
8192
)
max_position_embeddings
=
getattr
(
config
,
"max_position_embeddings"
,
8192
)
self
.
self_attn
=
LlamaAttention
(
self
.
self_attn
=
LlamaAttention
(
hidden_size
=
self
.
hidden_size
,
hidden_size
=
self
.
hidden_size
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment