Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
26f0bedc
Unverified
Commit
26f0bedc
authored
Feb 05, 2024
by
Liangsheng Yin
Committed by
GitHub
Feb 05, 2024
Browse files
jump-forward rename (#144)
parent
82fa69b3
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
70 additions
and
70 deletions
+70
-70
benchmark/json_jump_forward/README.md
benchmark/json_jump_forward/README.md
+0
-0
benchmark/json_jump_forward/bench_other.py
benchmark/json_jump_forward/bench_other.py
+1
-1
benchmark/json_jump_forward/bench_sglang.py
benchmark/json_jump_forward/bench_sglang.py
+1
-1
benchmark/json_jump_forward/build_dataset.py
benchmark/json_jump_forward/build_dataset.py
+0
-0
benchmark/json_jump_forward/dataset.txt
benchmark/json_jump_forward/dataset.txt
+0
-0
python/sglang/srt/constrained/jump_forward.py
python/sglang/srt/constrained/jump_forward.py
+21
-21
python/sglang/srt/managers/detokenizer_manager.py
python/sglang/srt/managers/detokenizer_manager.py
+1
-1
python/sglang/srt/managers/io_struct.py
python/sglang/srt/managers/io_struct.py
+1
-1
python/sglang/srt/managers/router/infer_batch.py
python/sglang/srt/managers/router/infer_batch.py
+22
-22
python/sglang/srt/managers/router/model_rpc.py
python/sglang/srt/managers/router/model_rpc.py
+16
-16
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+3
-3
test/srt/test_jump_forward.py
test/srt/test_jump_forward.py
+4
-4
No files found.
benchmark/json_
fast
_forward/README.md
→
benchmark/json_
jump
_forward/README.md
View file @
26f0bedc
File moved
benchmark/json_
fast
_forward/bench_other.py
→
benchmark/json_
jump
_forward/bench_other.py
View file @
26f0bedc
...
@@ -219,7 +219,7 @@ def main(args):
...
@@ -219,7 +219,7 @@ def main(args):
with
open
(
args
.
result_file
,
"a"
)
as
fout
:
with
open
(
args
.
result_file
,
"a"
)
as
fout
:
value
=
{
value
=
{
"task"
:
"json_
fast
_forward"
,
"task"
:
"json_
jump
_forward"
,
"backend"
:
args
.
backend
,
"backend"
:
args
.
backend
,
"latency"
:
round
(
latency
,
3
),
"latency"
:
round
(
latency
,
3
),
"num_jsons"
:
args
.
num_jsons
,
"num_jsons"
:
args
.
num_jsons
,
...
...
benchmark/json_
fast
_forward/bench_sglang.py
→
benchmark/json_
jump
_forward/bench_sglang.py
View file @
26f0bedc
...
@@ -122,7 +122,7 @@ def main(args):
...
@@ -122,7 +122,7 @@ def main(args):
with
open
(
args
.
result_file
,
"a"
)
as
fout
:
with
open
(
args
.
result_file
,
"a"
)
as
fout
:
value
=
{
value
=
{
"task"
:
"json_
fast
_forward"
,
"task"
:
"json_
jump
_forward"
,
"backend"
:
args
.
backend
,
"backend"
:
args
.
backend
,
"latency"
:
round
(
latency
,
3
),
"latency"
:
round
(
latency
,
3
),
"num_jsons"
:
args
.
num_jsons
,
"num_jsons"
:
args
.
num_jsons
,
...
...
benchmark/json_
fast
_forward/build_dataset.py
→
benchmark/json_
jump
_forward/build_dataset.py
View file @
26f0bedc
File moved
benchmark/json_
fast
_forward/dataset.txt
→
benchmark/json_
jump
_forward/dataset.txt
View file @
26f0bedc
File moved
python/sglang/srt/constrained/
fast
_forward.py
→
python/sglang/srt/constrained/
jump
_forward.py
View file @
26f0bedc
...
@@ -6,10 +6,10 @@ from sglang.srt.constrained.regex import FSMInfo, make_deterministic_fsm
...
@@ -6,10 +6,10 @@ from sglang.srt.constrained.regex import FSMInfo, make_deterministic_fsm
IP_REGEX
=
r
"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
IP_REGEX
=
r
"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
class
Fast
ForwardMap
:
class
Jump
ForwardMap
:
def
__init__
(
self
,
regex_string
):
def
__init__
(
self
,
regex_string
):
@
disk_cache
()
@
disk_cache
()
def
_init_state_to_
fast
_forward
(
regex_string
):
def
_init_state_to_
jump
_forward
(
regex_string
):
regex_pattern
=
interegular
.
parse_pattern
(
regex_string
)
regex_pattern
=
interegular
.
parse_pattern
(
regex_string
)
regex_fsm
,
_
=
make_deterministic_fsm
(
regex_pattern
.
to_fsm
().
reduce
())
regex_fsm
,
_
=
make_deterministic_fsm
(
regex_pattern
.
to_fsm
().
reduce
())
...
@@ -22,54 +22,54 @@ class FastForwardMap:
...
@@ -22,54 +22,54 @@ class FastForwardMap:
transitions
=
fsm_info
.
transitions
transitions
=
fsm_info
.
transitions
dirty_states
=
set
()
dirty_states
=
set
()
state_to_
fast
_forward
=
{}
state_to_
jump
_forward
=
{}
for
(
state
,
id_
),
next_state
in
transitions
.
items
():
for
(
state
,
id_
),
next_state
in
transitions
.
items
():
if
state
in
dirty_states
:
if
state
in
dirty_states
:
continue
continue
if
state
in
state_to_
fast
_forward
:
if
state
in
state_to_
jump
_forward
:
dirty_states
.
add
(
state
)
dirty_states
.
add
(
state
)
del
state_to_
fast
_forward
[
state
]
del
state_to_
jump
_forward
[
state
]
continue
continue
if
len
(
id_to_symbol
[
id_
])
>
1
:
if
len
(
id_to_symbol
[
id_
])
>
1
:
dirty_states
.
add
(
state
)
dirty_states
.
add
(
state
)
continue
continue
state_to_
fast
_forward
[
state
]
=
(
id_to_symbol
[
id_
][
0
],
next_state
)
state_to_
jump
_forward
[
state
]
=
(
id_to_symbol
[
id_
][
0
],
next_state
)
return
state_to_
fast
_forward
return
state_to_
jump
_forward
self
.
state_to_
fast
_forward
=
_init_state_to_
fast
_forward
(
regex_string
)
self
.
state_to_
jump
_forward
=
_init_state_to_
jump
_forward
(
regex_string
)
def
valid_states
(
self
):
def
valid_states
(
self
):
return
self
.
state_to_
fast
_forward
.
keys
()
return
self
.
state_to_
jump
_forward
.
keys
()
def
fast
_forward
(
self
,
state
):
def
jump
_forward
(
self
,
state
):
if
state
not
in
self
.
state_to_
fast
_forward
:
if
state
not
in
self
.
state_to_
jump
_forward
:
return
None
return
None
fast
_forward_str
=
""
jump
_forward_str
=
""
next_state
=
None
next_state
=
None
while
state
in
self
.
state_to_
fast
_forward
:
while
state
in
self
.
state_to_
jump
_forward
:
symbol
,
next_state
=
self
.
state_to_
fast
_forward
[
state
]
symbol
,
next_state
=
self
.
state_to_
jump
_forward
[
state
]
fast
_forward_str
+=
symbol
jump
_forward_str
+=
symbol
state
=
next_state
state
=
next_state
return
fast
_forward_str
,
next_state
return
jump
_forward_str
,
next_state
class
Fast
ForwardCache
(
BaseCache
):
class
Jump
ForwardCache
(
BaseCache
):
def
__init__
(
self
):
def
__init__
(
self
):
super
().
__init__
()
super
().
__init__
()
def
init_value
(
self
,
regex
):
def
init_value
(
self
,
regex
):
return
Fast
ForwardMap
(
regex
)
return
Jump
ForwardMap
(
regex
)
def
test_main
():
def
test_main
():
regex_string
=
r
"The google's DNS sever address is "
+
IP_REGEX
regex_string
=
r
"The google's DNS sever address is "
+
IP_REGEX
fast
_forward_map
=
Fast
ForwardMap
(
regex_string
)
jump
_forward_map
=
Jump
ForwardMap
(
regex_string
)
for
state
in
fast
_forward_map
.
valid_states
():
for
state
in
jump
_forward_map
.
valid_states
():
print
(
state
,
f
'"
{
fast
_forward_map
.
fast
_forward
(
state
)
}
"'
)
print
(
state
,
f
'"
{
jump
_forward_map
.
jump
_forward
(
state
)
}
"'
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
python/sglang/srt/managers/detokenizer_manager.py
View file @
26f0bedc
...
@@ -61,7 +61,7 @@ class DetokenizerManager:
...
@@ -61,7 +61,7 @@ class DetokenizerManager:
output_strs
[
i
]
=
" "
+
output_strs
[
i
]
output_strs
[
i
]
=
" "
+
output_strs
[
i
]
output_strs
[
i
]
=
(
output_strs
[
i
]
=
(
recv_obj
.
output_and_
fast
_forward_strs
[
i
]
+
output_strs
[
i
]
recv_obj
.
output_and_
jump
_forward_strs
[
i
]
+
output_strs
[
i
]
)
)
self
.
send_to_tokenizer
.
send_pyobj
(
self
.
send_to_tokenizer
.
send_pyobj
(
...
...
python/sglang/srt/managers/io_struct.py
View file @
26f0bedc
...
@@ -81,7 +81,7 @@ class TokenizedGenerateReqInput:
...
@@ -81,7 +81,7 @@ class TokenizedGenerateReqInput:
class
BatchTokenIDOut
:
class
BatchTokenIDOut
:
rids
:
List
[
str
]
rids
:
List
[
str
]
output_tokens
:
List
[
List
[
int
]]
output_tokens
:
List
[
List
[
int
]]
output_and_
fast
_forward_strs
:
List
[
str
]
output_and_
jump
_forward_strs
:
List
[
str
]
hit_stop_str
:
List
[
Optional
[
str
]]
hit_stop_str
:
List
[
Optional
[
str
]]
skip_special_tokens
:
List
[
bool
]
skip_special_tokens
:
List
[
bool
]
meta_info
:
List
[
Dict
]
meta_info
:
List
[
Dict
]
...
...
python/sglang/srt/managers/router/infer_batch.py
View file @
26f0bedc
...
@@ -53,13 +53,13 @@ class Req:
...
@@ -53,13 +53,13 @@ class Req:
# For constrained decoding
# For constrained decoding
self
.
regex_fsm
=
None
self
.
regex_fsm
=
None
self
.
regex_fsm_state
=
0
self
.
regex_fsm_state
=
0
self
.
fast
_forward_map
=
None
self
.
jump
_forward_map
=
None
self
.
output_and_
fast
_forward_str
=
""
self
.
output_and_
jump
_forward_str
=
""
def
max_new_tokens
(
self
):
def
max_new_tokens
(
self
):
return
self
.
sampling_params
.
max_new_tokens
return
self
.
sampling_params
.
max_new_tokens
def
fast
_forward_and_retokenize
(
self
,
fast
_forward_str
,
next_state
):
def
jump
_forward_and_retokenize
(
self
,
jump
_forward_str
,
next_state
):
old_output_str
=
self
.
tokenizer
.
decode
(
self
.
output_ids
)
old_output_str
=
self
.
tokenizer
.
decode
(
self
.
output_ids
)
# FIXME: This logic does not really solve the problem of determining whether
# FIXME: This logic does not really solve the problem of determining whether
# there should be a leading space.
# there should be a leading space.
...
@@ -71,35 +71,35 @@ class Req:
...
@@ -71,35 +71,35 @@ class Req:
old_output_str
=
" "
+
old_output_str
old_output_str
=
" "
+
old_output_str
new_input_string
=
(
new_input_string
=
(
self
.
input_text
self
.
input_text
+
self
.
output_and_
fast
_forward_str
+
self
.
output_and_
jump
_forward_str
+
old_output_str
+
old_output_str
+
fast
_forward_str
+
jump
_forward_str
)
)
new_input_ids
=
self
.
tokenizer
.
encode
(
new_input_string
)
new_input_ids
=
self
.
tokenizer
.
encode
(
new_input_string
)
if
self
.
pixel_values
is
not
None
:
if
self
.
pixel_values
is
not
None
:
# NOTE: This is a hack because the old input_ids contains the image padding
# NOTE: This is a hack because the old input_ids contains the image padding
fast
_forward_tokens_len
=
len
(
self
.
tokenizer
.
encode
(
fast
_forward_str
))
jump
_forward_tokens_len
=
len
(
self
.
tokenizer
.
encode
(
jump
_forward_str
))
else
:
else
:
fast
_forward_tokens_len
=
(
jump
_forward_tokens_len
=
(
len
(
new_input_ids
)
-
len
(
self
.
input_ids
)
-
len
(
self
.
output_ids
)
len
(
new_input_ids
)
-
len
(
self
.
input_ids
)
-
len
(
self
.
output_ids
)
)
)
# print("=" * 100)
# print("=" * 100)
# print(f"Catch
fast
forward:\n{
fast
_forward_str}")
# print(f"Catch
jump
forward:\n{
jump
_forward_str}")
# print(self.tokenizer.convert_ids_to_tokens(self.input_ids))
# print(self.tokenizer.convert_ids_to_tokens(self.input_ids))
# print(self.tokenizer.convert_ids_to_tokens(new_input_ids))
# print(self.tokenizer.convert_ids_to_tokens(new_input_ids))
self
.
input_ids
=
new_input_ids
self
.
input_ids
=
new_input_ids
self
.
output_ids
=
[]
self
.
output_ids
=
[]
self
.
sampling_params
.
max_new_tokens
=
max
(
self
.
sampling_params
.
max_new_tokens
=
max
(
self
.
sampling_params
.
max_new_tokens
-
fast
_forward_tokens_len
,
0
self
.
sampling_params
.
max_new_tokens
-
jump
_forward_tokens_len
,
0
)
)
self
.
regex_fsm_state
=
next_state
self
.
regex_fsm_state
=
next_state
self
.
output_and_
fast
_forward_str
=
(
self
.
output_and_
jump
_forward_str
=
(
self
.
output_and_
fast
_forward_str
+
old_output_str
+
fast
_forward_str
self
.
output_and_
jump
_forward_str
+
old_output_str
+
jump
_forward_str
)
)
# print(f"Output and
fast
forward str:\n{self.output_and_
fast
_forward_str}")
# print(f"Output and
jump
forward str:\n{self.output_and_
jump
_forward_str}")
# print("*" * 100)
# print("*" * 100)
def
check_finished
(
self
):
def
check_finished
(
self
):
...
@@ -327,18 +327,18 @@ class Batch:
...
@@ -327,18 +327,18 @@ class Batch:
return
retracted_reqs
return
retracted_reqs
def
check_for_
fast
_forward
(
self
):
def
check_for_
jump
_forward
(
self
):
fast
_forward_reqs
=
[]
jump
_forward_reqs
=
[]
filter_indices
=
[
i
for
i
in
range
(
len
(
self
.
reqs
))]
filter_indices
=
[
i
for
i
in
range
(
len
(
self
.
reqs
))]
req_pool_indices_cpu
=
None
req_pool_indices_cpu
=
None
for
i
,
req
in
enumerate
(
self
.
reqs
):
for
i
,
req
in
enumerate
(
self
.
reqs
):
if
req
.
fast
_forward_map
is
not
None
:
if
req
.
jump
_forward_map
is
not
None
:
res
=
req
.
fast
_forward_map
.
fast
_forward
(
req
.
regex_fsm_state
)
res
=
req
.
jump
_forward_map
.
jump
_forward
(
req
.
regex_fsm_state
)
if
res
is
not
None
:
if
res
is
not
None
:
fast
_forward_str
,
next_state
=
res
jump
_forward_str
,
next_state
=
res
if
len
(
fast
_forward_str
)
<=
1
:
if
len
(
jump
_forward_str
)
<=
1
:
continue
continue
# insert the old request into tree_cache
# insert the old request into tree_cache
...
@@ -356,16 +356,16 @@ class Batch:
...
@@ -356,16 +356,16 @@ class Batch:
self
.
req_to_token_pool
.
free
(
req_pool_idx
)
self
.
req_to_token_pool
.
free
(
req_pool_idx
)
self
.
tree_cache
.
dec_ref_counter
(
req
.
last_node
)
self
.
tree_cache
.
dec_ref_counter
(
req
.
last_node
)
#
fast
forward
#
jump-
forward
req
.
fast
_forward_and_retokenize
(
fast
_forward_str
,
next_state
)
req
.
jump
_forward_and_retokenize
(
jump
_forward_str
,
next_state
)
fast
_forward_reqs
.
append
(
req
)
jump
_forward_reqs
.
append
(
req
)
filter_indices
.
remove
(
i
)
filter_indices
.
remove
(
i
)
if
len
(
filter_indices
)
<
len
(
self
.
reqs
):
if
len
(
filter_indices
)
<
len
(
self
.
reqs
):
self
.
filter_batch
(
filter_indices
)
self
.
filter_batch
(
filter_indices
)
return
fast
_forward_reqs
return
jump
_forward_reqs
def
prepare_for_decode
(
self
,
input_ids
=
None
):
def
prepare_for_decode
(
self
,
input_ids
=
None
):
if
input_ids
is
None
:
if
input_ids
is
None
:
...
...
python/sglang/srt/managers/router/model_rpc.py
View file @
26f0bedc
...
@@ -11,7 +11,7 @@ import rpyc
...
@@ -11,7 +11,7 @@ import rpyc
import
torch
import
torch
from
rpyc.utils.classic
import
obtain
from
rpyc.utils.classic
import
obtain
from
rpyc.utils.server
import
ThreadedServer
from
rpyc.utils.server
import
ThreadedServer
from
sglang.srt.constrained.
fast
_forward
import
Fast
ForwardCache
from
sglang.srt.constrained.
jump
_forward
import
Jump
ForwardCache
from
sglang.srt.constrained.fsm_cache
import
FSMCache
from
sglang.srt.constrained.fsm_cache
import
FSMCache
from
sglang.srt.hf_transformers_utils
import
get_processor
,
get_tokenizer
from
sglang.srt.hf_transformers_utils
import
get_processor
,
get_tokenizer
from
sglang.srt.managers.io_struct
import
(
from
sglang.srt.managers.io_struct
import
(
...
@@ -49,7 +49,7 @@ class ModelRpcServer(rpyc.Service):
...
@@ -49,7 +49,7 @@ class ModelRpcServer(rpyc.Service):
self
.
tp_rank
=
tp_rank
self
.
tp_rank
=
tp_rank
self
.
tp_size
=
server_args
.
tp_size
self
.
tp_size
=
server_args
.
tp_size
self
.
schedule_heuristic
=
server_args
.
schedule_heuristic
self
.
schedule_heuristic
=
server_args
.
schedule_heuristic
self
.
no_regex_
fast
_forward
=
server_args
.
no_regex_
fast
_forward
self
.
no_regex_
jump
_forward
=
server_args
.
no_regex_
jump
_forward
# Init model and tokenizer
# Init model and tokenizer
self
.
model_config
=
ModelConfig
(
self
.
model_config
=
ModelConfig
(
...
@@ -127,7 +127,7 @@ class ModelRpcServer(rpyc.Service):
...
@@ -127,7 +127,7 @@ class ModelRpcServer(rpyc.Service):
"trust_remote_code"
:
server_args
.
trust_remote_code
,
"trust_remote_code"
:
server_args
.
trust_remote_code
,
},
},
)
)
self
.
fast
_forward_cache
=
Fast
ForwardCache
()
self
.
jump
_forward_cache
=
Jump
ForwardCache
()
# Init new token estimation
# Init new token estimation
self
.
new_token_ratio
=
min
(
0.4
*
server_args
.
schedule_conservativeness
,
1.0
)
self
.
new_token_ratio
=
min
(
0.4
*
server_args
.
schedule_conservativeness
,
1.0
)
...
@@ -254,8 +254,8 @@ class ModelRpcServer(rpyc.Service):
...
@@ -254,8 +254,8 @@ class ModelRpcServer(rpyc.Service):
# Init regex fsm
# Init regex fsm
if
req
.
sampling_params
.
regex
is
not
None
:
if
req
.
sampling_params
.
regex
is
not
None
:
req
.
regex_fsm
=
self
.
regex_fsm_cache
.
query
(
req
.
sampling_params
.
regex
)
req
.
regex_fsm
=
self
.
regex_fsm_cache
.
query
(
req
.
sampling_params
.
regex
)
if
not
self
.
no_regex_
fast
_forward
:
if
not
self
.
no_regex_
jump
_forward
:
req
.
fast
_forward_map
=
self
.
fast
_forward_cache
.
query
(
req
.
jump
_forward_map
=
self
.
jump
_forward_cache
.
query
(
req
.
sampling_params
.
regex
req
.
sampling_params
.
regex
)
)
...
@@ -369,8 +369,8 @@ class ModelRpcServer(rpyc.Service):
...
@@ -369,8 +369,8 @@ class ModelRpcServer(rpyc.Service):
logger
.
debug
(
logger
.
debug
(
f
"fsm_cache_hit_rate:
{
100.0
*
self
.
regex_fsm_cache
.
get_cache_hit_rate
():.
2
f
}
%. "
f
"fsm_cache_hit_rate:
{
100.0
*
self
.
regex_fsm_cache
.
get_cache_hit_rate
():.
2
f
}
%. "
f
"fsm_cache_avg_init_time:
{
self
.
regex_fsm_cache
.
get_avg_init_time
():.
2
f
}
s. "
f
"fsm_cache_avg_init_time:
{
self
.
regex_fsm_cache
.
get_avg_init_time
():.
2
f
}
s. "
f
"ff_cache_hit_rate:
{
100.0
*
self
.
fast
_forward_cache
.
get_cache_hit_rate
():.
2
f
}
%. "
f
"ff_cache_hit_rate:
{
100.0
*
self
.
jump
_forward_cache
.
get_cache_hit_rate
():.
2
f
}
%. "
f
"ff_cache_avg_init_time:
{
self
.
fast
_forward_cache
.
get_avg_init_time
():.
2
f
}
s. "
f
"ff_cache_avg_init_time:
{
self
.
jump
_forward_cache
.
get_avg_init_time
():.
2
f
}
s. "
)
)
new_batch
=
Batch
.
init_new
(
new_batch
=
Batch
.
init_new
(
...
@@ -437,12 +437,12 @@ class ModelRpcServer(rpyc.Service):
...
@@ -437,12 +437,12 @@ class ModelRpcServer(rpyc.Service):
self
.
min_new_token_ratio
,
self
.
min_new_token_ratio
,
)
)
if
not
self
.
no_regex_
fast
_forward
:
if
not
self
.
no_regex_
jump
_forward
:
# check for
fast
forward
# check for
jump-
forward
fast
_forward_reqs
=
batch
.
check_for_
fast
_forward
()
jump
_forward_reqs
=
batch
.
check_for_
jump
_forward
()
# check for image
fast
forward
# check for image
jump-
forward
for
req
in
fast
_forward_reqs
:
for
req
in
jump
_forward_reqs
:
if
req
.
pixel_values
is
not
None
:
if
req
.
pixel_values
is
not
None
:
(
(
req
.
input_ids
,
req
.
input_ids
,
...
@@ -454,7 +454,7 @@ class ModelRpcServer(rpyc.Service):
...
@@ -454,7 +454,7 @@ class ModelRpcServer(rpyc.Service):
req
.
image_size
,
req
.
image_size
,
)
)
self
.
forward_queue
.
extend
(
fast
_forward_reqs
)
self
.
forward_queue
.
extend
(
jump
_forward_reqs
)
if
batch
.
is_empty
():
if
batch
.
is_empty
():
return
return
...
@@ -478,7 +478,7 @@ class ModelRpcServer(rpyc.Service):
...
@@ -478,7 +478,7 @@ class ModelRpcServer(rpyc.Service):
def
handle_finished_requests
(
self
,
batch
:
Batch
):
def
handle_finished_requests
(
self
,
batch
:
Batch
):
output_rids
=
[]
output_rids
=
[]
output_tokens
=
[]
output_tokens
=
[]
output_and_
fast
_forward_strs
=
[]
output_and_
jump
_forward_strs
=
[]
output_hit_stop_str
=
[]
output_hit_stop_str
=
[]
output_skip_special_tokens
=
[]
output_skip_special_tokens
=
[]
output_meta_info
=
[]
output_meta_info
=
[]
...
@@ -502,7 +502,7 @@ class ModelRpcServer(rpyc.Service):
...
@@ -502,7 +502,7 @@ class ModelRpcServer(rpyc.Service):
):
):
output_rids
.
append
(
req
.
rid
)
output_rids
.
append
(
req
.
rid
)
output_tokens
.
append
(
req
.
output_ids
)
output_tokens
.
append
(
req
.
output_ids
)
output_and_
fast
_forward_strs
.
append
(
req
.
output_and_
fast
_forward_str
)
output_and_
jump
_forward_strs
.
append
(
req
.
output_and_
jump
_forward_str
)
output_hit_stop_str
.
append
(
req
.
hit_stop_str
)
output_hit_stop_str
.
append
(
req
.
hit_stop_str
)
output_skip_special_tokens
.
append
(
output_skip_special_tokens
.
append
(
req
.
sampling_params
.
skip_special_tokens
req
.
sampling_params
.
skip_special_tokens
...
@@ -523,7 +523,7 @@ class ModelRpcServer(rpyc.Service):
...
@@ -523,7 +523,7 @@ class ModelRpcServer(rpyc.Service):
BatchTokenIDOut
(
BatchTokenIDOut
(
output_rids
,
output_rids
,
output_tokens
,
output_tokens
,
output_and_
fast
_forward_strs
,
output_and_
jump
_forward_strs
,
output_hit_stop_str
,
output_hit_stop_str
,
output_skip_special_tokens
,
output_skip_special_tokens
,
output_meta_info
,
output_meta_info
,
...
...
python/sglang/srt/server_args.py
View file @
26f0bedc
...
@@ -25,7 +25,7 @@ class ServerArgs:
...
@@ -25,7 +25,7 @@ class ServerArgs:
disable_log_stats
:
bool
=
False
disable_log_stats
:
bool
=
False
log_stats_interval
:
int
=
10
log_stats_interval
:
int
=
10
log_level
:
str
=
"info"
log_level
:
str
=
"info"
no_regex_
fast
_forward
:
bool
=
False
no_regex_
jump
_forward
:
bool
=
False
def
__post_init__
(
self
):
def
__post_init__
(
self
):
if
self
.
tokenizer_path
is
None
:
if
self
.
tokenizer_path
is
None
:
...
@@ -172,9 +172,9 @@ class ServerArgs:
...
@@ -172,9 +172,9 @@ class ServerArgs:
help
=
"Log stats interval in second."
,
help
=
"Log stats interval in second."
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"--no-regex-
fast
-forward"
,
"--no-regex-
jump
-forward"
,
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"Disable regex
fast
forward"
,
help
=
"Disable regex
jump-
forward"
,
)
)
@
classmethod
@
classmethod
...
...
test/srt/test_
fast
_forward.py
→
test/srt/test_
jump
_forward.py
View file @
26f0bedc
...
@@ -12,7 +12,7 @@ import sglang as sgl
...
@@ -12,7 +12,7 @@ import sglang as sgl
IP_REGEX
=
r
"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
IP_REGEX
=
r
"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
ip_
fast
_forward
=
(
ip_
jump
_forward
=
(
r
"The google's DNS sever address is "
r
"The google's DNS sever address is "
+
IP_REGEX
+
IP_REGEX
+
r
" and "
+
r
" and "
...
@@ -32,11 +32,11 @@ def regex_gen(s):
...
@@ -32,11 +32,11 @@ def regex_gen(s):
"answer"
,
"answer"
,
max_tokens
=
128
,
max_tokens
=
128
,
temperature
=
0
,
temperature
=
0
,
regex
=
ip_
fast
_forward
,
regex
=
ip_
jump
_forward
,
)
)
# fmt: on
# fmt: on
json_
fast
_forward
=
(
json_
jump
_forward
=
(
r
"""The information about Hogwarts is in the following JSON format\.\n"""
r
"""The information about Hogwarts is in the following JSON format\.\n"""
+
r
"""\n\{\n"""
+
r
"""\n\{\n"""
+
r
""" "name": "[\w\d\s]*",\n"""
+
r
""" "name": "[\w\d\s]*",\n"""
...
@@ -54,7 +54,7 @@ def json_gen(s):
...
@@ -54,7 +54,7 @@ def json_gen(s):
"json"
,
"json"
,
max_tokens
=
128
,
max_tokens
=
128
,
temperature
=
0
,
temperature
=
0
,
regex
=
json_
fast
_forward
,
regex
=
json_
jump
_forward
,
)
)
# fmt: on
# fmt: on
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment