Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
75b31a2a
Unverified
Commit
75b31a2a
authored
Jun 30, 2024
by
Ying Sheng
Committed by
GitHub
Jun 30, 2024
Browse files
Update run_batch interface and max_prefill_tokens (#574)
parent
11616fc6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
14 deletions
+19
-14
docs/test_process.md
docs/test_process.md
+2
-7
python/sglang/lang/ir.py
python/sglang/lang/ir.py
+13
-1
python/sglang/srt/managers/controller/tp_worker.py
python/sglang/srt/managers/controller/tp_worker.py
+4
-6
No files found.
docs/test_process.md
View file @
75b31a2a
## SRT Unit Tests
### L
ow-level API
### L
atency Alignment
```
cd sglang/test/srt/model
python3 test_llama_low_api.py
python3 test_llama_extend.py
python3 test_llava_low_api.py
python3 bench_llama_low_api.py
python -m sglang.bench_latency --model-path meta-llama/Llama-2-7b-chat-hf --mem-fraction-static 0.8 --batch 32 --input-len 512 --output-len 256
```
### High-level API
...
...
python/sglang/lang/ir.py
View file @
75b31a2a
...
...
@@ -120,6 +120,7 @@ class SglFunction:
argspec
=
inspect
.
getfullargspec
(
func
)
assert
argspec
.
args
[
0
]
==
"s"
,
'The first argument must be "s"'
self
.
arg_names
=
argspec
.
args
[
1
:]
self
.
arg_defaults
=
argspec
.
defaults
if
argspec
.
defaults
is
not
None
else
[]
def
bind
(
self
,
**
kwargs
):
assert
all
(
key
in
self
.
arg_names
for
key
in
kwargs
)
...
...
@@ -178,7 +179,18 @@ class SglFunction:
assert
isinstance
(
batch_kwargs
,
(
list
,
tuple
))
if
len
(
batch_kwargs
)
==
0
:
return
[]
assert
isinstance
(
batch_kwargs
[
0
],
dict
)
if
not
isinstance
(
batch_kwargs
[
0
],
dict
):
num_programs
=
len
(
batch_kwargs
)
# change the list of argument values to dict of arg_name -> arg_value
batch_kwargs
=
[
{
self
.
arg_names
[
i
]:
v
for
i
,
v
in
enumerate
(
arg_values
)}
for
arg_values
in
batch_kwargs
if
isinstance
(
arg_values
,
(
list
,
tuple
))
and
len
(
self
.
arg_names
)
-
len
(
self
.
arg_defaults
)
<=
len
(
arg_values
)
<=
len
(
self
.
arg_names
)
]
# Ensure to raise an exception if the number of arguments mismatch
if
len
(
batch_kwargs
)
!=
num_programs
:
raise
Exception
(
"Given arguments mismatch the SGL function signature"
)
default_sampling_para
=
SglSamplingParams
(
max_new_tokens
=
max_new_tokens
,
...
...
python/sglang/srt/managers/controller/tp_worker.py
View file @
75b31a2a
...
...
@@ -98,10 +98,7 @@ class ModelTpServer:
)
self
.
max_total_num_tokens
=
self
.
model_runner
.
max_total_num_tokens
self
.
max_prefill_tokens
=
(
max
(
self
.
model_config
.
context_len
,
min
(
self
.
max_total_num_tokens
//
6
,
32768
),
)
4096
if
server_args
.
max_prefill_tokens
is
None
else
server_args
.
max_prefill_tokens
)
...
...
@@ -371,8 +368,9 @@ class ModelTpServer:
if
(
req
.
extend_input_len
+
req
.
max_new_tokens
()
+
new_batch_total_tokens
<
available_size
and
req
.
extend_input_len
+
new_batch_input_tokens
<
self
.
max_prefill_tokens
and
(
req
.
extend_input_len
+
new_batch_input_tokens
<=
self
.
max_prefill_tokens
or
len
(
can_run_list
)
==
0
)
):
delta
=
self
.
tree_cache
.
inc_lock_ref
(
req
.
last_node
)
available_size
+=
delta
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment