Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
c77c1e05
Unverified
Commit
c77c1e05
authored
Nov 07, 2024
by
Chayenne
Committed by
GitHub
Nov 08, 2024
Browse files
fix black in pre-commit (#1940)
parent
dca87ec3
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
25 additions
and
15 deletions
+25
-15
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+1
-1
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+14
-2
python/sglang/utils.py
python/sglang/utils.py
+1
-0
rust/test_bindings.py
rust/test_bindings.py
+1
-1
scripts/playground/reference_hf.py
scripts/playground/reference_hf.py
+2
-8
test/srt/models/test_generation_models.py
test/srt/models/test_generation_models.py
+1
-1
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+1
-0
test/srt/test_skip_tokenizer_init.py
test/srt/test_skip_tokenizer_init.py
+1
-0
test/srt/test_srt_engine.py
test/srt/test_srt_engine.py
+3
-2
No files found.
python/sglang/srt/server_args.py
View file @
c77c1e05
...
...
@@ -448,7 +448,7 @@ class ServerArgs:
"--decode-log-interval"
,
type
=
int
,
default
=
ServerArgs
.
decode_log_interval
,
help
=
"The log interval of decode batch"
help
=
"The log interval of decode batch"
,
)
# Data parallelism
...
...
python/sglang/test/test_utils.py
View file @
c77c1e05
...
...
@@ -742,7 +742,13 @@ def run_mmlu_test(
finally
:
pass
run_and_check_memory_leak
(
workload_func
,
disable_radix_cache
,
enable_mixed_chunk
,
enable_overlap
,
chunked_prefill_size
)
run_and_check_memory_leak
(
workload_func
,
disable_radix_cache
,
enable_mixed_chunk
,
enable_overlap
,
chunked_prefill_size
,
)
def
run_mulit_request_test
(
...
...
@@ -775,4 +781,10 @@ def run_mulit_request_test(
with
ThreadPoolExecutor
(
2
)
as
executor
:
list
(
executor
.
map
(
run_one
,
list
(
range
(
4
))))
run_and_check_memory_leak
(
workload_func
,
disable_radix_cache
,
enable_mixed_chunk
,
enable_overlap
,
chunked_prefill_size
)
run_and_check_memory_leak
(
workload_func
,
disable_radix_cache
,
enable_mixed_chunk
,
enable_overlap
,
chunked_prefill_size
,
)
python/sglang/utils.py
View file @
c77c1e05
...
...
@@ -349,6 +349,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
def
terminate_process
(
process
):
from
sglang.srt.utils
import
kill_child_process
kill_child_process
(
process
.
pid
,
include_self
=
True
)
...
...
rust/test_bindings.py
View file @
c77c1e05
...
...
@@ -11,7 +11,7 @@ router = router.Router(
"http://localhost:30000"
,
"http://localhost:30002"
,
],
policy
=
"random"
policy
=
"random"
,
)
# Start the router - this will block and run the server
...
...
scripts/playground/reference_hf.py
View file @
c77c1e05
...
...
@@ -104,15 +104,9 @@ if __name__ == "__main__":
default
=
"TinyLlama/TinyLlama-1.1B-Chat-v0.4"
,
# default="meta-llama/Llama-2-7b-chat-hf",
)
parser
.
add_argument
(
"--max-new-tokens"
,
type
=
int
,
default
=
16
)
parser
.
add_argument
(
"--max-new-tokens"
,
type
=
int
,
default
=
16
)
parser
.
add_argument
(
"--dtype"
,
type
=
str
,
default
=
"float16"
)
parser
.
add_argument
(
"--dtype"
,
type
=
str
,
default
=
"float16"
)
args
=
parser
.
parse_args
()
...
...
test/srt/models/test_generation_models.py
View file @
c77c1e05
...
...
@@ -56,7 +56,7 @@ ALL_OTHER_MODELS = [
ModelCase
(
"HuggingFaceTB/SmolLM-135M-Instruct"
,
skip_long_prompt
=
True
),
ModelCase
(
"allenai/OLMo-1B-0724-hf"
,
decode_tolerance
=
8e-2
,
skip_long_prompt
=
True
),
ModelCase
(
"THUDM/glm-4-9b-chat"
),
ModelCase
(
"openai-community/gpt2"
)
ModelCase
(
"openai-community/gpt2"
)
,
]
TORCH_DTYPES
=
[
torch
.
float16
]
...
...
test/srt/test_openai_server.py
View file @
c77c1e05
...
...
@@ -3,6 +3,7 @@ python3 -m unittest test_openai_server.TestOpenAIServer.test_batch
python3 -m unittest test_openai_server.TestOpenAIServer.test_completion
"""
import
json
import
time
import
unittest
...
...
test/srt/test_skip_tokenizer_init.py
View file @
c77c1e05
"""
python3 -m unittest test_skip_tokenizer_init.TestSkipTokenizerInit.test_parallel_sample
"""
import
json
import
unittest
...
...
test/srt/test_srt_engine.py
View file @
c77c1e05
...
...
@@ -110,7 +110,6 @@ class TestSRTEngine(unittest.TestCase):
def
test_5_prompt_input_ids_consistency
(
self
):
prompt
=
"The capital of UK is"
model_path
=
DEFAULT_MODEL_NAME_FOR_TEST
engine
=
sgl
.
Engine
(
model_path
=
model_path
,
random_seed
=
42
,
log_level
=
"error"
)
sampling_params
=
{
"temperature"
:
0
,
"max_new_tokens"
:
8
}
...
...
@@ -118,7 +117,9 @@ class TestSRTEngine(unittest.TestCase):
tokenizer
=
get_tokenizer
(
model_path
)
token_ids
=
tokenizer
.
encode
(
prompt
)
out2
=
engine
.
generate
(
input_ids
=
token_ids
,
sampling_params
=
sampling_params
)[
"text"
]
out2
=
engine
.
generate
(
input_ids
=
token_ids
,
sampling_params
=
sampling_params
)[
"text"
]
engine
.
shutdown
()
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment