Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
c77c1e05
"Src/NLP/GPT2/tokenization.h" did not exist on "6f5d13e8f5ac918b319c3918a3dc17d786c3e3a9"
Unverified
Commit
c77c1e05
authored
Nov 07, 2024
by
Chayenne
Committed by
GitHub
Nov 08, 2024
Browse files
fix black in pre-commit (#1940)
parent
dca87ec3
Changes
29
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
25 additions
and
15 deletions
+25
-15
python/sglang/srt/server_args.py
python/sglang/srt/server_args.py
+1
-1
python/sglang/test/test_utils.py
python/sglang/test/test_utils.py
+14
-2
python/sglang/utils.py
python/sglang/utils.py
+1
-0
rust/test_bindings.py
rust/test_bindings.py
+1
-1
scripts/playground/reference_hf.py
scripts/playground/reference_hf.py
+2
-8
test/srt/models/test_generation_models.py
test/srt/models/test_generation_models.py
+1
-1
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+1
-0
test/srt/test_skip_tokenizer_init.py
test/srt/test_skip_tokenizer_init.py
+1
-0
test/srt/test_srt_engine.py
test/srt/test_srt_engine.py
+3
-2
No files found.
python/sglang/srt/server_args.py
View file @
c77c1e05
...
...
@@ -448,7 +448,7 @@ class ServerArgs:
"--decode-log-interval"
,
type
=
int
,
default
=
ServerArgs
.
decode_log_interval
,
help
=
"The log interval of decode batch"
help
=
"The log interval of decode batch"
,
)
# Data parallelism
...
...
python/sglang/test/test_utils.py
View file @
c77c1e05
...
...
@@ -742,7 +742,13 @@ def run_mmlu_test(
finally
:
pass
run_and_check_memory_leak
(
workload_func
,
disable_radix_cache
,
enable_mixed_chunk
,
enable_overlap
,
chunked_prefill_size
)
run_and_check_memory_leak
(
workload_func
,
disable_radix_cache
,
enable_mixed_chunk
,
enable_overlap
,
chunked_prefill_size
,
)
def
run_mulit_request_test
(
...
...
@@ -775,4 +781,10 @@ def run_mulit_request_test(
with
ThreadPoolExecutor
(
2
)
as
executor
:
list
(
executor
.
map
(
run_one
,
list
(
range
(
4
))))
run_and_check_memory_leak
(
workload_func
,
disable_radix_cache
,
enable_mixed_chunk
,
enable_overlap
,
chunked_prefill_size
)
run_and_check_memory_leak
(
workload_func
,
disable_radix_cache
,
enable_mixed_chunk
,
enable_overlap
,
chunked_prefill_size
,
)
python/sglang/utils.py
View file @
c77c1e05
...
...
@@ -349,6 +349,7 @@ def wait_for_server(base_url: str, timeout: int = None) -> None:
def
terminate_process
(
process
):
from
sglang.srt.utils
import
kill_child_process
kill_child_process
(
process
.
pid
,
include_self
=
True
)
...
...
rust/test_bindings.py
View file @
c77c1e05
...
...
@@ -11,7 +11,7 @@ router = router.Router(
"http://localhost:30000"
,
"http://localhost:30002"
,
],
policy
=
"random"
policy
=
"random"
,
)
# Start the router - this will block and run the server
...
...
scripts/playground/reference_hf.py
View file @
c77c1e05
...
...
@@ -104,15 +104,9 @@ if __name__ == "__main__":
default
=
"TinyLlama/TinyLlama-1.1B-Chat-v0.4"
,
# default="meta-llama/Llama-2-7b-chat-hf",
)
parser
.
add_argument
(
"--max-new-tokens"
,
type
=
int
,
default
=
16
)
parser
.
add_argument
(
"--max-new-tokens"
,
type
=
int
,
default
=
16
)
parser
.
add_argument
(
"--dtype"
,
type
=
str
,
default
=
"float16"
)
parser
.
add_argument
(
"--dtype"
,
type
=
str
,
default
=
"float16"
)
args
=
parser
.
parse_args
()
...
...
test/srt/models/test_generation_models.py
View file @
c77c1e05
...
...
@@ -56,7 +56,7 @@ ALL_OTHER_MODELS = [
ModelCase
(
"HuggingFaceTB/SmolLM-135M-Instruct"
,
skip_long_prompt
=
True
),
ModelCase
(
"allenai/OLMo-1B-0724-hf"
,
decode_tolerance
=
8e-2
,
skip_long_prompt
=
True
),
ModelCase
(
"THUDM/glm-4-9b-chat"
),
ModelCase
(
"openai-community/gpt2"
)
ModelCase
(
"openai-community/gpt2"
)
,
]
TORCH_DTYPES
=
[
torch
.
float16
]
...
...
test/srt/test_openai_server.py
View file @
c77c1e05
...
...
@@ -3,6 +3,7 @@ python3 -m unittest test_openai_server.TestOpenAIServer.test_batch
python3 -m unittest test_openai_server.TestOpenAIServer.test_completion
"""
import
json
import
time
import
unittest
...
...
test/srt/test_skip_tokenizer_init.py
View file @
c77c1e05
"""
python3 -m unittest test_skip_tokenizer_init.TestSkipTokenizerInit.test_parallel_sample
"""
import
json
import
unittest
...
...
test/srt/test_srt_engine.py
View file @
c77c1e05
...
...
@@ -110,7 +110,6 @@ class TestSRTEngine(unittest.TestCase):
def
test_5_prompt_input_ids_consistency
(
self
):
prompt
=
"The capital of UK is"
model_path
=
DEFAULT_MODEL_NAME_FOR_TEST
engine
=
sgl
.
Engine
(
model_path
=
model_path
,
random_seed
=
42
,
log_level
=
"error"
)
sampling_params
=
{
"temperature"
:
0
,
"max_new_tokens"
:
8
}
...
...
@@ -118,7 +117,9 @@ class TestSRTEngine(unittest.TestCase):
tokenizer
=
get_tokenizer
(
model_path
)
token_ids
=
tokenizer
.
encode
(
prompt
)
out2
=
engine
.
generate
(
input_ids
=
token_ids
,
sampling_params
=
sampling_params
)[
"text"
]
out2
=
engine
.
generate
(
input_ids
=
token_ids
,
sampling_params
=
sampling_params
)[
"text"
]
engine
.
shutdown
()
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment