Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
6e09cf6a
Unverified
Commit
6e09cf6a
authored
May 12, 2024
by
Lianmin Zheng
Committed by
GitHub
May 12, 2024
Browse files
Misc fixes (#432)
parent
72bb3443
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
23 additions
and
5 deletions
+23
-5
docs/release_process.md
docs/release_process.md
+8
-0
docs/test_process.md
docs/test_process.md
+6
-0
playground/load_tokenizer.py
playground/load_tokenizer.py
+2
-1
python/sglang/srt/managers/router/model_rpc.py
python/sglang/srt/managers/router/model_rpc.py
+4
-1
python/sglang/srt/managers/router/model_runner.py
python/sglang/srt/managers/router/model_runner.py
+2
-2
test/srt/test_openai_server.py
test/srt/test_openai_server.py
+1
-1
No files found.
docs/release_process.md
0 → 100644
View file @
6e09cf6a
```
pip install build twine
```
```
cd python
bash upload_pypi.sh
```
\ No newline at end of file
docs/test_process.md
View file @
6e09cf6a
...
...
@@ -81,3 +81,9 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port
cd test/lang
python3 run_all.py
```
## OpenAI API server
```
cd test/srt
python test_openai_server.py
```
\ No newline at end of file
playground/load_tokenizer.py
View file @
6e09cf6a
import
transformers
import
code
name
=
"meta-llama/Llama-2-7b-chat-hf"
#name = "meta-llama/Llama-2-7b-chat-hf"
name
=
"meta-llama/Meta-Llama-3-8B-Instruct"
t
=
transformers
.
AutoTokenizer
.
from_pretrained
(
name
)
code
.
interact
(
local
=
locals
())
python/sglang/srt/managers/router/model_rpc.py
View file @
6e09cf6a
...
...
@@ -426,7 +426,9 @@ class ModelRpcServer:
# Only transfer the selected logprobs of the next token to CPU to reduce overhead.
if
last_logprobs
is
not
None
:
last_token_logprobs
=
(
last_logprobs
[
torch
.
arange
(
len
(
batch
.
reqs
)),
next_token_ids
].
tolist
()
last_logprobs
[
torch
.
arange
(
len
(
batch
.
reqs
),
device
=
next_token_ids
.
device
),
next_token_ids
].
tolist
()
)
next_token_ids
=
next_token_ids
.
tolist
()
...
...
@@ -587,6 +589,7 @@ class ModelRpcServer:
-
req
.
prompt_tokens
,
"completion_tokens_wo_jump_forward"
:
req
.
completion_tokens_wo_jump_forward
,
"finish_reason"
:
str
(
req
.
finish_reason
),
# FIXME: convert to the correct string
"hit_stop_str"
:
req
.
hit_stop_str
,
}
if
req
.
return_logprob
:
(
...
...
python/sglang/srt/managers/router/model_runner.py
View file @
6e09cf6a
...
...
@@ -110,8 +110,8 @@ class InputMetadata:
self
.
kv_last_page_len
=
torch
.
ones
(
(
self
.
batch_size
,),
dtype
=
torch
.
int32
,
device
=
"cuda"
)
req_pool_indices_cpu
=
self
.
req_pool_indices
.
cpu
().
tolist
()
seq_lens_cpu
=
self
.
seq_lens
.
tolist
()
req_pool_indices_cpu
=
self
.
req_pool_indices
.
cpu
().
numpy
()
seq_lens_cpu
=
self
.
seq_lens
.
cpu
().
numpy
()
self
.
kv_indices
=
torch
.
cat
(
[
self
.
req_to_token_pool
.
req_to_token
[
...
...
test/srt/test_openai_server.py
View file @
6e09cf6a
...
...
@@ -163,7 +163,7 @@ def test_regex(args):
regex
=
(
r
"""\{\n"""
+
r
""" "name": "[\w]+",\n"""
+
r
""" "population": [\
w\d\s
]+\n"""
+
r
""" "population": [\
d
]+\n"""
+
r
"""\}"""
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment