Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
f30abd09
"docs/vscode:/vscode.git/clone" did not exist on "f00cd6efbd00b0273f58c393a617415b5d1d410e"
Unverified
Commit
f30abd09
authored
Jan 19, 2024
by
Lianmin Zheng
Committed by
GitHub
Jan 19, 2024
Browse files
Improve error message & Add vicuna template (#57)
parent
40ab1f01
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
0 deletions
+17
-0
python/sglang/srt/conversation.py
python/sglang/srt/conversation.py
+12
-0
python/sglang/srt/managers/router/model_runner.py
python/sglang/srt/managers/router/model_runner.py
+5
-0
No files found.
python/sglang/srt/conversation.py
View file @
f30abd09
...
@@ -388,3 +388,15 @@ register_conv_template(
...
@@ -388,3 +388,15 @@ register_conv_template(
stop_str
=
[
"<|endoftext|>"
,
"<|im_end|>"
],
stop_str
=
[
"<|endoftext|>"
,
"<|im_end|>"
],
)
)
)
)
register_conv_template
(
Conversation
(
name
=
"vicuna_v1.1"
,
system_message
=
"A chat between a curious user and an artificial intelligence assistant. "
"The assistant gives helpful, detailed, and polite answers to the user's questions."
,
roles
=
(
"USER"
,
"ASSISTANT"
),
sep_style
=
SeparatorStyle
.
ADD_COLON_TWO
,
sep
=
" "
,
sep2
=
"</s>"
,
)
)
python/sglang/srt/managers/router/model_runner.py
View file @
f30abd09
...
@@ -297,6 +297,11 @@ class ModelRunner:
...
@@ -297,6 +297,11 @@ class ModelRunner:
def
init_memory_pool
(
self
,
total_gpu_memory
):
def
init_memory_pool
(
self
,
total_gpu_memory
):
self
.
max_total_num_token
=
self
.
profile_max_num_token
(
total_gpu_memory
)
self
.
max_total_num_token
=
self
.
profile_max_num_token
(
total_gpu_memory
)
if
self
.
max_total_num_token
<=
0
:
raise
RuntimeError
(
"Not enought memory. "
"Please try to increase --mem-fraction-static."
)
self
.
req_to_token_pool
=
ReqToTokenPool
(
self
.
req_to_token_pool
=
ReqToTokenPool
(
int
(
self
.
max_total_num_token
/
self
.
model_config
.
context_len
*
256
),
int
(
self
.
max_total_num_token
/
self
.
model_config
.
context_len
*
256
),
self
.
model_config
.
context_len
+
8
,
self
.
model_config
.
context_len
+
8
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment