Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
a385ee27
Unverified
Commit
a385ee27
authored
Jun 25, 2024
by
Lianmin Zheng
Committed by
GitHub
Jun 25, 2024
Browse files
Warmup cublas (#566)
parent
eb1ae6ae
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
17 additions
and
4 deletions
+17
-4
benchmark/gsm8k/README.md
benchmark/gsm8k/README.md
+1
-1
benchmark/gsm8k/download_data.sh
benchmark/gsm8k/download_data.sh
+2
-0
benchmark/mmlu/README.md
benchmark/mmlu/README.md
+1
-2
benchmark/mmlu/download_data.sh
benchmark/mmlu/download_data.sh
+2
-0
python/sglang/srt/managers/controller/model_runner.py
python/sglang/srt/managers/controller/model_runner.py
+10
-0
python/sglang/srt/managers/controller/tp_worker.py
python/sglang/srt/managers/controller/tp_worker.py
+1
-1
No files found.
benchmark/gsm8k/README.md
View file @
a385ee27
## Download data
## Download data
```
```
wget https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
bash download_data.sh
```
```
## Run benchmark
## Run benchmark
...
...
benchmark/gsm8k/download_data.sh
0 → 100644
View file @
a385ee27
wget https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/train.jsonl
wget https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
\ No newline at end of file
benchmark/mmlu/README.md
View file @
a385ee27
## Download data
## Download data
```
```
wget https://people.eecs.berkeley.edu/~hendrycks/data.tar
bash download_data.sh
tar xf data.tar
```
```
## Run benchmark
## Run benchmark
...
...
benchmark/mmlu/download_data.sh
0 → 100644
View file @
a385ee27
wget https://people.eecs.berkeley.edu/~hendrycks/data.tar
tar
xf data.tar
\ No newline at end of file
python/sglang/srt/managers/controller/model_runner.py
View file @
a385ee27
...
@@ -270,6 +270,7 @@ class ModelRunner:
...
@@ -270,6 +270,7 @@ class ModelRunner:
# Load the model and create memory pool
# Load the model and create memory pool
self
.
load_model
()
self
.
load_model
()
self
.
init_memory_pool
(
total_gpu_memory
)
self
.
init_memory_pool
(
total_gpu_memory
)
self
.
init_cublas
()
self
.
init_flash_infer
()
self
.
init_flash_infer
()
def
load_model
(
self
):
def
load_model
(
self
):
...
@@ -346,6 +347,15 @@ class ModelRunner:
...
@@ -346,6 +347,15 @@ class ModelRunner:
f
"avail mem=
{
get_available_gpu_memory
(
self
.
gpu_id
):.
2
f
}
GB"
f
"avail mem=
{
get_available_gpu_memory
(
self
.
gpu_id
):.
2
f
}
GB"
)
)
def
init_cublas
(
self
):
"""We need to run a small matmul to init cublas. Otherwise, it will raise some errors later."""
dtype
=
torch
.
float16
device
=
"cuda"
a
=
torch
.
ones
((
16
,
16
),
dtype
=
dtype
,
device
=
device
)
b
=
torch
.
ones
((
16
,
16
),
dtype
=
dtype
,
device
=
device
)
c
=
a
@
b
return
c
def
init_flash_infer
(
self
):
def
init_flash_infer
(
self
):
if
global_server_args_dict
.
get
(
"enable_flashinfer"
,
False
):
if
global_server_args_dict
.
get
(
"enable_flashinfer"
,
False
):
from
flashinfer
import
(
from
flashinfer
import
(
...
...
python/sglang/srt/managers/controller/tp_worker.py
View file @
a385ee27
...
@@ -410,7 +410,7 @@ class ModelTpServer:
...
@@ -410,7 +410,7 @@ class ModelTpServer:
self
.
tree_cache_metrics
[
"hit"
]
/
self
.
tree_cache_metrics
[
"total"
]
self
.
tree_cache_metrics
[
"hit"
]
/
self
.
tree_cache_metrics
[
"total"
]
)
)
logger
.
info
(
logger
.
info
(
f
"[gpu_id=
{
self
.
gpu_id
}
] Prefil batch. "
f
"[gpu_id=
{
self
.
gpu_id
}
] Prefil
l
batch. "
f
"#new-seq:
{
len
(
can_run_list
)
}
, "
f
"#new-seq:
{
len
(
can_run_list
)
}
, "
f
"#new-token:
{
new_batch_input_tokens
}
, "
f
"#new-token:
{
new_batch_input_tokens
}
, "
f
"#cached-token:
{
hit_tokens
}
, "
f
"#cached-token:
{
hit_tokens
}
, "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment