Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7581e4cb
"vllm/vscode:/vscode.git/clone" did not exist on "445a2a4d1a3a383a1a36da8acb7800bb85edabeb"
Commit
7581e4cb
authored
Feb 18, 2025
by
王敏
Browse files
[fix]修复0.7.2版本benchmark_moe因新增rocm参数报错问题
parent
249fca2a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
51 deletions
+32
-51
benchmarks/kernels/benchmark_moe.py
benchmarks/kernels/benchmark_moe.py
+32
-51
No files found.
benchmarks/kernels/benchmark_moe.py
View file @
7581e4cb
...
...
@@ -183,7 +183,8 @@ def benchmark_config(
def
get_rocm_tuning_space
(
use_fp16
,
nn_moe
:
Optional
[
bool
]
=
False
):
block_mn_range
=
[
16
,
32
,
64
,
128
,
256
]
block_m_range
=
[
16
,
32
,
64
,
128
,
256
]
block_n_range
=
[
32
,
64
,
128
,
256
]
block_k_range
=
[
16
,
32
,
64
,
128
,
256
]
if
not
use_fp16
:
block_k_range
.
remove
(
16
)
# BLOCK_K=16 not supported for fp8
...
...
@@ -195,8 +196,8 @@ def get_rocm_tuning_space(use_fp16, nn_moe: Optional[bool] = False):
kpack_range
=
[
1
,
2
]
if
use_fp16
else
[]
param_ranges
=
{
"BLOCK_SIZE_M"
:
block_m
n
_range
,
"BLOCK_SIZE_N"
:
block_
m
n_range
,
"BLOCK_SIZE_M"
:
block_m_range
,
"BLOCK_SIZE_N"
:
block_n_range
,
"BLOCK_SIZE_K"
:
block_k_range
,
"GROUP_SIZE_M"
:
group_m_range
,
"num_warps"
:
num_warps_range
,
...
...
@@ -204,11 +205,12 @@ def get_rocm_tuning_space(use_fp16, nn_moe: Optional[bool] = False):
"waves_per_eu"
:
waves_per_eu_range
,
}
if
nn_moe
:
param_ranges
[
"num_ldmatrixes"
]
=
1
if
use_fp16
:
param_ranges
[
"matrix_instr_nonkdim"
]
=
matrix_instr_nonkdim_range
param_ranges
[
"kpack"
]
=
kpack_range
param_ranges
[
"num_ldmatrixes"
]
=
[
1
]
# DCU currently does not support the following parameters
# if use_fp16:
# param_ranges["matrix_instr_nonkdim"] = matrix_instr_nonkdim_range
# param_ranges["kpack"] = kpack_range
return
param_ranges
...
...
@@ -277,10 +279,11 @@ def prune_rocm_configs(M, N, K, configs, is_fp16=True):
BLOCK_SIZE_K
=
config
.
get
(
"BLOCK_SIZE_K"
)
num_warps
=
config
.
get
(
"num_warps"
)
if
is_fp16
:
matrix_instr_nonkdim
=
config
.
get
(
"matrix_instr_nonkdim"
)
if
matrix_instr_nonkdim
>
mfma
:
continue
# DCU currently does not support matrix_instr_nonkdim param
# if is_fp16:
# matrix_instr_nonkdim = config.get("matrix_instr_nonkdim")
# if matrix_instr_nonkdim > mfma:
# continue
if
mfma
==
4
and
BLOCK_SIZE_K
<
64
:
continue
# some layouts could not work properly in case
...
...
@@ -289,16 +292,18 @@ def prune_rocm_configs(M, N, K, configs, is_fp16=True):
continue
SPLIT_K
=
config
.
get
(
"SPLIT_K"
,
1
)
GROUP_M
=
config
.
get
(
"GROUP_SIZE_M"
)
if
is_fp16
:
if
(
matrix_instr_nonkdim
>
BLOCK_SIZE_M
or
matrix_instr_nonkdim
>
BLOCK_SIZE_N
):
continue
if
(
matrix_instr_nonkdim
>=
M
and
matrix_instr_nonkdim
!=
BLOCK_SIZE_M
):
continue
if
(
matrix_instr_nonkdim
>=
N
and
matrix_instr_nonkdim
!=
BLOCK_SIZE_N
):
continue
# DCU currently does not support matrix_instr_nonkdim param
# if is_fp16:
# if (matrix_instr_nonkdim > BLOCK_SIZE_M
# or matrix_instr_nonkdim > BLOCK_SIZE_N):
# continue
# if (matrix_instr_nonkdim >= M
# and matrix_instr_nonkdim != BLOCK_SIZE_M):
# continue
# if (matrix_instr_nonkdim >= N
# and matrix_instr_nonkdim != BLOCK_SIZE_N):
# continue
# Skip BLOCK_SIZE that is too large compare to M/N
# unless BLOCK_SIZE is already small enough
if
M
*
2
<
BLOCK_SIZE_M
and
BLOCK_SIZE_M
!=
16
:
...
...
@@ -451,9 +456,8 @@ class BenchmarkWorker:
def
sort_config
(
config
:
BenchmarkConfig
)
->
BenchmarkConfig
:
if
"num_ldmatrixes"
not
in
config
:
return
{
return
{
"BLOCK_SIZE_M"
:
config
[
"BLOCK_SIZE_M"
],
"BLOCK_SIZE_N"
:
...
...
@@ -467,31 +471,8 @@ def sort_config(config: BenchmarkConfig) -> BenchmarkConfig:
"num_stages"
:
config
[
"num_stages"
],
**
({
"waves_per_eu"
:
config
[
"waves_per_eu"
]
}
if
"waves_per_eu"
in
config
else
{}),
**
({
"matrix_instr_nonkdim"
:
config
[
"matrix_instr_nonkdim"
]
}
if
"matrix_instr_nonkdim"
in
config
else
{}),
**
({
"kpack"
:
config
[
"kpack"
]
}
if
"kpack"
in
config
else
{}),
}
else
:
return
{
"BLOCK_SIZE_M"
:
config
[
"BLOCK_SIZE_M"
],
"BLOCK_SIZE_N"
:
config
[
"BLOCK_SIZE_N"
],
"BLOCK_SIZE_K"
:
config
[
"BLOCK_SIZE_K"
],
"GROUP_SIZE_M"
:
config
[
"GROUP_SIZE_M"
],
"num_warps"
:
config
[
"num_warps"
],
"num_stages"
:
config
[
"num_stages"
],
"num_ldmatrixes"
:
config
[
"num_ldmatrixes"
],
"num_ldmatrixes"
:
config
[
"num_ldmatrixes"
]
}
if
"num_ldmatrixes"
in
config
else
{}),
**
({
"waves_per_eu"
:
config
[
"waves_per_eu"
]
}
if
"waves_per_eu"
in
config
else
{}),
...
...
@@ -643,7 +624,7 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--tune"
,
action
=
"store_true"
)
parser
.
add_argument
(
"--nn-moe"
,
action
=
'store_true'
,
default
=
False
)
parser
.
add_argument
(
"--trust-remote-code"
,
action
=
"store_true"
)
parser
.
add_argument
(
"--moe-ep-size"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--moe-ep-size"
,
"-ep"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--num-gpus"
,
type
=
int
,
default
=
1
)
args
=
parser
.
parse_args
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment