Unverified Commit 62f66be1 authored by Jee Jee Li's avatar Jee Jee Li Committed by GitHub
Browse files

[Bugfix] Fix Qwen3-coder moe tuned config (#24072)


Signed-off-by: default avatarJee Jee Li <pandaleefree@gmail.com>
parent 81c53ef5
...@@ -678,7 +678,11 @@ def main(args: argparse.Namespace): ...@@ -678,7 +678,11 @@ def main(args: argparse.Namespace):
is_fp16 = not (use_fp8_w8a8 or use_int8_w8a16) is_fp16 = not (use_fp8_w8a8 or use_int8_w8a16)
search_space = get_configs_compute_bound(is_fp16, block_quant_shape) search_space = get_configs_compute_bound(is_fp16, block_quant_shape)
print(f"Start tuning over {len(search_space)} configurations...") print(f"Start tuning over {len(search_space)} configurations...")
if use_deep_gemm:
raise ValueError(
"Tuning with --use-deep-gemm is not supported as it only tunes Triton "
"kernels. Please remove the flag."
)
start = time.time() start = time.time()
configs = _distribute( configs = _distribute(
"tune", "tune",
......
...@@ -18,18 +18,18 @@ ...@@ -18,18 +18,18 @@
"4": { "4": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 1, "GROUP_SIZE_M": 32,
"num_warps": 4, "num_warps": 4,
"num_stages": 3 "num_stages": 3
}, },
"8": { "8": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 64, "GROUP_SIZE_M": 64,
"num_warps": 4, "num_warps": 4,
"num_stages": 4 "num_stages": 3
}, },
"16": { "16": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
...@@ -58,7 +58,7 @@ ...@@ -58,7 +58,7 @@
"48": { "48": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 64, "GROUP_SIZE_M": 64,
"num_warps": 4, "num_warps": 4,
"num_stages": 4 "num_stages": 4
...@@ -74,73 +74,73 @@ ...@@ -74,73 +74,73 @@
"96": { "96": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 16, "GROUP_SIZE_M": 32,
"num_warps": 4, "num_warps": 4,
"num_stages": 3 "num_stages": 4
}, },
"128": { "128": {
"BLOCK_SIZE_M": 128, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128, "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1, "GROUP_SIZE_M": 64,
"num_warps": 4, "num_warps": 4,
"num_stages": 2 "num_stages": 4
}, },
"256": { "256": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 64, "GROUP_SIZE_M": 1,
"num_warps": 4, "num_warps": 4,
"num_stages": 3 "num_stages": 4
}, },
"512": { "512": {
"BLOCK_SIZE_M": 256, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 64, "GROUP_SIZE_M": 64,
"num_warps": 8, "num_warps": 4,
"num_stages": 4 "num_stages": 4
}, },
"1024": { "1024": {
"BLOCK_SIZE_M": 256, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 16, "GROUP_SIZE_M": 64,
"num_warps": 4, "num_warps": 4,
"num_stages": 4 "num_stages": 4
}, },
"1536": { "1536": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 1,
"num_warps": 8,
"num_stages": 4
},
"2048": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 16, "GROUP_SIZE_M": 16,
"num_warps": 8, "num_warps": 4,
"num_stages": 5 "num_stages": 3
}, },
"3072": { "2048": {
"BLOCK_SIZE_M": 128, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128, "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 64, "GROUP_SIZE_M": 64,
"num_warps": 4, "num_warps": 4,
"num_stages": 4 "num_stages": 3
},
"3072": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 32,
"num_warps": 4,
"num_stages": 3
}, },
"4096": { "4096": {
"BLOCK_SIZE_M": 128, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 64, "GROUP_SIZE_M": 16,
"num_warps": 8, "num_warps": 4,
"num_stages": 5 "num_stages": 3
} }
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment