Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
30e0b082
Commit
30e0b082
authored
Mar 24, 2025
by
zhuwenwen
Browse files
add mla tuning configs of k100-ai
parent
13b1dcfe
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
4002 additions
and
0 deletions
+4002
-0
vllm/attention/backends/configs/QH=16_KVH=1_QKD=576_VD=512_fp16_K100AI.json
...kends/configs/QH=16_KVH=1_QKD=576_VD=512_fp16_K100AI.json
+1334
-0
vllm/attention/backends/configs/QH=4_KVH=1_QKD=576_VD=512_fp16_K100AI.json
...ckends/configs/QH=4_KVH=1_QKD=576_VD=512_fp16_K100AI.json
+1334
-0
vllm/attention/backends/configs/QH=8_KVH=1_QKD=576_VD=512_fp16_K100AI.json
...ckends/configs/QH=8_KVH=1_QKD=576_VD=512_fp16_K100AI.json
+1334
-0
No files found.
vllm/attention/backends/configs/QH=16_KVH=1_QKD=576_VD=512_fp16_K100AI.json
0 → 100644
View file @
30e0b082
{
"1"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
2
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
32.48000144958496
},
"100"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
46.720001846551895
},
"400"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
95.35899758338928
},
"700"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
134.8789930343628
},
"1000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
183.20000171661377
},
"1300"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
232.15900361537933
},
"1600"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
270.7189917564392
},
"1900"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
319.8390007019043
},
"2200"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
368.47901344299316
},
"2500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
416.1590039730072
},
"2800"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
455.9989869594574
},
"3100"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
502.87801027297974
},
"3400"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
551.6780018806458
},
"3700"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
588.4780287742615
},
"4000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
637.9179954528809
},
"4300"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
684.3180060386658
},
"4600"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
722.5580215454102
},
"4900"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
769.2769765853882
},
"5000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
787.0380282402039
},
"5500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
855.5974960327148
},
"6000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
930.3969740867615
},
"6500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1007.5169801712036
},
"7000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1083.035945892334
},
"7500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1160.4759693145752
},
"8000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1223.196029663086
},
"8500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1304.636001586914
},
"9000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1379.5154094696045
},
"9500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1458.2350254058838
},
"10000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1534.7150564193726
},
"10500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1603.8349866867065
},
"11000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1674.875020980835
},
"11500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1748.3140230178833
},
"12000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1829.4340372085571
},
"12500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1902.2339582443237
},
"13000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1981.5934896469116
},
"13500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2050.473690032959
},
"14000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2124.7940063476562
},
"14500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2205.6729793548584
},
"15000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2276.473045349121
},
"15500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2357.2731018066406
},
"16000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2413.912057876587
},
"16500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2496.7122077941895
},
"17000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2578.2310962677
},
"17500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2654.952049255371
},
"18000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2733.431100845337
},
"18500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2802.1509647369385
},
"19000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2874.070882797241
},
"19500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2944.5509910583496
},
"20000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3025.5908966064453
},
"20500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3102.5500297546387
},
"21000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3181.8299293518066
},
"21500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3240.9496307373047
},
"22000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3319.988965988159
},
"22500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3399.5089530944824
},
"23000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3478.549003601074
},
"23500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3546.7889308929443
},
"24000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3604.30908203125
},
"24500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3695.6679821014404
},
"25000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3773.668050765991
},
"25500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3846.3079929351807
},
"26000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3921.267032623291
},
"26500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3994.7071075439453
},
"27000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4069.9071884155273
},
"27500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4145.586967468262
},
"28000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4219.986915588379
},
"28500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4295.1860427856445
},
"29000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4377.1071434021
},
"29500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4437.265872955322
},
"30000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4522.225379943848
},
"30500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4587.345123291016
},
"31000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4673.824787139893
},
"31500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4754.70495223999
},
"32000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4792.4652099609375
},
"32500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4895.984172821045
}
}
\ No newline at end of file
vllm/attention/backends/configs/QH=4_KVH=1_QKD=576_VD=512_fp16_K100AI.json
0 → 100644
View file @
30e0b082
{
"1"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
2
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
31.039999797940254
},
"100"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
43.99999976158142
},
"400"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
90.87999910116196
},
"700"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
129.7599971294403
},
"1000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
176.95899307727814
},
"1300"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
224.15900230407715
},
"1600"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
261.9189918041229
},
"1900"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
310.2389872074127
},
"2200"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
356.63801431655884
},
"2500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
403.1989872455597
},
"2800"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
441.75800681114197
},
"3100"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
489.1180098056793
},
"3400"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
535.7584953308105
},
"3700"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
574.7179985046387
},
"4000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
620.9579706192017
},
"4300"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
667.9970026016235
},
"4600"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
705.7579755783081
},
"4900"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
752.8769969940186
},
"5000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
772.4779844284058
},
"5500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
838.0780220031738
},
"6000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
913.5969877243042
},
"6500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
991.117000579834
},
"7000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1065.7559633255005
},
"7500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1142.6365375518799
},
"8000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1203.995943069458
},
"8500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1283.5159301757812
},
"9000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1360.2359294891357
},
"9500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1439.674973487854
},
"10000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1519.9949741363525
},
"10500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1596.155047416687
},
"11000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1653.7549495697021
},
"11500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1737.5949621200562
},
"12000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1812.3149871826172
},
"12500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1889.4339799880981
},
"13000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1967.034101486206
},
"13500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2027.8329849243164
},
"14000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2114.712953567505
},
"14500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2183.993101119995
},
"15000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2258.3930492401123
},
"15500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2337.592601776123
},
"16000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2388.1518840789795
},
"16500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2477.2720336914062
},
"17000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2552.63090133667
},
"17500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2631.831645965576
},
"18000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2701.2710571289062
},
"18500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2781.8310260772705
},
"19000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2847.7511405944824
},
"19500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2929.4309616088867
},
"20000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3000.7104873657227
},
"20500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3072.46994972229
},
"21000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3165.750026702881
},
"21500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3223.269462585449
},
"22000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3299.190044403076
},
"22500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3377.4290084838867
},
"23000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3457.2689533233643
},
"23500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3525.3491401672363
},
"24000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3569.4289207458496
},
"24500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3679.6679496765137
},
"25000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3744.868278503418
},
"25500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3823.50754737854
},
"26000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3897.9079723358154
},
"26500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3982.8670024871826
},
"27000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4039.1879081726074
},
"27500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4112.546920776367
},
"28000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4191.186904907227
},
"28500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4264.14680480957
},
"29000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4359.186172485352
},
"29500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4394.065856933594
},
"30000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4495.505809783936
},
"30500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4560.785293579102
},
"31000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4630.865573883057
},
"31500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4712.944984436035
},
"32000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4757.744789123535
},
"32500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4864.785194396973
}
}
\ No newline at end of file
vllm/attention/backends/configs/QH=8_KVH=1_QKD=576_VD=512_fp16_K100AI.json
0 → 100644
View file @
30e0b082
{
"1"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
2
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
32.32000023126602
},
"100"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
44.79900002479553
},
"400"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
92.96000003814697
},
"700"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
132.31900334358215
},
"1000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
180.16000092029572
},
"1300"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
228.31900417804718
},
"1600"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
267.35949516296387
},
"1900"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
315.99900126457214
},
"2200"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
363.6789917945862
},
"2500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
410.8784794807434
},
"2800"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
450.55800676345825
},
"3100"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
498.7179934978485
},
"3400"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
545.5989837646484
},
"3700"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
584.4780206680298
},
"4000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
631.9980025291443
},
"4300"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
676.3975024223328
},
"4600"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
717.1175479888916
},
"4900"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
764.9570107460022
},
"5000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
782.1570038795471
},
"5500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
847.3569750785828
},
"6000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
925.4369735717773
},
"6500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1001.3569593429565
},
"7000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1074.7170448303223
},
"7500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1148.076057434082
},
"8000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1210.716962814331
},
"8500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1292.4760580062866
},
"9000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1365.915060043335
},
"9500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1445.5955028533936
},
"10000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1520.3150510787964
},
"10500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1597.1139669418335
},
"11000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1663.0350351333618
},
"11500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1740.6350374221802
},
"12000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1814.2340183258057
},
"12500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1885.0340843200684
},
"13000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
1964.553952217102
},
"13500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2028.9530754089355
},
"14000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2108.793020248413
},
"14500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2182.873010635376
},
"15000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2257.272958755493
},
"15500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2341.752529144287
},
"16000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2396.4719772338867
},
"16500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2481.5919399261475
},
"17000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2564.4710063934326
},
"17500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2635.511875152588
},
"18000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2705.9109210968018
},
"18500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2782.551050186157
},
"19000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2849.9109745025635
},
"19500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2927.1910190582275
},
"20000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
2999.030113220215
},
"20500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3082.710027694702
},
"21000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3159.670114517212
},
"21500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3208.3096504211426
},
"22000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3299.509048461914
},
"22500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3373.589038848877
},
"23000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3443.82905960083
},
"23500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3526.068687438965
},
"24000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3572.3090171813965
},
"24500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3671.1881160736084
},
"25000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3740.5481338500977
},
"25500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3825.1874446868896
},
"26000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3895.3471183776855
},
"26500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
3965.2678966522217
},
"27000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4029.2677879333496
},
"27500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4103.026390075684
},
"28000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4190.866947174072
},
"28500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4271.986484527588
},
"29000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4334.706783294678
},
"29500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4397.904872894287
},
"30000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4479.825973510742
},
"30500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4569.265365600586
},
"31000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4626.384735107422
},
"31500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4720.785140991211
},
"32000"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4744.144916534424
},
"32500"
:
{
"kernel_kind"
:
"v1_2stages_tc"
,
"best_config"
:
{
"stage1"
:
{
"BLOCK_N"
:
16
,
"num_stages"
:
1
,
"num_warps"
:
4
,
"num_ldmatrixes"
:
0
},
"stage2"
:
{
"BLOCK_N"
:
64
,
"num_stages"
:
1
,
"num_warps"
:
8
,
"num_ldmatrixes"
:
0
}
},
"best_us"
:
4870.384216308594
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment