Commit 30e0b082 authored by zhuwenwen's avatar zhuwenwen
Browse files

add mla tuning configs of k100-ai

parent 13b1dcfe
{
"1": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 2,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 32.48000144958496
},
"100": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 46.720001846551895
},
"400": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 95.35899758338928
},
"700": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 134.8789930343628
},
"1000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 183.20000171661377
},
"1300": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 232.15900361537933
},
"1600": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 270.7189917564392
},
"1900": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 319.8390007019043
},
"2200": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 368.47901344299316
},
"2500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 416.1590039730072
},
"2800": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 455.9989869594574
},
"3100": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 502.87801027297974
},
"3400": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 551.6780018806458
},
"3700": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 588.4780287742615
},
"4000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 637.9179954528809
},
"4300": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 684.3180060386658
},
"4600": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 722.5580215454102
},
"4900": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 769.2769765853882
},
"5000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 787.0380282402039
},
"5500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 855.5974960327148
},
"6000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 930.3969740867615
},
"6500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1007.5169801712036
},
"7000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1083.035945892334
},
"7500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1160.4759693145752
},
"8000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1223.196029663086
},
"8500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1304.636001586914
},
"9000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1379.5154094696045
},
"9500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1458.2350254058838
},
"10000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1534.7150564193726
},
"10500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1603.8349866867065
},
"11000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1674.875020980835
},
"11500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1748.3140230178833
},
"12000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1829.4340372085571
},
"12500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1902.2339582443237
},
"13000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1981.5934896469116
},
"13500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2050.473690032959
},
"14000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2124.7940063476562
},
"14500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2205.6729793548584
},
"15000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2276.473045349121
},
"15500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2357.2731018066406
},
"16000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2413.912057876587
},
"16500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2496.7122077941895
},
"17000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2578.2310962677
},
"17500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2654.952049255371
},
"18000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2733.431100845337
},
"18500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2802.1509647369385
},
"19000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2874.070882797241
},
"19500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2944.5509910583496
},
"20000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3025.5908966064453
},
"20500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3102.5500297546387
},
"21000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3181.8299293518066
},
"21500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3240.9496307373047
},
"22000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3319.988965988159
},
"22500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3399.5089530944824
},
"23000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3478.549003601074
},
"23500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3546.7889308929443
},
"24000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3604.30908203125
},
"24500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3695.6679821014404
},
"25000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3773.668050765991
},
"25500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3846.3079929351807
},
"26000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3921.267032623291
},
"26500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3994.7071075439453
},
"27000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4069.9071884155273
},
"27500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4145.586967468262
},
"28000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4219.986915588379
},
"28500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4295.1860427856445
},
"29000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4377.1071434021
},
"29500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4437.265872955322
},
"30000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4522.225379943848
},
"30500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4587.345123291016
},
"31000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4673.824787139893
},
"31500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4754.70495223999
},
"32000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4792.4652099609375
},
"32500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4895.984172821045
}
}
\ No newline at end of file
{
"1": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 2,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
}
},
"best_us": 31.039999797940254
},
"100": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 43.99999976158142
},
"400": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 90.87999910116196
},
"700": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 129.7599971294403
},
"1000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 176.95899307727814
},
"1300": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 224.15900230407715
},
"1600": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 261.9189918041229
},
"1900": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 310.2389872074127
},
"2200": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 356.63801431655884
},
"2500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 403.1989872455597
},
"2800": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 441.75800681114197
},
"3100": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 489.1180098056793
},
"3400": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 535.7584953308105
},
"3700": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 574.7179985046387
},
"4000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 620.9579706192017
},
"4300": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 667.9970026016235
},
"4600": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 705.7579755783081
},
"4900": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 752.8769969940186
},
"5000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 772.4779844284058
},
"5500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 838.0780220031738
},
"6000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 913.5969877243042
},
"6500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 991.117000579834
},
"7000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1065.7559633255005
},
"7500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1142.6365375518799
},
"8000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1203.995943069458
},
"8500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1283.5159301757812
},
"9000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1360.2359294891357
},
"9500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1439.674973487854
},
"10000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1519.9949741363525
},
"10500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1596.155047416687
},
"11000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1653.7549495697021
},
"11500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1737.5949621200562
},
"12000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1812.3149871826172
},
"12500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1889.4339799880981
},
"13000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1967.034101486206
},
"13500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2027.8329849243164
},
"14000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2114.712953567505
},
"14500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2183.993101119995
},
"15000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2258.3930492401123
},
"15500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2337.592601776123
},
"16000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2388.1518840789795
},
"16500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2477.2720336914062
},
"17000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2552.63090133667
},
"17500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2631.831645965576
},
"18000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2701.2710571289062
},
"18500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2781.8310260772705
},
"19000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2847.7511405944824
},
"19500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2929.4309616088867
},
"20000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3000.7104873657227
},
"20500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3072.46994972229
},
"21000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3165.750026702881
},
"21500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3223.269462585449
},
"22000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3299.190044403076
},
"22500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3377.4290084838867
},
"23000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3457.2689533233643
},
"23500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3525.3491401672363
},
"24000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3569.4289207458496
},
"24500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3679.6679496765137
},
"25000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3744.868278503418
},
"25500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3823.50754737854
},
"26000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3897.9079723358154
},
"26500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3982.8670024871826
},
"27000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4039.1879081726074
},
"27500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4112.546920776367
},
"28000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4191.186904907227
},
"28500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4264.14680480957
},
"29000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4359.186172485352
},
"29500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4394.065856933594
},
"30000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4495.505809783936
},
"30500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4560.785293579102
},
"31000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4630.865573883057
},
"31500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4712.944984436035
},
"32000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4757.744789123535
},
"32500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4864.785194396973
}
}
\ No newline at end of file
{
"1": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 2,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
}
},
"best_us": 32.32000023126602
},
"100": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 44.79900002479553
},
"400": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 92.96000003814697
},
"700": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 132.31900334358215
},
"1000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 180.16000092029572
},
"1300": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 228.31900417804718
},
"1600": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 267.35949516296387
},
"1900": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 315.99900126457214
},
"2200": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 363.6789917945862
},
"2500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 410.8784794807434
},
"2800": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 450.55800676345825
},
"3100": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 498.7179934978485
},
"3400": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 545.5989837646484
},
"3700": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 584.4780206680298
},
"4000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 631.9980025291443
},
"4300": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 676.3975024223328
},
"4600": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 717.1175479888916
},
"4900": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 764.9570107460022
},
"5000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 782.1570038795471
},
"5500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 847.3569750785828
},
"6000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 925.4369735717773
},
"6500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1001.3569593429565
},
"7000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1074.7170448303223
},
"7500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1148.076057434082
},
"8000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1210.716962814331
},
"8500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1292.4760580062866
},
"9000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1365.915060043335
},
"9500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1445.5955028533936
},
"10000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1520.3150510787964
},
"10500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1597.1139669418335
},
"11000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1663.0350351333618
},
"11500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1740.6350374221802
},
"12000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1814.2340183258057
},
"12500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1885.0340843200684
},
"13000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 1964.553952217102
},
"13500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2028.9530754089355
},
"14000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2108.793020248413
},
"14500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2182.873010635376
},
"15000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2257.272958755493
},
"15500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2341.752529144287
},
"16000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2396.4719772338867
},
"16500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2481.5919399261475
},
"17000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2564.4710063934326
},
"17500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2635.511875152588
},
"18000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2705.9109210968018
},
"18500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2782.551050186157
},
"19000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2849.9109745025635
},
"19500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2927.1910190582275
},
"20000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 2999.030113220215
},
"20500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3082.710027694702
},
"21000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3159.670114517212
},
"21500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3208.3096504211426
},
"22000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3299.509048461914
},
"22500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3373.589038848877
},
"23000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3443.82905960083
},
"23500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3526.068687438965
},
"24000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3572.3090171813965
},
"24500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3671.1881160736084
},
"25000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3740.5481338500977
},
"25500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3825.1874446868896
},
"26000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3895.3471183776855
},
"26500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 3965.2678966522217
},
"27000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4029.2677879333496
},
"27500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4103.026390075684
},
"28000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4190.866947174072
},
"28500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4271.986484527588
},
"29000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4334.706783294678
},
"29500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4397.904872894287
},
"30000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4479.825973510742
},
"30500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4569.265365600586
},
"31000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4626.384735107422
},
"31500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4720.785140991211
},
"32000": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4744.144916534424
},
"32500": {
"kernel_kind": "v1_2stages_tc",
"best_config": {
"stage1": {
"BLOCK_N": 16,
"num_stages": 1,
"num_warps": 4,
"num_ldmatrixes": 0
},
"stage2": {
"BLOCK_N": 64,
"num_stages": 1,
"num_warps": 8,
"num_ldmatrixes": 0
}
},
"best_us": 4870.384216308594
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment