diff --git a/.gitmodules b/.gitmodules index ae843a1d5a6265204ff5662237975323981eb6aa..59f6988bc573e5d34b2226c41d8c31a156a6f8f8 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,11 +1,11 @@ [submodule "3rdparty/composable_kernel"] path = 3rdparty/composable_kernel - url = ../composable_kernel.git - branch = main + url = ../composable_kernel + branch = rel-5.7.1 [submodule "3rdparty/moe_c"] path = 3rdparty/moe_c - url = ../moe.git - branch = master + url = ../Moe + branch = W8A8 diff --git a/3rdparty/composable_kernel b/3rdparty/composable_kernel index 8d05eec5aa99d5fa0cc5f5ef372a2ce02036bb73..a3b6d4d4825e8cf1b29160b9aa5ff8dbea08c8ea 160000 --- a/3rdparty/composable_kernel +++ b/3rdparty/composable_kernel @@ -1 +1 @@ -Subproject commit 8d05eec5aa99d5fa0cc5f5ef372a2ce02036bb73 +Subproject commit a3b6d4d4825e8cf1b29160b9aa5ff8dbea08c8ea diff --git a/aiter/__init__.py b/aiter/__init__.py index fdee5193125d9fd2f51708eade2d09fadb20feee..8b975b5cefa11fd78a8572fead8da3340c75fe0f 100644 --- a/aiter/__init__.py +++ b/aiter/__init__.py @@ -71,6 +71,8 @@ from .ops.rope import * from .ops.topk import * # from .ops.mha import * from .ops.gradlib import * +from .ops.mhc import * +from .ops.grouped_gemm import * # from .ops.trans_ragged_layout import * # from . import mla from .utility import dtypes,fp4_utils diff --git a/aiter/configs/tuned_fmoe_asm.csv b/aiter/configs/tuned_fmoe_asm.csv index adb197ba8552eec421eba69d3e827750883ebbe2..f59a1370ecc775d9ea103935a910830d0b3a3186 100644 --- a/aiter/configs/tuned_fmoe_asm.csv +++ b/aiter/configs/tuned_fmoe_asm.csv @@ -141,6 +141,57 @@ gfx938,no_quant,torch.float16,32768,352,4096,129,9,0,0,asm,13001+23001,19058.723 gfx938,no_quant,torch.float16,40960,352,4096,129,9,0,0,asm,13001+23001,23722.1115 gfx938,no_quant,torch.float16,49152,352,4096,129,9,0,0,asm,13001+23001,28329.0767 gfx938,no_quant,torch.float16,65536,352,4096,129,9,0,0,asm,13001+23001,37562.0269 +gfx938,no_quant,torch.bfloat16,1,192,2048,128,8,0,0,asm,10006+20000,36.1297 +gfx938,no_quant,torch.bfloat16,2,192,2048,128,8,0,0,asm,10008+20000,47.5655 +gfx938,no_quant,torch.bfloat16,4,192,2048,128,8,0,0,asm,10011+20000,70.0834 +gfx938,no_quant,torch.bfloat16,8,192,2048,128,8,0,0,asm,10002+20000,107.3634 +gfx938,no_quant,torch.bfloat16,16,192,2048,128,8,0,0,asm,10008+20000,153.1485 +gfx938,no_quant,torch.bfloat16,32,192,2048,128,8,0,0,asm,10008+20000,194.9505 +gfx938,no_quant,torch.bfloat16,64,192,2048,128,8,0,0,asm,10002+20000,227.6915 +gfx938,no_quant,torch.bfloat16,128,192,2048,128,8,0,0,asm,10002+20000,217.1789 +gfx938,no_quant,torch.bfloat16,256,192,2048,128,8,0,0,asm,11004+21001,243.1747 +gfx938,no_quant,torch.bfloat16,512,192,2048,128,8,0,0,asm,12000+22001,297.8012 +gfx938,no_quant,torch.bfloat16,1024,192,2048,128,8,0,0,asm,12000+22001,411.3837 +gfx938,no_quant,torch.bfloat16,2048,192,2048,128,8,0,0,asm,13000+23001,604.4119 +gfx938,no_quant,torch.bfloat16,4096,192,2048,128,8,0,0,asm,13001+23001,955.5858 +gfx938,no_quant,torch.bfloat16,8192,192,2048,128,8,0,0,asm,13001+23001,1689.6308 +gfx938,no_quant,torch.bfloat16,16384,192,2048,128,8,0,0,asm,13001+23001,3207.5057 +gfx938,no_quant,torch.bfloat16,32768,192,2048,128,8,0,0,asm,13001+23001,6173.8209 +gfx938,no_quant,torch.bfloat16,65536,192,2048,128,8,0,0,asm,13000+23001,12477.2685 +gfx938,no_quant,torch.bfloat16,1,384,2048,128,8,0,0,asm,10008+20001,47.3044 +gfx938,no_quant,torch.bfloat16,2,384,2048,128,8,0,0,asm,10011+20002,69.216 +gfx938,no_quant,torch.bfloat16,4,384,2048,128,8,0,0,asm,10002+20000,113.4772 +gfx938,no_quant,torch.bfloat16,8,384,2048,128,8,0,0,asm,10008+20000,191.1611 +gfx938,no_quant,torch.bfloat16,16,384,2048,128,8,0,0,asm,10011+20000,286.5208 +gfx938,no_quant,torch.bfloat16,32,384,2048,128,8,0,0,asm,10011+20000,357.0469 +gfx938,no_quant,torch.bfloat16,64,384,2048,128,8,0,0,asm,10002+20000,415.0259 +gfx938,no_quant,torch.bfloat16,128,384,2048,128,8,0,0,asm,10008+20000,412.5307 +gfx938,no_quant,torch.bfloat16,256,384,2048,128,8,0,0,asm,11004+21001,450.3575 +gfx938,no_quant,torch.bfloat16,512,384,2048,128,8,0,0,asm,12001+22001,491.7812 +gfx938,no_quant,torch.bfloat16,1024,384,2048,128,8,0,0,asm,13001+23001,597.3638 +gfx938,no_quant,torch.bfloat16,2048,384,2048,128,8,0,0,asm,13001+23001,767.0737 +gfx938,no_quant,torch.bfloat16,4096,384,2048,128,8,0,0,asm,13001+23001,1276.6764 +gfx938,no_quant,torch.bfloat16,8192,384,2048,128,8,0,0,asm,13001+23001,2246.1413 +gfx938,no_quant,torch.bfloat16,16384,384,2048,128,8,0,0,asm,13001+23001,4450.9009 +gfx938,no_quant,torch.bfloat16,32768,384,2048,128,8,0,0,asm,13001+23001,8575.6606 +gfx938,no_quant,torch.bfloat16,65536,384,2048,128,8,0,0,asm,13001+23001,16846.3634 +gfx938,no_quant,torch.bfloat16,1,768,2048,128,8,0,0,asm,10011+20000,68.8792 +gfx938,no_quant,torch.bfloat16,2,768,2048,128,8,0,0,asm,10008+20000,118.9591 +gfx938,no_quant,torch.bfloat16,4,768,2048,128,8,0,0,asm,10008+20001,210.1253 +gfx938,no_quant,torch.bfloat16,8,768,2048,128,8,0,0,asm,10011+20001,367.1356 +gfx938,no_quant,torch.bfloat16,16,768,2048,128,8,0,0,asm,10002+20001,572.9879 +gfx938,no_quant,torch.bfloat16,32,768,2048,128,8,0,0,asm,10002+20000,714.7725 +gfx938,no_quant,torch.bfloat16,64,768,2048,128,8,0,0,asm,10002+20000,817.7197 +gfx938,no_quant,torch.bfloat16,128,768,2048,128,8,0,0,asm,10011+20000,813.9779 +gfx938,no_quant,torch.bfloat16,256,768,2048,128,8,0,0,asm,11005+21001,849.9948 +gfx938,no_quant,torch.bfloat16,512,768,2048,128,8,0,0,asm,12001+22001,906.7694 +gfx938,no_quant,torch.bfloat16,1024,768,2048,128,8,0,0,asm,13001+23002,1021.0857 +gfx938,no_quant,torch.bfloat16,2048,768,2048,128,8,0,0,asm,13001+23001,1381.4244 +gfx938,no_quant,torch.bfloat16,4096,768,2048,128,8,0,0,asm,13001+23001,2296.0597 +gfx938,no_quant,torch.bfloat16,8192,768,2048,128,8,0,0,asm,13001+23001,4099.5996 +gfx938,no_quant,torch.bfloat16,16384,768,2048,128,8,0,0,asm,13001+23001,7791.3597 +gfx938,no_quant,torch.bfloat16,32768,768,2048,128,8,0,0,asm,13001+23001,15124.7783 +gfx938,no_quant,torch.bfloat16,65536,768,2048,128,8,0,0,asm,13001+23001,29786.7389 gfx936,no_quant,torch.float16,1,256,3072,256,8,0,0,asm,10002+20000,55.456 gfx936,no_quant,torch.float16,2,256,3072,256,8,0,0,asm,10002+20000,86.6223 gfx936,no_quant,torch.float16,4,256,3072,256,8,0,0,asm,10002+20000,155.6412 @@ -241,3 +292,419 @@ gfx936,no_quant,torch.float16,12288,128,3072,256,8,0,0,asm,13001+23001,2854.5487 gfx936,no_quant,torch.float16,16384,128,3072,256,8,0,0,asm,13001+23001,3669.7898 gfx936,no_quant,torch.float16,24576,128,3072,256,8,0,0,asm,13001+23001,5322.7565 gfx936,no_quant,torch.float16,32768,128,3072,256,8,0,0,asm,13001+23001,7028.0263 +gfx936,no_quant,torch.bfloat16,1,384,2048,128,8,0,0,asm,10009+20001,53.9486 +gfx936,no_quant,torch.bfloat16,2,384,2048,128,8,0,0,asm,10001+20000,84.7023 +gfx936,no_quant,torch.bfloat16,4,384,2048,128,8,0,0,asm,10001+20000,144.3738 +gfx936,no_quant,torch.bfloat16,8,384,2048,128,8,0,0,asm,10001+20001,243.5566 +gfx936,no_quant,torch.bfloat16,16,384,2048,128,8,0,0,asm,10001+20001,376.5333 +gfx936,no_quant,torch.bfloat16,32,384,2048,128,8,0,0,asm,10001+20001,470.0152 +gfx936,no_quant,torch.bfloat16,48,384,2048,128,8,0,0,asm,10001+20001,531.1322 +gfx936,no_quant,torch.bfloat16,64,384,2048,128,8,0,0,asm,10001+20001,553.4004 +gfx936,no_quant,torch.bfloat16,96,384,2048,128,8,0,0,asm,10001+20001,552.4121 +gfx936,no_quant,torch.bfloat16,128,384,2048,128,8,0,0,asm,10001+20001,561.5489 +gfx936,no_quant,torch.bfloat16,200,384,2048,128,8,0,0,asm,10001+20001,605.4647 +gfx936,no_quant,torch.bfloat16,256,384,2048,128,8,0,0,asm,11000+20002,622.9636 +gfx936,no_quant,torch.bfloat16,384,384,2048,128,8,0,0,asm,11006+20002,668.1256 +gfx936,no_quant,torch.bfloat16,460,384,2048,128,8,0,0,asm,11007+20002,671.6456 +gfx936,no_quant,torch.bfloat16,512,384,2048,128,8,0,0,asm,12004+22001,686.9045 +gfx936,no_quant,torch.bfloat16,798,384,2048,128,8,0,0,asm,12004+22001,732.6055 +gfx936,no_quant,torch.bfloat16,1024,384,2048,128,8,0,0,asm,13001+22001,769.1949 +gfx936,no_quant,torch.bfloat16,1280,384,2048,128,8,0,0,asm,13001+22001,801.9444 +gfx936,no_quant,torch.bfloat16,1440,384,2048,128,8,0,0,asm,13001+22001,841.9274 +gfx936,no_quant,torch.bfloat16,1560,384,2048,128,8,0,0,asm,13001+22001,832.7822 +gfx936,no_quant,torch.bfloat16,1880,384,2048,128,8,0,0,asm,13000+23002,849.4395 +gfx936,no_quant,torch.bfloat16,2000,384,2048,128,8,0,0,asm,13001+22001,886.6015 +gfx936,no_quant,torch.bfloat16,2200,384,2048,128,8,0,0,asm,13001+23001,923.3931 +gfx936,no_quant,torch.bfloat16,2400,384,2048,128,8,0,0,asm,13001+22001,983.6457 +gfx936,no_quant,torch.bfloat16,2800,384,2048,128,8,0,0,asm,13001+23001,1006.7361999999999 +gfx936,no_quant,torch.bfloat16,3200,384,2048,128,8,0,0,asm,13001+23001,1057.7929 +gfx936,no_quant,torch.bfloat16,3660,384,2048,128,8,0,0,asm,13001+23001,1118.3403 +gfx936,no_quant,torch.bfloat16,4096,384,2048,128,8,0,0,asm,13001+23001,1237.801 +gfx936,no_quant,torch.bfloat16,1,1024,4096,512,10,0,0,asm,10001+20000,266.4873 +gfx936,no_quant,torch.bfloat16,4,1024,4096,512,10,0,0,asm,10000+20000,927.0086 +gfx936,no_quant,torch.bfloat16,16,1024,4096,512,10,0,0,asm,13000+23002,3159.4357 +gfx936,no_quant,torch.bfloat16,32,1024,4096,512,10,0,0,asm,13000+23002,5249.8556 +gfx936,no_quant,torch.bfloat16,64,1024,4096,512,10,0,0,asm,13000+23002,7941.6251 +gfx936,no_quant,torch.bfloat16,128,1024,4096,512,10,0,0,asm,13000+23002,10229.1505 +gfx936,no_quant,torch.bfloat16,256,1024,4096,512,10,0,0,asm,13000+23002,11070.6568 +gfx936,no_quant,torch.bfloat16,512,1024,4096,512,10,0,0,asm,13000+23002,11400.1216 +gfx936,no_quant,torch.bfloat16,1024,1024,4096,512,10,0,0,asm,13000+23002,11766.9253 +gfx936,no_quant,torch.bfloat16,2048,1024,4096,512,10,0,0,asm,13000+23002,12317.5013 +gfx936,no_quant,torch.bfloat16,4096,1024,4096,512,10,0,0,asm,13000+23002,13405.458 +gfx936,no_quant,torch.bfloat16,8192,1024,4096,512,10,0,0,asm,13001+23002,16444.1795 +gfx936,no_quant,torch.bfloat16,16384,1024,4096,512,10,0,0,asm,13001+23002,25116.8953 +gfx936,no_quant,torch.bfloat16,32768,1024,4096,512,10,0,0,asm,13001+23002,47344.0981 +gfx936,no_quant,torch.bfloat16,1,192,4096,128,8,0,0,asm,10002+20000,49.5607 +gfx936,no_quant,torch.bfloat16,2,192,4096,128,8,0,0,asm,10000+20000,86.7055 +gfx936,no_quant,torch.bfloat16,3,192,4096,128,8,0,0,asm,10001+20000,119.6654 +gfx936,no_quant,torch.bfloat16,4,192,4096,128,8,0,0,asm,10000+20000,145.4165 +gfx936,no_quant,torch.bfloat16,5,192,4096,128,8,0,0,asm,10011+20000,167.3363 +gfx936,no_quant,torch.bfloat16,6,192,4096,128,8,0,0,asm,10001+20000,198.0811 +gfx936,no_quant,torch.bfloat16,7,192,4096,128,8,0,0,asm,10001+20000,225.6682 +gfx936,no_quant,torch.bfloat16,8,192,4096,128,8,0,0,asm,10001+20000,253.22990000000001 +gfx936,no_quant,torch.bfloat16,9,192,4096,128,8,0,0,asm,10001+20000,276.1772 +gfx936,no_quant,torch.bfloat16,10,192,4096,128,8,0,0,asm,10001+20000,293.5413 +gfx936,no_quant,torch.bfloat16,11,192,4096,128,8,0,0,asm,10005+20000,309.1789 +gfx936,no_quant,torch.bfloat16,12,192,4096,128,8,0,0,asm,10001+20000,329.4145 +gfx936,no_quant,torch.bfloat16,13,192,4096,128,8,0,0,asm,10001+20000,348.7321 +gfx936,no_quant,torch.bfloat16,14,192,4096,128,8,0,0,asm,10001+20000,358.8541 +gfx936,no_quant,torch.bfloat16,15,192,4096,128,8,0,0,asm,10001+20000,373.5655 +gfx936,no_quant,torch.bfloat16,16,192,4096,128,8,0,0,asm,10001+20000,391.4516 +gfx936,no_quant,torch.bfloat16,17,192,4096,128,8,0,0,asm,10001+20000,389.9161 +gfx936,no_quant,torch.bfloat16,18,192,4096,128,8,0,0,asm,10001+20000,411.9958 +gfx936,no_quant,torch.bfloat16,20,192,4096,128,8,0,0,asm,10000+20000,433.6464 +gfx936,no_quant,torch.bfloat16,24,192,4096,128,8,0,0,asm,10000+20000,455.8606 +gfx936,no_quant,torch.bfloat16,28,192,4096,128,8,0,0,asm,10000+20000,472.2339 +gfx936,no_quant,torch.bfloat16,32,192,4096,128,8,0,0,asm,10000+20000,491.9726 +gfx936,no_quant,torch.bfloat16,34,192,4096,128,8,0,0,asm,10001+20000,489.6287 +gfx936,no_quant,torch.bfloat16,36,192,4096,128,8,0,0,asm,10000+20000,501.42629999999997 +gfx936,no_quant,torch.bfloat16,40,192,4096,128,8,0,0,asm,10001+20000,519.8009 +gfx936,no_quant,torch.bfloat16,44,192,4096,128,8,0,0,asm,10000+20000,541.9312 +gfx936,no_quant,torch.bfloat16,48,192,4096,128,8,0,0,asm,10000+20000,553.4089 +gfx936,no_quant,torch.bfloat16,56,192,4096,128,8,0,0,asm,10000+20000,556.8362 +gfx936,no_quant,torch.bfloat16,64,192,4096,128,8,0,0,asm,10001+20000,573.7738 +gfx936,no_quant,torch.bfloat16,68,192,4096,128,8,0,0,asm,10000+20000,563.0172 +gfx936,no_quant,torch.bfloat16,72,192,4096,128,8,0,0,asm,10000+20000,566.1834 +gfx936,no_quant,torch.bfloat16,80,192,4096,128,8,0,0,asm,10001+20000,566.7393 +gfx936,no_quant,torch.bfloat16,88,192,4096,128,8,0,0,asm,10005+20000,603.7156 +gfx936,no_quant,torch.bfloat16,96,192,4096,128,8,0,0,asm,10001+20000,577.3413 +gfx936,no_quant,torch.bfloat16,104,192,4096,128,8,0,0,asm,10003+20000,584.0697 +gfx936,no_quant,torch.bfloat16,112,192,4096,128,8,0,0,asm,10000+20000,585.4338 +gfx936,no_quant,torch.bfloat16,128,192,4096,128,8,0,0,asm,10000+20000,590.5707 +gfx936,no_quant,torch.bfloat16,144,192,4096,128,8,0,0,asm,10001+20000,598.9495 +gfx936,no_quant,torch.bfloat16,160,192,4096,128,8,0,0,asm,10001+20000,603.9263 +gfx936,no_quant,torch.bfloat16,192,192,4096,128,8,0,0,asm,10001+20000,626.6545 +gfx936,no_quant,torch.bfloat16,224,192,4096,128,8,0,0,asm,10012+20000,626.0144 +gfx936,no_quant,torch.bfloat16,256,192,4096,128,8,0,0,asm,10001+20000,640.6671 +gfx936,no_quant,torch.bfloat16,320,192,4096,128,8,0,0,asm,11006+21001,662.0478 +gfx936,no_quant,torch.bfloat16,384,192,4096,128,8,0,0,asm,12004+22001,676.5403 +gfx936,no_quant,torch.bfloat16,448,192,4096,128,8,0,0,asm,12004+22001,688.4306 +gfx936,no_quant,torch.bfloat16,512,192,4096,128,8,0,0,asm,12004+22001,708.1021 +gfx936,no_quant,torch.bfloat16,576,192,4096,128,8,0,0,asm,12004+22001,720.3208 +gfx936,no_quant,torch.bfloat16,640,192,4096,128,8,0,0,asm,12004+22001,736.7166 +gfx936,no_quant,torch.bfloat16,704,192,4096,128,8,0,0,asm,12004+22001,743.7227 +gfx936,no_quant,torch.bfloat16,768,192,4096,128,8,0,0,asm,12004+22001,763.0237 +gfx936,no_quant,torch.bfloat16,832,192,4096,128,8,0,0,asm,12004+22001,779.8572 +gfx936,no_quant,torch.bfloat16,896,192,4096,128,8,0,0,asm,12004+22001,797.6254 +gfx936,no_quant,torch.bfloat16,960,192,4096,128,8,0,0,asm,12004+22001,836.2184 +gfx936,no_quant,torch.bfloat16,1024,192,4096,128,8,0,0,asm,12001+22001,843.6542 +gfx936,no_quant,torch.bfloat16,1152,192,4096,128,8,0,0,asm,13000+23001,884.7822 +gfx936,no_quant,torch.bfloat16,1280,192,4096,128,8,0,0,asm,13000+23001,899.9314 +gfx936,no_quant,torch.bfloat16,1408,192,4096,128,8,0,0,asm,13000+23001,922.5503 +gfx936,no_quant,torch.bfloat16,1536,192,4096,128,8,0,0,asm,13000+23001,967.3328 +gfx936,no_quant,torch.bfloat16,1664,192,4096,128,8,0,0,asm,13000+23001,974.6591 +gfx936,no_quant,torch.bfloat16,1792,192,4096,128,8,0,0,asm,13001+23001,1055.4919 +gfx936,no_quant,torch.bfloat16,1920,192,4096,128,8,0,0,asm,13000+23001,1065.8918 +gfx936,no_quant,torch.bfloat16,2048,192,4096,128,8,0,0,asm,13000+23001,1143.0194 +gfx936,no_quant,torch.bfloat16,2304,192,4096,128,8,0,0,asm,13001+23001,1326.3941 +gfx936,no_quant,torch.bfloat16,2560,192,4096,128,8,0,0,asm,13001+23001,1377.5515 +gfx936,no_quant,torch.bfloat16,2816,192,4096,128,8,0,0,asm,13000+23001,1432.0519 +gfx936,no_quant,torch.bfloat16,3072,192,4096,128,8,0,0,asm,13001+23001,1466.1822 +gfx936,no_quant,torch.bfloat16,3328,192,4096,128,8,0,0,asm,13001+23001,1510.1901 +gfx936,no_quant,torch.bfloat16,3584,192,4096,128,8,0,0,asm,13001+23001,1557.4486 +gfx936,no_quant,torch.bfloat16,3840,192,4096,128,8,0,0,asm,13001+23001,1618.8459 +gfx936,no_quant,torch.bfloat16,4096,192,4096,128,8,0,0,asm,13001+23001,1753.8086 +gfx936,no_quant,torch.bfloat16,4608,192,4096,128,8,0,0,asm,13001+23001,2039.8014 +gfx936,no_quant,torch.bfloat16,5120,192,4096,128,8,0,0,asm,13001+23001,2126.6889 +gfx936,no_quant,torch.bfloat16,5632,192,4096,128,8,0,0,asm,13001+23001,2223.7741 +gfx936,no_quant,torch.bfloat16,6144,192,4096,128,8,0,0,asm,13001+23001,2430.6853 +gfx936,no_quant,torch.bfloat16,6656,192,4096,128,8,0,0,asm,13001+23001,2734.7157 +gfx936,no_quant,torch.bfloat16,7168,192,4096,128,8,0,0,asm,13001+23001,2803.3465 +gfx936,no_quant,torch.bfloat16,7680,192,4096,128,8,0,0,asm,13001+23001,2972.8603 +gfx936,no_quant,torch.bfloat16,8192,192,4096,128,8,0,0,asm,13001+23001,3185.2116 +gfx936,no_quant,torch.bfloat16,10240,192,4096,128,8,0,0,asm,13001+23001,3868.2946 +gfx936,no_quant,torch.bfloat16,12288,192,4096,128,8,0,0,asm,13001+23001,4562.308 +gfx936,no_quant,torch.bfloat16,14336,192,4096,128,8,0,0,asm,13001+23001,5337.3815 +gfx936,no_quant,torch.bfloat16,16384,192,4096,128,8,0,0,asm,13001+23001,6046.3087 +gfx938,no_quant,torch.bfloat16,1,192,4096,128,8,0,0,asm,10002+20000,49.5607 +gfx938,no_quant,torch.bfloat16,2,192,4096,128,8,0,0,asm,10000+20000,86.7055 +gfx938,no_quant,torch.bfloat16,3,192,4096,128,8,0,0,asm,10001+20000,119.6654 +gfx938,no_quant,torch.bfloat16,4,192,4096,128,8,0,0,asm,10000+20000,145.4165 +gfx938,no_quant,torch.bfloat16,5,192,4096,128,8,0,0,asm,10011+20000,167.3363 +gfx938,no_quant,torch.bfloat16,6,192,4096,128,8,0,0,asm,10001+20000,198.0811 +gfx938,no_quant,torch.bfloat16,7,192,4096,128,8,0,0,asm,10001+20000,225.6682 +gfx938,no_quant,torch.bfloat16,8,192,4096,128,8,0,0,asm,10001+20000,253.22990000000001 +gfx938,no_quant,torch.bfloat16,9,192,4096,128,8,0,0,asm,10001+20000,276.1772 +gfx938,no_quant,torch.bfloat16,10,192,4096,128,8,0,0,asm,10001+20000,293.5413 +gfx938,no_quant,torch.bfloat16,11,192,4096,128,8,0,0,asm,10005+20000,309.1789 +gfx938,no_quant,torch.bfloat16,12,192,4096,128,8,0,0,asm,10001+20000,329.4145 +gfx938,no_quant,torch.bfloat16,13,192,4096,128,8,0,0,asm,10001+20000,348.7321 +gfx938,no_quant,torch.bfloat16,14,192,4096,128,8,0,0,asm,10001+20000,358.8541 +gfx938,no_quant,torch.bfloat16,15,192,4096,128,8,0,0,asm,10001+20000,373.5655 +gfx938,no_quant,torch.bfloat16,16,192,4096,128,8,0,0,asm,10001+20000,391.4516 +gfx938,no_quant,torch.bfloat16,17,192,4096,128,8,0,0,asm,10001+20000,389.9161 +gfx938,no_quant,torch.bfloat16,18,192,4096,128,8,0,0,asm,10001+20000,411.9958 +gfx938,no_quant,torch.bfloat16,20,192,4096,128,8,0,0,asm,10000+20000,433.6464 +gfx938,no_quant,torch.bfloat16,24,192,4096,128,8,0,0,asm,10000+20000,455.8606 +gfx938,no_quant,torch.bfloat16,28,192,4096,128,8,0,0,asm,10000+20000,472.2339 +gfx938,no_quant,torch.bfloat16,32,192,4096,128,8,0,0,asm,10000+20000,491.9726 +gfx938,no_quant,torch.bfloat16,34,192,4096,128,8,0,0,asm,10001+20000,489.6287 +gfx938,no_quant,torch.bfloat16,36,192,4096,128,8,0,0,asm,10000+20000,501.42629999999997 +gfx938,no_quant,torch.bfloat16,40,192,4096,128,8,0,0,asm,10001+20000,519.8009 +gfx938,no_quant,torch.bfloat16,44,192,4096,128,8,0,0,asm,10000+20000,541.9312 +gfx938,no_quant,torch.bfloat16,48,192,4096,128,8,0,0,asm,10000+20000,553.4089 +gfx938,no_quant,torch.bfloat16,56,192,4096,128,8,0,0,asm,10000+20000,556.8362 +gfx938,no_quant,torch.bfloat16,64,192,4096,128,8,0,0,asm,10001+20000,573.7738 +gfx938,no_quant,torch.bfloat16,68,192,4096,128,8,0,0,asm,10000+20000,563.0172 +gfx938,no_quant,torch.bfloat16,72,192,4096,128,8,0,0,asm,10000+20000,566.1834 +gfx938,no_quant,torch.bfloat16,80,192,4096,128,8,0,0,asm,10001+20000,566.7393 +gfx938,no_quant,torch.bfloat16,88,192,4096,128,8,0,0,asm,10005+20000,603.7156 +gfx938,no_quant,torch.bfloat16,96,192,4096,128,8,0,0,asm,10001+20000,577.3413 +gfx938,no_quant,torch.bfloat16,104,192,4096,128,8,0,0,asm,10003+20000,584.0697 +gfx938,no_quant,torch.bfloat16,112,192,4096,128,8,0,0,asm,10000+20000,585.4338 +gfx938,no_quant,torch.bfloat16,128,192,4096,128,8,0,0,asm,10000+20000,590.5707 +gfx938,no_quant,torch.bfloat16,144,192,4096,128,8,0,0,asm,10001+20000,598.9495 +gfx938,no_quant,torch.bfloat16,160,192,4096,128,8,0,0,asm,10001+20000,603.9263 +gfx938,no_quant,torch.bfloat16,192,192,4096,128,8,0,0,asm,10001+20000,626.6545 +gfx938,no_quant,torch.bfloat16,224,192,4096,128,8,0,0,asm,10012+20000,626.0144 +gfx938,no_quant,torch.bfloat16,256,192,4096,128,8,0,0,asm,10001+20000,640.6671 +gfx938,no_quant,torch.bfloat16,320,192,4096,128,8,0,0,asm,11006+21001,662.0478 +gfx938,no_quant,torch.bfloat16,384,192,4096,128,8,0,0,asm,12004+22001,676.5403 +gfx938,no_quant,torch.bfloat16,448,192,4096,128,8,0,0,asm,12004+22001,688.4306 +gfx938,no_quant,torch.bfloat16,512,192,4096,128,8,0,0,asm,12004+22001,708.1021 +gfx938,no_quant,torch.bfloat16,576,192,4096,128,8,0,0,asm,12004+22001,720.3208 +gfx938,no_quant,torch.bfloat16,640,192,4096,128,8,0,0,asm,12004+22001,736.7166 +gfx938,no_quant,torch.bfloat16,704,192,4096,128,8,0,0,asm,12004+22001,743.7227 +gfx938,no_quant,torch.bfloat16,768,192,4096,128,8,0,0,asm,12004+22001,763.0237 +gfx938,no_quant,torch.bfloat16,832,192,4096,128,8,0,0,asm,12004+22001,779.8572 +gfx938,no_quant,torch.bfloat16,896,192,4096,128,8,0,0,asm,12004+22001,797.6254 +gfx938,no_quant,torch.bfloat16,960,192,4096,128,8,0,0,asm,12004+22001,836.2184 +gfx938,no_quant,torch.bfloat16,1024,192,4096,128,8,0,0,asm,12001+22001,843.6542 +gfx938,no_quant,torch.bfloat16,1152,192,4096,128,8,0,0,asm,13000+23001,884.7822 +gfx938,no_quant,torch.bfloat16,1280,192,4096,128,8,0,0,asm,13000+23001,899.9314 +gfx938,no_quant,torch.bfloat16,1408,192,4096,128,8,0,0,asm,13000+23001,922.5503 +gfx938,no_quant,torch.bfloat16,1536,192,4096,128,8,0,0,asm,13000+23001,967.3328 +gfx938,no_quant,torch.bfloat16,1664,192,4096,128,8,0,0,asm,13000+23001,974.6591 +gfx938,no_quant,torch.bfloat16,1792,192,4096,128,8,0,0,asm,13001+23001,1055.4919 +gfx938,no_quant,torch.bfloat16,1920,192,4096,128,8,0,0,asm,13000+23001,1065.8918 +gfx938,no_quant,torch.bfloat16,2048,192,4096,128,8,0,0,asm,13000+23001,1143.0194 +gfx938,no_quant,torch.bfloat16,2304,192,4096,128,8,0,0,asm,13001+23001,1326.3941 +gfx938,no_quant,torch.bfloat16,2560,192,4096,128,8,0,0,asm,13001+23001,1377.5515 +gfx938,no_quant,torch.bfloat16,2816,192,4096,128,8,0,0,asm,13000+23001,1432.0519 +gfx938,no_quant,torch.bfloat16,3072,192,4096,128,8,0,0,asm,13001+23001,1466.1822 +gfx938,no_quant,torch.bfloat16,3328,192,4096,128,8,0,0,asm,13001+23001,1510.1901 +gfx938,no_quant,torch.bfloat16,3584,192,4096,128,8,0,0,asm,13001+23001,1557.4486 +gfx938,no_quant,torch.bfloat16,3840,192,4096,128,8,0,0,asm,13001+23001,1618.8459 +gfx938,no_quant,torch.bfloat16,4096,192,4096,128,8,0,0,asm,13001+23001,1753.8086 +gfx938,no_quant,torch.bfloat16,4608,192,4096,128,8,0,0,asm,13001+23001,2039.8014 +gfx938,no_quant,torch.bfloat16,5120,192,4096,128,8,0,0,asm,13001+23001,2126.6889 +gfx938,no_quant,torch.bfloat16,5632,192,4096,128,8,0,0,asm,13001+23001,2223.7741 +gfx938,no_quant,torch.bfloat16,6144,192,4096,128,8,0,0,asm,13001+23001,2430.6853 +gfx938,no_quant,torch.bfloat16,6656,192,4096,128,8,0,0,asm,13001+23001,2734.7157 +gfx938,no_quant,torch.bfloat16,7168,192,4096,128,8,0,0,asm,13001+23001,2803.3465 +gfx938,no_quant,torch.bfloat16,7680,192,4096,128,8,0,0,asm,13001+23001,2972.8603 +gfx938,no_quant,torch.bfloat16,8192,192,4096,128,8,0,0,asm,13001+23001,3185.2116 +gfx938,no_quant,torch.bfloat16,10240,192,4096,128,8,0,0,asm,13001+23001,3868.2946 +gfx938,no_quant,torch.bfloat16,12288,192,4096,128,8,0,0,asm,13001+23001,4562.308 +gfx938,no_quant,torch.bfloat16,14336,192,4096,128,8,0,0,asm,13001+23001,5337.3815 +gfx938,no_quant,torch.bfloat16,16384,192,4096,128,8,0,0,asm,13001+23001,6046.3087 +gfx938,no_quant,torch.bfloat16,1,384,4096,128,8,0,0,asm,10008+20000,88.778 +gfx938,no_quant,torch.bfloat16,2,384,4096,128,8,0,0,asm,10012+20000,145.6453 +gfx938,no_quant,torch.bfloat16,3,384,4096,128,8,0,0,asm,10002+20000,226.1335 +gfx938,no_quant,torch.bfloat16,4,384,4096,128,8,0,0,asm,10008+20000,262.7649 +gfx938,no_quant,torch.bfloat16,5,384,4096,128,8,0,0,asm,10011+20000,309.5268 +gfx938,no_quant,torch.bfloat16,6,384,4096,128,8,0,0,asm,10002+20000,379.8004 +gfx938,no_quant,torch.bfloat16,7,384,4096,128,8,0,0,asm,10008+20000,421.0213 +gfx938,no_quant,torch.bfloat16,8,384,4096,128,8,0,0,asm,10011+20000,473.8716 +gfx938,no_quant,torch.bfloat16,9,384,4096,128,8,0,0,asm,10011+20000,501.8547 +gfx938,no_quant,torch.bfloat16,10,384,4096,128,8,0,0,asm,10002+20000,554.2504 +gfx938,no_quant,torch.bfloat16,11,384,4096,128,8,0,0,asm,10002+20000,569.8967 +gfx938,no_quant,torch.bfloat16,12,384,4096,128,8,0,0,asm,10008+20000,607.4544 +gfx938,no_quant,torch.bfloat16,13,384,4096,128,8,0,0,asm,10002+20000,647.4375 +gfx938,no_quant,torch.bfloat16,14,384,4096,128,8,0,0,asm,10012+20000,668.9448 +gfx938,no_quant,torch.bfloat16,15,384,4096,128,8,0,0,asm,10002+20000,691.8079 +gfx938,no_quant,torch.bfloat16,16,384,4096,128,8,0,0,asm,10002+20000,721.2395 +gfx938,no_quant,torch.bfloat16,17,384,4096,128,8,0,0,asm,10002+20000,722.8702 +gfx938,no_quant,torch.bfloat16,18,384,4096,128,8,0,0,asm,10002+20000,759.9647 +gfx938,no_quant,torch.bfloat16,20,384,4096,128,8,0,0,asm,10002+20000,807.611 +gfx938,no_quant,torch.bfloat16,24,384,4096,128,8,0,0,asm,10002+20000,838.7015 +gfx938,no_quant,torch.bfloat16,28,384,4096,128,8,0,0,asm,10002+20000,864.4391 +gfx938,no_quant,torch.bfloat16,32,384,4096,128,8,0,0,asm,10002+20000,897.0201 +gfx938,no_quant,torch.bfloat16,34,384,4096,128,8,0,0,asm,10002+20000,897.0423 +gfx938,no_quant,torch.bfloat16,36,384,4096,128,8,0,0,asm,10002+20000,925.1771 +gfx938,no_quant,torch.bfloat16,40,384,4096,128,8,0,0,asm,10002+20000,946.8443 +gfx938,no_quant,torch.bfloat16,44,384,4096,128,8,0,0,asm,10002+20000,982.0526 +gfx938,no_quant,torch.bfloat16,48,384,4096,128,8,0,0,asm,10002+20000,1004.0483 +gfx938,no_quant,torch.bfloat16,56,384,4096,128,8,0,0,asm,10002+20000,1008.3682999999999 +gfx938,no_quant,torch.bfloat16,64,384,4096,128,8,0,0,asm,10002+20000,1036.6745 +gfx938,no_quant,torch.bfloat16,68,384,4096,128,8,0,0,asm,10002+20000,1009.5556000000001 +gfx938,no_quant,torch.bfloat16,72,384,4096,128,8,0,0,asm,10002+20000,1017.5387000000001 +gfx938,no_quant,torch.bfloat16,80,384,4096,128,8,0,0,asm,10002+20000,1016.1324000000001 +gfx938,no_quant,torch.bfloat16,88,384,4096,128,8,0,0,asm,10002+20000,1022.3051 +gfx938,no_quant,torch.bfloat16,96,384,4096,128,8,0,0,asm,10002+20000,1028.1575 +gfx938,no_quant,torch.bfloat16,104,384,4096,128,8,0,0,asm,10002+20000,1029.3365 +gfx938,no_quant,torch.bfloat16,112,384,4096,128,8,0,0,asm,10002+20000,1030.6838 +gfx938,no_quant,torch.bfloat16,128,384,4096,128,8,0,0,asm,10002+20000,1038.3891 +gfx938,no_quant,torch.bfloat16,144,384,4096,128,8,0,0,asm,10002+20000,1049.4039 +gfx938,no_quant,torch.bfloat16,160,384,4096,128,8,0,0,asm,10002+20000,1057.5723 +gfx938,no_quant,torch.bfloat16,192,384,4096,128,8,0,0,asm,12001+22001,1096.2332 +gfx938,no_quant,torch.bfloat16,224,384,4096,128,8,0,0,asm,12001+22001,1105.1679 +gfx938,no_quant,torch.bfloat16,256,384,4096,128,8,0,0,asm,12001+22001,1114.4311 +gfx938,no_quant,torch.bfloat16,320,384,4096,128,8,0,0,asm,12001+22001,1129.5048 +gfx938,no_quant,torch.bfloat16,384,384,4096,128,8,0,0,asm,12001+22001,1159.4921 +gfx938,no_quant,torch.bfloat16,448,384,4096,128,8,0,0,asm,12001+22001,1161.1932 +gfx938,no_quant,torch.bfloat16,512,384,4096,128,8,0,0,asm,12001+22001,1180.1742 +gfx938,no_quant,torch.bfloat16,576,384,4096,128,8,0,0,asm,12001+22001,1196.2163 +gfx938,no_quant,torch.bfloat16,640,384,4096,128,8,0,0,asm,12001+22001,1219.7446 +gfx938,no_quant,torch.bfloat16,704,384,4096,128,8,0,0,asm,12001+22001,1224.0478 +gfx938,no_quant,torch.bfloat16,768,384,4096,128,8,0,0,asm,12001+22001,1238.1277 +gfx938,no_quant,torch.bfloat16,832,384,4096,128,8,0,0,asm,12001+22001,1252.9572 +gfx938,no_quant,torch.bfloat16,896,384,4096,128,8,0,0,asm,12001+22001,1309.0665 +gfx938,no_quant,torch.bfloat16,960,384,4096,128,8,0,0,asm,13001+23001,1374.0515 +gfx938,no_quant,torch.bfloat16,1024,384,4096,128,8,0,0,asm,13001+23001,1385.7821 +gfx938,no_quant,torch.bfloat16,1152,384,4096,128,8,0,0,asm,13001+23001,1423.6346 +gfx938,no_quant,torch.bfloat16,1280,384,4096,128,8,0,0,asm,13001+23001,1419.9631 +gfx938,no_quant,torch.bfloat16,1408,384,4096,128,8,0,0,asm,13001+23001,1439.9123 +gfx938,no_quant,torch.bfloat16,1536,384,4096,128,8,0,0,asm,13001+23001,1467.6091 +gfx938,no_quant,torch.bfloat16,1664,384,4096,128,8,0,0,asm,13001+23001,1592.1729 +gfx938,no_quant,torch.bfloat16,1792,384,4096,128,8,0,0,asm,13001+23001,1631.3979 +gfx938,no_quant,torch.bfloat16,1920,384,4096,128,8,0,0,asm,13001+23001,1820.3655 +gfx938,no_quant,torch.bfloat16,2048,384,4096,128,8,0,0,asm,13001+23001,2047.0343 +gfx938,no_quant,torch.bfloat16,2304,384,4096,128,8,0,0,asm,13001+23001,2625.8116 +gfx938,no_quant,torch.bfloat16,2560,384,4096,128,8,0,0,asm,13001+23001,2679.9506 +gfx938,no_quant,torch.bfloat16,2816,384,4096,128,8,0,0,asm,13001+23001,2715.7403 +gfx938,no_quant,torch.bfloat16,3072,384,4096,128,8,0,0,asm,13001+23001,2745.2056 +gfx938,no_quant,torch.bfloat16,3328,384,4096,128,8,0,0,asm,13001+23001,2781.8456 +gfx938,no_quant,torch.bfloat16,3584,384,4096,128,8,0,0,asm,13001+23001,2925.4747 +gfx938,no_quant,torch.bfloat16,3840,384,4096,128,8,0,0,asm,13001+23001,3025.5921 +gfx938,no_quant,torch.bfloat16,4096,384,4096,128,8,0,0,asm,13001+23001,3433.3634 +gfx938,no_quant,torch.bfloat16,4608,384,4096,128,8,0,0,asm,13001+23001,4062.0351 +gfx938,no_quant,torch.bfloat16,5120,384,4096,128,8,0,0,asm,13001+23001,4116.7715 +gfx938,no_quant,torch.bfloat16,5632,384,4096,128,8,0,0,asm,13001+23001,4327.2296 +gfx938,no_quant,torch.bfloat16,6144,384,4096,128,8,0,0,asm,13001+23001,4917.9061 +gfx938,no_quant,torch.bfloat16,6656,384,4096,128,8,0,0,asm,13001+23001,5421.0124 +gfx938,no_quant,torch.bfloat16,7168,384,4096,128,8,0,0,asm,13001+23001,5534.0483 +gfx938,no_quant,torch.bfloat16,7680,384,4096,128,8,0,0,asm,13001+23001,5698.4187 +gfx938,no_quant,torch.bfloat16,8192,384,4096,128,8,0,0,asm,13001+23001,6196.8255 +gfx938,no_quant,torch.bfloat16,10240,384,4096,128,8,0,0,asm,13001+23001,7595.145 +gfx938,no_quant,torch.bfloat16,12288,384,4096,128,8,0,0,asm,13001+23001,8934.1889 +gfx938,no_quant,torch.bfloat16,14336,384,4096,128,8,0,0,asm,13001+23001,10329.2311 +gfx938,no_quant,torch.bfloat16,16384,384,4096,128,8,0,0,asm,13001+23001,11702.0355 +gfx938,no_quant,torch.bfloat16,17408,384,4096,128,8,0,0,asm,13001+23001,12483.0714 +gfx938,no_quant,torch.bfloat16,24576,384,4096,128,8,0,0,asm,13001+23001,17223.7804 +gfx938,no_quant,torch.bfloat16,32768,384,4096,128,8,0,0,asm,13001+23001,22719.4254 +gfx938,no_quant,torch.bfloat16,40960,384,4096,128,8,0,0,asm,13001+23001,28232.8175 +gfx938,no_quant,torch.bfloat16,49152,384,4096,128,8,0,0,asm,13001+23001,33760.5851 +gfx938,no_quant,torch.bfloat16,57344,384,4096,128,8,0,0,asm,13001+23001,39304.2245 +gfx938,no_quant,torch.bfloat16,65536,384,4096,128,8,0,0,asm,13001+23001,44786.0225 +gfx938,no_quant,torch.bfloat16,65536,384,4096,128,8,0,0,asm,13001+23001,44776.8645 +gfx936,no_quant,torch.bfloat16,1,384,4096,128,8,0,0,asm,10008+20000,88.778 +gfx936,no_quant,torch.bfloat16,2,384,4096,128,8,0,0,asm,10012+20000,145.6453 +gfx936,no_quant,torch.bfloat16,3,384,4096,128,8,0,0,asm,10002+20000,226.1335 +gfx936,no_quant,torch.bfloat16,4,384,4096,128,8,0,0,asm,10008+20000,262.7649 +gfx936,no_quant,torch.bfloat16,5,384,4096,128,8,0,0,asm,10011+20000,309.5268 +gfx936,no_quant,torch.bfloat16,6,384,4096,128,8,0,0,asm,10002+20000,379.8004 +gfx936,no_quant,torch.bfloat16,7,384,4096,128,8,0,0,asm,10008+20000,421.0213 +gfx936,no_quant,torch.bfloat16,8,384,4096,128,8,0,0,asm,10011+20000,473.8716 +gfx936,no_quant,torch.bfloat16,9,384,4096,128,8,0,0,asm,10011+20000,501.8547 +gfx936,no_quant,torch.bfloat16,10,384,4096,128,8,0,0,asm,10002+20000,554.2504 +gfx936,no_quant,torch.bfloat16,11,384,4096,128,8,0,0,asm,10002+20000,569.8967 +gfx936,no_quant,torch.bfloat16,12,384,4096,128,8,0,0,asm,10008+20000,607.4544 +gfx936,no_quant,torch.bfloat16,13,384,4096,128,8,0,0,asm,10002+20000,647.4375 +gfx936,no_quant,torch.bfloat16,14,384,4096,128,8,0,0,asm,10012+20000,668.9448 +gfx936,no_quant,torch.bfloat16,15,384,4096,128,8,0,0,asm,10002+20000,691.8079 +gfx936,no_quant,torch.bfloat16,16,384,4096,128,8,0,0,asm,10002+20000,721.2395 +gfx936,no_quant,torch.bfloat16,17,384,4096,128,8,0,0,asm,10002+20000,722.8702 +gfx936,no_quant,torch.bfloat16,18,384,4096,128,8,0,0,asm,10002+20000,759.9647 +gfx936,no_quant,torch.bfloat16,20,384,4096,128,8,0,0,asm,10002+20000,807.611 +gfx936,no_quant,torch.bfloat16,24,384,4096,128,8,0,0,asm,10002+20000,838.7015 +gfx936,no_quant,torch.bfloat16,28,384,4096,128,8,0,0,asm,10002+20000,864.4391 +gfx936,no_quant,torch.bfloat16,32,384,4096,128,8,0,0,asm,10002+20000,897.0201 +gfx936,no_quant,torch.bfloat16,34,384,4096,128,8,0,0,asm,10002+20000,897.0423 +gfx936,no_quant,torch.bfloat16,36,384,4096,128,8,0,0,asm,10002+20000,925.1771 +gfx936,no_quant,torch.bfloat16,40,384,4096,128,8,0,0,asm,10002+20000,946.8443 +gfx936,no_quant,torch.bfloat16,44,384,4096,128,8,0,0,asm,10002+20000,982.0526 +gfx936,no_quant,torch.bfloat16,48,384,4096,128,8,0,0,asm,10002+20000,1004.0483 +gfx936,no_quant,torch.bfloat16,56,384,4096,128,8,0,0,asm,10002+20000,1008.3682999999999 +gfx936,no_quant,torch.bfloat16,64,384,4096,128,8,0,0,asm,10002+20000,1036.6745 +gfx936,no_quant,torch.bfloat16,68,384,4096,128,8,0,0,asm,10002+20000,1009.5556000000001 +gfx936,no_quant,torch.bfloat16,72,384,4096,128,8,0,0,asm,10002+20000,1017.5387000000001 +gfx936,no_quant,torch.bfloat16,80,384,4096,128,8,0,0,asm,10002+20000,1016.1324000000001 +gfx936,no_quant,torch.bfloat16,88,384,4096,128,8,0,0,asm,10002+20000,1022.3051 +gfx936,no_quant,torch.bfloat16,96,384,4096,128,8,0,0,asm,10002+20000,1028.1575 +gfx936,no_quant,torch.bfloat16,104,384,4096,128,8,0,0,asm,10002+20000,1029.3365 +gfx936,no_quant,torch.bfloat16,112,384,4096,128,8,0,0,asm,10002+20000,1030.6838 +gfx936,no_quant,torch.bfloat16,128,384,4096,128,8,0,0,asm,10002+20000,1038.3891 +gfx936,no_quant,torch.bfloat16,144,384,4096,128,8,0,0,asm,10002+20000,1049.4039 +gfx936,no_quant,torch.bfloat16,160,384,4096,128,8,0,0,asm,10002+20000,1057.5723 +gfx936,no_quant,torch.bfloat16,192,384,4096,128,8,0,0,asm,12001+22001,1096.2332 +gfx936,no_quant,torch.bfloat16,224,384,4096,128,8,0,0,asm,12001+22001,1105.1679 +gfx936,no_quant,torch.bfloat16,256,384,4096,128,8,0,0,asm,12001+22001,1114.4311 +gfx936,no_quant,torch.bfloat16,320,384,4096,128,8,0,0,asm,12001+22001,1129.5048 +gfx936,no_quant,torch.bfloat16,384,384,4096,128,8,0,0,asm,12001+22001,1159.4921 +gfx936,no_quant,torch.bfloat16,448,384,4096,128,8,0,0,asm,12001+22001,1161.1932 +gfx936,no_quant,torch.bfloat16,512,384,4096,128,8,0,0,asm,12001+22001,1180.1742 +gfx936,no_quant,torch.bfloat16,576,384,4096,128,8,0,0,asm,12001+22001,1196.2163 +gfx936,no_quant,torch.bfloat16,640,384,4096,128,8,0,0,asm,12001+22001,1219.7446 +gfx936,no_quant,torch.bfloat16,704,384,4096,128,8,0,0,asm,12001+22001,1224.0478 +gfx936,no_quant,torch.bfloat16,768,384,4096,128,8,0,0,asm,12001+22001,1238.1277 +gfx936,no_quant,torch.bfloat16,832,384,4096,128,8,0,0,asm,12001+22001,1252.9572 +gfx936,no_quant,torch.bfloat16,896,384,4096,128,8,0,0,asm,12001+22001,1309.0665 +gfx936,no_quant,torch.bfloat16,960,384,4096,128,8,0,0,asm,13001+23001,1374.0515 +gfx936,no_quant,torch.bfloat16,1024,384,4096,128,8,0,0,asm,13001+23001,1385.7821 +gfx936,no_quant,torch.bfloat16,1152,384,4096,128,8,0,0,asm,13001+23001,1423.6346 +gfx936,no_quant,torch.bfloat16,1280,384,4096,128,8,0,0,asm,13001+23001,1419.9631 +gfx936,no_quant,torch.bfloat16,1408,384,4096,128,8,0,0,asm,13001+23001,1439.9123 +gfx936,no_quant,torch.bfloat16,1536,384,4096,128,8,0,0,asm,13001+23001,1467.6091 +gfx936,no_quant,torch.bfloat16,1664,384,4096,128,8,0,0,asm,13001+23001,1592.1729 +gfx936,no_quant,torch.bfloat16,1792,384,4096,128,8,0,0,asm,13001+23001,1631.3979 +gfx936,no_quant,torch.bfloat16,1920,384,4096,128,8,0,0,asm,13001+23001,1820.3655 +gfx936,no_quant,torch.bfloat16,2048,384,4096,128,8,0,0,asm,13001+23001,2047.0343 +gfx936,no_quant,torch.bfloat16,2304,384,4096,128,8,0,0,asm,13001+23001,2625.8116 +gfx936,no_quant,torch.bfloat16,2560,384,4096,128,8,0,0,asm,13001+23001,2679.9506 +gfx936,no_quant,torch.bfloat16,2816,384,4096,128,8,0,0,asm,13001+23001,2715.7403 +gfx936,no_quant,torch.bfloat16,3072,384,4096,128,8,0,0,asm,13001+23001,2745.2056 +gfx936,no_quant,torch.bfloat16,3328,384,4096,128,8,0,0,asm,13001+23001,2781.8456 +gfx936,no_quant,torch.bfloat16,3584,384,4096,128,8,0,0,asm,13001+23001,2925.4747 +gfx936,no_quant,torch.bfloat16,3840,384,4096,128,8,0,0,asm,13001+23001,3025.5921 +gfx936,no_quant,torch.bfloat16,4096,384,4096,128,8,0,0,asm,13001+23001,3433.3634 +gfx936,no_quant,torch.bfloat16,4608,384,4096,128,8,0,0,asm,13001+23001,4062.0351 +gfx936,no_quant,torch.bfloat16,5120,384,4096,128,8,0,0,asm,13001+23001,4116.7715 +gfx936,no_quant,torch.bfloat16,5632,384,4096,128,8,0,0,asm,13001+23001,4327.2296 +gfx936,no_quant,torch.bfloat16,6144,384,4096,128,8,0,0,asm,13001+23001,4917.9061 +gfx936,no_quant,torch.bfloat16,6656,384,4096,128,8,0,0,asm,13001+23001,5421.0124 +gfx936,no_quant,torch.bfloat16,7168,384,4096,128,8,0,0,asm,13001+23001,5534.0483 +gfx936,no_quant,torch.bfloat16,7680,384,4096,128,8,0,0,asm,13001+23001,5698.4187 +gfx936,no_quant,torch.bfloat16,8192,384,4096,128,8,0,0,asm,13001+23001,6196.8255 +gfx936,no_quant,torch.bfloat16,10240,384,4096,128,8,0,0,asm,13001+23001,7595.145 +gfx936,no_quant,torch.bfloat16,12288,384,4096,128,8,0,0,asm,13001+23001,8934.1889 +gfx936,no_quant,torch.bfloat16,14336,384,4096,128,8,0,0,asm,13001+23001,10329.2311 +gfx936,no_quant,torch.bfloat16,16384,384,4096,128,8,0,0,asm,13001+23001,11702.0355 +gfx936,no_quant,torch.bfloat16,17408,384,4096,128,8,0,0,asm,13001+23001,12483.0714 +gfx936,no_quant,torch.bfloat16,24576,384,4096,128,8,0,0,asm,13001+23001,17223.7804 +gfx936,no_quant,torch.bfloat16,32768,384,4096,128,8,0,0,asm,13001+23001,22719.4254 +gfx936,no_quant,torch.bfloat16,40960,384,4096,128,8,0,0,asm,13001+23001,28232.8175 +gfx936,no_quant,torch.bfloat16,49152,384,4096,128,8,0,0,asm,13001+23001,33760.5851 +gfx936,no_quant,torch.bfloat16,57344,384,4096,128,8,0,0,asm,13001+23001,39304.2245 +gfx936,no_quant,torch.bfloat16,65536,384,4096,128,8,0,0,asm,13001+23001,44786.0225 +gfx936,no_quant,torch.bfloat16,65536,384,4096,128,8,0,0,asm,13001+23001,44776.8645 +gfx936,no_quant,torch.bfloat16,1,896,1280,64,6,0,0,asm,10011+20000,53.2828 +gfx936,no_quant,torch.bfloat16,8,896,1280,64,6,0,0,asm,10002+20000,229.8033 +gfx936,no_quant,torch.bfloat16,16,896,1280,64,6,0,0,asm,10002+20001,343.8064 +gfx936,no_quant,torch.bfloat16,24,896,1280,64,6,0,0,asm,10002+20001,374.6831 +gfx936,no_quant,torch.bfloat16,32,896,1280,64,6,0,0,asm,10002+20001,391.7386 +gfx936,no_quant,torch.bfloat16,48,896,1280,64,6,0,0,asm,10002+20001,410.7332 +gfx936,no_quant,torch.bfloat16,64,896,1280,64,6,0,0,asm,10002+20001,423.3508 +gfx936,no_quant,torch.bfloat16,96,896,1280,64,6,0,0,asm,10007+20001,432.4509 +gfx936,no_quant,torch.bfloat16,128,896,1280,64,6,0,0,asm,10002+20001,458.4495 +gfx936,no_quant,torch.bfloat16,256,896,1280,64,6,0,0,asm,11006+20000,479.9704 +gfx936,no_quant,torch.bfloat16,512,896,1280,64,6,0,0,asm,13001+23002,520.5426 +gfx936,no_quant,torch.bfloat16,1024,896,1280,64,6,0,0,asm,13001+23002,572.3737 +gfx936,no_quant,torch.bfloat16,2048,896,1280,64,6,0,0,asm,13001+23001,789.1126 +gfx936,no_quant,torch.bfloat16,4096,896,1280,64,6,0,0,asm,13001+23001,1279.3058 +gfx936,no_quant,torch.bfloat16,8192,896,1280,64,6,0,0,asm,13001+23001,2321.6618 +gfx936,no_quant,torch.bfloat16,1,448,1280,64,6,0,0,asm,10009+20000,36.8876 +gfx936,no_quant,torch.bfloat16,8,448,1280,64,6,0,0,asm,10010+20000,123.3791 +gfx936,no_quant,torch.bfloat16,16,448,1280,64,6,0,0,asm,10002+20001,181.5932 +gfx936,no_quant,torch.bfloat16,24,448,1280,64,6,0,0,asm,10002+20002,201.2532 +gfx936,no_quant,torch.bfloat16,32,448,1280,64,6,0,0,asm,10002+20001,206.5698 +gfx936,no_quant,torch.bfloat16,48,448,1280,64,6,0,0,asm,10002+20001,219.3329 +gfx936,no_quant,torch.bfloat16,64,448,1280,64,6,0,0,asm,10002+20001,226.2244 +gfx936,no_quant,torch.bfloat16,96,448,1280,64,6,0,0,asm,10002+20001,228.6381 +gfx936,no_quant,torch.bfloat16,128,448,1280,64,6,0,0,asm,10002+20000,250.29160000000002 +gfx936,no_quant,torch.bfloat16,256,448,1280,64,6,0,0,asm,11006+20000,257.0169 +gfx936,no_quant,torch.bfloat16,512,448,1280,64,6,0,0,asm,12004+21001,293.6818 +gfx936,no_quant,torch.bfloat16,1024,448,1280,64,6,0,0,asm,13000+22001,347.8455 +gfx936,no_quant,torch.bfloat16,2048,448,1280,64,6,0,0,asm,13001+23001,530.3532 +gfx936,no_quant,torch.bfloat16,4096,448,1280,64,6,0,0,asm,13001+23001,832.2113 +gfx936,no_quant,torch.bfloat16,8192,448,1280,64,6,0,0,asm,13001+23001,1487.3629 diff --git a/aiter/configs/tuned_fmoe_asm_shuffle.csv b/aiter/configs/tuned_fmoe_asm_shuffle.csv index 3f4ff50797c417d71752f67b41bf379b2fe72a32..c825c2c7b56c94851a95d3861c68f912f31139db 100644 --- a/aiter/configs/tuned_fmoe_asm_shuffle.csv +++ b/aiter/configs/tuned_fmoe_asm_shuffle.csv @@ -121,6 +121,57 @@ gfx938,no_quant,torch.float16,32768,352,4096,129,9,0,0,asm,13001+23001,16397.301 gfx938,no_quant,torch.float16,40960,352,4096,129,9,0,0,asm,13001+23001,20398.5288 gfx938,no_quant,torch.float16,49152,352,4096,129,9,0,0,asm,13001+23001,24396.6972 gfx938,no_quant,torch.float16,65536,352,4096,129,9,0,0,asm,13001+23001,32435.0655 +gfx938,no_quant,torch.bfloat16,1,192,2048,128,8,0,0,asm,10007+20000,39.2876 +gfx938,no_quant,torch.bfloat16,2,192,2048,128,8,0,0,asm,10009+20000,51.8687 +gfx938,no_quant,torch.bfloat16,4,192,2048,128,8,0,0,asm,10006+20000,73.8475 +gfx938,no_quant,torch.bfloat16,8,192,2048,128,8,0,0,asm,10006+20000,107.3038 +gfx938,no_quant,torch.bfloat16,16,192,2048,128,8,0,0,asm,10007+20000,149.3254 +gfx938,no_quant,torch.bfloat16,32,192,2048,128,8,0,0,asm,10006+20000,182.4199 +gfx938,no_quant,torch.bfloat16,64,192,2048,128,8,0,0,asm,10006+20000,213.6452 +gfx938,no_quant,torch.bfloat16,128,192,2048,128,8,0,0,asm,10006+20000,205.5664 +gfx938,no_quant,torch.bfloat16,256,192,2048,128,8,0,0,asm,11004+21001,225.3643 +gfx938,no_quant,torch.bfloat16,512,192,2048,128,8,0,0,asm,11004+21001,268.9685 +gfx938,no_quant,torch.bfloat16,1024,192,2048,128,8,0,0,asm,12002+22001,373.4482 +gfx938,no_quant,torch.bfloat16,2048,192,2048,128,8,0,0,asm,12001+22001,544.0333 +gfx938,no_quant,torch.bfloat16,4096,192,2048,128,8,0,0,asm,13001+23001,859.873 +gfx938,no_quant,torch.bfloat16,8192,192,2048,128,8,0,0,asm,13001+23001,1515.4337 +gfx938,no_quant,torch.bfloat16,16384,192,2048,128,8,0,0,asm,13001+23001,2881.8408 +gfx938,no_quant,torch.bfloat16,32768,192,2048,128,8,0,0,asm,13001+23001,5550.2244 +gfx938,no_quant,torch.bfloat16,65536,192,2048,128,8,0,0,asm,13001+23001,10944.1702 +gfx938,no_quant,torch.bfloat16,1,384,2048,128,8,0,0,asm,10001+20000,53.056 +gfx938,no_quant,torch.bfloat16,2,384,2048,128,8,0,0,asm,10006+20000,77.3086 +gfx938,no_quant,torch.bfloat16,4,384,2048,128,8,0,0,asm,10006+20000,112.6348 +gfx938,no_quant,torch.bfloat16,8,384,2048,128,8,0,0,asm,10006+20000,177.2747 +gfx938,no_quant,torch.bfloat16,16,384,2048,128,8,0,0,asm,10006+20000,260.6267 +gfx938,no_quant,torch.bfloat16,32,384,2048,128,8,0,0,asm,10006+20000,320.2976 +gfx938,no_quant,torch.bfloat16,64,384,2048,128,8,0,0,asm,10006+20000,367.4922 +gfx938,no_quant,torch.bfloat16,128,384,2048,128,8,0,0,asm,10009+20000,364.0352 +gfx938,no_quant,torch.bfloat16,256,384,2048,128,8,0,0,asm,11004+21001,391.2504 +gfx938,no_quant,torch.bfloat16,512,384,2048,128,8,0,0,asm,12000+22001,455.2254 +gfx938,no_quant,torch.bfloat16,1024,384,2048,128,8,0,0,asm,12001+22001,542.8131 +gfx938,no_quant,torch.bfloat16,2048,384,2048,128,8,0,0,asm,13001+23001,709.3484 +gfx938,no_quant,torch.bfloat16,4096,384,2048,128,8,0,0,asm,13001+23001,1144.2526 +gfx938,no_quant,torch.bfloat16,8192,384,2048,128,8,0,0,asm,13001+23001,1982.3018 +gfx938,no_quant,torch.bfloat16,16384,384,2048,128,8,0,0,asm,13001+23001,3922.8848 +gfx938,no_quant,torch.bfloat16,32768,384,2048,128,8,0,0,asm,13001+23001,7601.1435 +gfx938,no_quant,torch.bfloat16,65536,384,2048,128,8,0,0,asm,13001+23001,15053.8397 +gfx938,no_quant,torch.bfloat16,1,768,2048,128,8,0,0,asm,10006+20000,75.2789 +gfx938,no_quant,torch.bfloat16,2,768,2048,128,8,0,0,asm,10006+20000,119.599 +gfx938,no_quant,torch.bfloat16,4,768,2048,128,8,0,0,asm,10007+20000,189.241 +gfx938,no_quant,torch.bfloat16,8,768,2048,128,8,0,0,asm,10006+20000,311.8679 +gfx938,no_quant,torch.bfloat16,16,768,2048,128,8,0,0,asm,10008+20000,465.2827 +gfx938,no_quant,torch.bfloat16,32,768,2048,128,8,0,0,asm,10008+20000,574.3358 +gfx938,no_quant,torch.bfloat16,64,768,2048,128,8,0,0,asm,10008+20000,659.7834 +gfx938,no_quant,torch.bfloat16,128,768,2048,128,8,0,0,asm,10008+20000,672.0162 +gfx938,no_quant,torch.bfloat16,256,768,2048,128,8,0,0,asm,11002+21001,716.3866 +gfx938,no_quant,torch.bfloat16,512,768,2048,128,8,0,0,asm,12005+22001,802.6013 +gfx938,no_quant,torch.bfloat16,1024,768,2048,128,8,0,0,asm,13001+23001,945.1779 +gfx938,no_quant,torch.bfloat16,2048,768,2048,128,8,0,0,asm,13001+23001,1243.9816 +gfx938,no_quant,torch.bfloat16,4096,768,2048,128,8,0,0,asm,13001+23001,1989.4641 +gfx938,no_quant,torch.bfloat16,8192,768,2048,128,8,0,0,asm,13001+23001,3554.2789 +gfx938,no_quant,torch.bfloat16,16384,768,2048,128,8,0,0,asm,13001+23001,6779.7759 +gfx938,no_quant,torch.bfloat16,32768,768,2048,128,8,0,0,asm,13001+23001,13203.9373 +gfx938,no_quant,torch.bfloat16,65536,768,2048,128,8,0,0,asm,13001+23001,26121.7552 gfx936,no_quant,torch.float16,1,256,3072,256,8,0,0,asm,10006+20000,56.4327 gfx936,no_quant,torch.float16,2,256,3072,256,8,0,0,asm,10006+20000,85.2664 gfx936,no_quant,torch.float16,4,256,3072,256,8,0,0,asm,10004+20000,148.02 @@ -221,3 +272,31 @@ gfx936,no_quant,torch.float16,12288,128,3072,256,8,0,0,asm,13001+23001,2844.9048 gfx936,no_quant,torch.float16,16384,128,3072,256,8,0,0,asm,13001+23001,3597.2571 gfx936,no_quant,torch.float16,24576,128,3072,256,8,0,0,asm,13001+23001,5205.65 gfx936,no_quant,torch.float16,32768,128,3072,256,8,0,0,asm,13001+23001,6847.9883 +gfx936,no_quant,torch.bfloat16,1,384,2048,128,8,0,0,asm,10005+20000,57.5107 +gfx936,no_quant,torch.bfloat16,2,384,2048,128,8,0,0,asm,10005+20000,86.1507 +gfx936,no_quant,torch.bfloat16,4,384,2048,128,8,0,0,asm,10001+20000,137.9569 +gfx936,no_quant,torch.bfloat16,8,384,2048,128,8,0,0,asm,10001+20000,230.5798 +gfx936,no_quant,torch.bfloat16,16,384,2048,128,8,0,0,asm,10001+20000,352.5754 +gfx936,no_quant,torch.bfloat16,32,384,2048,128,8,0,0,asm,10001+20000,436.6174 +gfx936,no_quant,torch.bfloat16,48,384,2048,128,8,0,0,asm,10001+20001,490.5933 +gfx936,no_quant,torch.bfloat16,64,384,2048,128,8,0,0,asm,10001+20001,508.85309 +gfx936,no_quant,torch.bfloat16,96,384,2048,128,8,0,0,asm,10001+20001,510.02899 +gfx936,no_quant,torch.bfloat16,128,384,2048,128,8,0,0,asm,10001+20001,517.6922 +gfx936,no_quant,torch.bfloat16,200,384,2048,128,8,0,0,asm,11000+21001,564.4711 +gfx936,no_quant,torch.bfloat16,256,384,2048,128,8,0,0,asm,11000+21001,580.3952 +gfx936,no_quant,torch.bfloat16,384,384,2048,128,8,0,0,asm,11000+21001,635.8056 +gfx936,no_quant,torch.bfloat16,460,384,2048,128,8,0,0,asm,11000+21001,672.3782 +gfx936,no_quant,torch.bfloat16,512,384,2048,128,8,0,0,asm,11006+20002,695.6287 +gfx936,no_quant,torch.bfloat16,798,384,2048,128,8,0,0,asm,12004+22001,731.5276 +gfx936,no_quant,torch.bfloat16,1024,384,2048,128,8,0,0,asm,12000+22001,779.5612 +gfx936,no_quant,torch.bfloat16,1280,384,2048,128,8,0,0,asm,12000+22001,832.0495 +gfx936,no_quant,torch.bfloat16,1440,384,2048,128,8,0,0,asm,13000+22001,891.2665 +gfx936,no_quant,torch.bfloat16,1560,384,2048,128,8,0,0,asm,12004+22001,882.4158 +gfx936,no_quant,torch.bfloat16,1880,384,2048,128,8,0,0,asm,13000+22001,885.6415 +gfx936,no_quant,torch.bfloat16,2000,384,2048,128,8,0,0,asm,13000+23001,919.6288 +gfx936,no_quant,torch.bfloat16,2200,384,2048,128,8,0,0,asm,12005+22001,965.1782 +gfx936,no_quant,torch.bfloat16,2400,384,2048,128,8,0,0,asm,12001+22001,999.2413 +gfx936,no_quant,torch.bfloat16,2800,384,2048,128,8,0,0,asm,13001+23001,1065.1948 +gfx936,no_quant,torch.bfloat16,3200,384,2048,128,8,0,0,asm,13001+23001,1126.6853 +gfx936,no_quant,torch.bfloat16,3660,384,2048,128,8,0,0,asm,13001+23001,1216.6051 +gfx936,no_quant,torch.bfloat16,4096,384,2048,128,8,0,0,asm,13001+23001,1259.6619 \ No newline at end of file diff --git a/aiter/configs/tuned_fmoe_asm_w8a8_channel.csv b/aiter/configs/tuned_fmoe_asm_w8a8_channel.csv index b98bb247e9b86cd31993fe7efac742bb965e6b0b..f103f92a99f95cdc46d96d37b4995676afebf4e3 100644 --- a/aiter/configs/tuned_fmoe_asm_w8a8_channel.csv +++ b/aiter/configs/tuned_fmoe_asm_w8a8_channel.csv @@ -1042,3 +1042,1511 @@ gfx936,int8_w8a8_channel,torch.float16,12288,128,3072,256,8,0,0,asm,13001+23001, gfx936,int8_w8a8_channel,torch.float16,16384,128,3072,256,8,0,0,asm,13001+23001,3264.3642 gfx936,int8_w8a8_channel,torch.float16,24576,128,3072,256,8,0,0,asm,13001+23001,4759.7737 gfx936,int8_w8a8_channel,torch.float16,32768,128,3072,256,8,0,0,asm,13001+23001,6279.1322 +gfx938,f8_w8a8_channel,torch.bfloat16,1,512,4096,256,6,0,0,asm,10008+20000,64.9697 +gfx938,f8_w8a8_channel,torch.bfloat16,2,512,4096,256,6,0,0,asm,10011+20000,93.5172 +gfx938,f8_w8a8_channel,torch.bfloat16,3,512,4096,256,6,0,0,asm,10002+20000,132.2538 +gfx938,f8_w8a8_channel,torch.bfloat16,4,512,4096,256,6,0,0,asm,10011+20000,155.6137 +gfx938,f8_w8a8_channel,torch.bfloat16,5,512,4096,256,6,0,0,asm,10011+20000,179.6641 +gfx938,f8_w8a8_channel,torch.bfloat16,6,512,4096,256,6,0,0,asm,10002+20000,220.5063 +gfx938,f8_w8a8_channel,torch.bfloat16,7,512,4096,256,6,0,0,asm,10011+20000,239.3947 +gfx938,f8_w8a8_channel,torch.bfloat16,8,512,4096,256,6,0,0,asm,10013+20000,259.5464 +gfx938,f8_w8a8_channel,torch.bfloat16,9,512,4096,256,6,0,0,asm,10002+20000,299.5799 +gfx938,f8_w8a8_channel,torch.bfloat16,10,512,4096,256,6,0,0,asm,10002+20000,320.0597 +gfx938,f8_w8a8_channel,torch.bfloat16,11,512,4096,256,6,0,0,asm,10011+20000,333.7271 +gfx938,f8_w8a8_channel,torch.bfloat16,12,512,4096,256,6,0,0,asm,10011+20000,342.6955 +gfx938,f8_w8a8_channel,torch.bfloat16,13,512,4096,256,6,0,0,asm,10013+20000,359.1504 +gfx938,f8_w8a8_channel,torch.bfloat16,14,512,4096,256,6,0,0,asm,10002+20000,395.1839 +gfx938,f8_w8a8_channel,torch.bfloat16,15,512,4096,256,6,0,0,asm,10011+20000,410.9396 +gfx938,f8_w8a8_channel,torch.bfloat16,16,512,4096,256,6,0,0,asm,10011+20000,420.0933 +gfx938,f8_w8a8_channel,torch.bfloat16,17,512,4096,256,6,0,0,asm,10013+20000,427.3493 +gfx938,f8_w8a8_channel,torch.bfloat16,18,512,4096,256,6,0,0,asm,10002+20000,453.3787 +gfx938,f8_w8a8_channel,torch.bfloat16,20,512,4096,256,6,0,0,asm,11005+20000,490.2628 +gfx938,f8_w8a8_channel,torch.bfloat16,24,512,4096,256,6,0,0,asm,11007+20000,567.5428 +gfx938,f8_w8a8_channel,torch.bfloat16,28,512,4096,256,6,0,0,asm,10002+20000,655.0066 +gfx938,f8_w8a8_channel,torch.bfloat16,32,512,4096,256,6,0,0,asm,10002+20000,726.0803 +gfx938,f8_w8a8_channel,torch.bfloat16,34,512,4096,256,6,0,0,asm,10013+20000,734.8098 +gfx938,f8_w8a8_channel,torch.bfloat16,36,512,4096,256,6,0,0,asm,10009+20000,775.795 +gfx938,f8_w8a8_channel,torch.bfloat16,40,512,4096,256,6,0,0,asm,11007+20000,818.0771 +gfx938,f8_w8a8_channel,torch.bfloat16,44,512,4096,256,6,0,0,asm,11007+20000,887.8286 +gfx938,f8_w8a8_channel,torch.bfloat16,48,512,4096,256,6,0,0,asm,11004+20000,932.4768 +gfx938,f8_w8a8_channel,torch.bfloat16,56,512,4096,256,6,0,0,asm,10013+20000,986.0515 +gfx938,f8_w8a8_channel,torch.bfloat16,64,512,4096,256,6,0,0,asm,10013+20000,1050.5229 +gfx938,f8_w8a8_channel,torch.bfloat16,68,512,4096,256,6,0,0,asm,10013+20000,1069.2597 +gfx938,f8_w8a8_channel,torch.bfloat16,72,512,4096,256,6,0,0,asm,10013+20000,1122.986 +gfx938,f8_w8a8_channel,torch.bfloat16,80,512,4096,256,6,0,0,asm,10013+20000,1141.0322 +gfx938,f8_w8a8_channel,torch.bfloat16,88,512,4096,256,6,0,0,asm,10013+20000,1176.19 +gfx938,f8_w8a8_channel,torch.bfloat16,96,512,4096,256,6,0,0,asm,10013+20000,1191.3393 +gfx938,f8_w8a8_channel,torch.bfloat16,104,512,4096,256,6,0,0,asm,10013+20000,1208.3246 +gfx938,f8_w8a8_channel,torch.bfloat16,112,512,4096,256,6,0,0,asm,10013+20000,1223.2634 +gfx938,f8_w8a8_channel,torch.bfloat16,128,512,4096,256,6,0,0,asm,10013+20000,1256.1306 +gfx938,f8_w8a8_channel,torch.bfloat16,144,512,4096,256,6,0,0,asm,10013+20000,1273.7726 +gfx938,f8_w8a8_channel,torch.bfloat16,160,512,4096,256,6,0,0,asm,10013+20000,1337.7641 +gfx938,f8_w8a8_channel,torch.bfloat16,192,512,4096,256,6,0,0,asm,10013+20000,1323.9031 +gfx938,f8_w8a8_channel,torch.bfloat16,224,512,4096,256,6,0,0,asm,10013+20000,1333.9326 +gfx938,f8_w8a8_channel,torch.bfloat16,256,512,4096,256,6,0,0,asm,10013+20000,1343.3811 +gfx938,f8_w8a8_channel,torch.bfloat16,320,512,4096,256,6,0,0,asm,10013+20000,1357.4695 +gfx938,f8_w8a8_channel,torch.bfloat16,384,512,4096,256,6,0,0,asm,10013+20000,1423.6841 +gfx938,f8_w8a8_channel,torch.bfloat16,448,512,4096,256,6,0,0,asm,12001+22000,1404.2484 +gfx938,f8_w8a8_channel,torch.bfloat16,512,512,4096,256,6,0,0,asm,12005+22000,1424.821 +gfx938,f8_w8a8_channel,torch.bfloat16,576,512,4096,256,6,0,0,asm,12005+22000,1483.4819 +gfx938,f8_w8a8_channel,torch.bfloat16,640,512,4096,256,6,0,0,asm,12005+22000,1449.8736 +gfx938,f8_w8a8_channel,torch.bfloat16,704,512,4096,256,6,0,0,asm,12005+22000,1460.4673 +gfx938,f8_w8a8_channel,torch.bfloat16,768,512,4096,256,6,0,0,asm,11007+21000,1516.282 +gfx938,f8_w8a8_channel,torch.bfloat16,832,512,4096,256,6,0,0,asm,12001+22000,1484.9305 +gfx938,f8_w8a8_channel,torch.bfloat16,896,512,4096,256,6,0,0,asm,12005+22000,1498.9684 +gfx938,f8_w8a8_channel,torch.bfloat16,960,512,4096,256,6,0,0,asm,12001+22000,1515.9789 +gfx938,f8_w8a8_channel,torch.bfloat16,1024,512,4096,256,6,0,0,asm,12001+22000,1530.6652 +gfx938,f8_w8a8_channel,torch.bfloat16,1152,512,4096,256,6,0,0,asm,12001+22000,1603.4229 +gfx938,f8_w8a8_channel,torch.bfloat16,1280,512,4096,256,6,0,0,asm,12001+22000,1593.6629 +gfx938,f8_w8a8_channel,torch.bfloat16,1408,512,4096,256,6,0,0,asm,12001+22000,1618.6649 +gfx938,f8_w8a8_channel,torch.bfloat16,1536,512,4096,256,6,0,0,asm,12001+22000,1649.2079 +gfx938,f8_w8a8_channel,torch.bfloat16,1664,512,4096,256,6,0,0,asm,12001+22000,1689.0732 +gfx938,f8_w8a8_channel,torch.bfloat16,1792,512,4096,256,6,0,0,asm,12001+22000,1696.9302 +gfx938,f8_w8a8_channel,torch.bfloat16,1920,512,4096,256,6,0,0,asm,12001+22000,1729.9067 +gfx938,f8_w8a8_channel,torch.bfloat16,2048,512,4096,256,6,0,0,asm,12001+22000,1758.7655 +gfx938,f8_w8a8_channel,torch.bfloat16,2304,512,4096,256,6,0,0,asm,12005+22000,1826.9085 +gfx938,f8_w8a8_channel,torch.bfloat16,2560,512,4096,256,6,0,0,asm,12001+22000,1889.4516 +gfx938,f8_w8a8_channel,torch.bfloat16,2816,512,4096,256,6,0,0,asm,13001+23001,2053.8806 +gfx938,f8_w8a8_channel,torch.bfloat16,3072,512,4096,256,6,0,0,asm,13001+23001,2078.0912 +gfx938,f8_w8a8_channel,torch.bfloat16,3328,512,4096,256,6,0,0,asm,13001+23001,2136.8785 +gfx938,f8_w8a8_channel,torch.bfloat16,3584,512,4096,256,6,0,0,asm,13001+23001,2151.1017 +gfx938,f8_w8a8_channel,torch.bfloat16,3840,512,4096,256,6,0,0,asm,13001+23001,2190.1756 +gfx938,f8_w8a8_channel,torch.bfloat16,4096,512,4096,256,6,0,0,asm,13001+23001,2215.1607 +gfx938,f8_w8a8_channel,torch.bfloat16,4608,512,4096,256,6,0,0,asm,13001+23001,2345.1226 +gfx938,f8_w8a8_channel,torch.bfloat16,5120,512,4096,256,6,0,0,asm,13001+23001,2609.8883 +gfx938,f8_w8a8_channel,torch.bfloat16,5632,512,4096,256,6,0,0,asm,13001+23001,3160.4807 +gfx938,f8_w8a8_channel,torch.bfloat16,6144,512,4096,256,6,0,0,asm,12001+22001,3582.846 +gfx938,f8_w8a8_channel,torch.bfloat16,6656,512,4096,256,6,0,0,asm,12001+22001,3712.0919 +gfx938,f8_w8a8_channel,torch.bfloat16,7168,512,4096,256,6,0,0,asm,12001+22001,3825.0181 +gfx938,f8_w8a8_channel,torch.bfloat16,7680,512,4096,256,6,0,0,asm,12001+22001,4007.3421 +gfx938,f8_w8a8_channel,torch.bfloat16,8192,512,4096,256,6,0,0,asm,13001+23001,4081.6409 +gfx938,f8_w8a8_channel,torch.bfloat16,10240,512,4096,256,6,0,0,asm,13001+23001,4520.1242 +gfx938,f8_w8a8_channel,torch.bfloat16,12288,512,4096,256,6,0,0,asm,13001+23001,5796.0966 +gfx938,f8_w8a8_channel,torch.bfloat16,14336,512,4096,256,6,0,0,asm,13001+23001,6163.6752 +gfx938,f8_w8a8_channel,torch.bfloat16,16384,512,4096,256,6,0,0,asm,13001+23001,6976.4906 +gfx938,f8_w8a8_channel,torch.bfloat16,17408,512,4096,256,6,0,0,asm,13001+23001,7636.5647 +gfx938,f8_w8a8_channel,torch.bfloat16,24576,512,4096,256,6,0,0,asm,13001+23001,10009.1398 +gfx938,f8_w8a8_channel,torch.bfloat16,32768,512,4096,256,6,0,0,asm,13001+23001,13063.6081 +gfx938,f8_w8a8_channel,torch.bfloat16,40960,512,4096,256,6,0,0,asm,13001+23001,16152.603299999999 +gfx938,f8_w8a8_channel,torch.bfloat16,49152,512,4096,256,6,0,0,asm,13001+23001,19138.8131 +gfx938,f8_w8a8_channel,torch.bfloat16,57344,512,4096,256,6,0,0,asm,13001+23001,22122.0234 +gfx938,f8_w8a8_channel,torch.bfloat16,65536,512,4096,256,6,0,0,asm,13001+23001,25145.5661 +gfx938,f8_w8a8_channel,torch.bfloat16,1,256,4096,256,6,0,0,asm,10002+20000,49.0539 +gfx938,f8_w8a8_channel,torch.bfloat16,2,256,4096,256,6,0,0,asm,10008+20000,69.6096 +gfx938,f8_w8a8_channel,torch.bfloat16,3,256,4096,256,6,0,0,asm,10011+20000,81.9043 +gfx938,f8_w8a8_channel,torch.bfloat16,4,256,4096,256,6,0,0,asm,10011+20000,96.3972 +gfx938,f8_w8a8_channel,torch.bfloat16,5,256,4096,256,6,0,0,asm,10013+20000,108.3464 +gfx938,f8_w8a8_channel,torch.bfloat16,6,256,4096,256,6,0,0,asm,10002+20000,135.0663 +gfx938,f8_w8a8_channel,torch.bfloat16,7,256,4096,256,6,0,0,asm,10008+20000,146.2664 +gfx938,f8_w8a8_channel,torch.bfloat16,8,256,4096,256,6,0,0,asm,10009+20000,154.9147 +gfx938,f8_w8a8_channel,torch.bfloat16,9,256,4096,256,6,0,0,asm,10011+20000,164.0599 +gfx938,f8_w8a8_channel,torch.bfloat16,10,256,4096,256,6,0,0,asm,10011+20000,173.3991 +gfx938,f8_w8a8_channel,torch.bfloat16,11,256,4096,256,6,0,0,asm,10012+20000,182.8562 +gfx938,f8_w8a8_channel,torch.bfloat16,12,256,4096,256,6,0,0,asm,10011+20000,187.5294 +gfx938,f8_w8a8_channel,torch.bfloat16,13,256,4096,256,6,0,0,asm,10011+20000,196.4389 +gfx938,f8_w8a8_channel,torch.bfloat16,14,256,4096,256,6,0,0,asm,10002+20000,224.5231 +gfx938,f8_w8a8_channel,torch.bfloat16,15,256,4096,256,6,0,0,asm,10009+20000,234.4093 +gfx938,f8_w8a8_channel,torch.bfloat16,16,256,4096,256,6,0,0,asm,10008+20000,236.0515 +gfx938,f8_w8a8_channel,torch.bfloat16,17,256,4096,256,6,0,0,asm,10011+20000,232.4528 +gfx938,f8_w8a8_channel,torch.bfloat16,18,256,4096,256,6,0,0,asm,10011+20000,238.6592 +gfx938,f8_w8a8_channel,torch.bfloat16,20,256,4096,256,6,0,0,asm,10011+20000,260.2927 +gfx938,f8_w8a8_channel,torch.bfloat16,24,256,4096,256,6,0,0,asm,10011+20000,306.3895 +gfx938,f8_w8a8_channel,torch.bfloat16,28,256,4096,256,6,0,0,asm,10011+20000,343.4197 +gfx938,f8_w8a8_channel,torch.bfloat16,32,256,4096,256,6,0,0,asm,10002+20000,396.7588 +gfx938,f8_w8a8_channel,torch.bfloat16,34,256,4096,256,6,0,0,asm,10011+20000,389.6483 +gfx938,f8_w8a8_channel,torch.bfloat16,36,256,4096,256,6,0,0,asm,10011+20000,393.9598 +gfx938,f8_w8a8_channel,torch.bfloat16,40,256,4096,256,6,0,0,asm,10013+20000,413.7998 +gfx938,f8_w8a8_channel,torch.bfloat16,44,256,4096,256,6,0,0,asm,10011+20000,469.0166 +gfx938,f8_w8a8_channel,torch.bfloat16,48,256,4096,256,6,0,0,asm,10011+20000,478.7007 +gfx938,f8_w8a8_channel,torch.bfloat16,56,256,4096,256,6,0,0,asm,10013+20000,496.7975 +gfx938,f8_w8a8_channel,torch.bfloat16,64,256,4096,256,6,0,0,asm,10013+20000,543.4332 +gfx938,f8_w8a8_channel,torch.bfloat16,68,256,4096,256,6,0,0,asm,10013+20000,544.5616 +gfx938,f8_w8a8_channel,torch.bfloat16,72,256,4096,256,6,0,0,asm,11007+21000,570.2289 +gfx938,f8_w8a8_channel,torch.bfloat16,80,256,4096,256,6,0,0,asm,10013+20000,567.2564 +gfx938,f8_w8a8_channel,torch.bfloat16,88,256,4096,256,6,0,0,asm,10013+20000,596.8983 +gfx938,f8_w8a8_channel,torch.bfloat16,96,256,4096,256,6,0,0,asm,10013+20000,617.5636 +gfx938,f8_w8a8_channel,torch.bfloat16,104,256,4096,256,6,0,0,asm,10013+20000,621.3699 +gfx938,f8_w8a8_channel,torch.bfloat16,112,256,4096,256,6,0,0,asm,10013+20000,625.2604 +gfx938,f8_w8a8_channel,torch.bfloat16,128,256,4096,256,6,0,0,asm,10013+20000,638.4225 +gfx938,f8_w8a8_channel,torch.bfloat16,144,256,4096,256,6,0,0,asm,10011+20000,645.5047 +gfx938,f8_w8a8_channel,torch.bfloat16,160,256,4096,256,6,0,0,asm,10011+20000,705.6814 +gfx938,f8_w8a8_channel,torch.bfloat16,192,256,4096,256,6,0,0,asm,10013+20000,674.2035 +gfx938,f8_w8a8_channel,torch.bfloat16,224,256,4096,256,6,0,0,asm,11007+21000,683.3825 +gfx938,f8_w8a8_channel,torch.bfloat16,256,256,4096,256,6,0,0,asm,11007+21000,684.8983 +gfx938,f8_w8a8_channel,torch.bfloat16,320,256,4096,256,6,0,0,asm,11007+21000,699.3994 +gfx938,f8_w8a8_channel,torch.bfloat16,384,256,4096,256,6,0,0,asm,11007+21000,713.1762 +gfx938,f8_w8a8_channel,torch.bfloat16,448,256,4096,256,6,0,0,asm,11007+21000,723.7025 +gfx938,f8_w8a8_channel,torch.bfloat16,512,256,4096,256,6,0,0,asm,11007+21000,738.852 +gfx938,f8_w8a8_channel,torch.bfloat16,576,256,4096,256,6,0,0,asm,11007+21000,799.3572 +gfx938,f8_w8a8_channel,torch.bfloat16,640,256,4096,256,6,0,0,asm,11007+21000,762.5656 +gfx938,f8_w8a8_channel,torch.bfloat16,704,256,4096,256,6,0,0,asm,11007+21000,778.4477 +gfx938,f8_w8a8_channel,torch.bfloat16,768,256,4096,256,6,0,0,asm,11007+21000,839.9381 +gfx938,f8_w8a8_channel,torch.bfloat16,832,256,4096,256,6,0,0,asm,11007+21000,799.0623 +gfx938,f8_w8a8_channel,torch.bfloat16,896,256,4096,256,6,0,0,asm,11007+21000,836.7212 +gfx938,f8_w8a8_channel,torch.bfloat16,960,256,4096,256,6,0,0,asm,11007+21000,848.0727 +gfx938,f8_w8a8_channel,torch.bfloat16,1024,256,4096,256,6,0,0,asm,11007+21000,856.5527 +gfx938,f8_w8a8_channel,torch.bfloat16,1152,256,4096,256,6,0,0,asm,11005+21000,924.4178 +gfx938,f8_w8a8_channel,torch.bfloat16,1280,256,4096,256,6,0,0,asm,12005+22001,921.0157 +gfx938,f8_w8a8_channel,torch.bfloat16,1408,256,4096,256,6,0,0,asm,12005+22001,944.8219 +gfx938,f8_w8a8_channel,torch.bfloat16,1536,256,4096,256,6,0,0,asm,12005+22001,1006.0514000000001 +gfx938,f8_w8a8_channel,torch.bfloat16,1664,256,4096,256,6,0,0,asm,12005+22001,996.0051 +gfx938,f8_w8a8_channel,torch.bfloat16,1792,256,4096,256,6,0,0,asm,12001+22001,1008.4261000000001 +gfx938,f8_w8a8_channel,torch.bfloat16,1920,256,4096,256,6,0,0,asm,12005+22001,1039.9376 +gfx938,f8_w8a8_channel,torch.bfloat16,2048,256,4096,256,6,0,0,asm,12005+22001,1055.2724 +gfx938,f8_w8a8_channel,torch.bfloat16,2304,256,4096,256,6,0,0,asm,12005+22001,1132.6533 +gfx938,f8_w8a8_channel,torch.bfloat16,2560,256,4096,256,6,0,0,asm,12005+22001,1180.8385 +gfx938,f8_w8a8_channel,torch.bfloat16,2816,256,4096,256,6,0,0,asm,12001+22001,1316.072 +gfx938,f8_w8a8_channel,torch.bfloat16,3072,256,4096,256,6,0,0,asm,13001+23001,1364.4256 +gfx938,f8_w8a8_channel,torch.bfloat16,3328,256,4096,256,6,0,0,asm,13001+23001,1402.0171 +gfx938,f8_w8a8_channel,torch.bfloat16,3584,256,4096,256,6,0,0,asm,13001+23001,1428.0044 +gfx938,f8_w8a8_channel,torch.bfloat16,3840,256,4096,256,6,0,0,asm,13001+23001,1458.6064 +gfx938,f8_w8a8_channel,torch.bfloat16,4096,256,4096,256,6,0,0,asm,13001+23001,1480.5179 +gfx938,f8_w8a8_channel,torch.bfloat16,4608,256,4096,256,6,0,0,asm,13001+23001,1593.1746 +gfx938,f8_w8a8_channel,torch.bfloat16,5120,256,4096,256,6,0,0,asm,13001+23001,1768.6016 +gfx938,f8_w8a8_channel,torch.bfloat16,5632,256,4096,256,6,0,0,asm,13001+23001,2115.3041 +gfx938,f8_w8a8_channel,torch.bfloat16,6144,256,4096,256,6,0,0,asm,12001+22001,2367.6908 +gfx938,f8_w8a8_channel,torch.bfloat16,6656,256,4096,256,6,0,0,asm,12001+22001,2451.6906 +gfx938,f8_w8a8_channel,torch.bfloat16,7168,256,4096,256,6,0,0,asm,12001+22001,2551.1766 +gfx938,f8_w8a8_channel,torch.bfloat16,7680,256,4096,256,6,0,0,asm,12001+22001,2670.5869 +gfx938,f8_w8a8_channel,torch.bfloat16,8192,256,4096,256,6,0,0,asm,13001+23001,2771.5974 +gfx938,f8_w8a8_channel,torch.bfloat16,10240,256,4096,256,6,0,0,asm,13001+23001,3116.8093 +gfx938,f8_w8a8_channel,torch.bfloat16,12288,256,4096,256,6,0,0,asm,13001+23001,4024.8074999999994 +gfx938,f8_w8a8_channel,torch.bfloat16,14336,256,4096,256,6,0,0,asm,13001+23001,4314.9881 +gfx938,f8_w8a8_channel,torch.bfloat16,16384,256,4096,256,6,0,0,asm,13001+23001,4910.3977 +gfx938,f8_w8a8_channel,torch.bfloat16,17408,256,4096,256,6,0,0,asm,13001+23001,5376.1151 +gfx938,f8_w8a8_channel,torch.bfloat16,24576,256,4096,256,6,0,0,asm,13001+23001,7071.3878 +gfx938,f8_w8a8_channel,torch.bfloat16,32768,256,4096,256,6,0,0,asm,13001+23001,9263.7619 +gfx938,f8_w8a8_channel,torch.bfloat16,40960,256,4096,256,6,0,0,asm,13001+23001,11379.3186 +gfx938,f8_w8a8_channel,torch.bfloat16,49152,256,4096,256,6,0,0,asm,13001+23001,13529.0724 +gfx938,f8_w8a8_channel,torch.bfloat16,57344,256,4096,256,6,0,0,asm,13001+23001,15696.9227 +gfx938,f8_w8a8_channel,torch.bfloat16,65536,256,4096,256,6,0,0,asm,13001+23001,17822.9558 +gfx938,f8_w8a8_channel,torch.bfloat16,65536,256,4096,256,6,0,0,asm,13001+23001,17822.9558 +gfx938,f8_w8a8_channel,torch.bfloat16,1,128,4096,512,10,0,0,asm,10002+20100,61.3404 +gfx938,f8_w8a8_channel,torch.bfloat16,2,128,4096,512,10,0,0,asm,10008+20101,79.4288 +gfx938,f8_w8a8_channel,torch.bfloat16,3,128,4096,512,10,0,0,asm,10008+20101,89.4075 +gfx938,f8_w8a8_channel,torch.bfloat16,4,128,4096,512,10,0,0,asm,10011+20001,101.8791 +gfx938,f8_w8a8_channel,torch.bfloat16,5,128,4096,512,10,0,0,asm,10011+20001,110.3675 +gfx938,f8_w8a8_channel,torch.bfloat16,6,128,4096,512,10,0,0,asm,10011+20101,117.7528 +gfx938,f8_w8a8_channel,torch.bfloat16,7,128,4096,512,10,0,0,asm,10013+20101,127.9168 +gfx938,f8_w8a8_channel,torch.bfloat16,8,128,4096,512,10,0,0,asm,10002+20001,156.7674 +gfx938,f8_w8a8_channel,torch.bfloat16,9,128,4096,512,10,0,0,asm,10002+20101,169.3063 +gfx938,f8_w8a8_channel,torch.bfloat16,10,128,4096,512,10,0,0,asm,10008+20101,179.1253 +gfx938,f8_w8a8_channel,torch.bfloat16,11,128,4096,512,10,0,0,asm,10011+20101,186.9651 +gfx938,f8_w8a8_channel,torch.bfloat16,12,128,4096,512,10,0,0,asm,10011+20001,192.6495 +gfx938,f8_w8a8_channel,torch.bfloat16,13,128,4096,512,10,0,0,asm,10011+20101,204.7336 +gfx938,f8_w8a8_channel,torch.bfloat16,14,128,4096,512,10,0,0,asm,10011+20101,214.9736 +gfx938,f8_w8a8_channel,torch.bfloat16,15,128,4096,512,10,0,0,asm,10002+20001,247.1083 +gfx938,f8_w8a8_channel,torch.bfloat16,16,128,4096,512,10,0,0,asm,10002+20001,257.2555 +gfx938,f8_w8a8_channel,torch.bfloat16,17,128,4096,512,10,0,0,asm,10011+20101,242.44 +gfx938,f8_w8a8_channel,torch.bfloat16,18,128,4096,512,10,0,0,asm,10011+20101,247.6779 +gfx938,f8_w8a8_channel,torch.bfloat16,20,128,4096,512,10,0,0,asm,10011+20001,256.4443 +gfx938,f8_w8a8_channel,torch.bfloat16,24,128,4096,512,10,0,0,asm,10002+20101,309.7493 +gfx938,f8_w8a8_channel,torch.bfloat16,28,128,4096,512,10,0,0,asm,10011+20101,327.4082 +gfx938,f8_w8a8_channel,torch.bfloat16,32,128,4096,512,10,0,0,asm,10011+20101,389.7154 +gfx938,f8_w8a8_channel,torch.bfloat16,34,128,4096,512,10,0,0,asm,10012+20101,357.9008 +gfx938,f8_w8a8_channel,torch.bfloat16,36,128,4096,512,10,0,0,asm,10002+20101,393.1261 +gfx938,f8_w8a8_channel,torch.bfloat16,40,128,4096,512,10,0,0,asm,10011+20101,398.0945 +gfx938,f8_w8a8_channel,torch.bfloat16,44,128,4096,512,10,0,0,asm,10011+20101,415.5933 +gfx938,f8_w8a8_channel,torch.bfloat16,48,128,4096,512,10,0,0,asm,10013+20101,429.2944 +gfx938,f8_w8a8_channel,torch.bfloat16,56,128,4096,512,10,0,0,asm,10011+20101,477.2353 +gfx938,f8_w8a8_channel,torch.bfloat16,64,128,4096,512,10,0,0,asm,10012+20101,511.1384 +gfx938,f8_w8a8_channel,torch.bfloat16,68,128,4096,512,10,0,0,asm,10013+20101,503.0374 +gfx938,f8_w8a8_channel,torch.bfloat16,72,128,4096,512,10,0,0,asm,10011+20001,560.5698 +gfx938,f8_w8a8_channel,torch.bfloat16,80,128,4096,512,10,0,0,asm,10013+20101,558.2371 +gfx938,f8_w8a8_channel,torch.bfloat16,88,128,4096,512,10,0,0,asm,10011+20101,559.5003 +gfx938,f8_w8a8_channel,torch.bfloat16,96,128,4096,512,10,0,0,asm,10013+20101,580.7802 +gfx938,f8_w8a8_channel,torch.bfloat16,104,128,4096,512,10,0,0,asm,10013+20101,618.5823 +gfx938,f8_w8a8_channel,torch.bfloat16,112,128,4096,512,10,0,0,asm,10011+20101,633.9506 +gfx938,f8_w8a8_channel,torch.bfloat16,128,128,4096,512,10,0,0,asm,10013+20101,640.2074 +gfx938,f8_w8a8_channel,torch.bfloat16,144,128,4096,512,10,0,0,asm,10013+20101,651.0538 +gfx938,f8_w8a8_channel,torch.bfloat16,160,128,4096,512,10,0,0,asm,10013+20101,662.6833 +gfx938,f8_w8a8_channel,torch.bfloat16,192,128,4096,512,10,0,0,asm,10011+20101,671.2138 +gfx938,f8_w8a8_channel,torch.bfloat16,224,128,4096,512,10,0,0,asm,10011+20101,679.5591 +gfx938,f8_w8a8_channel,torch.bfloat16,256,128,4096,512,10,0,0,asm,10013+20101,691.0874 +gfx938,f8_w8a8_channel,torch.bfloat16,320,128,4096,512,10,0,0,asm,10013+20101,710.0768 +gfx938,f8_w8a8_channel,torch.bfloat16,384,128,4096,512,10,0,0,asm,10011+20101,720.1315 +gfx938,f8_w8a8_channel,torch.bfloat16,448,128,4096,512,10,0,0,asm,11007+21101,739.2556 +gfx938,f8_w8a8_channel,torch.bfloat16,512,128,4096,512,10,0,0,asm,11007+21101,755.2641 +gfx938,f8_w8a8_channel,torch.bfloat16,576,128,4096,512,10,0,0,asm,11007+21101,815.3902 +gfx938,f8_w8a8_channel,torch.bfloat16,640,128,4096,512,10,0,0,asm,11007+21101,775.8956 +gfx938,f8_w8a8_channel,torch.bfloat16,704,128,4096,512,10,0,0,asm,11007+21101,788.7293 +gfx938,f8_w8a8_channel,torch.bfloat16,768,128,4096,512,10,0,0,asm,11007+21101,797.6977 +gfx938,f8_w8a8_channel,torch.bfloat16,832,128,4096,512,10,0,0,asm,11007+21101,804.5356 +gfx938,f8_w8a8_channel,torch.bfloat16,896,128,4096,512,10,0,0,asm,11007+21101,813.3018 +gfx938,f8_w8a8_channel,torch.bfloat16,960,128,4096,512,10,0,0,asm,11007+21101,827.7018 +gfx938,f8_w8a8_channel,torch.bfloat16,1024,128,4096,512,10,0,0,asm,11007+21101,859.8112 +gfx938,f8_w8a8_channel,torch.bfloat16,1152,128,4096,512,10,0,0,asm,11005+21101,906.9689 +gfx938,f8_w8a8_channel,torch.bfloat16,1280,128,4096,512,10,0,0,asm,11007+21101,891.1711 +gfx938,f8_w8a8_channel,torch.bfloat16,1408,128,4096,512,10,0,0,asm,11007+21101,941.4109 +gfx938,f8_w8a8_channel,torch.bfloat16,1536,128,4096,512,10,0,0,asm,11005+21101,992.2741 +gfx938,f8_w8a8_channel,torch.bfloat16,1664,128,4096,512,10,0,0,asm,11003+21101,1042.8424 +gfx938,f8_w8a8_channel,torch.bfloat16,1792,128,4096,512,10,0,0,asm,11005+21101,1146.0506 +gfx938,f8_w8a8_channel,torch.bfloat16,1920,128,4096,512,10,0,0,asm,12001+21101,1143.2548 +gfx938,f8_w8a8_channel,torch.bfloat16,2048,128,4096,512,10,0,0,asm,12005+21101,1165.4021 +gfx938,f8_w8a8_channel,torch.bfloat16,2304,128,4096,512,10,0,0,asm,12005+21101,1217.7977 +gfx938,f8_w8a8_channel,torch.bfloat16,2560,128,4096,512,10,0,0,asm,12001+21101,1269.5702 +gfx938,f8_w8a8_channel,torch.bfloat16,2816,128,4096,512,10,0,0,asm,12005+21101,1345.1573 +gfx938,f8_w8a8_channel,torch.bfloat16,3072,128,4096,512,10,0,0,asm,12005+21101,1456.5676 +gfx938,f8_w8a8_channel,torch.bfloat16,3328,128,4096,512,10,0,0,asm,12001+22101,1596.6935 +gfx938,f8_w8a8_channel,torch.bfloat16,3584,128,4096,512,10,0,0,asm,12001+22101,1700.3058 +gfx938,f8_w8a8_channel,torch.bfloat16,3840,128,4096,512,10,0,0,asm,13001+22101,1797.0805 +gfx938,f8_w8a8_channel,torch.bfloat16,4096,128,4096,512,10,0,0,asm,13001+22101,1808.7774 +gfx938,f8_w8a8_channel,torch.bfloat16,4608,128,4096,512,10,0,0,asm,13001+22101,1898.4193 +gfx938,f8_w8a8_channel,torch.bfloat16,5120,128,4096,512,10,0,0,asm,13001+22101,2009.0547000000001 +gfx938,f8_w8a8_channel,torch.bfloat16,5632,128,4096,512,10,0,0,asm,13001+22101,2101.2144 +gfx938,f8_w8a8_channel,torch.bfloat16,6144,128,4096,512,10,0,0,asm,13001+22101,2300.4644 +gfx938,f8_w8a8_channel,torch.bfloat16,6656,128,4096,512,10,0,0,asm,12001+22101,2613.3645 +gfx938,f8_w8a8_channel,torch.bfloat16,7168,128,4096,512,10,0,0,asm,12001+22101,2836.1681 +gfx938,f8_w8a8_channel,torch.bfloat16,7680,128,4096,512,10,0,0,asm,12001+22101,3003.8056 +gfx938,f8_w8a8_channel,torch.bfloat16,8192,128,4096,512,10,0,0,asm,12001+22101,3106.1971 +gfx938,f8_w8a8_channel,torch.bfloat16,9008,128,4096,512,10,0,0,asm,12001+22101,3256.4307 +gfx938,f8_w8a8_channel,torch.bfloat16,10240,128,4096,512,10,0,0,asm,13001+22101,3614.1959 +gfx938,f8_w8a8_channel,torch.bfloat16,12288,128,4096,512,10,0,0,asm,13001+22101,4049.7062999999994 +gfx938,f8_w8a8_channel,torch.bfloat16,14336,128,4096,512,10,0,0,asm,12001+22101,5020.8025 +gfx938,f8_w8a8_channel,torch.bfloat16,16384,128,4096,512,10,0,0,asm,13001+22101,5391.9083 +gfx938,f8_w8a8_channel,torch.bfloat16,17408,128,4096,512,10,0,0,asm,13001+22101,5574.7625 +gfx938,f8_w8a8_channel,torch.bfloat16,24576,128,4096,512,10,0,0,asm,13001+23101,7529.52 +gfx938,f8_w8a8_channel,torch.bfloat16,32768,128,4096,512,10,0,0,asm,13001+23101,9864.0308 +gfx938,f8_w8a8_channel,torch.bfloat16,40960,128,4096,512,10,0,0,asm,13001+23101,12122.0694 +gfx938,f8_w8a8_channel,torch.bfloat16,49152,128,4096,512,10,0,0,asm,13001+23101,14532.4632 +gfx938,f8_w8a8_channel,torch.bfloat16,57344,128,4096,512,10,0,0,asm,13001+23101,15834.6738 +gfx938,f8_w8a8_channel,torch.bfloat16,65536,128,4096,512,10,0,0,asm,13001+23101,18168.4834 +gfx938,f8_w8a8_channel,torch.bfloat16,1,192,4096,192,8,0,0,asm,10002+20001,48.0604 +gfx938,f8_w8a8_channel,torch.bfloat16,2,192,4096,192,8,0,0,asm,10008+20001,61.4835 +gfx938,f8_w8a8_channel,torch.bfloat16,3,192,4096,192,8,0,0,asm,10008+20000,76.4645 +gfx938,f8_w8a8_channel,torch.bfloat16,4,192,4096,192,8,0,0,asm,10011+20000,87.3277 +gfx938,f8_w8a8_channel,torch.bfloat16,5,192,4096,192,8,0,0,asm,10008+20000,102.3091 +gfx938,f8_w8a8_channel,torch.bfloat16,6,192,4096,192,8,0,0,asm,10008+20001,109.9972 +gfx938,f8_w8a8_channel,torch.bfloat16,7,192,4096,192,8,0,0,asm,10002+20001,127.1341 +gfx938,f8_w8a8_channel,torch.bfloat16,8,192,4096,192,8,0,0,asm,10002+20001,135.1593 +gfx938,f8_w8a8_channel,torch.bfloat16,9,192,4096,192,8,0,0,asm,10008+20001,144.4141 +gfx938,f8_w8a8_channel,torch.bfloat16,10,192,4096,192,8,0,0,asm,10011+20001,152.7594 +gfx938,f8_w8a8_channel,torch.bfloat16,11,192,4096,192,8,0,0,asm,10002+20001,170.4348 +gfx938,f8_w8a8_channel,torch.bfloat16,12,192,4096,192,8,0,0,asm,10002+20000,178.1488 +gfx938,f8_w8a8_channel,torch.bfloat16,13,192,4096,192,8,0,0,asm,10002+20001,182.2583 +gfx938,f8_w8a8_channel,torch.bfloat16,14,192,4096,192,8,0,0,asm,10002+20001,189.8372 +gfx938,f8_w8a8_channel,torch.bfloat16,15,192,4096,192,8,0,0,asm,10008+20001,191.2014 +gfx938,f8_w8a8_channel,torch.bfloat16,16,192,4096,192,8,0,0,asm,10008+20001,196.3971 +gfx938,f8_w8a8_channel,torch.bfloat16,17,192,4096,192,8,0,0,asm,10011+20001,201.0932 +gfx938,f8_w8a8_channel,torch.bfloat16,18,192,4096,192,8,0,0,asm,10011+20001,208.1921 +gfx938,f8_w8a8_channel,torch.bfloat16,19,192,4096,192,8,0,0,asm,10011+20001,215.1816 +gfx938,f8_w8a8_channel,torch.bfloat16,20,192,4096,192,8,0,0,asm,10008+20001,222.5584 +gfx938,f8_w8a8_channel,torch.bfloat16,21,192,4096,192,8,0,0,asm,10009+20001,225.7163 +gfx938,f8_w8a8_channel,torch.bfloat16,22,192,4096,192,8,0,0,asm,10008+20001,234.9288 +gfx938,f8_w8a8_channel,torch.bfloat16,23,192,4096,192,8,0,0,asm,10002+20001,243.51 +gfx938,f8_w8a8_channel,torch.bfloat16,24,192,4096,192,8,0,0,asm,10002+20001,252.2678 +gfx938,f8_w8a8_channel,torch.bfloat16,25,192,4096,192,8,0,0,asm,10002+20001,261.2141 +gfx938,f8_w8a8_channel,torch.bfloat16,26,192,4096,192,8,0,0,asm,10002+20001,264.0519 +gfx938,f8_w8a8_channel,torch.bfloat16,27,192,4096,192,8,0,0,asm,10002+20001,275.4121 +gfx938,f8_w8a8_channel,torch.bfloat16,28,192,4096,192,8,0,0,asm,10002+20001,276.5657 +gfx938,f8_w8a8_channel,torch.bfloat16,29,192,4096,192,8,0,0,asm,10011+20001,276.0856 +gfx938,f8_w8a8_channel,torch.bfloat16,30,192,4096,192,8,0,0,asm,10011+20001,280.5405 +gfx938,f8_w8a8_channel,torch.bfloat16,31,192,4096,192,8,0,0,asm,10011+20001,282.5867 +gfx938,f8_w8a8_channel,torch.bfloat16,32,192,4096,192,8,0,0,asm,10002+20001,297.3489 +gfx938,f8_w8a8_channel,torch.bfloat16,34,192,4096,192,8,0,0,asm,10002+20001,291.4595 +gfx938,f8_w8a8_channel,torch.bfloat16,36,192,4096,192,8,0,0,asm,10002+20001,293.5395 +gfx938,f8_w8a8_channel,torch.bfloat16,40,192,4096,192,8,0,0,asm,10009+20001,303.7121 +gfx938,f8_w8a8_channel,torch.bfloat16,50,192,4096,192,8,0,0,asm,10011+20001,321.9604 +gfx938,f8_w8a8_channel,torch.bfloat16,60,192,4096,192,8,0,0,asm,10002+20001,334.0279 +gfx938,f8_w8a8_channel,torch.bfloat16,64,192,4096,192,8,0,0,asm,10011+20001,336.2426 +gfx938,f8_w8a8_channel,torch.bfloat16,68,192,4096,192,8,0,0,asm,10002+20001,345.1858 +gfx938,f8_w8a8_channel,torch.bfloat16,72,192,4096,192,8,0,0,asm,10002+20001,348.0321 +gfx938,f8_w8a8_channel,torch.bfloat16,80,192,4096,192,8,0,0,asm,10008+20001,347.3332 +gfx938,f8_w8a8_channel,torch.bfloat16,88,192,4096,192,8,0,0,asm,10008+20001,349.8258 +gfx938,f8_w8a8_channel,torch.bfloat16,96,192,4096,192,8,0,0,asm,10008+20001,357.59 +gfx938,f8_w8a8_channel,torch.bfloat16,112,192,4096,192,8,0,0,asm,10008+20001,365.1857 +gfx938,f8_w8a8_channel,torch.bfloat16,128,192,4096,192,8,0,0,asm,10002+20001,368.2005 +gfx938,f8_w8a8_channel,torch.bfloat16,164,192,4096,192,8,0,0,asm,10002+20001,377.6742 +gfx938,f8_w8a8_channel,torch.bfloat16,200,192,4096,192,8,0,0,asm,10002+20001,386.1121 +gfx938,f8_w8a8_channel,torch.bfloat16,256,192,4096,192,8,0,0,asm,10002+20001,400.2932 +gfx938,f8_w8a8_channel,torch.bfloat16,384,192,4096,192,8,0,0,asm,11004+21001,429.4721 +gfx938,f8_w8a8_channel,torch.bfloat16,448,192,4096,192,8,0,0,asm,11004+21001,442.3479 +gfx938,f8_w8a8_channel,torch.bfloat16,512,192,4096,192,8,0,0,asm,11004+21001,462.4826 +gfx938,f8_w8a8_channel,torch.bfloat16,576,192,4096,192,8,0,0,asm,11004+21001,536.0068 +gfx938,f8_w8a8_channel,torch.bfloat16,640,192,4096,192,8,0,0,asm,11004+21001,494.8532 +gfx938,f8_w8a8_channel,torch.bfloat16,768,192,4096,192,8,0,0,asm,12000+22001,575.7121 +gfx938,f8_w8a8_channel,torch.bfloat16,896,192,4096,192,8,0,0,asm,12000+22001,599.5942 +gfx938,f8_w8a8_channel,torch.bfloat16,960,192,4096,192,8,0,0,asm,12000+22001,605.6405 +gfx938,f8_w8a8_channel,torch.bfloat16,1024,192,4096,192,8,0,0,asm,12000+22001,613.4047 +gfx938,f8_w8a8_channel,torch.bfloat16,1152,192,4096,192,8,0,0,asm,12000+22001,670.5163 +gfx938,f8_w8a8_channel,torch.bfloat16,1280,192,4096,192,8,0,0,asm,12000+22001,682.4826 +gfx938,f8_w8a8_channel,torch.bfloat16,1408,192,4096,192,8,0,0,asm,12000+22001,747.2995 +gfx938,f8_w8a8_channel,torch.bfloat16,1536,192,4096,192,8,0,0,asm,12000+22001,844.7813 +gfx938,f8_w8a8_channel,torch.bfloat16,1664,192,4096,192,8,0,0,asm,13000+23001,969.2786 +gfx938,f8_w8a8_channel,torch.bfloat16,1920,192,4096,192,8,0,0,asm,13000+23001,1000.8655 +gfx938,f8_w8a8_channel,torch.bfloat16,2048,192,4096,192,8,0,0,asm,13000+23001,1017.5138 +gfx938,f8_w8a8_channel,torch.bfloat16,2304,192,4096,192,8,0,0,asm,13000+23001,1065.388 +gfx938,f8_w8a8_channel,torch.bfloat16,2560,192,4096,192,8,0,0,asm,13001+23001,1106.7182 +gfx938,f8_w8a8_channel,torch.bfloat16,2816,192,4096,192,8,0,0,asm,13001+23001,1242.9464 +gfx938,f8_w8a8_channel,torch.bfloat16,3072,192,4096,192,8,0,0,asm,13001+23001,1430.5507 +gfx938,f8_w8a8_channel,torch.bfloat16,3328,192,4096,192,8,0,0,asm,12000+22001,1591.2913 +gfx938,f8_w8a8_channel,torch.bfloat16,3584,192,4096,192,8,0,0,asm,12000+22001,1686.8364 +gfx938,f8_w8a8_channel,torch.bfloat16,3840,192,4096,192,8,0,0,asm,12000+22001,1746.9627 +gfx938,f8_w8a8_channel,torch.bfloat16,4096,192,4096,192,8,0,0,asm,12000+22001,1789.9269 +gfx938,f8_w8a8_channel,torch.bfloat16,4608,192,4096,192,8,0,0,asm,13000+23001,1990.3815 +gfx938,f8_w8a8_channel,torch.bfloat16,5120,192,4096,192,8,0,0,asm,13001+23001,2063.0553 +gfx938,f8_w8a8_channel,torch.bfloat16,5632,192,4096,192,8,0,0,asm,13001+23001,2188.8321 +gfx938,f8_w8a8_channel,torch.bfloat16,6144,192,4096,192,8,0,0,asm,13001+23001,2473.7248 +gfx938,f8_w8a8_channel,torch.bfloat16,6656,192,4096,192,8,0,0,asm,13001+23001,2799.1545 +gfx938,f8_w8a8_channel,torch.bfloat16,7168,192,4096,192,8,0,0,asm,13001+23001,2912.2738 +gfx938,f8_w8a8_channel,torch.bfloat16,7680,192,4096,192,8,0,0,asm,13001+23001,2974.8588 +gfx938,f8_w8a8_channel,torch.bfloat16,8192,192,4096,192,8,0,0,asm,13001+23001,3091.6693 +gfx938,f8_w8a8_channel,torch.bfloat16,10240,192,4096,192,8,0,0,asm,13001+23001,3955.2855 +gfx938,f8_w8a8_channel,torch.bfloat16,12288,192,4096,192,8,0,0,asm,13001+23001,4556.5461 +gfx938,f8_w8a8_channel,torch.bfloat16,14336,192,4096,192,8,0,0,asm,13001+23001,5127.8724 +gfx938,f8_w8a8_channel,torch.bfloat16,16384,192,4096,192,8,0,0,asm,13001+23001,5982.643 +gfx938,f8_w8a8_channel,torch.bfloat16,1,384,4096,192,8,0,0,asm,10008+20000,59.5804 +gfx938,f8_w8a8_channel,torch.bfloat16,2,384,4096,192,8,0,0,asm,10011+20000,79.7909 +gfx938,f8_w8a8_channel,torch.bfloat16,3,384,4096,192,8,0,0,asm,10011+20000,101.8709 +gfx938,f8_w8a8_channel,torch.bfloat16,4,384,4096,192,8,0,0,asm,10002+20000,129.1467 +gfx938,f8_w8a8_channel,torch.bfloat16,5,384,4096,192,8,0,0,asm,10011+20000,147.8331 +gfx938,f8_w8a8_channel,torch.bfloat16,6,384,4096,192,8,0,0,asm,10011+20000,164.5404 +gfx938,f8_w8a8_channel,torch.bfloat16,7,384,4096,192,8,0,0,asm,10002+20000,197.1467 +gfx938,f8_w8a8_channel,torch.bfloat16,8,384,4096,192,8,0,0,asm,10008+20000,210.1236 +gfx938,f8_w8a8_channel,torch.bfloat16,9,384,4096,192,8,0,0,asm,10012+20000,229.4667 +gfx938,f8_w8a8_channel,torch.bfloat16,10,384,4096,192,8,0,0,asm,10011+20000,241.9297 +gfx938,f8_w8a8_channel,torch.bfloat16,11,384,4096,192,8,0,0,asm,10002+20000,281.2393 +gfx938,f8_w8a8_channel,torch.bfloat16,12,384,4096,192,8,0,0,asm,10002+20000,288.9109 +gfx938,f8_w8a8_channel,torch.bfloat16,13,384,4096,192,8,0,0,asm,10002+20000,302.1067 +gfx938,f8_w8a8_channel,torch.bfloat16,14,384,4096,192,8,0,0,asm,10011+20000,307.2604 +gfx938,f8_w8a8_channel,torch.bfloat16,15,384,4096,192,8,0,0,asm,10011+20000,311.4204 +gfx938,f8_w8a8_channel,torch.bfloat16,16,384,4096,192,8,0,0,asm,10011+20000,323.2856 +gfx938,f8_w8a8_channel,torch.bfloat16,17,384,4096,192,8,0,0,asm,10002+20000,344.4867 +gfx938,f8_w8a8_channel,torch.bfloat16,18,384,4096,192,8,0,0,asm,10002+20000,361.0931 +gfx938,f8_w8a8_channel,torch.bfloat16,19,384,4096,192,8,0,0,asm,10011+20000,370.6004 +gfx938,f8_w8a8_channel,torch.bfloat16,20,384,4096,192,8,0,0,asm,10011+20000,374.6257 +gfx938,f8_w8a8_channel,torch.bfloat16,21,384,4096,192,8,0,0,asm,10011+20000,379.3905 +gfx938,f8_w8a8_channel,torch.bfloat16,22,384,4096,192,8,0,0,asm,10011+20000,396.7562 +gfx938,f8_w8a8_channel,torch.bfloat16,23,384,4096,192,8,0,0,asm,10002+20000,420.537 +gfx938,f8_w8a8_channel,torch.bfloat16,24,384,4096,192,8,0,0,asm,10002+20000,433.1338 +gfx938,f8_w8a8_channel,torch.bfloat16,25,384,4096,192,8,0,0,asm,10008+20000,447.5888 +gfx938,f8_w8a8_channel,torch.bfloat16,26,384,4096,192,8,0,0,asm,10011+20000,449.3571 +gfx938,f8_w8a8_channel,torch.bfloat16,27,384,4096,192,8,0,0,asm,10011+20000,458.3423 +gfx938,f8_w8a8_channel,torch.bfloat16,28,384,4096,192,8,0,0,asm,10011+20000,459.9846 +gfx938,f8_w8a8_channel,torch.bfloat16,29,384,4096,192,8,0,0,asm,10011+20000,459.3782 +gfx938,f8_w8a8_channel,torch.bfloat16,30,384,4096,192,8,0,0,asm,10011+20000,474.2329 +gfx938,f8_w8a8_channel,torch.bfloat16,31,384,4096,192,8,0,0,asm,10011+20000,479.2856 +gfx938,f8_w8a8_channel,torch.bfloat16,32,384,4096,192,8,0,0,asm,10002+20000,505.69390000000004 +gfx938,f8_w8a8_channel,torch.bfloat16,34,384,4096,192,8,0,0,asm,10008+20000,505.3288 +gfx938,f8_w8a8_channel,torch.bfloat16,36,384,4096,192,8,0,0,asm,10011+20000,512.3015 +gfx938,f8_w8a8_channel,torch.bfloat16,40,384,4096,192,8,0,0,asm,10011+20000,520.4194 +gfx938,f8_w8a8_channel,torch.bfloat16,50,384,4096,192,8,0,0,asm,10002+20000,572.9583 +gfx938,f8_w8a8_channel,torch.bfloat16,60,384,4096,192,8,0,0,asm,10011+20000,585.2783 +gfx938,f8_w8a8_channel,torch.bfloat16,64,384,4096,192,8,0,0,asm,10011+20000,586.9457 +gfx938,f8_w8a8_channel,torch.bfloat16,68,384,4096,192,8,0,0,asm,10011+20000,588.4109 +gfx938,f8_w8a8_channel,torch.bfloat16,72,384,4096,192,8,0,0,asm,10011+20000,596.2594 +gfx938,f8_w8a8_channel,torch.bfloat16,80,384,4096,192,8,0,0,asm,10011+20000,604.5288 +gfx938,f8_w8a8_channel,torch.bfloat16,88,384,4096,192,8,0,0,asm,10011+20000,600.8912 +gfx938,f8_w8a8_channel,torch.bfloat16,96,384,4096,192,8,0,0,asm,10011+20000,601.0764 +gfx938,f8_w8a8_channel,torch.bfloat16,112,384,4096,192,8,0,0,asm,10011+20000,617.6238 +gfx938,f8_w8a8_channel,torch.bfloat16,128,384,4096,192,8,0,0,asm,10013+20000,624.6806 +gfx938,f8_w8a8_channel,torch.bfloat16,164,384,4096,192,8,0,0,asm,11007+21000,635.552 +gfx938,f8_w8a8_channel,torch.bfloat16,200,384,4096,192,8,0,0,asm,11005+21000,639.9392 +gfx938,f8_w8a8_channel,torch.bfloat16,256,384,4096,192,8,0,0,asm,11007+21000,622.8442 +gfx938,f8_w8a8_channel,torch.bfloat16,384,384,4096,192,8,0,0,asm,11005+21000,648.8489 +gfx938,f8_w8a8_channel,torch.bfloat16,448,384,4096,192,8,0,0,asm,11005+21000,669.3034 +gfx938,f8_w8a8_channel,torch.bfloat16,512,384,4096,192,8,0,0,asm,11005+21000,705.5139 +gfx938,f8_w8a8_channel,torch.bfloat16,576,384,4096,192,8,0,0,asm,11004+21000,752.3603 +gfx938,f8_w8a8_channel,torch.bfloat16,640,384,4096,192,8,0,0,asm,11005+21000,735.7456 +gfx938,f8_w8a8_channel,torch.bfloat16,768,384,4096,192,8,0,0,asm,12005+22001,785.4973 +gfx938,f8_w8a8_channel,torch.bfloat16,896,384,4096,192,8,0,0,asm,12005+22001,808.0238 +gfx938,f8_w8a8_channel,torch.bfloat16,960,384,4096,192,8,0,0,asm,12005+22000,850.3143 +gfx938,f8_w8a8_channel,torch.bfloat16,1024,384,4096,192,8,0,0,asm,12005+22001,824.63 +gfx938,f8_w8a8_channel,torch.bfloat16,1152,384,4096,192,8,0,0,asm,12005+22001,920.6298 +gfx938,f8_w8a8_channel,torch.bfloat16,1280,384,4096,192,8,0,0,asm,12001+22001,900.1413 +gfx938,f8_w8a8_channel,torch.bfloat16,1408,384,4096,192,8,0,0,asm,12003+22000,1022.5834 +gfx938,f8_w8a8_channel,torch.bfloat16,1536,384,4096,192,8,0,0,asm,12005+22001,1157.3204 +gfx938,f8_w8a8_channel,torch.bfloat16,1664,384,4096,192,8,0,0,asm,12001+22001,1223.6704 +gfx938,f8_w8a8_channel,torch.bfloat16,1920,384,4096,192,8,0,0,asm,13001+22000,1410.853 +gfx938,f8_w8a8_channel,torch.bfloat16,2048,384,4096,192,8,0,0,asm,12002+22000,1616.5035 +gfx938,f8_w8a8_channel,torch.bfloat16,2304,384,4096,192,8,0,0,asm,13001+23001,1234.1287 +gfx938,f8_w8a8_channel,torch.bfloat16,2560,384,4096,192,8,0,0,asm,13001+23001,1321.0339 +gfx938,f8_w8a8_channel,torch.bfloat16,2816,384,4096,192,8,0,0,asm,13001+23001,1391.2062 +gfx938,f8_w8a8_channel,torch.bfloat16,3072,384,4096,192,8,0,0,asm,13001+23001,1619.9061 +gfx938,f8_w8a8_channel,torch.bfloat16,3328,384,4096,192,8,0,0,asm,12001+22001,1901.8681 +gfx938,f8_w8a8_channel,torch.bfloat16,3584,384,4096,192,8,0,0,asm,12001+22001,2119.3495 +gfx938,f8_w8a8_channel,torch.bfloat16,3840,384,4096,192,8,0,0,asm,12001+22001,2185.8674 +gfx938,f8_w8a8_channel,torch.bfloat16,4096,384,4096,192,8,0,0,asm,13001+23001,2237.3788 +gfx938,f8_w8a8_channel,torch.bfloat16,4608,384,4096,192,8,0,0,asm,13001+23001,2310.9957 +gfx938,f8_w8a8_channel,torch.bfloat16,5120,384,4096,192,8,0,0,asm,13001+23001,2378.0357 +gfx938,f8_w8a8_channel,torch.bfloat16,5632,384,4096,192,8,0,0,asm,13001+23001,2539.4922 +gfx938,f8_w8a8_channel,torch.bfloat16,6144,384,4096,192,8,0,0,asm,13001+23001,2885.2772 +gfx938,f8_w8a8_channel,torch.bfloat16,6656,384,4096,192,8,0,0,asm,13001+23001,3287.535 +gfx938,f8_w8a8_channel,torch.bfloat16,7168,384,4096,192,8,0,0,asm,12001+21001,4397.0517 +gfx938,f8_w8a8_channel,torch.bfloat16,7680,384,4096,192,8,0,0,asm,13001+23001,3472.5801 +gfx938,f8_w8a8_channel,torch.bfloat16,8192,384,4096,192,8,0,0,asm,12001+22001,4283.1629 +gfx938,f8_w8a8_channel,torch.bfloat16,10240,384,4096,192,8,0,0,asm,12003+22000,5482.4985 +gfx938,f8_w8a8_channel,torch.bfloat16,12288,384,4096,192,8,0,0,asm,13001+23001,5284.9398 +gfx938,f8_w8a8_channel,torch.bfloat16,14336,384,4096,192,8,0,0,asm,13001+23001,5947.1976 +gfx938,f8_w8a8_channel,torch.bfloat16,16384,384,4096,192,8,0,0,asm,13001+23001,6931.904 +gfx938,f8_w8a8_channel,torch.bfloat16,1,320,6144,160,8,0,0,asm,10008+20000,69.6014 +gfx938,f8_w8a8_channel,torch.bfloat16,2,320,6144,160,8,0,0,asm,10011+20000,101.4667 +gfx938,f8_w8a8_channel,torch.bfloat16,3,320,6144,160,8,0,0,asm,10011+20001,121.433 +gfx938,f8_w8a8_channel,torch.bfloat16,4,320,6144,160,8,0,0,asm,10002+20000,156.473 +gfx938,f8_w8a8_channel,torch.bfloat16,5,320,6144,160,8,0,0,asm,10008+20000,176.153 +gfx938,f8_w8a8_channel,torch.bfloat16,6,320,6144,160,8,0,0,asm,10012+20000,193.4667 +gfx938,f8_w8a8_channel,torch.bfloat16,7,320,6144,160,8,0,0,asm,10011+20000,211.4052 +gfx938,f8_w8a8_channel,torch.bfloat16,8,320,6144,160,8,0,0,asm,10002+20000,248.4655 +gfx938,f8_w8a8_channel,torch.bfloat16,9,320,6144,160,8,0,0,asm,10002+20000,280.9531 +gfx938,f8_w8a8_channel,torch.bfloat16,10,320,6144,160,8,0,0,asm,10011+20000,298.0814 +gfx938,f8_w8a8_channel,torch.bfloat16,11,320,6144,160,8,0,0,asm,10002+20000,334.0654 +gfx938,f8_w8a8_channel,torch.bfloat16,12,320,6144,160,8,0,0,asm,10008+20000,352.5205 +gfx938,f8_w8a8_channel,torch.bfloat16,13,320,6144,160,8,0,0,asm,10008+20000,364.6246 +gfx938,f8_w8a8_channel,torch.bfloat16,14,320,6144,160,8,0,0,asm,10011+20000,378.7972 +gfx938,f8_w8a8_channel,torch.bfloat16,15,320,6144,160,8,0,0,asm,10011+20000,390.9404 +gfx938,f8_w8a8_channel,torch.bfloat16,16,320,6144,160,8,0,0,asm,10008+20000,406.3341 +gfx938,f8_w8a8_channel,torch.bfloat16,17,320,6144,160,8,0,0,asm,10002+20000,410.4993 +gfx938,f8_w8a8_channel,torch.bfloat16,18,320,6144,160,8,0,0,asm,10002+20000,432.5962 +gfx938,f8_w8a8_channel,torch.bfloat16,19,320,6144,160,8,0,0,asm,10002+20000,446.8446 +gfx938,f8_w8a8_channel,torch.bfloat16,20,320,6144,160,8,0,0,asm,10002+20000,458.9288 +gfx938,f8_w8a8_channel,torch.bfloat16,21,320,6144,160,8,0,0,asm,10012+20000,468.0993 +gfx938,f8_w8a8_channel,torch.bfloat16,22,320,6144,160,8,0,0,asm,10012+20000,473.7583 +gfx938,f8_w8a8_channel,torch.bfloat16,23,320,6144,160,8,0,0,asm,10011+20000,482.0193 +gfx938,f8_w8a8_channel,torch.bfloat16,24,320,6144,160,8,0,0,asm,10011+20000,488.0067 +gfx938,f8_w8a8_channel,torch.bfloat16,25,320,6144,160,8,0,0,asm,10011+20000,495.5972 +gfx938,f8_w8a8_channel,torch.bfloat16,26,320,6144,160,8,0,0,asm,10012+20000,494.3508 +gfx938,f8_w8a8_channel,torch.bfloat16,27,320,6144,160,8,0,0,asm,10002+20000,514.0897 +gfx938,f8_w8a8_channel,torch.bfloat16,28,320,6144,160,8,0,0,asm,10002+20000,512.8855 +gfx938,f8_w8a8_channel,torch.bfloat16,29,320,6144,160,8,0,0,asm,10002+20000,521.6281 +gfx938,f8_w8a8_channel,torch.bfloat16,30,320,6144,160,8,0,0,asm,10002+20000,528.0692 +gfx938,f8_w8a8_channel,torch.bfloat16,31,320,6144,160,8,0,0,asm,10002+20000,545.9403 +gfx938,f8_w8a8_channel,torch.bfloat16,32,320,6144,160,8,0,0,asm,10002+20000,536.8097 +gfx938,f8_w8a8_channel,torch.bfloat16,34,320,6144,160,8,0,0,asm,10002+20000,553.843 +gfx938,f8_w8a8_channel,torch.bfloat16,36,320,6144,160,8,0,0,asm,10011+20000,561.0852 +gfx938,f8_w8a8_channel,torch.bfloat16,40,320,6144,160,8,0,0,asm,10002+20000,576.1735 +gfx938,f8_w8a8_channel,torch.bfloat16,50,320,6144,160,8,0,0,asm,10002+20000,596.8992 +gfx938,f8_w8a8_channel,torch.bfloat16,60,320,6144,160,8,0,0,asm,10002+20000,611.6024 +gfx938,f8_w8a8_channel,torch.bfloat16,64,320,6144,160,8,0,0,asm,10002+20000,608.2677 +gfx938,f8_w8a8_channel,torch.bfloat16,68,320,6144,160,8,0,0,asm,10002+20000,609.0344 +gfx938,f8_w8a8_channel,torch.bfloat16,72,320,6144,160,8,0,0,asm,10002+20000,614.6851 +gfx938,f8_w8a8_channel,torch.bfloat16,80,320,6144,160,8,0,0,asm,10002+20000,619.4088 +gfx938,f8_w8a8_channel,torch.bfloat16,88,320,6144,160,8,0,0,asm,10002+20000,615.9141 +gfx938,f8_w8a8_channel,torch.bfloat16,96,320,6144,160,8,0,0,asm,10002+20000,628.9667 +gfx938,f8_w8a8_channel,torch.bfloat16,112,320,6144,160,8,0,0,asm,10002+20000,646.8109 +gfx938,f8_w8a8_channel,torch.bfloat16,128,320,6144,160,8,0,0,asm,10002+20000,659.7204 +gfx938,f8_w8a8_channel,torch.bfloat16,164,320,6144,160,8,0,0,asm,10002+20000,658.2551 +gfx938,f8_w8a8_channel,torch.bfloat16,200,320,6144,160,8,0,0,asm,10002+20000,680.8404 +gfx938,f8_w8a8_channel,torch.bfloat16,256,320,6144,160,8,0,0,asm,11005+21000,703.8734 +gfx938,f8_w8a8_channel,torch.bfloat16,384,320,6144,160,8,0,0,asm,11005+21000,741.0434 +gfx938,f8_w8a8_channel,torch.bfloat16,448,320,6144,160,8,0,0,asm,11005+21000,756.2762 +gfx938,f8_w8a8_channel,torch.bfloat16,512,320,6144,160,8,0,0,asm,11005+21000,785.7922 +gfx938,f8_w8a8_channel,torch.bfloat16,576,320,6144,160,8,0,0,asm,12001+22001,887.2916 +gfx938,f8_w8a8_channel,torch.bfloat16,640,320,6144,160,8,0,0,asm,12003+22001,897.8008 +gfx938,f8_w8a8_channel,torch.bfloat16,768,320,6144,160,8,0,0,asm,12001+22001,880.7285 +gfx938,f8_w8a8_channel,torch.bfloat16,896,320,6144,160,8,0,0,asm,12001+22001,923.4593 +gfx938,f8_w8a8_channel,torch.bfloat16,960,320,6144,160,8,0,0,asm,12001+22001,941.1182 +gfx938,f8_w8a8_channel,torch.bfloat16,1024,320,6144,160,8,0,0,asm,12001+22001,972.5456 +gfx938,f8_w8a8_channel,torch.bfloat16,1152,320,6144,160,8,0,0,asm,12001+22001,1065.0424 +gfx938,f8_w8a8_channel,torch.bfloat16,1280,320,6144,160,8,0,0,asm,12003+22001,1285.0508 +gfx938,f8_w8a8_channel,torch.bfloat16,1408,320,6144,160,8,0,0,asm,13001+23001,1361.4546 +gfx938,f8_w8a8_channel,torch.bfloat16,1536,320,6144,160,8,0,0,asm,13001+23001,1402.4149 +gfx938,f8_w8a8_channel,torch.bfloat16,1664,320,6144,160,8,0,0,asm,13001+23001,1414.0279 +gfx938,f8_w8a8_channel,torch.bfloat16,1920,320,6144,160,8,0,0,asm,13001+23001,1460.9076 +gfx938,f8_w8a8_channel,torch.bfloat16,2048,320,6144,160,8,0,0,asm,13001+23001,1491.1982 +gfx938,f8_w8a8_channel,torch.bfloat16,2304,320,6144,160,8,0,0,asm,13001+23001,1656.7476 +gfx938,f8_w8a8_channel,torch.bfloat16,2560,320,6144,160,8,0,0,asm,13001+23001,2013.8255 +gfx938,f8_w8a8_channel,torch.bfloat16,2816,320,6144,160,8,0,0,asm,12001+22001,2361.5574 +gfx938,f8_w8a8_channel,torch.bfloat16,3072,320,6144,160,8,0,0,asm,12001+22001,2514.7185 +gfx938,f8_w8a8_channel,torch.bfloat16,3328,320,6144,160,8,0,0,asm,12001+22001,2599.3669 +gfx938,f8_w8a8_channel,torch.bfloat16,3584,320,6144,160,8,0,0,asm,12001+22001,2712.2848 +gfx938,f8_w8a8_channel,torch.bfloat16,3840,320,6144,160,8,0,0,asm,13001+23001,2762.1038 +gfx938,f8_w8a8_channel,torch.bfloat16,4096,320,6144,160,8,0,0,asm,13001+23001,2799.1474 +gfx938,f8_w8a8_channel,torch.bfloat16,4608,320,6144,160,8,0,0,asm,13001+23001,2929.9516 +gfx938,f8_w8a8_channel,torch.bfloat16,5120,320,6144,160,8,0,0,asm,13001+23001,3464.4512 +gfx938,f8_w8a8_channel,torch.bfloat16,5632,320,6144,160,8,0,0,asm,13001+23001,3967.3241 +gfx938,f8_w8a8_channel,torch.bfloat16,6144,320,6144,160,8,0,0,asm,13001+23001,4164.1323 +gfx938,f8_w8a8_channel,torch.bfloat16,6656,320,6144,160,8,0,0,asm,13001+23001,4277.5134 +gfx938,f8_w8a8_channel,torch.bfloat16,7168,320,6144,160,8,0,0,asm,13001+23001,4424.5111 +gfx938,f8_w8a8_channel,torch.bfloat16,7680,320,6144,160,8,0,0,asm,13001+23001,4918.0516 +gfx938,f8_w8a8_channel,torch.bfloat16,8192,320,6144,160,8,0,0,asm,13001+23001,5357.5801 +gfx938,f8_w8a8_channel,torch.bfloat16,10240,320,6144,160,8,0,0,asm,13001+23001,6304.1441 +gfx938,f8_w8a8_channel,torch.bfloat16,12288,320,6144,160,8,0,0,asm,13001+23001,7287.5066 +gfx938,f8_w8a8_channel,torch.bfloat16,14336,320,6144,160,8,0,0,asm,13001+23001,8555.5617 +gfx938,f8_w8a8_channel,torch.bfloat16,16384,320,6144,160,8,0,0,asm,13001+23001,9789.7686 +gfx938,f8_w8a8_channel,torch.bfloat16,1,512,4096,256,8,0,0,asm,10010+20000,75.8919 +gfx938,f8_w8a8_channel,torch.bfloat16,2,512,4096,256,8,0,0,asm,10011+20000,110.873 +gfx938,f8_w8a8_channel,torch.bfloat16,3,512,4096,256,8,0,0,asm,10011+20000,156.1189 +gfx938,f8_w8a8_channel,torch.bfloat16,4,512,4096,256,8,0,0,asm,10013+20000,187.1673 +gfx938,f8_w8a8_channel,torch.bfloat16,5,512,4096,256,8,0,0,asm,10011+20000,234.5187 +gfx938,f8_w8a8_channel,torch.bfloat16,6,512,4096,256,8,0,0,asm,10013+20000,263.8997 +gfx938,f8_w8a8_channel,torch.bfloat16,7,512,4096,256,8,0,0,asm,10011+20000,314.1481 +gfx938,f8_w8a8_channel,torch.bfloat16,8,512,4096,256,8,0,0,asm,10011+20000,328.3713 +gfx938,f8_w8a8_channel,torch.bfloat16,9,512,4096,256,8,0,0,asm,10013+20000,356.5142 +gfx938,f8_w8a8_channel,torch.bfloat16,10,512,4096,256,8,0,0,asm,10002+20000,401.9035 +gfx938,f8_w8a8_channel,torch.bfloat16,11,512,4096,256,8,0,0,asm,10013+20000,424.792 +gfx938,f8_w8a8_channel,torch.bfloat16,12,512,4096,256,8,0,0,asm,10013+20000,437.2972 +gfx938,f8_w8a8_channel,torch.bfloat16,13,512,4096,256,8,0,0,asm,10002+20000,477.3813 +gfx938,f8_w8a8_channel,torch.bfloat16,14,512,4096,256,8,0,0,asm,10013+20000,504.0507 +gfx938,f8_w8a8_channel,torch.bfloat16,15,512,4096,256,8,0,0,asm,10013+20000,519.0485 +gfx938,f8_w8a8_channel,torch.bfloat16,16,512,4096,256,8,0,0,asm,10002+20000,545.061 +gfx938,f8_w8a8_channel,torch.bfloat16,17,512,4096,256,8,0,0,asm,11007+20000,559.3569 +gfx938,f8_w8a8_channel,torch.bfloat16,18,512,4096,256,8,0,0,asm,11005+20000,574.4559 +gfx938,f8_w8a8_channel,torch.bfloat16,20,512,4096,256,8,0,0,asm,10002+20000,634.363 +gfx938,f8_w8a8_channel,torch.bfloat16,24,512,4096,256,8,0,0,asm,11007+20000,722.2786 +gfx938,f8_w8a8_channel,torch.bfloat16,28,512,4096,256,8,0,0,asm,10013+20000,819.4688 +gfx938,f8_w8a8_channel,torch.bfloat16,32,512,4096,256,8,0,0,asm,10013+20000,883.6372 +gfx938,f8_w8a8_channel,torch.bfloat16,34,512,4096,256,8,0,0,asm,10013+20000,885.7056 +gfx938,f8_w8a8_channel,torch.bfloat16,36,512,4096,256,8,0,0,asm,11004+20000,925.9328 +gfx938,f8_w8a8_channel,torch.bfloat16,40,512,4096,256,8,0,0,asm,10013+20000,959.1117 +gfx938,f8_w8a8_channel,torch.bfloat16,44,512,4096,256,8,0,0,asm,10013+20000,1014.1347 +gfx938,f8_w8a8_channel,torch.bfloat16,48,512,4096,256,8,0,0,asm,10013+20000,1031.8273 +gfx938,f8_w8a8_channel,torch.bfloat16,56,512,4096,256,8,0,0,asm,10013+20000,1095.1198 +gfx938,f8_w8a8_channel,torch.bfloat16,64,512,4096,256,8,0,0,asm,10013+20000,1125.0987 +gfx938,f8_w8a8_channel,torch.bfloat16,68,512,4096,256,8,0,0,asm,10013+20000,1132.2397 +gfx938,f8_w8a8_channel,torch.bfloat16,72,512,4096,256,8,0,0,asm,10013+20000,1199.5153 +gfx938,f8_w8a8_channel,torch.bfloat16,80,512,4096,256,8,0,0,asm,10013+20000,1204.4331 +gfx938,f8_w8a8_channel,torch.bfloat16,88,512,4096,256,8,0,0,asm,10013+20000,1230.0583 +gfx938,f8_w8a8_channel,torch.bfloat16,96,512,4096,256,8,0,0,asm,10013+20000,1262.8582 +gfx938,f8_w8a8_channel,torch.bfloat16,104,512,4096,256,8,0,0,asm,10013+20000,1270.4204 +gfx938,f8_w8a8_channel,torch.bfloat16,112,512,4096,256,8,0,0,asm,10013+20000,1282.7065 +gfx938,f8_w8a8_channel,torch.bfloat16,128,512,4096,256,8,0,0,asm,10013+20000,1305.9148 +gfx938,f8_w8a8_channel,torch.bfloat16,144,512,4096,256,8,0,0,asm,10013+20000,1327.5401 +gfx938,f8_w8a8_channel,torch.bfloat16,160,512,4096,256,8,0,0,asm,10013+20000,1372.9126 +gfx938,f8_w8a8_channel,torch.bfloat16,192,512,4096,256,8,0,0,asm,10013+20000,1337.5275 +gfx938,f8_w8a8_channel,torch.bfloat16,224,512,4096,256,8,0,0,asm,10013+20000,1346.6138 +gfx938,f8_w8a8_channel,torch.bfloat16,256,512,4096,256,8,0,0,asm,10013+20000,1360.3402 +gfx938,f8_w8a8_channel,torch.bfloat16,320,512,4096,256,8,0,0,asm,12005+22000,1397.5359 +gfx938,f8_w8a8_channel,torch.bfloat16,384,512,4096,256,8,0,0,asm,10012+20000,1409.5781 +gfx938,f8_w8a8_channel,torch.bfloat16,448,512,4096,256,8,0,0,asm,12005+22000,1431.1863 +gfx938,f8_w8a8_channel,torch.bfloat16,512,512,4096,256,8,0,0,asm,12005+22000,1453.0052 +gfx938,f8_w8a8_channel,torch.bfloat16,576,512,4096,256,8,0,0,asm,11004+21000,1552.9712 +gfx938,f8_w8a8_channel,torch.bfloat16,640,512,4096,256,8,0,0,asm,12005+22000,1488.6682 +gfx938,f8_w8a8_channel,torch.bfloat16,704,512,4096,256,8,0,0,asm,12001+22000,1506.5207 +gfx938,f8_w8a8_channel,torch.bfloat16,768,512,4096,256,8,0,0,asm,12005+22000,1524.1374 +gfx938,f8_w8a8_channel,torch.bfloat16,832,512,4096,256,8,0,0,asm,12001+22000,1553.2487 +gfx938,f8_w8a8_channel,torch.bfloat16,896,512,4096,256,8,0,0,asm,12005+22000,1573.4087 +gfx938,f8_w8a8_channel,torch.bfloat16,960,512,4096,256,8,0,0,asm,12001+22000,1577.7371 +gfx938,f8_w8a8_channel,torch.bfloat16,1024,512,4096,256,8,0,0,asm,12001+22000,1596.3645 +gfx938,f8_w8a8_channel,torch.bfloat16,1152,512,4096,256,8,0,0,asm,12005+22000,1656.3139 +gfx938,f8_w8a8_channel,torch.bfloat16,1280,512,4096,256,8,0,0,asm,12001+22000,1659.0086 +gfx938,f8_w8a8_channel,torch.bfloat16,1408,512,4096,256,8,0,0,asm,12001+22000,1694.2758 +gfx938,f8_w8a8_channel,torch.bfloat16,1536,512,4096,256,8,0,0,asm,12001+22000,1747.2695 +gfx938,f8_w8a8_channel,torch.bfloat16,1664,512,4096,256,8,0,0,asm,12001+22000,1764.5495 +gfx938,f8_w8a8_channel,torch.bfloat16,1792,512,4096,256,8,0,0,asm,12001+22000,1814.0988 +gfx938,f8_w8a8_channel,torch.bfloat16,1920,512,4096,256,8,0,0,asm,12001+22000,1863.4713 +gfx938,f8_w8a8_channel,torch.bfloat16,2048,512,4096,256,8,0,0,asm,12001+22000,2004.3972 +gfx938,f8_w8a8_channel,torch.bfloat16,2304,512,4096,256,8,0,0,asm,13001+23001,2068.1275 +gfx938,f8_w8a8_channel,torch.bfloat16,2560,512,4096,256,8,0,0,asm,13001+23001,2104.7673 +gfx938,f8_w8a8_channel,torch.bfloat16,2816,512,4096,256,8,0,0,asm,13001+23001,2164.767 +gfx938,f8_w8a8_channel,torch.bfloat16,3072,512,4096,256,8,0,0,asm,13001+23001,2232.9101 +gfx938,f8_w8a8_channel,torch.bfloat16,3328,512,4096,256,8,0,0,asm,13001+23001,2291.0403 +gfx938,f8_w8a8_channel,torch.bfloat16,3584,512,4096,256,8,0,0,asm,13001+23001,2342.3748 +gfx938,f8_w8a8_channel,torch.bfloat16,3840,512,4096,256,8,0,0,asm,13001+23001,2525.4564 +gfx938,f8_w8a8_channel,torch.bfloat16,4096,512,4096,256,8,0,0,asm,13001+23001,2913.6658 +gfx938,f8_w8a8_channel,torch.bfloat16,4608,512,4096,256,8,0,0,asm,12001+22001,3532.8052 +gfx938,f8_w8a8_channel,torch.bfloat16,5120,512,4096,256,8,0,0,asm,12001+22001,3707.7436 +gfx938,f8_w8a8_channel,torch.bfloat16,5632,512,4096,256,8,0,0,asm,12001+22001,3903.2379 +gfx938,f8_w8a8_channel,torch.bfloat16,6144,512,4096,256,8,0,0,asm,13001+23001,4042.7495 +gfx938,f8_w8a8_channel,torch.bfloat16,6656,512,4096,256,8,0,0,asm,13001+23001,4125.4021 +gfx938,f8_w8a8_channel,torch.bfloat16,7168,512,4096,256,8,0,0,asm,13001+23001,4192.7535 +gfx938,f8_w8a8_channel,torch.bfloat16,7680,512,4096,256,8,0,0,asm,13001+23001,4438.7736 +gfx938,f8_w8a8_channel,torch.bfloat16,8192,512,4096,256,8,0,0,asm,13001+23001,4982.7885 +gfx938,f8_w8a8_channel,torch.bfloat16,10240,512,4096,256,8,0,0,asm,13001+23001,5955.813 +gfx938,f8_w8a8_channel,torch.bfloat16,12288,512,4096,256,8,0,0,asm,13001+23001,6843.4231 +gfx938,f8_w8a8_channel,torch.bfloat16,14336,512,4096,256,8,0,0,asm,13001+23001,8050.5179 +gfx938,f8_w8a8_channel,torch.bfloat16,16384,512,4096,256,8,0,0,asm,13001+23001,8920.3837 +gfx938,f8_w8a8_channel,torch.bfloat16,17408,512,4096,256,8,0,0,asm,13001+23001,9615.6993 +gfx938,f8_w8a8_channel,torch.bfloat16,24576,512,4096,256,8,0,0,asm,13001+23001,12861.3819 +gfx938,f8_w8a8_channel,torch.bfloat16,32768,512,4096,256,8,0,0,asm,13001+23001,16857.1581 +gfx938,f8_w8a8_channel,torch.bfloat16,40960,512,4096,256,8,0,0,asm,13001+23001,20774.7386 +gfx938,f8_w8a8_channel,torch.bfloat16,49152,512,4096,256,8,0,0,asm,13001+23001,24836.1122 +gfx938,f8_w8a8_channel,torch.bfloat16,57344,512,4096,256,8,0,0,asm,13001+23001,28813.0985 +gfx938,f8_w8a8_channel,torch.bfloat16,65536,512,4096,256,8,0,0,asm,13001+23001,32831.1841 +gfx938,f8_w8a8_channel,torch.bfloat16,1,256,4096,256,8,0,0,asm,10002+20000,56.3044 +gfx938,f8_w8a8_channel,torch.bfloat16,2,256,4096,256,8,0,0,asm,10009+20000,77.8791 +gfx938,f8_w8a8_channel,torch.bfloat16,3,256,4096,256,8,0,0,asm,10011+20000,95.4364 +gfx938,f8_w8a8_channel,torch.bfloat16,4,256,4096,256,8,0,0,asm,10013+20000,114.7963 +gfx938,f8_w8a8_channel,torch.bfloat16,5,256,4096,256,8,0,0,asm,10002+20001,141.6684 +gfx938,f8_w8a8_channel,torch.bfloat16,6,256,4096,256,8,0,0,asm,10011+20000,157.0874 +gfx938,f8_w8a8_channel,torch.bfloat16,7,256,4096,256,8,0,0,asm,10011+20000,166.4009 +gfx938,f8_w8a8_channel,torch.bfloat16,8,256,4096,256,8,0,0,asm,10011+20000,181.5504 +gfx938,f8_w8a8_channel,torch.bfloat16,9,256,4096,256,8,0,0,asm,10011+20000,195.1926 +gfx938,f8_w8a8_channel,torch.bfloat16,10,256,4096,256,8,0,0,asm,10002+20000,227.9334 +gfx938,f8_w8a8_channel,torch.bfloat16,11,256,4096,256,8,0,0,asm,10011+20000,237.0449 +gfx938,f8_w8a8_channel,torch.bfloat16,12,256,4096,256,8,0,0,asm,10011+20000,243.7987 +gfx938,f8_w8a8_channel,torch.bfloat16,13,256,4096,256,8,0,0,asm,10012+20000,254.5859 +gfx938,f8_w8a8_channel,torch.bfloat16,14,256,4096,256,8,0,0,asm,10011+20000,268.8848 +gfx938,f8_w8a8_channel,torch.bfloat16,15,256,4096,256,8,0,0,asm,10013+20000,274.527 +gfx938,f8_w8a8_channel,torch.bfloat16,16,256,4096,256,8,0,0,asm,10002+20000,306.2069 +gfx938,f8_w8a8_channel,torch.bfloat16,17,256,4096,256,8,0,0,asm,10011+20000,306.8776 +gfx938,f8_w8a8_channel,torch.bfloat16,18,256,4096,256,8,0,0,asm,10011+20000,313.0418 +gfx938,f8_w8a8_channel,torch.bfloat16,20,256,4096,256,8,0,0,asm,10011+20000,326.0776 +gfx938,f8_w8a8_channel,torch.bfloat16,24,256,4096,256,8,0,0,asm,10011+20000,384.5867 +gfx938,f8_w8a8_channel,torch.bfloat16,28,256,4096,256,8,0,0,asm,10013+20000,428.9265 +gfx938,f8_w8a8_channel,torch.bfloat16,32,256,4096,256,8,0,0,asm,10011+20000,476.4969 +gfx938,f8_w8a8_channel,torch.bfloat16,34,256,4096,256,8,0,0,asm,10013+20000,468.9065 +gfx938,f8_w8a8_channel,torch.bfloat16,36,256,4096,256,8,0,0,asm,10011+20000,474.2539 +gfx938,f8_w8a8_channel,torch.bfloat16,40,256,4096,256,8,0,0,asm,10013+20000,487.9128 +gfx938,f8_w8a8_channel,torch.bfloat16,44,256,4096,256,8,0,0,asm,10013+20000,531.6433 +gfx938,f8_w8a8_channel,torch.bfloat16,48,256,4096,256,8,0,0,asm,10013+20000,538.1696 +gfx938,f8_w8a8_channel,torch.bfloat16,56,256,4096,256,8,0,0,asm,10013+20000,550.0937 +gfx938,f8_w8a8_channel,torch.bfloat16,64,256,4096,256,8,0,0,asm,10013+20000,564.0726 +gfx938,f8_w8a8_channel,torch.bfloat16,68,256,4096,256,8,0,0,asm,10013+20000,570.9105 +gfx938,f8_w8a8_channel,torch.bfloat16,72,256,4096,256,8,0,0,asm,10002+20000,638.5988 +gfx938,f8_w8a8_channel,torch.bfloat16,80,256,4096,256,8,0,0,asm,10011+20000,623.4157 +gfx938,f8_w8a8_channel,torch.bfloat16,88,256,4096,256,8,0,0,asm,10013+20000,628.4768 +gfx938,f8_w8a8_channel,torch.bfloat16,96,256,4096,256,8,0,0,asm,10013+20000,633.9588 +gfx938,f8_w8a8_channel,torch.bfloat16,104,256,4096,256,8,0,0,asm,10011+20000,640.2747 +gfx938,f8_w8a8_channel,torch.bfloat16,112,256,4096,256,8,0,0,asm,10013+20000,647.8451 +gfx938,f8_w8a8_channel,torch.bfloat16,128,256,4096,256,8,0,0,asm,10013+20000,660.0304 +gfx938,f8_w8a8_channel,torch.bfloat16,144,256,4096,256,8,0,0,asm,11007+21000,667.3651 +gfx938,f8_w8a8_channel,torch.bfloat16,160,256,4096,256,8,0,0,asm,10013+20000,720.207 +gfx938,f8_w8a8_channel,torch.bfloat16,192,256,4096,256,8,0,0,asm,11007+21000,684.8472 +gfx938,f8_w8a8_channel,torch.bfloat16,224,256,4096,256,8,0,0,asm,11007+21000,692.9482 +gfx938,f8_w8a8_channel,torch.bfloat16,256,256,4096,256,8,0,0,asm,11007+21000,701.0576 +gfx938,f8_w8a8_channel,torch.bfloat16,320,256,4096,256,8,0,0,asm,11007+21000,714.9185 +gfx938,f8_w8a8_channel,torch.bfloat16,384,256,4096,256,8,0,0,asm,11007+21000,730.2954 +gfx938,f8_w8a8_channel,torch.bfloat16,448,256,4096,256,8,0,0,asm,11007+21000,749.7058 +gfx938,f8_w8a8_channel,torch.bfloat16,512,256,4096,256,8,0,0,asm,11007+21000,767.71 +gfx938,f8_w8a8_channel,torch.bfloat16,576,256,4096,256,8,0,0,asm,11007+21000,911.0612 +gfx938,f8_w8a8_channel,torch.bfloat16,640,256,4096,256,8,0,0,asm,11007+21000,827.5752 +gfx938,f8_w8a8_channel,torch.bfloat16,704,256,4096,256,8,0,0,asm,11007+21000,841.3687 +gfx938,f8_w8a8_channel,torch.bfloat16,768,256,4096,256,8,0,0,asm,11007+21000,851.6931 +gfx938,f8_w8a8_channel,torch.bfloat16,832,256,4096,256,8,0,0,asm,11007+21000,854.2361 +gfx938,f8_w8a8_channel,torch.bfloat16,896,256,4096,256,8,0,0,asm,11007+21000,895.1287 +gfx938,f8_w8a8_channel,torch.bfloat16,960,256,4096,256,8,0,0,asm,12005+22001,913.5792 +gfx938,f8_w8a8_channel,torch.bfloat16,1024,256,4096,256,8,0,0,asm,12005+22001,928.2065 +gfx938,f8_w8a8_channel,torch.bfloat16,1152,256,4096,256,8,0,0,asm,12005+22001,1030.5556 +gfx938,f8_w8a8_channel,torch.bfloat16,1280,256,4096,256,8,0,0,asm,12005+22001,974.0253 +gfx938,f8_w8a8_channel,torch.bfloat16,1408,256,4096,256,8,0,0,asm,12005+22001,992.4589 +gfx938,f8_w8a8_channel,torch.bfloat16,1536,256,4096,256,8,0,0,asm,12005+22001,1049.3093 +gfx938,f8_w8a8_channel,torch.bfloat16,1664,256,4096,256,8,0,0,asm,12005+22001,1075.9619 +gfx938,f8_w8a8_channel,torch.bfloat16,1792,256,4096,256,8,0,0,asm,12005+22001,1085.7218 +gfx938,f8_w8a8_channel,torch.bfloat16,1920,256,4096,256,8,0,0,asm,12005+22001,1161.7554 +gfx938,f8_w8a8_channel,torch.bfloat16,2048,256,4096,256,8,0,0,asm,12001+22001,1223.5404 +gfx938,f8_w8a8_channel,torch.bfloat16,2304,256,4096,256,8,0,0,asm,13001+23001,1368.9801 +gfx938,f8_w8a8_channel,torch.bfloat16,2560,256,4096,256,8,0,0,asm,13001+23001,1383.0179 +gfx938,f8_w8a8_channel,torch.bfloat16,2816,256,4096,256,8,0,0,asm,13001+23001,1446.8324 +gfx938,f8_w8a8_channel,torch.bfloat16,3072,256,4096,256,8,0,0,asm,13001+23001,1506.8069 +gfx938,f8_w8a8_channel,torch.bfloat16,3328,256,4096,256,8,0,0,asm,13001+23001,1539.3036 +gfx938,f8_w8a8_channel,torch.bfloat16,3584,256,4096,256,8,0,0,asm,13001+23001,1599.775 +gfx938,f8_w8a8_channel,torch.bfloat16,3840,256,4096,256,8,0,0,asm,13001+23001,1742.5619 +gfx938,f8_w8a8_channel,torch.bfloat16,4096,256,4096,256,8,0,0,asm,13001+23001,1961.475 +gfx938,f8_w8a8_channel,torch.bfloat16,4608,256,4096,256,8,0,0,asm,12001+22001,2326.1309 +gfx938,f8_w8a8_channel,torch.bfloat16,5120,256,4096,256,8,0,0,asm,12001+22001,2440.0339 +gfx938,f8_w8a8_channel,torch.bfloat16,5632,256,4096,256,8,0,0,asm,12001+22001,2579.3011 +gfx938,f8_w8a8_channel,torch.bfloat16,6144,256,4096,256,8,0,0,asm,13001+23001,2715.183 +gfx938,f8_w8a8_channel,torch.bfloat16,6656,256,4096,256,8,0,0,asm,13001+23001,2793.6165 +gfx938,f8_w8a8_channel,torch.bfloat16,7168,256,4096,256,8,0,0,asm,13001+23001,2894.8036 +gfx938,f8_w8a8_channel,torch.bfloat16,7680,256,4096,256,8,0,0,asm,13001+23001,3076.6136 +gfx938,f8_w8a8_channel,torch.bfloat16,8192,256,4096,256,8,0,0,asm,13001+23001,3431.2145 +gfx938,f8_w8a8_channel,torch.bfloat16,10240,256,4096,256,8,0,0,asm,13001+23001,4102.8755 +gfx938,f8_w8a8_channel,torch.bfloat16,12288,256,4096,256,8,0,0,asm,13001+23001,4808.2627 +gfx938,f8_w8a8_channel,torch.bfloat16,14336,256,4096,256,8,0,0,asm,13001+23001,5609.1194 +gfx938,f8_w8a8_channel,torch.bfloat16,16384,256,4096,256,8,0,0,asm,13001+23001,6265.4711 +gfx938,f8_w8a8_channel,torch.bfloat16,17408,256,4096,256,8,0,0,asm,13001+23001,6744.4082 +gfx938,f8_w8a8_channel,torch.bfloat16,24576,256,4096,256,8,0,0,asm,13001+23001,9062.0679 +gfx938,f8_w8a8_channel,torch.bfloat16,32768,256,4096,256,8,0,0,asm,13001+23001,11895.1374 +gfx938,f8_w8a8_channel,torch.bfloat16,40960,256,4096,256,8,0,0,asm,13001+23001,14666.4642 +gfx938,f8_w8a8_channel,torch.bfloat16,49152,256,4096,256,8,0,0,asm,13001+23001,17470.387 +gfx938,f8_w8a8_channel,torch.bfloat16,57344,256,4096,256,8,0,0,asm,13001+23001,20309.6104 +gfx938,f8_w8a8_channel,torch.bfloat16,65536,256,4096,256,8,0,0,asm,13001+23001,23151.9759 +gfx938,int8_w8a8_channel,torch.float16,1,512,2048,256,8,0,0,asm,10010+20000,57.5423 +gfx938,int8_w8a8_channel,torch.float16,2,512,2048,256,8,0,0,asm,10013+20000,75.8832 +gfx938,int8_w8a8_channel,torch.float16,3,512,2048,256,8,0,0,asm,10013+20000,103.3274 +gfx938,int8_w8a8_channel,torch.float16,4,512,2048,256,8,0,0,asm,10013+20000,118.0053 +gfx938,int8_w8a8_channel,torch.float16,5,512,2048,256,8,0,0,asm,10013+20000,140.1358 +gfx938,int8_w8a8_channel,torch.float16,6,512,2048,256,8,0,0,asm,10013+20000,153.1881 +gfx938,int8_w8a8_channel,torch.float16,7,512,2048,256,8,0,0,asm,10013+20000,181.3652 +gfx938,int8_w8a8_channel,torch.float16,8,512,2048,256,8,0,0,asm,10013+20000,192.2788 +gfx938,int8_w8a8_channel,torch.float16,9,512,2048,256,8,0,0,asm,10013+20000,208.2451 +gfx938,int8_w8a8_channel,torch.float16,10,512,2048,256,8,0,0,asm,10008+20000,228.9861 +gfx938,int8_w8a8_channel,torch.float16,11,512,2048,256,8,0,0,asm,10013+20000,240.826 +gfx938,int8_w8a8_channel,torch.float16,12,512,2048,256,8,0,0,asm,10013+20000,251.5291 +gfx938,int8_w8a8_channel,torch.float16,13,512,2048,256,8,0,0,asm,10011+20000,273.2555 +gfx938,int8_w8a8_channel,torch.float16,14,512,2048,256,8,0,0,asm,10013+20000,285.9122 +gfx938,int8_w8a8_channel,torch.float16,15,512,2048,256,8,0,0,asm,10013+20000,297.1289 +gfx938,int8_w8a8_channel,torch.float16,16,512,2048,256,8,0,0,asm,10008+20000,309.1964 +gfx938,int8_w8a8_channel,torch.float16,17,512,2048,256,8,0,0,asm,10009+20000,312.5196 +gfx938,int8_w8a8_channel,torch.float16,18,512,2048,256,8,0,0,asm,10013+20000,320.166 +gfx938,int8_w8a8_channel,torch.float16,20,512,2048,256,8,0,0,asm,10008+20000,349.8501 +gfx938,int8_w8a8_channel,torch.float16,24,512,2048,256,8,0,0,asm,10009+20000,390.6669 +gfx938,int8_w8a8_channel,torch.float16,28,512,2048,256,8,0,0,asm,10013+20000,454.9896 +gfx938,int8_w8a8_channel,torch.float16,32,512,2048,256,8,0,0,asm,11005+20000,482.9222 +gfx938,int8_w8a8_channel,torch.float16,34,512,2048,256,8,0,0,asm,11005+20000,468.7803 +gfx938,int8_w8a8_channel,torch.float16,36,512,2048,256,8,0,0,asm,10011+20000,496.3086 +gfx938,int8_w8a8_channel,torch.float16,40,512,2048,256,8,0,0,asm,10011+20000,516.0896 +gfx938,int8_w8a8_channel,torch.float16,44,512,2048,256,8,0,0,asm,10011+20000,536.3675 +gfx938,int8_w8a8_channel,torch.float16,48,512,2048,256,8,0,0,asm,10008+20000,550.9443 +gfx938,int8_w8a8_channel,torch.float16,56,512,2048,256,8,0,0,asm,10011+20000,577.7821 +gfx938,int8_w8a8_channel,torch.float16,64,512,2048,256,8,0,0,asm,10008+20000,597.9084 +gfx938,int8_w8a8_channel,torch.float16,68,512,2048,256,8,0,0,asm,10008+20000,601.681 +gfx938,int8_w8a8_channel,torch.float16,72,512,2048,256,8,0,0,asm,10008+20000,615.7609 +gfx938,int8_w8a8_channel,torch.float16,80,512,2048,256,8,0,0,asm,10011+20000,636.763 +gfx938,int8_w8a8_channel,torch.float16,88,512,2048,256,8,0,0,asm,10011+20000,650.9019 +gfx938,int8_w8a8_channel,torch.float16,96,512,2048,256,8,0,0,asm,10011+20000,655.6177 +gfx938,int8_w8a8_channel,torch.float16,104,512,2048,256,8,0,0,asm,10011+20000,665.5714 +gfx938,int8_w8a8_channel,torch.float16,112,512,2048,256,8,0,0,asm,10011+20000,670.8683 +gfx938,int8_w8a8_channel,torch.float16,128,512,2048,256,8,0,0,asm,10011+20000,685.1923 +gfx938,int8_w8a8_channel,torch.float16,144,512,2048,256,8,0,0,asm,10008+20001,725.1081 +gfx938,int8_w8a8_channel,torch.float16,160,512,2048,256,8,0,0,asm,10011+20000,696.4429 +gfx938,int8_w8a8_channel,torch.float16,192,512,2048,256,8,0,0,asm,10011+20000,700.8724 +gfx938,int8_w8a8_channel,torch.float16,224,512,2048,256,8,0,0,asm,10011+20000,711.4071 +gfx938,int8_w8a8_channel,torch.float16,256,512,2048,256,8,0,0,asm,10011+20000,716.0387 +gfx938,int8_w8a8_channel,torch.float16,320,512,2048,256,8,0,0,asm,11007+21000,774.5733 +gfx938,int8_w8a8_channel,torch.float16,384,512,2048,256,8,0,0,asm,10011+20000,747.0113 +gfx938,int8_w8a8_channel,torch.float16,448,512,2048,256,8,0,0,asm,11005+21000,773.7396 +gfx938,int8_w8a8_channel,torch.float16,512,512,2048,256,8,0,0,asm,11005+21000,787.6848 +gfx938,int8_w8a8_channel,torch.float16,576,512,2048,256,8,0,0,asm,12005+22000,861.8404 +gfx938,int8_w8a8_channel,torch.float16,640,512,2048,256,8,0,0,asm,11005+21000,809.0491 +gfx938,int8_w8a8_channel,torch.float16,704,512,2048,256,8,0,0,asm,11007+21000,826.4215 +gfx938,int8_w8a8_channel,torch.float16,768,512,2048,256,8,0,0,asm,11005+21000,838.3794 +gfx938,int8_w8a8_channel,torch.float16,832,512,2048,256,8,0,0,asm,11005+21000,849.7562 +gfx938,int8_w8a8_channel,torch.float16,896,512,2048,256,8,0,0,asm,12005+22000,884.7455 +gfx938,int8_w8a8_channel,torch.float16,960,512,2048,256,8,0,0,asm,12001+22000,867.4824 +gfx938,int8_w8a8_channel,torch.float16,1024,512,2048,256,8,0,0,asm,12005+22000,888.8297 +gfx938,int8_w8a8_channel,torch.float16,1152,512,2048,256,8,0,0,asm,12005+22000,941.4443 +gfx938,int8_w8a8_channel,torch.float16,1280,512,2048,256,8,0,0,asm,12001+22000,928.9812 +gfx938,int8_w8a8_channel,torch.float16,1408,512,2048,256,8,0,0,asm,12001+22000,947.9622 +gfx938,int8_w8a8_channel,torch.float16,1536,512,2048,256,8,0,0,asm,12005+22000,980.9053 +gfx938,int8_w8a8_channel,torch.float16,1664,512,2048,256,8,0,0,asm,12001+22000,999.9199 +gfx938,int8_w8a8_channel,torch.float16,1792,512,2048,256,8,0,0,asm,12001+22000,1022.5051 +gfx938,int8_w8a8_channel,torch.float16,1920,512,2048,256,8,0,0,asm,12001+22000,1070.6567 +gfx938,int8_w8a8_channel,torch.float16,2048,512,2048,256,8,0,0,asm,12001+22001,1153.3089 +gfx938,int8_w8a8_channel,torch.float16,2304,512,2048,256,8,0,0,asm,13001+23001,1182.4963 +gfx938,int8_w8a8_channel,torch.float16,2560,512,2048,256,8,0,0,asm,13001+23001,1224.7614 +gfx938,int8_w8a8_channel,torch.float16,2816,512,2048,256,8,0,0,asm,13001+23001,1245.3172 +gfx938,int8_w8a8_channel,torch.float16,3072,512,2048,256,8,0,0,asm,13001+23001,1278.1086 +gfx938,int8_w8a8_channel,torch.float16,3328,512,2048,256,8,0,0,asm,13001+23001,1336.2475 +gfx938,int8_w8a8_channel,torch.float16,3584,512,2048,256,8,0,0,asm,13001+23001,1381.6115 +gfx938,int8_w8a8_channel,torch.float16,3840,512,2048,256,8,0,0,asm,13001+23001,1493.1565 +gfx938,int8_w8a8_channel,torch.float16,4096,512,2048,256,8,0,0,asm,13001+23001,1657.3499 +gfx938,int8_w8a8_channel,torch.float16,4608,512,2048,256,8,0,0,asm,13001+23001,1993.45 +gfx938,int8_w8a8_channel,torch.float16,5120,512,2048,256,8,0,0,asm,12001+22001,2101.3907 +gfx938,int8_w8a8_channel,torch.float16,5632,512,2048,256,8,0,0,asm,13001+23001,2184.7083 +gfx938,int8_w8a8_channel,torch.float16,6144,512,2048,256,8,0,0,asm,13001+23001,2237.3144 +gfx938,int8_w8a8_channel,torch.float16,6656,512,2048,256,8,0,0,asm,13001+23001,2297.6341 +gfx938,int8_w8a8_channel,torch.float16,7168,512,2048,256,8,0,0,asm,13001+23001,2372.7578 +gfx938,int8_w8a8_channel,torch.float16,7680,512,2048,256,8,0,0,asm,13001+23001,2503.2248 +gfx938,int8_w8a8_channel,torch.float16,8192,512,2048,256,8,0,0,asm,13001+23001,2796.6807 +gfx938,int8_w8a8_channel,torch.float16,10240,512,2048,256,8,0,0,asm,13001+23001,3349.3024 +gfx938,int8_w8a8_channel,torch.float16,12288,512,2048,256,8,0,0,asm,13001+23001,3896.5094 +gfx938,int8_w8a8_channel,torch.float16,14336,512,2048,256,8,0,0,asm,13001+23001,4486.15 +gfx938,int8_w8a8_channel,torch.float16,16384,512,2048,256,8,0,0,asm,13001+23001,5067.0073 +gfx938,int8_w8a8_channel,torch.float16,17408,512,2048,256,8,0,0,asm,13001+23001,5469.187 +gfx938,int8_w8a8_channel,torch.float16,24576,512,2048,256,8,0,0,asm,13001+23001,7372.5919 +gfx938,int8_w8a8_channel,torch.float16,32768,512,2048,256,8,0,0,asm,13001+23001,11965.4224 +gfx938,int8_w8a8_channel,torch.float16,40960,512,2048,256,8,0,0,asm,13001+23001,14939.3742 +gfx938,int8_w8a8_channel,torch.float16,49152,512,2048,256,8,0,0,asm,13001+23001,17880.1629 +gfx938,int8_w8a8_channel,torch.float16,57344,512,2048,256,8,0,0,asm,13001+23001,20746.1576 +gfx938,int8_w8a8_channel,torch.float16,65536,512,2048,256,8,0,0,asm,13001+22001,24014.33 +gfx938,int8_w8a8_channel,torch.float16,65536,512,2048,256,8,0,0,asm,13001+23001,23307.1407 +gfx938,int8_w8a8_channel,torch.bfloat16,1,512,2048,256,8,0,0,asm,10010+20000,66.1234 +gfx938,int8_w8a8_channel,torch.bfloat16,2,512,2048,256,8,0,0,asm,10013+20000,87.4538 +gfx938,int8_w8a8_channel,torch.bfloat16,3,512,2048,256,8,0,0,asm,10010+20000,114.2326 +gfx938,int8_w8a8_channel,torch.bfloat16,4,512,2048,256,8,0,0,asm,10013+20000,130.999 +gfx938,int8_w8a8_channel,torch.bfloat16,5,512,2048,256,8,0,0,asm,10013+20000,155.5547 +gfx938,int8_w8a8_channel,torch.bfloat16,6,512,2048,256,8,0,0,asm,10013+20000,167.4367 +gfx938,int8_w8a8_channel,torch.bfloat16,7,512,2048,256,8,0,0,asm,10013+20000,202.2999 +gfx938,int8_w8a8_channel,torch.bfloat16,8,512,2048,256,8,0,0,asm,10013+20000,208.5902 +gfx938,int8_w8a8_channel,torch.bfloat16,9,512,2048,256,8,0,0,asm,10013+20000,226.6451 +gfx938,int8_w8a8_channel,torch.bfloat16,10,512,2048,256,8,0,0,asm,10013+20000,253.2217 +gfx938,int8_w8a8_channel,torch.bfloat16,11,512,2048,256,8,0,0,asm,10013+20000,260.186 +gfx938,int8_w8a8_channel,torch.bfloat16,12,512,2048,256,8,0,0,asm,10013+20000,270.1901 +gfx938,int8_w8a8_channel,torch.bfloat16,13,512,2048,256,8,0,0,asm,10013+20000,296.2028 +gfx938,int8_w8a8_channel,torch.bfloat16,14,512,2048,256,8,0,0,asm,10013+20000,309.7185 +gfx938,int8_w8a8_channel,torch.bfloat16,15,512,2048,256,8,0,0,asm,10013+20000,318.2238 +gfx938,int8_w8a8_channel,torch.bfloat16,16,512,2048,256,8,0,0,asm,10013+20000,340.7668 +gfx938,int8_w8a8_channel,torch.bfloat16,17,512,2048,256,8,0,0,asm,10013+20000,335.9133 +gfx938,int8_w8a8_channel,torch.bfloat16,18,512,2048,256,8,0,0,asm,10013+20000,342.3722 +gfx938,int8_w8a8_channel,torch.bfloat16,20,512,2048,256,8,0,0,asm,10011+20000,377.9258 +gfx938,int8_w8a8_channel,torch.bfloat16,24,512,2048,256,8,0,0,asm,10011+20000,422.1447 +gfx938,int8_w8a8_channel,torch.bfloat16,28,512,2048,256,8,0,0,asm,10013+20000,491.0148 +gfx938,int8_w8a8_channel,torch.bfloat16,32,512,2048,256,8,0,0,asm,10011+20000,519.6043 +gfx938,int8_w8a8_channel,torch.bfloat16,34,512,2048,256,8,0,0,asm,10011+20000,508.8813 +gfx938,int8_w8a8_channel,torch.bfloat16,36,512,2048,256,8,0,0,asm,10011+20000,523.6266 +gfx938,int8_w8a8_channel,torch.bfloat16,40,512,2048,256,8,0,0,asm,10011+20000,552.1233 +gfx938,int8_w8a8_channel,torch.bfloat16,44,512,2048,256,8,0,0,asm,10011+20000,567.8369 +gfx938,int8_w8a8_channel,torch.bfloat16,48,512,2048,256,8,0,0,asm,10011+20000,582.3801 +gfx938,int8_w8a8_channel,torch.bfloat16,56,512,2048,256,8,0,0,asm,10011+20000,605.1421 +gfx938,int8_w8a8_channel,torch.bfloat16,64,512,2048,256,8,0,0,asm,10011+20000,631.66 +gfx938,int8_w8a8_channel,torch.bfloat16,68,512,2048,256,8,0,0,asm,10011+20000,638.2958 +gfx938,int8_w8a8_channel,torch.bfloat16,72,512,2048,256,8,0,0,asm,10011+20000,648.8725 +gfx938,int8_w8a8_channel,torch.bfloat16,80,512,2048,256,8,0,0,asm,10011+20000,670.4557 +gfx938,int8_w8a8_channel,torch.bfloat16,88,512,2048,256,8,0,0,asm,10011+20000,685.4114 +gfx938,int8_w8a8_channel,torch.bfloat16,96,512,2048,256,8,0,0,asm,10011+20000,690.6072 +gfx938,int8_w8a8_channel,torch.bfloat16,104,512,2048,256,8,0,0,asm,10011+20000,682.2114 +gfx938,int8_w8a8_channel,torch.bfloat16,112,512,2048,256,8,0,0,asm,10011+20000,707.9545 +gfx938,int8_w8a8_channel,torch.bfloat16,128,512,2048,256,8,0,0,asm,10011+20000,720.4093 +gfx938,int8_w8a8_channel,torch.bfloat16,144,512,2048,256,8,0,0,asm,10011+20000,711.2387 +gfx938,int8_w8a8_channel,torch.bfloat16,160,512,2048,256,8,0,0,asm,10011+20000,730.3039 +gfx938,int8_w8a8_channel,torch.bfloat16,192,512,2048,256,8,0,0,asm,10011+20000,739.9207 +gfx938,int8_w8a8_channel,torch.bfloat16,224,512,2048,256,8,0,0,asm,10012+20000,746.9103 +gfx938,int8_w8a8_channel,torch.bfloat16,256,512,2048,256,8,0,0,asm,10011+20000,755.8366 +gfx938,int8_w8a8_channel,torch.bfloat16,320,512,2048,256,8,0,0,asm,10011+20000,780.4092 +gfx938,int8_w8a8_channel,torch.bfloat16,384,512,2048,256,8,0,0,asm,10011+20000,794.2703 +gfx938,int8_w8a8_channel,torch.bfloat16,448,512,2048,256,8,0,0,asm,11005+21000,800.1734 +gfx938,int8_w8a8_channel,torch.bfloat16,512,512,2048,256,8,0,0,asm,11005+21000,841.2765 +gfx938,int8_w8a8_channel,torch.bfloat16,576,512,2048,256,8,0,0,asm,11007+21000,863.8701 +gfx938,int8_w8a8_channel,torch.bfloat16,640,512,2048,256,8,0,0,asm,11005+21000,836.1818 +gfx938,int8_w8a8_channel,torch.bfloat16,704,512,2048,256,8,0,0,asm,11005+21000,858.7079 +gfx938,int8_w8a8_channel,torch.bfloat16,768,512,2048,256,8,0,0,asm,11005+21000,897.6973 +gfx938,int8_w8a8_channel,torch.bfloat16,832,512,2048,256,8,0,0,asm,11005+21000,929.8656 +gfx938,int8_w8a8_channel,torch.bfloat16,896,512,2048,256,8,0,0,asm,12001+22000,944.9813 +gfx938,int8_w8a8_channel,torch.bfloat16,960,512,2048,256,8,0,0,asm,12001+22000,955.5665 +gfx938,int8_w8a8_channel,torch.bfloat16,1024,512,2048,256,8,0,0,asm,12001+22000,965.9664 +gfx938,int8_w8a8_channel,torch.bfloat16,1152,512,2048,256,8,0,0,asm,12005+22000,1028.9894 +gfx938,int8_w8a8_channel,torch.bfloat16,1280,512,2048,256,8,0,0,asm,12001+22000,999.2968 +gfx938,int8_w8a8_channel,torch.bfloat16,1408,512,2048,256,8,0,0,asm,12001+22001,1032.0463 +gfx938,int8_w8a8_channel,torch.bfloat16,1536,512,2048,256,8,0,0,asm,12001+22000,1052.8631 +gfx938,int8_w8a8_channel,torch.bfloat16,1664,512,2048,256,8,0,0,asm,12001+22001,1086.3956 +gfx938,int8_w8a8_channel,torch.bfloat16,1792,512,2048,256,8,0,0,asm,12001+22001,1132.8207 +gfx938,int8_w8a8_channel,torch.bfloat16,1920,512,2048,256,8,0,0,asm,12001+22001,1190.7238 +gfx938,int8_w8a8_channel,torch.bfloat16,2048,512,2048,256,8,0,0,asm,12005+22001,1308.5508 +gfx938,int8_w8a8_channel,torch.bfloat16,2304,512,2048,256,8,0,0,asm,13001+23001,1372.2055 +gfx938,int8_w8a8_channel,torch.bfloat16,2560,512,2048,256,8,0,0,asm,13001+23001,1401.258 +gfx938,int8_w8a8_channel,torch.bfloat16,2816,512,2048,256,8,0,0,asm,13001+23001,1442.5548 +gfx938,int8_w8a8_channel,torch.bfloat16,3072,512,2048,256,8,0,0,asm,13001+23001,1476.5842 +gfx938,int8_w8a8_channel,torch.bfloat16,3328,512,2048,256,8,0,0,asm,13001+23001,1541.9145 +gfx938,int8_w8a8_channel,torch.bfloat16,3584,512,2048,256,8,0,0,asm,13001+23001,1587.8681 +gfx938,int8_w8a8_channel,torch.bfloat16,3840,512,2048,256,8,0,0,asm,13001+23001,1753.4172 +gfx938,int8_w8a8_channel,torch.bfloat16,4096,512,2048,256,8,0,0,asm,13001+23001,2012.5745 +gfx938,int8_w8a8_channel,torch.bfloat16,4608,512,2048,256,8,0,0,asm,12001+22001,2424.8344 +gfx938,int8_w8a8_channel,torch.bfloat16,5120,512,2048,256,8,0,0,asm,12001+22001,2563.3436 +gfx938,int8_w8a8_channel,torch.bfloat16,5632,512,2048,256,8,0,0,asm,12001+22001,2694.5769 +gfx938,int8_w8a8_channel,torch.bfloat16,6144,512,2048,256,8,0,0,asm,13001+23001,2759.1072 +gfx938,int8_w8a8_channel,torch.bfloat16,6656,512,2048,256,8,0,0,asm,13001+23001,2816.3026 +gfx938,int8_w8a8_channel,torch.bfloat16,7168,512,2048,256,8,0,0,asm,13001+23001,2898.4665 +gfx938,int8_w8a8_channel,torch.bfloat16,7680,512,2048,256,8,0,0,asm,13001+23001,3065.6827 +gfx938,int8_w8a8_channel,torch.bfloat16,8192,512,2048,256,8,0,0,asm,13001+23001,3465.1172 +gfx938,int8_w8a8_channel,torch.bfloat16,10240,512,2048,256,8,0,0,asm,13001+23001,4148.9045 +gfx938,int8_w8a8_channel,torch.bfloat16,12288,512,2048,256,8,0,0,asm,13001+23001,4845.3656 +gfx938,int8_w8a8_channel,torch.bfloat16,14336,512,2048,256,8,0,0,asm,13001+23001,5592.5893 +gfx938,int8_w8a8_channel,torch.bfloat16,16384,512,2048,256,8,0,0,asm,13001+23001,6308.3012 +gfx938,int8_w8a8_channel,torch.bfloat16,17408,512,2048,256,8,0,0,asm,13001+23001,6826.1859 +gfx938,int8_w8a8_channel,torch.bfloat16,24576,512,2048,256,8,0,0,asm,13001+23001,9185.3035 +gfx938,int8_w8a8_channel,torch.bfloat16,32768,512,2048,256,8,0,0,asm,13001+23001,12019.847 +gfx938,int8_w8a8_channel,torch.bfloat16,40960,512,2048,256,8,0,0,asm,13001+23001,14983.3494 +gfx938,int8_w8a8_channel,torch.bfloat16,49152,512,2048,256,8,0,0,asm,13001+23001,17938.1767 +gfx938,int8_w8a8_channel,torch.bfloat16,57344,512,2048,256,8,0,0,asm,13001+23001,20809.5385 +gfx938,int8_w8a8_channel,torch.bfloat16,65536,512,2048,256,8,0,0,asm,13001+23001,23354.4759 +gfx938,int8_w8a8_channel,torch.bfloat16,65536,512,2048,256,8,0,0,asm,13001+23001,23370.3628 +gfx938,f8_w8a8_channel,torch.bfloat16,1,320,6144,160,8,0,0,asm,10008+20000,87.8748 +gfx938,f8_w8a8_channel,torch.bfloat16,2,320,6144,160,8,0,0,asm,10011+20001,129.4152 +gfx938,f8_w8a8_channel,torch.bfloat16,3,320,6144,160,8,0,0,asm,10008+20000,160.6829 +gfx938,f8_w8a8_channel,torch.bfloat16,4,320,6144,160,8,0,0,asm,10002+20000,199.8576 +gfx938,f8_w8a8_channel,torch.bfloat16,5,320,6144,160,8,0,0,asm,10009+20000,224.0766 +gfx938,f8_w8a8_channel,torch.bfloat16,6,320,6144,160,8,0,0,asm,10002+20000,266.0216 +gfx938,f8_w8a8_channel,torch.bfloat16,7,320,6144,160,8,0,0,asm,10008+20000,284.0428 +gfx938,f8_w8a8_channel,torch.bfloat16,8,320,6144,160,8,0,0,asm,10011+20000,329.752 +gfx938,f8_w8a8_channel,torch.bfloat16,9,320,6144,160,8,0,0,asm,10011+20000,349.2551 +gfx938,f8_w8a8_channel,torch.bfloat16,10,320,6144,160,8,0,0,asm,10002+20000,407.293 +gfx938,f8_w8a8_channel,torch.bfloat16,11,320,6144,160,8,0,0,asm,10008+20000,425.7182 +gfx938,f8_w8a8_channel,torch.bfloat16,12,320,6144,160,8,0,0,asm,10011+20000,445.836 +gfx938,f8_w8a8_channel,torch.bfloat16,13,320,6144,160,8,0,0,asm,10011+20000,458.4254 +gfx938,f8_w8a8_channel,torch.bfloat16,14,320,6144,160,8,0,0,asm,10008+20000,493.6169 +gfx938,f8_w8a8_channel,torch.bfloat16,15,320,6144,160,8,0,0,asm,10011+20000,521.6674 +gfx938,f8_w8a8_channel,torch.bfloat16,16,320,6144,160,8,0,0,asm,10011+20000,529.3306 +gfx938,f8_w8a8_channel,torch.bfloat16,17,320,6144,160,8,0,0,asm,10011+20000,532.4435 +gfx938,f8_w8a8_channel,torch.bfloat16,18,320,6144,160,8,0,0,asm,10011+20000,545.1423 +gfx938,f8_w8a8_channel,torch.bfloat16,20,320,6144,160,8,0,0,asm,10011+20000,600.0979 +gfx938,f8_w8a8_channel,torch.bfloat16,24,320,6144,160,8,0,0,asm,10012+20000,637.3948 +gfx938,f8_w8a8_channel,torch.bfloat16,28,320,6144,160,8,0,0,asm,10011+20000,661.3219 +gfx938,f8_w8a8_channel,torch.bfloat16,32,320,6144,160,8,0,0,asm,10011+20000,712.3954 +gfx938,f8_w8a8_channel,torch.bfloat16,34,320,6144,160,8,0,0,asm,10011+20000,714.4303 +gfx938,f8_w8a8_channel,torch.bfloat16,36,320,6144,160,8,0,0,asm,10011+20000,726.3377 +gfx938,f8_w8a8_channel,torch.bfloat16,40,320,6144,160,8,0,0,asm,10012+20000,733.1418 +gfx938,f8_w8a8_channel,torch.bfloat16,44,320,6144,160,8,0,0,asm,10011+20000,739.8449 +gfx938,f8_w8a8_channel,torch.bfloat16,48,320,6144,160,8,0,0,asm,11005+21000,783.7775 +gfx938,f8_w8a8_channel,torch.bfloat16,56,320,6144,160,8,0,0,asm,10011+20000,804.1311 +gfx938,f8_w8a8_channel,torch.bfloat16,64,320,6144,160,8,0,0,asm,10011+20000,810.3711 +gfx938,f8_w8a8_channel,torch.bfloat16,68,320,6144,160,8,0,0,asm,10012+20000,810.5901 +gfx938,f8_w8a8_channel,torch.bfloat16,72,320,6144,160,8,0,0,asm,10011+20000,813.4195 +gfx938,f8_w8a8_channel,torch.bfloat16,80,320,6144,160,8,0,0,asm,10011+20000,824.6364 +gfx938,f8_w8a8_channel,torch.bfloat16,88,320,6144,160,8,0,0,asm,10012+20000,828.4427 +gfx938,f8_w8a8_channel,torch.bfloat16,96,320,6144,160,8,0,0,asm,10011+20000,831.988 +gfx938,f8_w8a8_channel,torch.bfloat16,104,320,6144,160,8,0,0,asm,10011+20000,838.1268 +gfx938,f8_w8a8_channel,torch.bfloat16,112,320,6144,160,8,0,0,asm,10011+20000,840.889 +gfx938,f8_w8a8_channel,torch.bfloat16,128,320,6144,160,8,0,0,asm,10011+20000,850.2531 +gfx938,f8_w8a8_channel,torch.bfloat16,144,320,6144,160,8,0,0,asm,10012+20000,859.3393 +gfx938,f8_w8a8_channel,torch.bfloat16,160,320,6144,160,8,0,0,asm,10011+20000,864.9309 +gfx938,f8_w8a8_channel,torch.bfloat16,192,320,6144,160,8,0,0,asm,11005+21000,882.9772 +gfx938,f8_w8a8_channel,torch.bfloat16,224,320,6144,160,8,0,0,asm,10011+20000,898.3287 +gfx938,f8_w8a8_channel,torch.bfloat16,256,320,6144,160,8,0,0,asm,11005+21000,912.3244 +gfx938,f8_w8a8_channel,torch.bfloat16,320,320,6144,160,8,0,0,asm,11005+21000,943.9706 +gfx938,f8_w8a8_channel,torch.bfloat16,384,320,6144,160,8,0,0,asm,11005+21000,1033.402 +gfx938,f8_w8a8_channel,torch.bfloat16,448,320,6144,160,8,0,0,asm,11005+21000,997.2927 +gfx938,f8_w8a8_channel,torch.bfloat16,512,320,6144,160,8,0,0,asm,11005+21000,1031.0105 +gfx938,f8_w8a8_channel,torch.bfloat16,576,320,6144,160,8,0,0,asm,11005+21000,1134.2692 +gfx938,f8_w8a8_channel,torch.bfloat16,640,320,6144,160,8,0,0,asm,12000+22001,1198.9596 +gfx938,f8_w8a8_channel,torch.bfloat16,704,320,6144,160,8,0,0,asm,12001+22001,1171.1534 +gfx938,f8_w8a8_channel,torch.bfloat16,768,320,6144,160,8,0,0,asm,12001+22001,1242.6059 +gfx938,f8_w8a8_channel,torch.bfloat16,832,320,6144,160,8,0,0,asm,12001+22001,1202.7659 +gfx938,f8_w8a8_channel,torch.bfloat16,896,320,6144,160,8,0,0,asm,12001+22001,1210.446 +gfx938,f8_w8a8_channel,torch.bfloat16,960,320,6144,160,8,0,0,asm,12001+22001,1248.0121 +gfx938,f8_w8a8_channel,torch.bfloat16,1024,320,6144,160,8,0,0,asm,12001+22001,1277.2499 +gfx938,f8_w8a8_channel,torch.bfloat16,1152,320,6144,160,8,0,0,asm,12001+22001,1399.4727 +gfx938,f8_w8a8_channel,torch.bfloat16,1280,320,6144,160,8,0,0,asm,12001+22001,1749.5096 +gfx938,f8_w8a8_channel,torch.bfloat16,1408,320,6144,160,8,0,0,asm,13001+23001,1848.2125 +gfx938,f8_w8a8_channel,torch.bfloat16,1536,320,6144,160,8,0,0,asm,13001+23001,1873.7618 +gfx938,f8_w8a8_channel,torch.bfloat16,1664,320,6144,160,8,0,0,asm,13001+23001,1907.8164 +gfx938,f8_w8a8_channel,torch.bfloat16,1792,320,6144,160,8,0,0,asm,13001+23001,1935.1678 +gfx938,f8_w8a8_channel,torch.bfloat16,1920,320,6144,160,8,0,0,asm,13001+23001,1966.5951 +gfx938,f8_w8a8_channel,torch.bfloat16,2048,320,6144,160,8,0,0,asm,13001+23001,1995.8666 +gfx938,f8_w8a8_channel,torch.bfloat16,2304,320,6144,160,8,0,0,asm,13001+23001,2217.6428 +gfx938,f8_w8a8_channel,torch.bfloat16,2560,320,6144,160,8,0,0,asm,13001+23001,2676.4204 +gfx938,f8_w8a8_channel,torch.bfloat16,2816,320,6144,160,8,0,0,asm,12001+22001,3274.8019 +gfx938,f8_w8a8_channel,torch.bfloat16,3072,320,6144,160,8,0,0,asm,12001+22001,3447.8037 +gfx938,f8_w8a8_channel,torch.bfloat16,3328,320,6144,160,8,0,0,asm,12001+22001,3513.1594 +gfx938,f8_w8a8_channel,torch.bfloat16,3584,320,6144,160,8,0,0,asm,13001+23001,3681.1507 +gfx938,f8_w8a8_channel,torch.bfloat16,3840,320,6144,160,8,0,0,asm,13001+23001,3740.8308 +gfx938,f8_w8a8_channel,torch.bfloat16,4096,320,6144,160,8,0,0,asm,13001+23001,3787.5591 +gfx938,f8_w8a8_channel,torch.bfloat16,4608,320,6144,160,8,0,0,asm,13001+23001,3988.2912 +gfx938,f8_w8a8_channel,torch.bfloat16,5120,320,6144,160,8,0,0,asm,13001+23001,4664.4407 +gfx938,f8_w8a8_channel,torch.bfloat16,5632,320,6144,160,8,0,0,asm,13001+23001,5438.1986 +gfx938,f8_w8a8_channel,torch.bfloat16,6144,320,6144,160,8,0,0,asm,13001+23001,5639.4108 +gfx938,f8_w8a8_channel,torch.bfloat16,6656,320,6144,160,8,0,0,asm,13001+23001,5753.8779 +gfx938,f8_w8a8_channel,torch.bfloat16,7168,320,6144,160,8,0,0,asm,13001+23000,6060.2096 +gfx938,f8_w8a8_channel,torch.bfloat16,7680,320,6144,160,8,0,0,asm,13001+23001,6691.5508 +gfx938,f8_w8a8_channel,torch.bfloat16,8192,320,6144,160,8,0,0,asm,13001+23001,7332.6939 +gfx938,f8_w8a8_channel,torch.bfloat16,10240,320,6144,160,8,0,0,asm,13001+23001,8524.7909 +gfx938,f8_w8a8_channel,torch.bfloat16,12288,320,6144,160,8,0,0,asm,13001+23001,9910.8836 +gfx938,f8_w8a8_channel,torch.bfloat16,14336,320,6144,160,8,0,0,asm,13001+23001,11609.6582 +gfx938,f8_w8a8_channel,torch.bfloat16,16384,320,6144,160,8,0,0,asm,13001+23001,13364.4164 +gfx938,f8_w8a8_channel,torch.bfloat16,17408,320,6144,160,8,0,0,asm,13001+23001,13945.3495 +gfx938,f8_w8a8_channel,torch.bfloat16,24576,320,6144,160,8,0,0,asm,13001+23001,19386.0554 +gfx938,f8_w8a8_channel,torch.bfloat16,32768,320,6144,160,8,0,0,asm,13001+23001,25609.6308 +gfx938,f8_w8a8_channel,torch.bfloat16,40960,320,6144,160,8,0,0,asm,13001+23001,31906.0787 +gfx938,f8_w8a8_channel,torch.bfloat16,49152,320,6144,160,8,0,0,asm,13001+23001,38174.2552 +gfx938,f8_w8a8_channel,torch.bfloat16,57344,320,6144,160,8,0,0,asm,13001+23001,44395.4213 +gfx938,f8_w8a8_channel,torch.bfloat16,65536,320,6144,160,8,0,0,asm,13001+23001,50864.8195 +gfx938,f8_w8a8_channel,torch.bfloat16,65536,320,6144,160,8,0,0,asm,13001+23001,50655.7567 +gfx938,f8_w8a8_channel,torch.float16,1,320,6144,160,8,0,0,asm,10008+20000,86.8391 +gfx938,f8_w8a8_channel,torch.float16,2,320,6144,160,8,0,0,asm,10012+20000,129.7864 +gfx938,f8_w8a8_channel,torch.float16,3,320,6144,160,8,0,0,asm,10008+20000,160.3293 +gfx938,f8_w8a8_channel,torch.float16,4,320,6144,160,8,0,0,asm,10002+20000,199.0578 +gfx938,f8_w8a8_channel,torch.float16,5,320,6144,160,8,0,0,asm,10009+20000,224.0428 +gfx938,f8_w8a8_channel,torch.float16,6,320,6144,160,8,0,0,asm,10002+20000,267.9417 +gfx938,f8_w8a8_channel,torch.float16,7,320,6144,160,8,0,0,asm,10008+20000,285.1374 +gfx938,f8_w8a8_channel,torch.float16,8,320,6144,160,8,0,0,asm,10002+20000,337.491 +gfx938,f8_w8a8_channel,torch.float16,9,320,6144,160,8,0,0,asm,10011+20000,349.1963 +gfx938,f8_w8a8_channel,torch.float16,10,320,6144,160,8,0,0,asm,10002+20000,405.5077 +gfx938,f8_w8a8_channel,torch.float16,11,320,6144,160,8,0,0,asm,10008+20000,425.0951 +gfx938,f8_w8a8_channel,torch.float16,12,320,6144,160,8,0,0,asm,10012+20000,453.8696 +gfx938,f8_w8a8_channel,torch.float16,13,320,6144,160,8,0,0,asm,10011+20000,459.6549 +gfx938,f8_w8a8_channel,torch.float16,14,320,6144,160,8,0,0,asm,10008+20000,493.0359 +gfx938,f8_w8a8_channel,torch.float16,15,320,6144,160,8,0,0,asm,10011+20000,520.7074 +gfx938,f8_w8a8_channel,torch.float16,16,320,6144,160,8,0,0,asm,10011+20000,530.198 +gfx938,f8_w8a8_channel,torch.float16,17,320,6144,160,8,0,0,asm,10011+20000,532.9151 +gfx938,f8_w8a8_channel,torch.float16,18,320,6144,160,8,0,0,asm,10011+20000,542.9361 +gfx938,f8_w8a8_channel,torch.float16,20,320,6144,160,8,0,0,asm,10008+20000,601.6643 +gfx938,f8_w8a8_channel,torch.float16,24,320,6144,160,8,0,0,asm,10011+20000,632.7463 +gfx938,f8_w8a8_channel,torch.float16,28,320,6144,160,8,0,0,asm,10011+20000,661.8019 +gfx938,f8_w8a8_channel,torch.float16,32,320,6144,160,8,0,0,asm,10012+20000,715.9659 +gfx938,f8_w8a8_channel,torch.float16,34,320,6144,160,8,0,0,asm,10011+20000,716.8639 +gfx938,f8_w8a8_channel,torch.float16,36,320,6144,160,8,0,0,asm,10011+20000,725.8576 +gfx938,f8_w8a8_channel,torch.float16,40,320,6144,160,8,0,0,asm,10011+20000,731.6175 +gfx938,f8_w8a8_channel,torch.float16,44,320,6144,160,8,0,0,asm,10011+20000,736.8048 +gfx938,f8_w8a8_channel,torch.float16,48,320,6144,160,8,0,0,asm,10012+20000,825.9498 +gfx938,f8_w8a8_channel,torch.float16,56,320,6144,160,8,0,0,asm,10012+20000,804.4173 +gfx938,f8_w8a8_channel,torch.float16,64,320,6144,160,8,0,0,asm,10011+20000,808.5773 +gfx938,f8_w8a8_channel,torch.float16,68,320,6144,160,8,0,0,asm,10011+20000,807.6089 +gfx938,f8_w8a8_channel,torch.float16,72,320,6144,160,8,0,0,asm,10011+20000,815.5415 +gfx938,f8_w8a8_channel,torch.float16,80,320,6144,160,8,0,0,asm,10011+20000,825.4531 +gfx938,f8_w8a8_channel,torch.float16,88,320,6144,160,8,0,0,asm,10011+20000,825.4868 +gfx938,f8_w8a8_channel,torch.float16,96,320,6144,160,8,0,0,asm,10011+20000,830.8931 +gfx938,f8_w8a8_channel,torch.float16,104,320,6144,160,8,0,0,asm,10012+20000,838.5309 +gfx938,f8_w8a8_channel,torch.float16,112,320,6144,160,8,0,0,asm,10011+20000,841.9751 +gfx938,f8_w8a8_channel,torch.float16,128,320,6144,160,8,0,0,asm,10012+20000,850.0088 +gfx938,f8_w8a8_channel,torch.float16,144,320,6144,160,8,0,0,asm,10012+20000,857.8152 +gfx938,f8_w8a8_channel,torch.float16,160,320,6144,160,8,0,0,asm,10011+20000,866.2614 +gfx938,f8_w8a8_channel,torch.float16,192,320,6144,160,8,0,0,asm,10011+20000,881.3939 +gfx938,f8_w8a8_channel,torch.float16,224,320,6144,160,8,0,0,asm,10011+20000,896.1813 +gfx938,f8_w8a8_channel,torch.float16,256,320,6144,160,8,0,0,asm,11005+21000,912.0044 +gfx938,f8_w8a8_channel,torch.float16,320,320,6144,160,8,0,0,asm,11005+21000,943.1369 +gfx938,f8_w8a8_channel,torch.float16,384,320,6144,160,8,0,0,asm,11005+21000,975.6336 +gfx938,f8_w8a8_channel,torch.float16,448,320,6144,160,8,0,0,asm,11005+21000,994.6231 +gfx938,f8_w8a8_channel,torch.float16,512,320,6144,160,8,0,0,asm,11005+21000,1028.1387 +gfx938,f8_w8a8_channel,torch.float16,576,320,6144,160,8,0,0,asm,11005+21000,1131.5826 +gfx938,f8_w8a8_channel,torch.float16,640,320,6144,160,8,0,0,asm,12003+22001,1166.0162 +gfx938,f8_w8a8_channel,torch.float16,704,320,6144,160,8,0,0,asm,12001+22001,1164.4584 +gfx938,f8_w8a8_channel,torch.float16,768,320,6144,160,8,0,0,asm,12001+22001,1186.3363 +gfx938,f8_w8a8_channel,torch.float16,832,320,6144,160,8,0,0,asm,12001+22001,1196.6605 +gfx938,f8_w8a8_channel,torch.float16,896,320,6144,160,8,0,0,asm,12001+22001,1213.2753 +gfx938,f8_w8a8_channel,torch.float16,960,320,6144,160,8,0,0,asm,12001+22001,1245.5952 +gfx938,f8_w8a8_channel,torch.float16,1024,320,6144,160,8,0,0,asm,12001+22001,1274.4457 +gfx938,f8_w8a8_channel,torch.float16,1152,320,6144,160,8,0,0,asm,12001+22001,1415.7254 +gfx938,f8_w8a8_channel,torch.float16,1280,320,6144,160,8,0,0,asm,12001+22001,1862.7136 +gfx938,f8_w8a8_channel,torch.float16,1408,320,6144,160,8,0,0,asm,13001+23001,1842.0736 +gfx938,f8_w8a8_channel,torch.float16,1536,320,6144,160,8,0,0,asm,13001+23001,1869.8966 +gfx938,f8_w8a8_channel,torch.float16,1664,320,6144,160,8,0,0,asm,13001+23001,1899.6228 +gfx938,f8_w8a8_channel,torch.float16,1792,320,6144,160,8,0,0,asm,13001+23001,1928.2289 +gfx938,f8_w8a8_channel,torch.float16,1920,320,6144,160,8,0,0,asm,13001+23001,1961.2308 +gfx938,f8_w8a8_channel,torch.float16,2048,320,6144,160,8,0,0,asm,13001+23001,1987.4959 +gfx938,f8_w8a8_channel,torch.float16,2304,320,6144,160,8,0,0,asm,13001+23001,2205.8699 +gfx938,f8_w8a8_channel,torch.float16,2560,320,6144,160,8,0,0,asm,13001+23001,2666.6012 +gfx938,f8_w8a8_channel,torch.float16,2816,320,6144,160,8,0,0,asm,12001+22001,3269.1511 +gfx938,f8_w8a8_channel,torch.float16,3072,320,6144,160,8,0,0,asm,12001+22001,3444.1653 +gfx938,f8_w8a8_channel,torch.float16,3328,320,6144,160,8,0,0,asm,12001+22001,3507.9211 +gfx938,f8_w8a8_channel,torch.float16,3584,320,6144,160,8,0,0,asm,13001+23001,3695.8705 +gfx938,f8_w8a8_channel,torch.float16,3840,320,6144,160,8,0,0,asm,13001+23001,3735.5589 +gfx938,f8_w8a8_channel,torch.float16,4096,320,6144,160,8,0,0,asm,13001+23001,3786.7084 +gfx938,f8_w8a8_channel,torch.float16,4608,320,6144,160,8,0,0,asm,13001+23001,3975.1794 +gfx938,f8_w8a8_channel,torch.float16,5120,320,6144,160,8,0,0,asm,13001+23001,4655.5142 +gfx938,f8_w8a8_channel,torch.float16,5632,320,6144,160,8,0,0,asm,13001+23001,5420.9347 +gfx938,f8_w8a8_channel,torch.float16,6144,320,6144,160,8,0,0,asm,13001+23001,5629.4392 +gfx938,f8_w8a8_channel,torch.float16,6656,320,6144,160,8,0,0,asm,13001+23001,5746.9548 +gfx938,f8_w8a8_channel,torch.float16,7168,320,6144,160,8,0,0,asm,13001+23001,5931.0555 +gfx938,f8_w8a8_channel,torch.float16,7680,320,6144,160,8,0,0,asm,13001+23001,6678.6912 +gfx938,f8_w8a8_channel,torch.float16,8192,320,6144,160,8,0,0,asm,13001+23001,7327.6405 +gfx938,f8_w8a8_channel,torch.float16,10240,320,6144,160,8,0,0,asm,13001+23001,8520.8914 +gfx938,f8_w8a8_channel,torch.float16,12288,320,6144,160,8,0,0,asm,13001+23000,10064.4825 +gfx938,f8_w8a8_channel,torch.float16,14336,320,6144,160,8,0,0,asm,13001+23001,11593.2692 +gfx938,f8_w8a8_channel,torch.float16,16384,320,6144,160,8,0,0,asm,13001+23001,13341.9031 +gfx938,f8_w8a8_channel,torch.float16,17408,320,6144,160,8,0,0,asm,13001+23001,13929.9271 +gfx938,f8_w8a8_channel,torch.float16,24576,320,6144,160,8,0,0,asm,13001+23001,19364.5646 +gfx938,f8_w8a8_channel,torch.float16,32768,320,6144,160,8,0,0,asm,13001+23001,25569.9553 +gfx938,f8_w8a8_channel,torch.float16,40960,320,6144,160,8,0,0,asm,13001+23001,31866.0491 +gfx938,f8_w8a8_channel,torch.float16,49152,320,6144,160,8,0,0,asm,13001+23001,38129.8607 +gfx938,f8_w8a8_channel,torch.float16,57344,320,6144,160,8,0,0,asm,13001+23001,44350.9585 +gfx938,f8_w8a8_channel,torch.float16,65536,320,6144,160,8,0,0,asm,13001+23001,50658.3987 +gfx938,f8_w8a8_channel,torch.float16,65536,320,6144,160,8,0,0,asm,13001+23001,50595.5307 +gfx938,f8_w8a8_channel,torch.bfloat16,1,256,4096,512,10,0,0,asm,10008+20000,85.98 +gfx938,f8_w8a8_channel,torch.bfloat16,2,256,4096,512,10,0,0,asm,10011+20001,118.039 +gfx938,f8_w8a8_channel,torch.bfloat16,3,256,4096,512,10,0,0,asm,10011+20000,135.8747 +gfx938,f8_w8a8_channel,torch.bfloat16,4,256,4096,512,10,0,0,asm,10002+20001,172.7083 +gfx938,f8_w8a8_channel,torch.bfloat16,5,256,4096,512,10,0,0,asm,10008+20001,185.7608 +gfx938,f8_w8a8_channel,torch.bfloat16,6,256,4096,512,10,0,0,asm,10011+20001,206.0303 +gfx938,f8_w8a8_channel,torch.bfloat16,7,256,4096,512,10,0,0,asm,10011+20000,225.2808 +gfx938,f8_w8a8_channel,torch.bfloat16,8,256,4096,512,10,0,0,asm,10002+20000,267.5038 +gfx938,f8_w8a8_channel,torch.bfloat16,9,256,4096,512,10,0,0,asm,10011+20000,286.0384 +gfx938,f8_w8a8_channel,torch.bfloat16,10,256,4096,512,10,0,0,asm,10011+20000,299.1836 +gfx938,f8_w8a8_channel,torch.bfloat16,11,256,4096,512,10,0,0,asm,10011+20000,317.5753 +gfx938,f8_w8a8_channel,torch.bfloat16,12,256,4096,512,10,0,0,asm,10002+20000,358.7036 +gfx938,f8_w8a8_channel,torch.bfloat16,13,256,4096,512,10,0,0,asm,10011+20000,376.0678 +gfx938,f8_w8a8_channel,torch.bfloat16,14,256,4096,512,10,0,0,asm,10011+20000,385.3896 +gfx938,f8_w8a8_channel,torch.bfloat16,15,256,4096,512,10,0,0,asm,10002+20000,434.7706 +gfx938,f8_w8a8_channel,torch.bfloat16,16,256,4096,512,10,0,0,asm,10011+20000,452.497 +gfx938,f8_w8a8_channel,torch.bfloat16,17,256,4096,512,10,0,0,asm,10011+20000,436.5192 +gfx938,f8_w8a8_channel,torch.bfloat16,18,256,4096,512,10,0,0,asm,10011+20000,451.9129 +gfx938,f8_w8a8_channel,torch.bfloat16,20,256,4096,512,10,0,0,asm,10011+20000,506.1359 +gfx938,f8_w8a8_channel,torch.bfloat16,24,256,4096,512,10,0,0,asm,10011+20000,578.1441 +gfx938,f8_w8a8_channel,torch.bfloat16,28,256,4096,512,10,0,0,asm,10011+20000,618.8681 +gfx938,f8_w8a8_channel,torch.bfloat16,32,256,4096,512,10,0,0,asm,10011+20000,715.6595 +gfx938,f8_w8a8_channel,torch.bfloat16,34,256,4096,512,10,0,0,asm,10011+20001,720.6531 +gfx938,f8_w8a8_channel,torch.bfloat16,36,256,4096,512,10,0,0,asm,10011+20001,769.3266 +gfx938,f8_w8a8_channel,torch.bfloat16,40,256,4096,512,10,0,0,asm,10011+20000,770.5728 +gfx938,f8_w8a8_channel,torch.bfloat16,44,256,4096,512,10,0,0,asm,10011+20000,838.8926 +gfx938,f8_w8a8_channel,torch.bfloat16,48,256,4096,512,10,0,0,asm,10011+20000,867.4567 +gfx938,f8_w8a8_channel,torch.bfloat16,56,256,4096,512,10,0,0,asm,10011+20000,942.9175 +gfx938,f8_w8a8_channel,torch.bfloat16,64,256,4096,512,10,0,0,asm,10011+20000,1019.0772 +gfx938,f8_w8a8_channel,torch.bfloat16,68,256,4096,512,10,0,0,asm,10011+20000,1014.0247 +gfx938,f8_w8a8_channel,torch.bfloat16,72,256,4096,512,10,0,0,asm,11005+21001,1107.2792 +gfx938,f8_w8a8_channel,torch.bfloat16,80,256,4096,512,10,0,0,asm,10011+20000,1174.3611 +gfx938,f8_w8a8_channel,torch.bfloat16,88,256,4096,512,10,0,0,asm,10011+20000,1141.0054 +gfx938,f8_w8a8_channel,torch.bfloat16,96,256,4096,512,10,0,0,asm,10011+20000,1172.3738 +gfx938,f8_w8a8_channel,torch.bfloat16,104,256,4096,512,10,0,0,asm,10011+20000,1203.7 +gfx938,f8_w8a8_channel,torch.bfloat16,112,256,4096,512,10,0,0,asm,10012+20000,1230.1001 +gfx938,f8_w8a8_channel,torch.bfloat16,128,256,4096,512,10,0,0,asm,10012+20000,1260.7946 +gfx938,f8_w8a8_channel,torch.bfloat16,144,256,4096,512,10,0,0,asm,10011+20000,1281.3336 +gfx938,f8_w8a8_channel,torch.bfloat16,160,256,4096,512,10,0,0,asm,10011+20000,1296.1208 +gfx938,f8_w8a8_channel,torch.bfloat16,192,256,4096,512,10,0,0,asm,10011+20000,1331.9944 +gfx938,f8_w8a8_channel,torch.bfloat16,224,256,4096,512,10,0,0,asm,10011+20001,1461.4509 +gfx938,f8_w8a8_channel,torch.bfloat16,256,256,4096,512,10,0,0,asm,10012+20000,1374.63 +gfx938,f8_w8a8_channel,torch.bfloat16,320,256,4096,512,10,0,0,asm,10011+20000,1409.8298 +gfx938,f8_w8a8_channel,torch.bfloat16,384,256,4096,512,10,0,0,asm,10011+20000,1436.2382 +gfx938,f8_w8a8_channel,torch.bfloat16,448,256,4096,512,10,0,0,asm,10011+20000,1479.8002 +gfx938,f8_w8a8_channel,torch.bfloat16,512,256,4096,512,10,0,0,asm,10011+20000,1510.2592 +gfx938,f8_w8a8_channel,torch.bfloat16,576,256,4096,512,10,0,0,asm,11005+21000,1584.1538 +gfx938,f8_w8a8_channel,torch.bfloat16,640,256,4096,512,10,0,0,asm,11005+21000,1550.1751 +gfx938,f8_w8a8_channel,torch.bfloat16,704,256,4096,512,10,0,0,asm,11005+21000,1570.1246 +gfx938,f8_w8a8_channel,torch.bfloat16,768,256,4096,512,10,0,0,asm,11005+21000,1586.2341 +gfx938,f8_w8a8_channel,torch.bfloat16,832,256,4096,512,10,0,0,asm,11005+21000,1601.8551 +gfx938,f8_w8a8_channel,torch.bfloat16,896,256,4096,512,10,0,0,asm,11005+21001,1655.2444 +gfx938,f8_w8a8_channel,torch.bfloat16,960,256,4096,512,10,0,0,asm,11005+21000,1640.3982 +gfx938,f8_w8a8_channel,torch.bfloat16,1024,256,4096,512,10,0,0,asm,11005+21000,1671.8927 +gfx938,f8_w8a8_channel,torch.bfloat16,1152,256,4096,512,10,0,0,asm,11005+21000,1730.5199 +gfx938,f8_w8a8_channel,torch.bfloat16,1280,256,4096,512,10,0,0,asm,11005+21000,1781.442 +gfx938,f8_w8a8_channel,torch.bfloat16,1408,256,4096,512,10,0,0,asm,11005+21001,1842.9829 +gfx938,f8_w8a8_channel,torch.bfloat16,1536,256,4096,512,10,0,0,asm,11005+21001,1951.3363 +gfx938,f8_w8a8_channel,torch.bfloat16,1664,256,4096,512,10,0,0,asm,12001+22001,1998.9403 +gfx938,f8_w8a8_channel,torch.bfloat16,1792,256,4096,512,10,0,0,asm,12001+22001,2012.1022 +gfx938,f8_w8a8_channel,torch.bfloat16,1920,256,4096,512,10,0,0,asm,12001+22001,2040.3715 +gfx938,f8_w8a8_channel,torch.bfloat16,2048,256,4096,512,10,0,0,asm,12001+22001,2059.7652 +gfx938,f8_w8a8_channel,torch.bfloat16,2304,256,4096,512,10,0,0,asm,12001+22001,2132.1608 +gfx938,f8_w8a8_channel,torch.bfloat16,2560,256,4096,512,10,0,0,asm,12001+22001,2200.5735 +gfx938,f8_w8a8_channel,torch.bfloat16,2816,256,4096,512,10,0,0,asm,12001+22001,2329.0953 +gfx938,f8_w8a8_channel,torch.bfloat16,3072,256,4096,512,10,0,0,asm,12001+22001,2593.4905 +gfx938,f8_w8a8_channel,torch.bfloat16,3328,256,4096,512,10,0,0,asm,12001+22001,2998.7442 +gfx938,f8_w8a8_channel,torch.bfloat16,3584,256,4096,512,10,0,0,asm,13001+23001,3227.1983 +gfx938,f8_w8a8_channel,torch.bfloat16,3840,256,4096,512,10,0,0,asm,13001+23001,3274.2464 +gfx938,f8_w8a8_channel,torch.bfloat16,4096,256,4096,512,10,0,0,asm,13001+23001,3308.0736 +gfx938,f8_w8a8_channel,torch.bfloat16,4608,256,4096,512,10,0,0,asm,13001+23001,3420.7888 +gfx938,f8_w8a8_channel,torch.bfloat16,5120,256,4096,512,10,0,0,asm,13001+23001,3562.9356 +gfx938,f8_w8a8_channel,torch.bfloat16,5632,256,4096,512,10,0,0,asm,13001+23001,3679.8195 +gfx938,f8_w8a8_channel,torch.bfloat16,6144,256,4096,512,10,0,0,asm,13001+23001,4237.0553 +gfx938,f8_w8a8_channel,torch.bfloat16,6656,256,4096,512,10,0,0,asm,13001+23001,5043.5704 +gfx938,f8_w8a8_channel,torch.bfloat16,7168,256,4096,512,10,0,0,asm,12001+22001,5577.7081 +gfx938,f8_w8a8_channel,torch.bfloat16,7680,256,4096,512,10,0,0,asm,12001+22001,5867.8889 +gfx938,f8_w8a8_channel,torch.bfloat16,8192,256,4096,512,10,0,0,asm,12001+22001,6024.8825 +gfx938,f8_w8a8_channel,torch.bfloat16,10240,256,4096,512,10,0,0,asm,13001+23001,6800.9394 +gfx938,f8_w8a8_channel,torch.bfloat16,12288,256,4096,512,10,0,0,asm,13001+23001,7508.4403 +gfx938,f8_w8a8_channel,torch.bfloat16,14336,256,4096,512,10,0,0,asm,13001+23001,9580.6614 +gfx938,f8_w8a8_channel,torch.bfloat16,16384,256,4096,512,10,0,0,asm,13001+23001,10215.7416 +gfx938,f8_w8a8_channel,torch.bfloat16,17408,256,4096,512,10,0,0,asm,13001+23001,10499.4128 +gfx938,f8_w8a8_channel,torch.bfloat16,24576,256,4096,512,10,0,0,asm,13001+23001,14172.5893 +gfx938,f8_w8a8_channel,torch.bfloat16,32768,256,4096,512,10,0,0,asm,13001+23001,18841.2993 +gfx938,f8_w8a8_channel,torch.bfloat16,40960,256,4096,512,10,0,0,asm,13001+23001,23386.5547 +gfx938,f8_w8a8_channel,torch.bfloat16,49152,256,4096,512,10,0,0,asm,13001+23001,27573.5355 +gfx938,f8_w8a8_channel,torch.bfloat16,57344,256,4096,512,10,0,0,asm,13001+23001,31773.8919 +gfx938,f8_w8a8_channel,torch.bfloat16,65536,256,4096,512,10,0,0,asm,13001+23001,36230.8804 +gfx938,f8_w8a8_channel,torch.bfloat16,65536,256,4096,512,10,0,0,asm,13001+23001,36217.3488 +gfx936,int8_w8a8_channel,torch.float16,1,512,2048,256,8,0,0,asm,10010+20000,57.5423 +gfx936,int8_w8a8_channel,torch.float16,2,512,2048,256,8,0,0,asm,10013+20000,75.8832 +gfx936,int8_w8a8_channel,torch.float16,3,512,2048,256,8,0,0,asm,10013+20000,103.3274 +gfx936,int8_w8a8_channel,torch.float16,4,512,2048,256,8,0,0,asm,10013+20000,118.0053 +gfx936,int8_w8a8_channel,torch.float16,5,512,2048,256,8,0,0,asm,10013+20000,140.1358 +gfx936,int8_w8a8_channel,torch.float16,6,512,2048,256,8,0,0,asm,10013+20000,153.1881 +gfx936,int8_w8a8_channel,torch.float16,7,512,2048,256,8,0,0,asm,10013+20000,181.3652 +gfx936,int8_w8a8_channel,torch.float16,8,512,2048,256,8,0,0,asm,10013+20000,192.2788 +gfx936,int8_w8a8_channel,torch.float16,9,512,2048,256,8,0,0,asm,10013+20000,208.2451 +gfx936,int8_w8a8_channel,torch.float16,10,512,2048,256,8,0,0,asm,10008+20000,228.9861 +gfx936,int8_w8a8_channel,torch.float16,11,512,2048,256,8,0,0,asm,10013+20000,240.826 +gfx936,int8_w8a8_channel,torch.float16,12,512,2048,256,8,0,0,asm,10013+20000,251.5291 +gfx936,int8_w8a8_channel,torch.float16,13,512,2048,256,8,0,0,asm,10011+20000,273.2555 +gfx936,int8_w8a8_channel,torch.float16,14,512,2048,256,8,0,0,asm,10013+20000,285.9122 +gfx936,int8_w8a8_channel,torch.float16,15,512,2048,256,8,0,0,asm,10013+20000,297.1289 +gfx936,int8_w8a8_channel,torch.float16,16,512,2048,256,8,0,0,asm,10008+20000,309.1964 +gfx936,int8_w8a8_channel,torch.float16,17,512,2048,256,8,0,0,asm,10009+20000,312.5196 +gfx936,int8_w8a8_channel,torch.float16,18,512,2048,256,8,0,0,asm,10013+20000,320.166 +gfx936,int8_w8a8_channel,torch.float16,20,512,2048,256,8,0,0,asm,10008+20000,349.8501 +gfx936,int8_w8a8_channel,torch.float16,24,512,2048,256,8,0,0,asm,10009+20000,390.6669 +gfx936,int8_w8a8_channel,torch.float16,28,512,2048,256,8,0,0,asm,10013+20000,454.9896 +gfx936,int8_w8a8_channel,torch.float16,32,512,2048,256,8,0,0,asm,11005+20000,482.9222 +gfx936,int8_w8a8_channel,torch.float16,34,512,2048,256,8,0,0,asm,11005+20000,468.7803 +gfx936,int8_w8a8_channel,torch.float16,36,512,2048,256,8,0,0,asm,10011+20000,496.3086 +gfx936,int8_w8a8_channel,torch.float16,40,512,2048,256,8,0,0,asm,10011+20000,516.0896 +gfx936,int8_w8a8_channel,torch.float16,44,512,2048,256,8,0,0,asm,10011+20000,536.3675 +gfx936,int8_w8a8_channel,torch.float16,48,512,2048,256,8,0,0,asm,10008+20000,550.9443 +gfx936,int8_w8a8_channel,torch.float16,56,512,2048,256,8,0,0,asm,10011+20000,577.7821 +gfx936,int8_w8a8_channel,torch.float16,64,512,2048,256,8,0,0,asm,10008+20000,597.9084 +gfx936,int8_w8a8_channel,torch.float16,68,512,2048,256,8,0,0,asm,10008+20000,601.681 +gfx936,int8_w8a8_channel,torch.float16,72,512,2048,256,8,0,0,asm,10008+20000,615.7609 +gfx936,int8_w8a8_channel,torch.float16,80,512,2048,256,8,0,0,asm,10011+20000,636.763 +gfx936,int8_w8a8_channel,torch.float16,88,512,2048,256,8,0,0,asm,10011+20000,650.9019 +gfx936,int8_w8a8_channel,torch.float16,96,512,2048,256,8,0,0,asm,10011+20000,655.6177 +gfx936,int8_w8a8_channel,torch.float16,104,512,2048,256,8,0,0,asm,10011+20000,665.5714 +gfx936,int8_w8a8_channel,torch.float16,112,512,2048,256,8,0,0,asm,10011+20000,670.8683 +gfx936,int8_w8a8_channel,torch.float16,128,512,2048,256,8,0,0,asm,10011+20000,685.1923 +gfx936,int8_w8a8_channel,torch.float16,144,512,2048,256,8,0,0,asm,10008+20001,725.1081 +gfx936,int8_w8a8_channel,torch.float16,160,512,2048,256,8,0,0,asm,10011+20000,696.4429 +gfx936,int8_w8a8_channel,torch.float16,192,512,2048,256,8,0,0,asm,10011+20000,700.8724 +gfx936,int8_w8a8_channel,torch.float16,224,512,2048,256,8,0,0,asm,10011+20000,711.4071 +gfx936,int8_w8a8_channel,torch.float16,256,512,2048,256,8,0,0,asm,10011+20000,716.0387 +gfx936,int8_w8a8_channel,torch.float16,320,512,2048,256,8,0,0,asm,11007+21000,774.5733 +gfx936,int8_w8a8_channel,torch.float16,384,512,2048,256,8,0,0,asm,10011+20000,747.0113 +gfx936,int8_w8a8_channel,torch.float16,448,512,2048,256,8,0,0,asm,11005+21000,773.7396 +gfx936,int8_w8a8_channel,torch.float16,512,512,2048,256,8,0,0,asm,11005+21000,787.6848 +gfx936,int8_w8a8_channel,torch.float16,576,512,2048,256,8,0,0,asm,12005+22000,861.8404 +gfx936,int8_w8a8_channel,torch.float16,640,512,2048,256,8,0,0,asm,11005+21000,809.0491 +gfx936,int8_w8a8_channel,torch.float16,704,512,2048,256,8,0,0,asm,11007+21000,826.4215 +gfx936,int8_w8a8_channel,torch.float16,768,512,2048,256,8,0,0,asm,11005+21000,838.3794 +gfx936,int8_w8a8_channel,torch.float16,832,512,2048,256,8,0,0,asm,11005+21000,849.7562 +gfx936,int8_w8a8_channel,torch.float16,896,512,2048,256,8,0,0,asm,12005+22000,884.7455 +gfx936,int8_w8a8_channel,torch.float16,960,512,2048,256,8,0,0,asm,12001+22000,867.4824 +gfx936,int8_w8a8_channel,torch.float16,1024,512,2048,256,8,0,0,asm,12005+22000,888.8297 +gfx936,int8_w8a8_channel,torch.float16,1152,512,2048,256,8,0,0,asm,12005+22000,941.4443 +gfx936,int8_w8a8_channel,torch.float16,1280,512,2048,256,8,0,0,asm,12001+22000,928.9812 +gfx936,int8_w8a8_channel,torch.float16,1408,512,2048,256,8,0,0,asm,12001+22000,947.9622 +gfx936,int8_w8a8_channel,torch.float16,1536,512,2048,256,8,0,0,asm,12005+22000,980.9053 +gfx936,int8_w8a8_channel,torch.float16,1664,512,2048,256,8,0,0,asm,12001+22000,999.9199 +gfx936,int8_w8a8_channel,torch.float16,1792,512,2048,256,8,0,0,asm,12001+22000,1022.5051 +gfx936,int8_w8a8_channel,torch.float16,1920,512,2048,256,8,0,0,asm,12001+22000,1070.6567 +gfx936,int8_w8a8_channel,torch.float16,2048,512,2048,256,8,0,0,asm,12001+22001,1153.3089 +gfx936,int8_w8a8_channel,torch.float16,2304,512,2048,256,8,0,0,asm,13001+23001,1182.4963 +gfx936,int8_w8a8_channel,torch.float16,2560,512,2048,256,8,0,0,asm,13001+23001,1224.7614 +gfx936,int8_w8a8_channel,torch.float16,2816,512,2048,256,8,0,0,asm,13001+23001,1245.3172 +gfx936,int8_w8a8_channel,torch.float16,3072,512,2048,256,8,0,0,asm,13001+23001,1278.1086 +gfx936,int8_w8a8_channel,torch.float16,3328,512,2048,256,8,0,0,asm,13001+23001,1336.2475 +gfx936,int8_w8a8_channel,torch.float16,3584,512,2048,256,8,0,0,asm,13001+23001,1381.6115 +gfx936,int8_w8a8_channel,torch.float16,3840,512,2048,256,8,0,0,asm,13001+23001,1493.1565 +gfx936,int8_w8a8_channel,torch.float16,4096,512,2048,256,8,0,0,asm,13001+23001,1657.3499 +gfx936,int8_w8a8_channel,torch.float16,4608,512,2048,256,8,0,0,asm,13001+23001,1993.45 +gfx936,int8_w8a8_channel,torch.float16,5120,512,2048,256,8,0,0,asm,12001+22001,2101.3907 +gfx936,int8_w8a8_channel,torch.float16,5632,512,2048,256,8,0,0,asm,13001+23001,2184.7083 +gfx936,int8_w8a8_channel,torch.float16,6144,512,2048,256,8,0,0,asm,13001+23001,2237.3144 +gfx936,int8_w8a8_channel,torch.float16,6656,512,2048,256,8,0,0,asm,13001+23001,2297.6341 +gfx936,int8_w8a8_channel,torch.float16,7168,512,2048,256,8,0,0,asm,13001+23001,2372.7578 +gfx936,int8_w8a8_channel,torch.float16,7680,512,2048,256,8,0,0,asm,13001+23001,2503.2248 +gfx936,int8_w8a8_channel,torch.float16,8192,512,2048,256,8,0,0,asm,13001+23001,2796.6807 +gfx936,int8_w8a8_channel,torch.float16,10240,512,2048,256,8,0,0,asm,13001+23001,3349.3024 +gfx936,int8_w8a8_channel,torch.float16,12288,512,2048,256,8,0,0,asm,13001+23001,3896.5094 +gfx936,int8_w8a8_channel,torch.float16,14336,512,2048,256,8,0,0,asm,13001+23001,4486.15 +gfx936,int8_w8a8_channel,torch.float16,16384,512,2048,256,8,0,0,asm,13001+23001,5067.0073 +gfx936,int8_w8a8_channel,torch.float16,17408,512,2048,256,8,0,0,asm,13001+23001,5469.187 +gfx936,int8_w8a8_channel,torch.float16,24576,512,2048,256,8,0,0,asm,13001+23001,7372.5919 +gfx936,int8_w8a8_channel,torch.float16,32768,512,2048,256,8,0,0,asm,13001+23001,11965.4224 +gfx936,int8_w8a8_channel,torch.float16,40960,512,2048,256,8,0,0,asm,13001+23001,14939.3742 +gfx936,int8_w8a8_channel,torch.float16,49152,512,2048,256,8,0,0,asm,13001+23001,17880.1629 +gfx936,int8_w8a8_channel,torch.float16,57344,512,2048,256,8,0,0,asm,13001+23001,20746.1576 +gfx936,int8_w8a8_channel,torch.float16,65536,512,2048,256,8,0,0,asm,13001+22001,24014.33 +gfx936,int8_w8a8_channel,torch.float16,65536,512,2048,256,8,0,0,asm,13001+23001,23307.1407 +gfx936,int8_w8a8_channel,torch.bfloat16,1,512,2048,256,8,0,0,asm,10010+20000,66.1234 +gfx936,int8_w8a8_channel,torch.bfloat16,2,512,2048,256,8,0,0,asm,10013+20000,87.4538 +gfx936,int8_w8a8_channel,torch.bfloat16,3,512,2048,256,8,0,0,asm,10010+20000,114.2326 +gfx936,int8_w8a8_channel,torch.bfloat16,4,512,2048,256,8,0,0,asm,10013+20000,130.999 +gfx936,int8_w8a8_channel,torch.bfloat16,5,512,2048,256,8,0,0,asm,10013+20000,155.5547 +gfx936,int8_w8a8_channel,torch.bfloat16,6,512,2048,256,8,0,0,asm,10013+20000,167.4367 +gfx936,int8_w8a8_channel,torch.bfloat16,7,512,2048,256,8,0,0,asm,10013+20000,202.2999 +gfx936,int8_w8a8_channel,torch.bfloat16,8,512,2048,256,8,0,0,asm,10013+20000,208.5902 +gfx936,int8_w8a8_channel,torch.bfloat16,9,512,2048,256,8,0,0,asm,10013+20000,226.6451 +gfx936,int8_w8a8_channel,torch.bfloat16,10,512,2048,256,8,0,0,asm,10013+20000,253.2217 +gfx936,int8_w8a8_channel,torch.bfloat16,11,512,2048,256,8,0,0,asm,10013+20000,260.186 +gfx936,int8_w8a8_channel,torch.bfloat16,12,512,2048,256,8,0,0,asm,10013+20000,270.1901 +gfx936,int8_w8a8_channel,torch.bfloat16,13,512,2048,256,8,0,0,asm,10013+20000,296.2028 +gfx936,int8_w8a8_channel,torch.bfloat16,14,512,2048,256,8,0,0,asm,10013+20000,309.7185 +gfx936,int8_w8a8_channel,torch.bfloat16,15,512,2048,256,8,0,0,asm,10013+20000,318.2238 +gfx936,int8_w8a8_channel,torch.bfloat16,16,512,2048,256,8,0,0,asm,10013+20000,340.7668 +gfx936,int8_w8a8_channel,torch.bfloat16,17,512,2048,256,8,0,0,asm,10013+20000,335.9133 +gfx936,int8_w8a8_channel,torch.bfloat16,18,512,2048,256,8,0,0,asm,10013+20000,342.3722 +gfx936,int8_w8a8_channel,torch.bfloat16,20,512,2048,256,8,0,0,asm,10011+20000,377.9258 +gfx936,int8_w8a8_channel,torch.bfloat16,24,512,2048,256,8,0,0,asm,10011+20000,422.1447 +gfx936,int8_w8a8_channel,torch.bfloat16,28,512,2048,256,8,0,0,asm,10013+20000,491.0148 +gfx936,int8_w8a8_channel,torch.bfloat16,32,512,2048,256,8,0,0,asm,10011+20000,519.6043 +gfx936,int8_w8a8_channel,torch.bfloat16,34,512,2048,256,8,0,0,asm,10011+20000,508.8813 +gfx936,int8_w8a8_channel,torch.bfloat16,36,512,2048,256,8,0,0,asm,10011+20000,523.6266 +gfx936,int8_w8a8_channel,torch.bfloat16,40,512,2048,256,8,0,0,asm,10011+20000,552.1233 +gfx936,int8_w8a8_channel,torch.bfloat16,44,512,2048,256,8,0,0,asm,10011+20000,567.8369 +gfx936,int8_w8a8_channel,torch.bfloat16,48,512,2048,256,8,0,0,asm,10011+20000,582.3801 +gfx936,int8_w8a8_channel,torch.bfloat16,56,512,2048,256,8,0,0,asm,10011+20000,605.1421 +gfx936,int8_w8a8_channel,torch.bfloat16,64,512,2048,256,8,0,0,asm,10011+20000,631.66 +gfx936,int8_w8a8_channel,torch.bfloat16,68,512,2048,256,8,0,0,asm,10011+20000,638.2958 +gfx936,int8_w8a8_channel,torch.bfloat16,72,512,2048,256,8,0,0,asm,10011+20000,648.8725 +gfx936,int8_w8a8_channel,torch.bfloat16,80,512,2048,256,8,0,0,asm,10011+20000,670.4557 +gfx936,int8_w8a8_channel,torch.bfloat16,88,512,2048,256,8,0,0,asm,10011+20000,685.4114 +gfx936,int8_w8a8_channel,torch.bfloat16,96,512,2048,256,8,0,0,asm,10011+20000,690.6072 +gfx936,int8_w8a8_channel,torch.bfloat16,104,512,2048,256,8,0,0,asm,10011+20000,682.2114 +gfx936,int8_w8a8_channel,torch.bfloat16,112,512,2048,256,8,0,0,asm,10011+20000,707.9545 +gfx936,int8_w8a8_channel,torch.bfloat16,128,512,2048,256,8,0,0,asm,10011+20000,720.4093 +gfx936,int8_w8a8_channel,torch.bfloat16,144,512,2048,256,8,0,0,asm,10011+20000,711.2387 +gfx936,int8_w8a8_channel,torch.bfloat16,160,512,2048,256,8,0,0,asm,10011+20000,730.3039 +gfx936,int8_w8a8_channel,torch.bfloat16,192,512,2048,256,8,0,0,asm,10011+20000,739.9207 +gfx936,int8_w8a8_channel,torch.bfloat16,224,512,2048,256,8,0,0,asm,10012+20000,746.9103 +gfx936,int8_w8a8_channel,torch.bfloat16,256,512,2048,256,8,0,0,asm,10011+20000,755.8366 +gfx936,int8_w8a8_channel,torch.bfloat16,320,512,2048,256,8,0,0,asm,10011+20000,780.4092 +gfx936,int8_w8a8_channel,torch.bfloat16,384,512,2048,256,8,0,0,asm,10011+20000,794.2703 +gfx936,int8_w8a8_channel,torch.bfloat16,448,512,2048,256,8,0,0,asm,11005+21000,800.1734 +gfx936,int8_w8a8_channel,torch.bfloat16,512,512,2048,256,8,0,0,asm,11005+21000,841.2765 +gfx936,int8_w8a8_channel,torch.bfloat16,576,512,2048,256,8,0,0,asm,11007+21000,863.8701 +gfx936,int8_w8a8_channel,torch.bfloat16,640,512,2048,256,8,0,0,asm,11005+21000,836.1818 +gfx936,int8_w8a8_channel,torch.bfloat16,704,512,2048,256,8,0,0,asm,11005+21000,858.7079 +gfx936,int8_w8a8_channel,torch.bfloat16,768,512,2048,256,8,0,0,asm,11005+21000,897.6973 +gfx936,int8_w8a8_channel,torch.bfloat16,832,512,2048,256,8,0,0,asm,11005+21000,929.8656 +gfx936,int8_w8a8_channel,torch.bfloat16,896,512,2048,256,8,0,0,asm,12001+22000,944.9813 +gfx936,int8_w8a8_channel,torch.bfloat16,960,512,2048,256,8,0,0,asm,12001+22000,955.5665 +gfx936,int8_w8a8_channel,torch.bfloat16,1024,512,2048,256,8,0,0,asm,12001+22000,965.9664 +gfx936,int8_w8a8_channel,torch.bfloat16,1152,512,2048,256,8,0,0,asm,12005+22000,1028.9894 +gfx936,int8_w8a8_channel,torch.bfloat16,1280,512,2048,256,8,0,0,asm,12001+22000,999.2968 +gfx936,int8_w8a8_channel,torch.bfloat16,1408,512,2048,256,8,0,0,asm,12001+22001,1032.0463 +gfx936,int8_w8a8_channel,torch.bfloat16,1536,512,2048,256,8,0,0,asm,12001+22000,1052.8631 +gfx936,int8_w8a8_channel,torch.bfloat16,1664,512,2048,256,8,0,0,asm,12001+22001,1086.3956 +gfx936,int8_w8a8_channel,torch.bfloat16,1792,512,2048,256,8,0,0,asm,12001+22001,1132.8207 +gfx936,int8_w8a8_channel,torch.bfloat16,1920,512,2048,256,8,0,0,asm,12001+22001,1190.7238 +gfx936,int8_w8a8_channel,torch.bfloat16,2048,512,2048,256,8,0,0,asm,12005+22001,1308.5508 +gfx936,int8_w8a8_channel,torch.bfloat16,2304,512,2048,256,8,0,0,asm,13001+23001,1372.2055 +gfx936,int8_w8a8_channel,torch.bfloat16,2560,512,2048,256,8,0,0,asm,13001+23001,1401.258 +gfx936,int8_w8a8_channel,torch.bfloat16,2816,512,2048,256,8,0,0,asm,13001+23001,1442.5548 +gfx936,int8_w8a8_channel,torch.bfloat16,3072,512,2048,256,8,0,0,asm,13001+23001,1476.5842 +gfx936,int8_w8a8_channel,torch.bfloat16,3328,512,2048,256,8,0,0,asm,13001+23001,1541.9145 +gfx936,int8_w8a8_channel,torch.bfloat16,3584,512,2048,256,8,0,0,asm,13001+23001,1587.8681 +gfx936,int8_w8a8_channel,torch.bfloat16,3840,512,2048,256,8,0,0,asm,13001+23001,1753.4172 +gfx936,int8_w8a8_channel,torch.bfloat16,4096,512,2048,256,8,0,0,asm,13001+23001,2012.5745 +gfx936,int8_w8a8_channel,torch.bfloat16,4608,512,2048,256,8,0,0,asm,12001+22001,2424.8344 +gfx936,int8_w8a8_channel,torch.bfloat16,5120,512,2048,256,8,0,0,asm,12001+22001,2563.3436 +gfx936,int8_w8a8_channel,torch.bfloat16,5632,512,2048,256,8,0,0,asm,12001+22001,2694.5769 +gfx936,int8_w8a8_channel,torch.bfloat16,6144,512,2048,256,8,0,0,asm,13001+23001,2759.1072 +gfx936,int8_w8a8_channel,torch.bfloat16,6656,512,2048,256,8,0,0,asm,13001+23001,2816.3026 +gfx936,int8_w8a8_channel,torch.bfloat16,7168,512,2048,256,8,0,0,asm,13001+23001,2898.4665 +gfx936,int8_w8a8_channel,torch.bfloat16,7680,512,2048,256,8,0,0,asm,13001+23001,3065.6827 +gfx936,int8_w8a8_channel,torch.bfloat16,8192,512,2048,256,8,0,0,asm,13001+23001,3465.1172 +gfx936,int8_w8a8_channel,torch.bfloat16,10240,512,2048,256,8,0,0,asm,13001+23001,4148.9045 +gfx936,int8_w8a8_channel,torch.bfloat16,12288,512,2048,256,8,0,0,asm,13001+23001,4845.3656 +gfx936,int8_w8a8_channel,torch.bfloat16,14336,512,2048,256,8,0,0,asm,13001+23001,5592.5893 +gfx936,int8_w8a8_channel,torch.bfloat16,16384,512,2048,256,8,0,0,asm,13001+23001,6308.3012 +gfx936,int8_w8a8_channel,torch.bfloat16,17408,512,2048,256,8,0,0,asm,13001+23001,6826.1859 +gfx936,int8_w8a8_channel,torch.bfloat16,24576,512,2048,256,8,0,0,asm,13001+23001,9185.3035 +gfx936,int8_w8a8_channel,torch.bfloat16,32768,512,2048,256,8,0,0,asm,13001+23001,12019.847 +gfx936,int8_w8a8_channel,torch.bfloat16,40960,512,2048,256,8,0,0,asm,13001+23001,14983.3494 +gfx936,int8_w8a8_channel,torch.bfloat16,49152,512,2048,256,8,0,0,asm,13001+23001,17938.1767 +gfx936,int8_w8a8_channel,torch.bfloat16,57344,512,2048,256,8,0,0,asm,13001+23001,20809.5385 +gfx936,int8_w8a8_channel,torch.bfloat16,65536,512,2048,256,8,0,0,asm,13001+23001,23354.4759 +gfx936,int8_w8a8_channel,torch.bfloat16,65536,512,2048,256,8,0,0,asm,13001+23001,23370.3628 +gfx938,int8_w8a8_channel,torch.bfloat16,1,512,4096,256,6,0,0,asm,10008+20000,78.0137 +gfx938,int8_w8a8_channel,torch.bfloat16,2,512,4096,256,6,0,0,asm,10011+20000,104.7423 +gfx938,int8_w8a8_channel,torch.bfloat16,3,512,4096,256,6,0,0,asm,10002+20000,148.4811 +gfx938,int8_w8a8_channel,torch.bfloat16,4,512,4096,256,6,0,0,asm,10008+20000,174.1736 +gfx938,int8_w8a8_channel,torch.bfloat16,5,512,4096,256,6,0,0,asm,10011+20000,193.5926 +gfx938,int8_w8a8_channel,torch.bfloat16,6,512,4096,256,6,0,0,asm,10002+20000,246.7968 +gfx938,int8_w8a8_channel,torch.bfloat16,7,512,4096,256,6,0,0,asm,10011+20000,268.0261 +gfx938,int8_w8a8_channel,torch.bfloat16,8,512,4096,256,6,0,0,asm,10011+20000,281.0701 +gfx938,int8_w8a8_channel,torch.bfloat16,9,512,4096,256,6,0,0,asm,10002+20000,336.7837 +gfx938,int8_w8a8_channel,torch.bfloat16,10,512,4096,256,6,0,0,asm,10011+20000,349.3478 +gfx938,int8_w8a8_channel,torch.bfloat16,11,512,4096,256,6,0,0,asm,10011+20000,362.3499 +gfx938,int8_w8a8_channel,torch.bfloat16,12,512,4096,256,6,0,0,asm,10011+20000,368.5898 +gfx938,int8_w8a8_channel,torch.bfloat16,13,512,4096,256,6,0,0,asm,10011+20000,384.8931 +gfx938,int8_w8a8_channel,torch.bfloat16,14,512,4096,256,6,0,0,asm,10011+20000,435.6717 +gfx938,int8_w8a8_channel,torch.bfloat16,15,512,4096,256,6,0,0,asm,10011+20000,443.6802 +gfx938,int8_w8a8_channel,torch.bfloat16,16,512,4096,256,6,0,0,asm,10011+20000,449.9033 +gfx938,int8_w8a8_channel,torch.bfloat16,17,512,4096,256,6,0,0,asm,10011+20000,456.8477 +gfx938,int8_w8a8_channel,torch.bfloat16,18,512,4096,256,6,0,0,asm,10011+20000,500.16550000000007 +gfx938,int8_w8a8_channel,torch.bfloat16,20,512,4096,256,6,0,0,asm,10011+20000,524.7801 +gfx938,int8_w8a8_channel,torch.bfloat16,24,512,4096,256,6,0,0,asm,10011+20000,603.8881 +gfx938,int8_w8a8_channel,torch.bfloat16,28,512,4096,256,6,0,0,asm,10011+20000,697.4143 +gfx938,int8_w8a8_channel,torch.bfloat16,32,512,4096,256,6,0,0,asm,10011+20000,773.7342 +gfx938,int8_w8a8_channel,torch.bfloat16,34,512,4096,256,6,0,0,asm,10011+20000,775.1206 +gfx938,int8_w8a8_channel,torch.bfloat16,36,512,4096,256,6,0,0,asm,10011+20000,823.769 +gfx938,int8_w8a8_channel,torch.bfloat16,40,512,4096,256,6,0,0,asm,10011+20000,865.4699 +gfx938,int8_w8a8_channel,torch.bfloat16,44,512,4096,256,6,0,0,asm,10011+20000,937.0485 +gfx938,int8_w8a8_channel,torch.bfloat16,48,512,4096,256,6,0,0,asm,10011+20000,988.0631 +gfx938,int8_w8a8_channel,torch.bfloat16,56,512,4096,256,6,0,0,asm,10011+20000,1047.9872 +gfx938,int8_w8a8_channel,torch.bfloat16,64,512,4096,256,6,0,0,asm,10011+20000,1105.1827 +gfx938,int8_w8a8_channel,torch.bfloat16,68,512,4096,256,6,0,0,asm,10011+20000,1135.1615 +gfx938,int8_w8a8_channel,torch.bfloat16,72,512,4096,256,6,0,0,asm,10011+20000,1172.1466 +gfx938,int8_w8a8_channel,torch.bfloat16,80,512,4096,256,6,0,0,asm,10011+20000,1195.0182 +gfx938,int8_w8a8_channel,torch.bfloat16,88,512,4096,256,6,0,0,asm,10011+20000,1236.5169 +gfx938,int8_w8a8_channel,torch.bfloat16,96,512,4096,256,6,0,0,asm,10011+20000,1262.0663 +gfx938,int8_w8a8_channel,torch.bfloat16,104,512,4096,256,6,0,0,asm,10011+20000,1281.8557 +gfx938,int8_w8a8_channel,torch.bfloat16,112,512,4096,256,6,0,0,asm,10011+20000,1292.9798 +gfx938,int8_w8a8_channel,torch.bfloat16,128,512,4096,256,6,0,0,asm,10011+20000,1330.4197 +gfx938,int8_w8a8_channel,torch.bfloat16,144,512,4096,256,6,0,0,asm,10011+20000,1346.0662 +gfx938,int8_w8a8_channel,torch.bfloat16,160,512,4096,256,6,0,0,asm,10011+20000,1386.0409 +gfx938,int8_w8a8_channel,torch.bfloat16,192,512,4096,256,6,0,0,asm,10011+20000,1398.8493 +gfx938,int8_w8a8_channel,torch.bfloat16,224,512,4096,256,6,0,0,asm,10011+20000,1416.8366 +gfx938,int8_w8a8_channel,torch.bfloat16,256,512,4096,256,6,0,0,asm,10011+20000,1422.0661 +gfx938,int8_w8a8_channel,torch.bfloat16,320,512,4096,256,6,0,0,asm,10011+20000,1439.3039 +gfx938,int8_w8a8_channel,torch.bfloat16,384,512,4096,256,6,0,0,asm,11005+21000,1519.3457 +gfx938,int8_w8a8_channel,torch.bfloat16,448,512,4096,256,6,0,0,asm,10011+20000,1480.4069 +gfx938,int8_w8a8_channel,torch.bfloat16,512,512,4096,256,6,0,0,asm,11005+21000,1508.4069 +gfx938,int8_w8a8_channel,torch.bfloat16,576,512,4096,256,6,0,0,asm,11005+21000,1582.9583 +gfx938,int8_w8a8_channel,torch.bfloat16,640,512,4096,256,6,0,0,asm,11005+21000,1542.1839 +gfx938,int8_w8a8_channel,torch.bfloat16,704,512,4096,256,6,0,0,asm,11005+21000,1553.5608 +gfx938,int8_w8a8_channel,torch.bfloat16,768,512,4096,256,6,0,0,asm,11005+21000,1569.9736 +gfx938,int8_w8a8_channel,torch.bfloat16,832,512,4096,256,6,0,0,asm,11005+21000,1596.6598 +gfx938,int8_w8a8_channel,torch.bfloat16,896,512,4096,256,6,0,0,asm,11005+21000,1617.6449 +gfx938,int8_w8a8_channel,torch.bfloat16,960,512,4096,256,6,0,0,asm,11005+21000,1632.1626 +gfx938,int8_w8a8_channel,torch.bfloat16,1024,512,4096,256,6,0,0,asm,11005+21000,1645.7204 +gfx938,int8_w8a8_channel,torch.bfloat16,1152,512,4096,256,6,0,0,asm,12001+22001,1750.0232 +gfx938,int8_w8a8_channel,torch.bfloat16,1280,512,4096,256,6,0,0,asm,12001+22000,1721.5347 +gfx938,int8_w8a8_channel,torch.bfloat16,1408,512,4096,256,6,0,0,asm,12001+22000,1741.7703 +gfx938,int8_w8a8_channel,torch.bfloat16,1536,512,4096,256,6,0,0,asm,12001+22001,1789.3573 +gfx938,int8_w8a8_channel,torch.bfloat16,1664,512,4096,256,6,0,0,asm,12001+22000,1804.4477 +gfx938,int8_w8a8_channel,torch.bfloat16,1792,512,4096,256,6,0,0,asm,12001+22000,1817.0034 +gfx938,int8_w8a8_channel,torch.bfloat16,1920,512,4096,256,6,0,0,asm,12001+22000,1861.1633 +gfx938,int8_w8a8_channel,torch.bfloat16,2048,512,4096,256,6,0,0,asm,12001+22000,1889.8789 +gfx938,int8_w8a8_channel,torch.bfloat16,2304,512,4096,256,6,0,0,asm,12001+22000,1970.5355 +gfx938,int8_w8a8_channel,torch.bfloat16,2560,512,4096,256,6,0,0,asm,12001+22001,2170.8128 +gfx938,int8_w8a8_channel,torch.bfloat16,2816,512,4096,256,6,0,0,asm,13001+23001,2504.1004 +gfx938,int8_w8a8_channel,torch.bfloat16,3072,512,4096,256,6,0,0,asm,13001+23001,2511.8814 +gfx938,int8_w8a8_channel,torch.bfloat16,3328,512,4096,256,6,0,0,asm,13001+23001,2566.197 +gfx938,int8_w8a8_channel,torch.bfloat16,3584,512,4096,256,6,0,0,asm,13001+23001,2613.4642 +gfx938,int8_w8a8_channel,torch.bfloat16,3840,512,4096,256,6,0,0,asm,13001+23001,2658.1883 +gfx938,int8_w8a8_channel,torch.bfloat16,4096,512,4096,256,6,0,0,asm,13001+23001,2694.4411 +gfx938,int8_w8a8_channel,torch.bfloat16,4608,512,4096,256,6,0,0,asm,13001+23001,2836.3946 +gfx938,int8_w8a8_channel,torch.bfloat16,5120,512,4096,256,6,0,0,asm,13001+22001,3390.4479 +gfx938,int8_w8a8_channel,torch.bfloat16,5632,512,4096,256,6,0,0,asm,13001+23001,4003.1115 +gfx938,int8_w8a8_channel,torch.bfloat16,6144,512,4096,256,6,0,0,asm,12001+22001,4569.3499 +gfx938,int8_w8a8_channel,torch.bfloat16,6656,512,4096,256,6,0,0,asm,12001+22001,4721.6274 +gfx938,int8_w8a8_channel,torch.bfloat16,7168,512,4096,256,6,0,0,asm,12001+22001,4877.1131 +gfx938,int8_w8a8_channel,torch.bfloat16,7680,512,4096,256,6,0,0,asm,12001+22001,5094.0808 +gfx938,int8_w8a8_channel,torch.bfloat16,8192,512,4096,256,6,0,0,asm,13001+23001,5156.8006 +gfx938,int8_w8a8_channel,torch.bfloat16,10240,512,4096,256,6,0,0,asm,13001+23001,5702.4324 +gfx938,int8_w8a8_channel,torch.bfloat16,12288,512,4096,256,6,0,0,asm,13001+23001,7434.4938 +gfx938,int8_w8a8_channel,torch.bfloat16,14336,512,4096,256,6,0,0,asm,13001+23001,7808.6131 +gfx938,int8_w8a8_channel,torch.bfloat16,16384,512,4096,256,6,0,0,asm,13001+23001,8942.8737 +gfx938,int8_w8a8_channel,torch.bfloat16,17408,512,4096,256,6,0,0,asm,13001+23001,9827.4874 +gfx938,int8_w8a8_channel,torch.bfloat16,24576,512,4096,256,6,0,0,asm,13001+23001,12834.5617 +gfx938,int8_w8a8_channel,torch.bfloat16,32768,512,4096,256,6,0,0,asm,13001+23001,16783.0803 +gfx938,int8_w8a8_channel,torch.bfloat16,40960,512,4096,256,6,0,0,asm,13001+23001,20690.7507 +gfx938,int8_w8a8_channel,torch.bfloat16,49152,512,4096,256,6,0,0,asm,13001+23001,24586.4703 +gfx938,int8_w8a8_channel,torch.bfloat16,57344,512,4096,256,6,0,0,asm,13001+23001,28541.35 +gfx938,int8_w8a8_channel,torch.bfloat16,65536,512,4096,256,6,0,0,asm,13001+23001,32417.998199999998 +gfx938,int8_w8a8_channel,torch.bfloat16,1,256,4096,256,6,0,0,asm,10002+20001,59.4876 +gfx938,int8_w8a8_channel,torch.bfloat16,2,256,4096,256,6,0,0,asm,10008+20000,79.9253 +gfx938,int8_w8a8_channel,torch.bfloat16,3,256,4096,256,6,0,0,asm,10011+20000,102.9991 +gfx938,int8_w8a8_channel,torch.bfloat16,4,256,4096,256,6,0,0,asm,10011+20000,109.1716 +gfx938,int8_w8a8_channel,torch.bfloat16,5,256,4096,256,6,0,0,asm,10011+20001,121.6347 +gfx938,int8_w8a8_channel,torch.bfloat16,6,256,4096,256,6,0,0,asm,10002+20001,154.6619 +gfx938,int8_w8a8_channel,torch.bfloat16,7,256,4096,256,6,0,0,asm,10008+20000,164.1441 +gfx938,int8_w8a8_channel,torch.bfloat16,8,256,4096,256,6,0,0,asm,10008+20001,171.8326 +gfx938,int8_w8a8_channel,torch.bfloat16,9,256,4096,256,6,0,0,asm,10011+20001,189.1714 +gfx938,int8_w8a8_channel,torch.bfloat16,10,256,4096,256,6,0,0,asm,10011+20000,194.6872 +gfx938,int8_w8a8_channel,torch.bfloat16,11,256,4096,256,6,0,0,asm,10011+20001,200.6829 +gfx938,int8_w8a8_channel,torch.bfloat16,12,256,4096,256,6,0,0,asm,10011+20001,208.1187 +gfx938,int8_w8a8_channel,torch.bfloat16,13,256,4096,256,6,0,0,asm,10011+20000,215.2429 +gfx938,int8_w8a8_channel,torch.bfloat16,14,256,4096,256,6,0,0,asm,10002+20000,256.8849 +gfx938,int8_w8a8_channel,torch.bfloat16,15,256,4096,256,6,0,0,asm,10008+20000,262.7795 +gfx938,int8_w8a8_channel,torch.bfloat16,16,256,4096,256,6,0,0,asm,10008+20000,264.8932 +gfx938,int8_w8a8_channel,torch.bfloat16,17,256,4096,256,6,0,0,asm,10011+20000,259.7367 +gfx938,int8_w8a8_channel,torch.bfloat16,18,256,4096,256,6,0,0,asm,10011+20000,260.9324 +gfx938,int8_w8a8_channel,torch.bfloat16,20,256,4096,256,6,0,0,asm,10011+20000,276.4524 +gfx938,int8_w8a8_channel,torch.bfloat16,24,256,4096,256,6,0,0,asm,10011+20000,338.6502 +gfx938,int8_w8a8_channel,torch.bfloat16,28,256,4096,256,6,0,0,asm,10011+20000,374.6445 +gfx938,int8_w8a8_channel,torch.bfloat16,32,256,4096,256,6,0,0,asm,10011+20000,434.6781 +gfx938,int8_w8a8_channel,torch.bfloat16,34,256,4096,256,6,0,0,asm,10011+20000,419.3826 +gfx938,int8_w8a8_channel,torch.bfloat16,36,256,4096,256,6,0,0,asm,10011+20000,429.2436 +gfx938,int8_w8a8_channel,torch.bfloat16,40,256,4096,256,6,0,0,asm,10011+20000,451.4667 +gfx938,int8_w8a8_channel,torch.bfloat16,44,256,4096,256,6,0,0,asm,10011+20000,508.71270000000004 +gfx938,int8_w8a8_channel,torch.bfloat16,48,256,4096,256,6,0,0,asm,10011+20000,514.7422 +gfx938,int8_w8a8_channel,torch.bfloat16,56,256,4096,256,6,0,0,asm,10011+20000,538.018 +gfx938,int8_w8a8_channel,torch.bfloat16,64,256,4096,256,6,0,0,asm,10011+20000,586.3377 +gfx938,int8_w8a8_channel,torch.bfloat16,68,256,4096,256,6,0,0,asm,10011+20000,592.6621 +gfx938,int8_w8a8_channel,torch.bfloat16,72,256,4096,256,6,0,0,asm,10011+20000,598.0852 +gfx938,int8_w8a8_channel,torch.bfloat16,80,256,4096,256,6,0,0,asm,10011+20000,606.6492 +gfx938,int8_w8a8_channel,torch.bfloat16,88,256,4096,256,6,0,0,asm,10011+20000,653.2175 +gfx938,int8_w8a8_channel,torch.bfloat16,96,256,4096,256,6,0,0,asm,10011+20000,659.6933 +gfx938,int8_w8a8_channel,torch.bfloat16,104,256,4096,256,6,0,0,asm,10011+20000,666.8344 +gfx938,int8_w8a8_channel,torch.bfloat16,112,256,4096,256,6,0,0,asm,10011+20000,670.4216 +gfx938,int8_w8a8_channel,torch.bfloat16,128,256,4096,256,6,0,0,asm,10011+20000,679.7943 +gfx938,int8_w8a8_channel,torch.bfloat16,144,256,4096,256,6,0,0,asm,10011+20000,686.0091 +gfx938,int8_w8a8_channel,torch.bfloat16,160,256,4096,256,6,0,0,asm,10011+20001,731.4153 +gfx938,int8_w8a8_channel,torch.bfloat16,192,256,4096,256,6,0,0,asm,10011+20000,713.9669 +gfx938,int8_w8a8_channel,torch.bfloat16,224,256,4096,256,6,0,0,asm,10011+20000,721.7312 +gfx938,int8_w8a8_channel,torch.bfloat16,256,256,4096,256,6,0,0,asm,10011+20000,729.8406 +gfx938,int8_w8a8_channel,torch.bfloat16,320,256,4096,256,6,0,0,asm,10011+20000,749.0827 +gfx938,int8_w8a8_channel,torch.bfloat16,384,256,4096,256,6,0,0,asm,10011+20000,765.891 +gfx938,int8_w8a8_channel,torch.bfloat16,448,256,4096,256,6,0,0,asm,11005+21001,786.75 +gfx938,int8_w8a8_channel,torch.bfloat16,512,256,4096,256,6,0,0,asm,10011+20001,804.5941 +gfx938,int8_w8a8_channel,torch.bfloat16,576,256,4096,256,6,0,0,asm,11005+21001,882.0003 +gfx938,int8_w8a8_channel,torch.bfloat16,640,256,4096,256,6,0,0,asm,11005+21001,832.0973 +gfx938,int8_w8a8_channel,torch.bfloat16,704,256,4096,256,6,0,0,asm,11005+21001,843.4405 +gfx938,int8_w8a8_channel,torch.bfloat16,768,256,4096,256,6,0,0,asm,11003+21001,908.7708 +gfx938,int8_w8a8_channel,torch.bfloat16,832,256,4096,256,6,0,0,asm,11005+21001,868.4593 +gfx938,int8_w8a8_channel,torch.bfloat16,896,256,4096,256,6,0,0,asm,11005+21001,914.3624 +gfx938,int8_w8a8_channel,torch.bfloat16,960,256,4096,256,6,0,0,asm,11005+21001,924.1559 +gfx938,int8_w8a8_channel,torch.bfloat16,1024,256,4096,256,6,0,0,asm,11005+21001,928.5264 +gfx938,int8_w8a8_channel,torch.bfloat16,1152,256,4096,256,6,0,0,asm,11005+21000,1027.9198 +gfx938,int8_w8a8_channel,torch.bfloat16,1280,256,4096,256,6,0,0,asm,12001+22001,1040.2229 +gfx938,int8_w8a8_channel,torch.bfloat16,1408,256,4096,256,6,0,0,asm,12003+22001,1065.9996 +gfx938,int8_w8a8_channel,torch.bfloat16,1536,256,4096,256,6,0,0,asm,12001+22001,1136.1046 +gfx938,int8_w8a8_channel,torch.bfloat16,1664,256,4096,256,6,0,0,asm,12005+22001,1125.452 +gfx938,int8_w8a8_channel,torch.bfloat16,1792,256,4096,256,6,0,0,asm,12001+22001,1115.6584 +gfx938,int8_w8a8_channel,torch.bfloat16,1920,256,4096,256,6,0,0,asm,12001+22001,1177.1741 +gfx938,int8_w8a8_channel,torch.bfloat16,2048,256,4096,256,6,0,0,asm,12005+22001,1201.3592 +gfx938,int8_w8a8_channel,torch.bfloat16,2304,256,4096,256,6,0,0,asm,12001+22001,1261.3253 +gfx938,int8_w8a8_channel,torch.bfloat16,2560,256,4096,256,6,0,0,asm,12001+22001,1404.8618 +gfx938,int8_w8a8_channel,torch.bfloat16,2816,256,4096,256,6,0,0,asm,12001+22001,1625.1307 +gfx938,int8_w8a8_channel,torch.bfloat16,3072,256,4096,256,6,0,0,asm,13001+23001,1689.2738 +gfx938,int8_w8a8_channel,torch.bfloat16,3328,256,4096,256,6,0,0,asm,13001+23001,1716.6084 +gfx938,int8_w8a8_channel,torch.bfloat16,3584,256,4096,256,6,0,0,asm,13001+23001,1742.6125 +gfx938,int8_w8a8_channel,torch.bfloat16,3840,256,4096,256,6,0,0,asm,13001+23001,1771.8165 +gfx938,int8_w8a8_channel,torch.bfloat16,4096,256,4096,256,6,0,0,asm,13001+23001,1793.0459 +gfx938,int8_w8a8_channel,torch.bfloat16,4608,256,4096,256,6,0,0,asm,13001+23001,1943.1845 +gfx938,int8_w8a8_channel,torch.bfloat16,5120,256,4096,256,6,0,0,asm,13001+23001,2196.4471 +gfx938,int8_w8a8_channel,torch.bfloat16,5632,256,4096,256,6,0,0,asm,13001+23001,2693.1279 +gfx938,int8_w8a8_channel,torch.bfloat16,6144,256,4096,256,6,0,0,asm,12001+22001,3018.0198 +gfx938,int8_w8a8_channel,torch.bfloat16,6656,256,4096,256,6,0,0,asm,12001+22001,3114.0279 +gfx938,int8_w8a8_channel,torch.bfloat16,7168,256,4096,256,6,0,0,asm,12001+22001,3249.5137 +gfx938,int8_w8a8_channel,torch.bfloat16,7680,256,4096,256,6,0,0,asm,12001+22001,3395.9636 +gfx938,int8_w8a8_channel,torch.bfloat16,8192,256,4096,256,6,0,0,asm,13001+23001,3485.6473 +gfx938,int8_w8a8_channel,torch.bfloat16,10240,256,4096,256,6,0,0,asm,13001+23001,3930.8417 +gfx938,int8_w8a8_channel,torch.bfloat16,12288,256,4096,256,6,0,0,asm,13001+23001,5137.4904 +gfx938,int8_w8a8_channel,torch.bfloat16,14336,256,4096,256,6,0,0,asm,13001+23001,5479.5268 +gfx938,int8_w8a8_channel,torch.bfloat16,16384,256,4096,256,6,0,0,asm,13001+23001,6239.061 +gfx938,int8_w8a8_channel,torch.bfloat16,17408,256,4096,256,6,0,0,asm,13001+23001,6896.6919 +gfx938,int8_w8a8_channel,torch.bfloat16,24576,256,4096,256,6,0,0,asm,13001+23001,8995.4132 +gfx938,int8_w8a8_channel,torch.bfloat16,32768,256,4096,256,6,0,0,asm,13001+23001,11788.1793 +gfx938,int8_w8a8_channel,torch.bfloat16,40960,256,4096,256,6,0,0,asm,13001+23000,14816.1534 +gfx938,int8_w8a8_channel,torch.bfloat16,49152,256,4096,256,6,0,0,asm,13001+23001,17277.1915 +gfx938,int8_w8a8_channel,torch.bfloat16,57344,256,4096,256,6,0,0,asm,13001+23001,20062.2133 +gfx938,int8_w8a8_channel,torch.bfloat16,65536,256,4096,256,6,0,0,asm,13001+23001,22777.5632 +gfx938,int8_w8a8_channel,torch.bfloat16,65536,256,4096,256,6,0,0,asm,13001+23001,22900.0209 \ No newline at end of file diff --git a/aiter/configs/tuned_fmoe_asm_w8a8_group_shuffle.csv b/aiter/configs/tuned_fmoe_asm_w8a8_group_shuffle.csv index ca245efe51fbe7d9927f6e8fbef5d11bcbb6ee10..98a0a810d5f1acb264754c6e688f15f0c9cc5e6a 100644 --- a/aiter/configs/tuned_fmoe_asm_w8a8_group_shuffle.csv +++ b/aiter/configs/tuned_fmoe_asm_w8a8_group_shuffle.csv @@ -1268,3 +1268,72 @@ gfx938,f8_w8a8_block,torch.float16,14336,256,4096,256,8,0,0,asm,13001+23000,5030 gfx938,f8_w8a8_block,torch.float16,16384,256,4096,256,8,0,0,asm,13001+23000,5608.4473 gfx938,f8_w8a8_block,torch.float16,17408,256,4096,256,8,0,0,asm,13001+23000,6038.0465 gfx938,f8_w8a8_block,torch.float16,24576,256,4096,256,8,0,0,asm,13001+23000,8143.5178 +gfx938,f8_w8a8_block,torch.float16,1,512,4096,256,8,0,0,asm,10007+20000,82.9571 +gfx938,f8_w8a8_block,torch.float16,2,512,4096,256,8,0,0,asm,10001+20000,112.2287 +gfx938,f8_w8a8_block,torch.float16,4,512,4096,256,8,0,0,asm,10002+20000,168.5064 +gfx938,f8_w8a8_block,torch.float16,6,512,4096,256,8,0,0,asm,10002+20000,218.1063 +gfx938,f8_w8a8_block,torch.float16,8,512,4096,256,8,0,0,asm,10002+20000,257.9547 +gfx938,f8_w8a8_block,torch.float16,10,512,4096,256,8,0,0,asm,10002+20000,302.8811 +gfx938,f8_w8a8_block,torch.float16,12,512,4096,256,8,0,0,asm,10002+20000,333.6432 +gfx938,f8_w8a8_block,torch.float16,14,512,4096,256,8,0,0,asm,10002+20000,373.4999 +gfx938,f8_w8a8_block,torch.float16,16,512,4096,256,8,0,0,asm,10002+20000,393.4746 +gfx938,f8_w8a8_block,torch.float16,20,512,4096,256,8,0,0,asm,10002+20000,455.5854 +gfx938,f8_w8a8_block,torch.float16,24,512,4096,256,8,0,0,asm,10001+20000,514.044 +gfx938,f8_w8a8_block,torch.float16,28,512,4096,256,8,0,0,asm,10002+20000,582.8807 +gfx938,f8_w8a8_block,torch.float16,32,512,4096,256,8,0,0,asm,10002+20000,618.847 +gfx938,f8_w8a8_block,torch.float16,36,512,4096,256,8,0,0,asm,10002+20000,647.2482 +gfx938,f8_w8a8_block,torch.float16,40,512,4096,256,8,0,0,asm,10001+20000,676.0396 +gfx938,f8_w8a8_block,torch.float16,44,512,4096,256,8,0,0,asm,10002+20000,707.8039 +gfx938,f8_w8a8_block,torch.float16,48,512,4096,256,8,0,0,asm,10002+20000,718.1197 +gfx938,f8_w8a8_block,torch.float16,56,512,4096,256,8,0,0,asm,10001+20000,757.1511 +gfx938,f8_w8a8_block,torch.float16,64,512,4096,256,8,0,0,asm,10002+20000,781.3533 +gfx938,f8_w8a8_block,torch.float16,80,512,4096,256,8,0,0,asm,10002+20000,834.0691 +gfx938,f8_w8a8_block,torch.float16,96,512,4096,256,8,0,0,asm,10002+20000,871.6102 +gfx938,f8_w8a8_block,torch.float16,112,512,4096,256,8,0,0,asm,10002+20000,885.1175 +gfx938,f8_w8a8_block,torch.float16,128,512,4096,256,8,0,0,asm,10002+20000,903.3996 +gfx938,f8_w8a8_block,torch.float16,160,512,4096,256,8,0,0,asm,10002+20000,918.0774 +gfx938,f8_w8a8_block,torch.float16,192,512,4096,256,8,0,0,asm,10001+20000,936.3676 +gfx938,f8_w8a8_block,torch.float16,224,512,4096,256,8,0,0,asm,10002+20000,943.6689 +gfx938,f8_w8a8_block,torch.float16,256,512,4096,256,8,0,0,asm,10002+20000,951.9721 +gfx938,f8_w8a8_block,torch.float16,320,512,4096,256,8,0,0,asm,10002+20000,969.4878 +gfx938,f8_w8a8_block,torch.float16,384,512,4096,256,8,0,0,asm,10006+20000,1018.5991999999999 +gfx938,f8_w8a8_block,torch.float16,448,512,4096,256,8,0,0,asm,11008+21000,1043.5172 +gfx938,f8_w8a8_block,torch.float16,512,512,4096,256,8,0,0,asm,11010+21000,1063.8287 +gfx938,f8_w8a8_block,torch.float16,576,512,4096,256,8,0,0,asm,11010+21000,1103.7024 +gfx938,f8_w8a8_block,torch.float16,640,512,4096,256,8,0,0,asm,11007+21000,1088.9825 +gfx938,f8_w8a8_block,torch.float16,704,512,4096,256,8,0,0,asm,11010+21000,1061.7066 +gfx938,f8_w8a8_block,torch.float16,768,512,4096,256,8,0,0,asm,11010+21000,1106.7677 +gfx938,f8_w8a8_block,torch.float16,832,512,4096,256,8,0,0,asm,11010+21000,1127.4246 +gfx938,f8_w8a8_block,torch.float16,896,512,4096,256,8,0,0,asm,11010+21000,1168.4432 +gfx938,f8_w8a8_block,torch.float16,960,512,4096,256,8,0,0,asm,11010+21000,1173.7484 +gfx938,f8_w8a8_block,torch.float16,1024,512,4096,256,8,0,0,asm,11010+21000,1219.298 +gfx938,f8_w8a8_block,torch.float16,1152,512,4096,256,8,0,0,asm,12002+22000,1338.2958 +gfx938,f8_w8a8_block,torch.float16,1280,512,4096,256,8,0,0,asm,12003+22000,1279.3149 +gfx938,f8_w8a8_block,torch.float16,1408,512,4096,256,8,0,0,asm,12003+22000,1309.1085 +gfx938,f8_w8a8_block,torch.float16,1536,512,4096,256,8,0,0,asm,12002+22000,1346.0515 +gfx938,f8_w8a8_block,torch.float16,1664,512,4096,256,8,0,0,asm,12003+22000,1330.7168 +gfx938,f8_w8a8_block,torch.float16,1792,512,4096,256,8,0,0,asm,12002+22000,1433.3104 +gfx938,f8_w8a8_block,torch.float16,1920,512,4096,256,8,0,0,asm,12004+22000,1557.4367 +gfx938,f8_w8a8_block,torch.float16,2048,512,4096,256,8,0,0,asm,12003+22000,1661.2428 +gfx938,f8_w8a8_block,torch.float16,2304,512,4096,256,8,0,0,asm,12005+22000,1940.813 +gfx938,f8_w8a8_block,torch.float16,2560,512,4096,256,8,0,0,asm,13001+22000,2072.7203 +gfx938,f8_w8a8_block,torch.float16,2816,512,4096,256,8,0,0,asm,13001+22000,2095.2974 +gfx938,f8_w8a8_block,torch.float16,3072,512,4096,256,8,0,0,asm,13001+22000,2144.6107 +gfx938,f8_w8a8_block,torch.float16,3328,512,4096,256,8,0,0,asm,13001+22000,2175.5328 +gfx938,f8_w8a8_block,torch.float16,3584,512,4096,256,8,0,0,asm,13001+22000,2234.2864 +gfx938,f8_w8a8_block,torch.float16,3840,512,4096,256,8,0,0,asm,12005+22000,2454.0841 +gfx938,f8_w8a8_block,torch.float16,4096,512,4096,256,8,0,0,asm,12005+22000,2680.5851 +gfx938,f8_w8a8_block,torch.float16,4608,512,4096,256,8,0,0,asm,12005+22000,3087.0188 +gfx938,f8_w8a8_block,torch.float16,5120,512,4096,256,8,0,0,asm,12005+22000,3226.8833 +gfx938,f8_w8a8_block,torch.float16,5632,512,4096,256,8,0,0,asm,12005+22000,3405.6863 +gfx938,f8_w8a8_block,torch.float16,6144,512,4096,256,8,0,0,asm,12005+22000,3731.7076 +gfx938,f8_w8a8_block,torch.float16,6656,512,4096,256,8,0,0,asm,13001+23000,3931.2194 +gfx938,f8_w8a8_block,torch.float16,7168,512,4096,256,8,0,0,asm,13001+23000,4010.6804 +gfx938,f8_w8a8_block,torch.float16,7680,512,4096,256,8,0,0,asm,13001+23000,4195.0509 +gfx938,f8_w8a8_block,torch.float16,8192,512,4096,256,8,0,0,asm,13001+23000,4642.5692 +gfx938,f8_w8a8_block,torch.float16,10240,512,4096,256,8,0,0,asm,13001+23000,5698.2073 +gfx938,f8_w8a8_block,torch.float16,12288,512,4096,256,8,0,0,asm,13001+23000,6601.8277 +gfx938,f8_w8a8_block,torch.float16,14336,512,4096,256,8,0,0,asm,13001+23000,7572.0575 +gfx938,f8_w8a8_block,torch.float16,16384,512,4096,256,8,0,0,asm,13001+23000,8551.2295 +gfx938,f8_w8a8_block,torch.float16,17408,512,4096,256,8,0,0,asm,13001+23000,9230.6924 +gfx938,f8_w8a8_block,torch.float16,24576,512,4096,256,8,0,0,asm,13001+23000,12357.5934 \ No newline at end of file diff --git a/aiter/dist/communication_op.py b/aiter/dist/communication_op.py index 63ea6c0aaf2255718ddc49285bbabe20afd541af..0bd07d5acd1c9a943c1032e513cbd8451ca7111d 100644 --- a/aiter/dist/communication_op.py +++ b/aiter/dist/communication_op.py @@ -19,26 +19,78 @@ from typing import Any, Dict, Optional, Union import torch import torch.distributed -from .parallel_state import get_tp_group +from .parallel_state import get_tp_group, get_custom_group, has_custom_group def tensor_model_parallel_all_reduce( - input_: torch.Tensor, open_fp8_quant: bool = False + input_: torch.Tensor, + use_new: bool = True, + open_fp8_quant: bool = False, + prefill_support: bool = False, ) -> torch.Tensor: """All-reduce the input tensor across model parallel group.""" - return get_tp_group().all_reduce(input_, open_fp8_quant) + return get_tp_group().all_reduce(input_, use_new, open_fp8_quant, prefill_support) def tensor_model_parallel_fused_allreduce_rmsnorm( - input_: torch.Tensor, residual_inp_: torch.Tensor, weight_: torch.Tensor, eps: float + input_: torch.Tensor, + residual_inp_: torch.Tensor, + weight_: torch.Tensor, + eps: float, + prefill_support: bool = False, ) -> tuple[torch.Tensor, torch.Tensor]: - return get_tp_group().fused_allreduce_rmsnorm(input_, residual_inp_, weight_, eps) + return get_tp_group().fused_allreduce_rmsnorm( + input_, residual_inp_, weight_, eps, prefill_support + ) + + +def tensor_model_parallel_fused_allreduce_rmsnorm_quant( + input_: torch.Tensor, + residual_inp_: torch.Tensor, + weight_: torch.Tensor, + eps: float, + prefill_support: bool = False, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + return get_tp_group().fused_allreduce_rmsnorm_quant( + input_, residual_inp_, weight_, eps, prefill_support + ) + + +def tensor_model_parallel_fused_allreduce_rmsnorm_quant_per_group( + input_: torch.Tensor, + residual_inp_: torch.Tensor, + weight_: torch.Tensor, + eps: float, + group_size: int = 128, + prefill_support: bool = False, + emit_bf16: bool = False, +): + return get_tp_group().fused_allreduce_rmsnorm_quant_per_group( + input_, residual_inp_, weight_, eps, group_size, prefill_support, emit_bf16=emit_bf16 + ) + + +def tensor_model_parallel_fused_qknorm_allreduce( + qkv_in: torch.Tensor, + q_w: torch.Tensor, + k_w: torch.Tensor, + eps: float, +): + return get_tp_group().fused_qknorm_allreduce(qkv_in, q_w, k_w, eps) def tensor_model_parallel_custom_all_gather(input_: torch.Tensor) -> torch.Tensor: return get_tp_group().custom_all_gather(input_) +def tensor_model_parallel_reduce_scatter( + input_: torch.Tensor, + use_custom: bool = True, + dim: int = 0, +) -> torch.Tensor: + return get_tp_group().reduce_scatter_tensor(input_, use_custom, dim) + + def tensor_model_parallel_all_gather( input_: torch.Tensor, use_custom: bool = False, dim: int = -1 ) -> torch.Tensor: @@ -59,3 +111,66 @@ def broadcast_tensor_dict( if not torch.distributed.is_initialized(): return tensor_dict return get_tp_group().broadcast_tensor_dict(tensor_dict, src) + + +# ============================================================ +# Custom group communication operations +# ============================================================ + + +def _assert_has_custom_group(): + assert has_custom_group(), ( + "No custom group initialized. Call ensure_model_parallel_initialized " + "with custom_group_config to initialize custom groups." + ) + + +def custom_all_reduce( + input_: torch.Tensor, + use_new: bool = True, + open_fp8_quant: bool = False, + group: Optional[str] = None, +) -> torch.Tensor: + """All-reduce the input tensor across the user-specified custom group. + + Args: + group: Name of the custom group. When only one custom group is + initialized this can be omitted. When multiple groups exist, + pass the group name to select which one to use. + """ + _assert_has_custom_group() + return get_custom_group(group).all_reduce(input_, use_new, open_fp8_quant) + + +def custom_all_gather( + input_: torch.Tensor, + use_custom: bool = True, + dim: int = 0, + group: Optional[str] = None, +) -> torch.Tensor: + """All-gather the input tensor across the user-specified custom group. + + Args: + group: Name of the custom group. When only one custom group is + initialized this can be omitted. When multiple groups exist, + pass the group name to select which one to use. + """ + _assert_has_custom_group() + return get_custom_group(group).all_gather(input_, use_custom, dim) + + +def custom_reduce_scatter( + input_: torch.Tensor, + use_custom: bool = True, + dim: int = 0, + group: Optional[str] = None, +) -> torch.Tensor: + """Reduce-scatter the input tensor across the user-specified custom group. + + Args: + group: Name of the custom group. When only one custom group is + initialized this can be omitted. When multiple groups exist, + pass the group name to select which one to use. + """ + _assert_has_custom_group() + return get_custom_group(group).reduce_scatter_tensor(input_, use_custom, dim) diff --git a/aiter/dist/device_communicators/communicator_cuda.py b/aiter/dist/device_communicators/communicator_cuda.py index 4c13965a9603608228fa8263c1c83c0ea6b28f2d..f982032c59f2eabe991d260d9428d661c80e98ab 100644 --- a/aiter/dist/device_communicators/communicator_cuda.py +++ b/aiter/dist/device_communicators/communicator_cuda.py @@ -2,6 +2,8 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import os + import torch from torch.distributed import ProcessGroup @@ -11,6 +13,13 @@ from aiter import logger from .base_device_communicator import DeviceCommunicatorBase +def _env_flag(name: str, default: bool) -> bool: + val = os.environ.get(name) + if val is None: + return default + return val.strip().lower() in ("1", "true", "yes", "on") + + class CudaCommunicator(DeviceCommunicatorBase): def __init__( self, @@ -64,9 +73,20 @@ class CudaCommunicator(DeviceCommunicatorBase): if use_custom_allreduce and self.world_size > 1: # Initialize a custom fast all-reduce implementation. + # AITER_AR_ENABLE_REG_CAPTURE controls whether inputs captured + # inside a CUDA graph are assumed to already live in the + # pre-registered IPC buffer (True, default), or whether the + # in-graph all-reduce should fall back to the unregistered + # copy-in path (False). Set this to "0" when callers cannot + # guarantee that captured input pointers were registered via + # ``CustomAllreduce.register_buffer``. + enable_register_for_capturing = _env_flag( + "AITER_AR_ENABLE_REG_CAPTURE", default=True + ) self.ca_comm = CustomAllreduce( group=self.cpu_group, device=self.device, + enable_register_for_capturing=enable_register_for_capturing, # symm_mem_enabled=( # self.symm_mem_comm is not None and not self.symm_mem_comm.disabled # ), @@ -118,7 +138,13 @@ class CudaCommunicator(DeviceCommunicatorBase): self.all2all_manager.__class__.__name__, ) - def all_reduce(self, input_, ca_fp8_quant: bool = False) -> torch.Tensor: + def all_reduce( + self, + input_, + use_new: bool = True, + ca_fp8_quant: bool = False, + prefill_support: bool = False, + ) -> torch.Tensor: # always try quick reduce first, then custom allreduce, # and then pynccl. (quick reduce just for ROCM MI3*) qr_comm = self.qr_comm @@ -137,7 +163,7 @@ class CudaCommunicator(DeviceCommunicatorBase): and not ca_comm.disabled and ca_comm.should_custom_ar(input_) ): - out = ca_comm.custom_all_reduce(input_, ca_fp8_quant) + out = ca_comm.custom_all_reduce(input_, use_new=use_new, open_fp8_quant=ca_fp8_quant) assert out is not None return out symm_mem_comm = self.symm_mem_comm @@ -159,7 +185,7 @@ class CudaCommunicator(DeviceCommunicatorBase): return out def fused_allreduce_rmsnorm( - self, input_, res_inp_, weight_, eps + self, input_, res_inp_, weight_, eps, prefill_support: bool = False ) -> tuple[torch.Tensor, torch.Tensor]: n = input_.shape[-1] can_use_fuse_ar_rms = ( @@ -174,10 +200,12 @@ class CudaCommunicator(DeviceCommunicatorBase): and ca_comm.should_custom_ar(input_) and can_use_fuse_ar_rms ): - res_out, out = ca_comm.custom_fused_ar_rms(input_, res_inp_, weight_, eps) + out, res_out = ca_comm.custom_fused_ar_rms( + input_, res_inp_, weight_, eps, use_1stage=prefill_support + ) assert out is not None assert res_out is not None - return res_out, out + return out, res_out # call split kernel ar_out = self.all_reduce(input_) out = torch.empty_like(ar_out) @@ -193,7 +221,138 @@ class CudaCommunicator(DeviceCommunicatorBase): eps, 0, ) - return residual_out, out + return out, residual_out + + def fused_allreduce_rmsnorm_quant( + self, + input_, + res_inp_, + weight_, + eps, + prefill_support: bool = False, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + total_bytes = input_.numel() * input_.element_size() + K = int(input_.shape[-1]) + use_1stage = total_bytes <= 128 * 1024 + # Hygon (gfx938/gfx946) kernel-level bug: the fused + # AR+RMSNorm+FP8-quant kernel produces 100% NaN for bf16 at + # K=4096 whenever use_1stage=False, regardless of which post- + # 1-stage path is dispatched (the 2-stage 128KB-512KB path AND + # the split >512KB path both fail). Empirically confirmed on + # shapes (17,4096), (32,4096), (128,4096), and (and very likely + # (512,4096)+ which we don't proceed to). The 2-stage kernel + # works for fp16 at the same K=4096, and for bf16 at K=7168 / + # K=8192; the failure is therefore K=4096-and-bf16-specific. + # Until the C++ kernel is fixed upstream, fall back to the + # Python split path (RMSNorm-only fused kernel + separate + # hip_quant) for the entire problematic configuration. + problematic_bf16_non_1stage = ( + input_.dtype == torch.bfloat16 + and not use_1stage + and K == 4096 + ) + if ( + K in [512, 1024, 2048, 4096] + and total_bytes <= 4096 * 1024 + and not problematic_bf16_non_1stage + ): + out, res_out, scale_out = self.ca_comm.custom_fused_ar_rms_quant( + input_, res_inp_, weight_, eps, use_1stage + ) + else: + out_, res_out = self.fused_allreduce_rmsnorm( + input_, res_inp_, weight_, eps, prefill_support + ) + from aiter import get_hip_quant, QuantType + from aiter.utility.dtypes import fp8 + hip_quant = get_hip_quant(QuantType.per_Token) + out, scale_out = hip_quant(out_, quant_dtype=fp8) + assert out is not None + assert res_out is not None + assert scale_out is not None + return out, res_out, scale_out + + def fused_allreduce_rmsnorm_quant_per_group( + self, + input_, + res_inp_, + weight_, + eps, + group_size=128, + prefill_support: bool = False, + emit_bf16: bool = False, + ): + total_bytes = input_.numel() * input_.element_size() + K = int(input_.shape[-1]) + use_1stage = total_bytes <= 128 * 1024 + out = res_out = scale_out = bf16_out = None + fused_ok = False + # See ``fused_allreduce_rmsnorm_quant`` for context, with one + # important difference: per-token quant's custom-kernel + # whitelist is K in {512, 1024, 2048, 4096}, so larger K values + # (6144 / 7168 / 8192) always go to the Python fallback there + # and never expose the kernel bug for those K. Per-group quant + # has a much wider whitelist (any K with K % group_size == 0 + # and K <= 16384), so it surfaces the same bug at additional K + # values (K=4096 and K=6144 both empirically confirmed NaN; + # K=7168 / K=8192 untested but likely affected). Widen the + # fallback to all bf16 + non-1-stage configurations to be safe; + # the perf cost is limited since this only affects bf16 inputs + # whose total bytes exceed 128 KB (medium / large prefill). + problematic_bf16_non_1stage = ( + input_.dtype == torch.bfloat16 + and not use_1stage + ) + if ( + K % group_size == 0 + and K <= 16384 + and total_bytes < 8 * 1024 * 8192 + and not problematic_bf16_non_1stage + ): + try: + result = self.ca_comm.custom_fused_ar_rms_per_group_quant( + input_, res_inp_, weight_, eps, group_size, use_1stage, + emit_bf16=emit_bf16, + ) + if emit_bf16: + out, res_out, scale_out, bf16_out = result + else: + out, res_out, scale_out = result + fused_ok = True + except Exception: + pass + if not fused_ok: + out_, res_out = self.fused_allreduce_rmsnorm( + input_, res_inp_, weight_, eps, prefill_support + ) + from aiter import get_hip_quant, QuantType + from aiter.utility.dtypes import fp8 + hip_quant = get_hip_quant(QuantType.per_1x128) + out, scale_out = hip_quant(out_, quant_dtype=fp8) + if emit_bf16: + bf16_out = out_ + assert out is not None + assert res_out is not None + assert scale_out is not None + if emit_bf16: + assert bf16_out is not None + return out, res_out, scale_out, bf16_out + return out, res_out, scale_out + + def fused_qknorm_allreduce( + self, + qkv_in, + q_w, + k_w, + eps, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + q_out, k_out, v_out = self.ca_comm.custom_fused_qknorm_ar( + qkv_in, q_w, k_w, eps + ) + assert q_out is not None + assert k_out is not None + assert v_out is not None + return q_out, k_out, v_out def reduce_scatter(self, input_: torch.Tensor, dim: int = -1): world_size = self.world_size diff --git a/aiter/dist/device_communicators/custom_all_reduce.py b/aiter/dist/device_communicators/custom_all_reduce.py index 697983ef4ecd014a1fd036424b9c8df16425356e..8835165cc2a1c09eed9b836d79fc18e6de0713f8 100644 --- a/aiter/dist/device_communicators/custom_all_reduce.py +++ b/aiter/dist/device_communicators/custom_all_reduce.py @@ -1,5 +1,6 @@ """ -* Copyright (C) 2024-2025, The vLLM team. +* Copyright (C) Advanced Micro Devices, Inc. All rights reserved. +* Copyright (C) 2024-2026, The vLLM team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +15,9 @@ * limitations under the License. """ +import pickle from contextlib import contextmanager -from typing import Any, List, Optional, Union +from typing import Dict, List, Optional, Tuple, Union import torch import torch.distributed as dist @@ -26,6 +28,7 @@ from torch.distributed import ProcessGroup import aiter as ops from aiter.dist.parallel_state import in_the_same_node_as from aiter import logger +from aiter.utility.dtypes import fp8 try: ops.meta_size() @@ -43,6 +46,273 @@ def is_weak_contiguous(inp: torch.Tensor): ) +# Wavefront width on AMD CDNA / gfx94x / gfx950. ``__shfl_xor`` in the +# fused per-group FP8 quant epilogue is scoped to a single wavefront, so +# ``threads_per_group = group_size / PACK_SIZE`` must fit inside it. +_AITER_AR_WAVEFRONT_SIZE = 64 + + +def _validate_per_group_size(group_size: int, element_size: int, n: int) -> None: + """Validate ``group_size`` for the fused AR + RMSNorm + per-group FP8 + quant kernel. Mirrors the C++ host dispatcher checks in + ``dispatchFusedAllReduceRMSNormQuantPerGroup`` so callers fail fast + with a clear Python-level ``ValueError`` (rather than a generic + ``RuntimeError`` from the extension, which aborts CUDA-graph capture + asynchronously). + + The fused epilogue imposes five constraints on ``group_size``: + + (a) ``group_size > 0`` + (b) ``group_size % PACK_SIZE == 0`` with ``PACK_SIZE = 16 // element_size`` + (each thread owns a full 16-byte pack, so a group must be made of + whole packs). + (c) ``threads_per_group = group_size / PACK_SIZE`` must be a power of two + (butterfly ``__shfl_xor`` reduction strides ``{tpg/2, tpg/4, ..., 1}``). + (d) ``threads_per_group`` must fit inside a wavefront + (``<= 64`` on AMD CDNA); cross-warp shuffles do not exist on HIP. + (e) ``n % group_size == 0`` so ``num_groups = n / group_size`` is an + integer. + """ + if not isinstance(group_size, int): + raise TypeError( + f"per-group quant group_size must be int, got {type(group_size).__name__}" + ) + if group_size <= 0: + raise ValueError( + f"per-group quant requires group_size > 0, got group_size={group_size}" + ) + if element_size <= 0 or 16 % element_size != 0: + raise ValueError( + "per-group quant requires an element_size that divides 16 " + f"(bf16/fp16: 2), got element_size={element_size}" + ) + pack_size = 16 // element_size + if group_size % pack_size != 0: + raise ValueError( + f"per-group quant requires group_size divisible by PACK_SIZE=" + f"{pack_size} (16 // element_size), got group_size={group_size}" + ) + threads_per_group = group_size // pack_size + if threads_per_group & (threads_per_group - 1) != 0: + raise ValueError( + "per-group quant requires group_size/PACK_SIZE to be a power of " + "two (butterfly __shfl_xor reduction), got " + f"group_size={group_size} PACK_SIZE={pack_size} " + f"threads_per_group={threads_per_group}" + ) + if threads_per_group > _AITER_AR_WAVEFRONT_SIZE: + raise ValueError( + "per-group quant requires group_size/PACK_SIZE <= wavefront size " + f"({_AITER_AR_WAVEFRONT_SIZE}), got group_size={group_size} " + f"PACK_SIZE={pack_size} threads_per_group={threads_per_group}" + ) + if n % group_size != 0: + raise ValueError( + f"per-group quant requires n divisible by group_size, " + f"got n={n} group_size={group_size}" + ) + + +class IPCBuffer: + """A single IPC-accessible device buffer. + + Pure data container — owns a pre-allocated GPU allocation with a fixed + device address. All IPC handle / broadcast / registration logic lives + in IPCBufferPool. + + When *uncached* is False (default), memory is allocated through PyTorch's + caching allocator (torch.empty). When True, memory is allocated via + hipExtMallocWithFlags with hipDeviceMallocUncached, bypassing the cache. + Uncached buffers are suitable for cross-GPU synchronization metadata and + signal buffers where cache coherence overhead is undesirable. + """ + + def __init__( + self, + size: int, + device: torch.device, + uncached: bool = False, + ): + self._size = size + self._uncached = uncached + if uncached: + self._buffer = None + self._raw_ptr = ops.allocate_meta_buffer(size) + else: + self._buffer = torch.empty(size, dtype=torch.uint8, device=device) + self._raw_ptr = self._buffer.data_ptr() + + @property + def data_ptr(self) -> int: + return self._raw_ptr + + @property + def tensor(self) -> torch.Tensor: + if self._buffer is None: + raise RuntimeError( + "Uncached IPCBuffer has no backing tensor; use .data_ptr" + ) + return self._buffer + + @property + def max_size(self) -> int: + return self._size + + @property + def uncached(self) -> bool: + return self._uncached + + def __del__(self): + if self._uncached and self._raw_ptr: + try: + ops.free_meta_buffer(self._raw_ptr) + except (AttributeError, TypeError): + pass + self._raw_ptr = 0 + + +class IPCBufferPool: + """Manages a collection of named IPCBuffers and provides IPC broadcast + infrastructure for cross-GPU communication. + + Buffers are stored in an internal dict and accessed by string key. + + Two sets of operations: + + Eager mode (named internal buffers): + create(key, size) allocates a buffer and stores it under *key*. + get_ipc_meta(key) broadcasts IPC handles for that buffer. + + Graph mode (arbitrary external tensors): + get_external_ipc_meta(tensor) broadcasts IPC handles for any tensor. + flush_graph_buffers(ar_ptr) batch-registers addresses that the C++ + backend collected during CUDA graph capture. + """ + + _pool_seq: int = 0 + + def __init__(self, device: torch.device, group: ProcessGroup): + self._device = device + self._group = group + self._rank = dist.get_rank(group=group) + self._world_size = dist.get_world_size(group=group) + self._buffers: Dict[str, IPCBuffer] = {} + + self._store = dist.distributed_c10d._get_default_store() + self._assert_pure_tcp_store(self._store) + + ranks_tag = "_".join(map(str, sorted(dist.get_process_group_ranks(group)))) + self._store_key_prefix = f"aiter_ipc/p{IPCBufferPool._pool_seq}/g{ranks_tag}" + IPCBufferPool._pool_seq += 1 + self._ipc_seq = 0 + + @staticmethod + def _assert_pure_tcp_store(store) -> None: + """Verify the store is a pure-TCP KV store, free from any collective + communication backend (RCCL / gloo / MPI). + Emits a warning rather than aborting to allow non-TCP store setups.""" + s = store + while isinstance(s, dist.PrefixStore): + s = s.underlying_store + if not isinstance(s, dist.TCPStore): + logger.warning( + "IPC metadata exchange prefers a pure-TCP KV store " + "(torch.distributed.TCPStore), got %s. " + "If IPC handle exchange fails, ensure MASTER_ADDR/MASTER_PORT " + "are set and the process group is initialised with a TCPStore.", + type(s).__name__, + ) + + # ---- Buffer lifecycle ---- + + def create(self, key: str, size: int, uncached: bool = False) -> IPCBuffer: + """Allocate a new IPCBuffer and store it under *key*. + + Args: + key: unique name for this buffer in the pool. + size: buffer size in bytes. + uncached: if True, allocate via hipMalloc (uncached); + if False (default), allocate via torch.empty (cached). + """ + if key in self._buffers: + raise KeyError(f"IPCBuffer '{key}' already exists in the pool") + buf = IPCBuffer(size, self._device, uncached=uncached) + self._buffers[key] = buf + return buf + + def __getitem__(self, key: str) -> IPCBuffer: + return self._buffers[key] + + def __contains__(self, key: str) -> bool: + return key in self._buffers + + # ---- Eager mode: named buffer IPC meta ---- + + def get_ipc_meta(self, key: str) -> Tuple[List, List]: + """Broadcast IPC handles for the named buffer across all ranks.""" + buf = self._buffers[key] + return self._broadcast_ipc(buf.data_ptr) + + # ---- Graph mode: external buffer IPC meta ---- + + def get_external_ipc_meta(self, tensor: torch.Tensor) -> Tuple[List, List]: + """Broadcast IPC handles for an arbitrary external tensor.""" + return self._broadcast_ipc(tensor.data_ptr()) + + def flush_graph_buffers(self, ar_ptr): + """Batch-register buffer addresses collected during CUDA graph capture. + + During graph capture the C++ backend records addresses of buffers that + are not yet IPC-registered. After capture ends this method exchanges + their IPC handles across all ranks and completes registration. + """ + count = ops.get_graph_buffer_count(ar_ptr) + if count == 0: + return + handle_sz = 64 # sizeof(hipIpcMemHandle_t) + handle = torch.empty(count * handle_sz, dtype=torch.uint8) + offset = torch.empty(count, dtype=torch.int64) + ops.get_graph_buffer_ipc_meta(ar_ptr, handle.data_ptr(), offset.data_ptr()) + handles, offsets = self._gather_ipc_meta((handle, offset)) + logger.info("Registering %d cuda graph addresses", count) + ops.register_graph_buffers( + ar_ptr, + [h.data_ptr() for h in handles], + [o.data_ptr() for o in offsets], + ) + + # ---- Private IPC primitives ---- + + def _broadcast_ipc(self, data_ptr: int) -> Tuple[List, List]: + """Get IPC handle for *data_ptr* and broadcast across all ranks.""" + handle = torch.empty(64, dtype=torch.uint8) # sizeof(hipIpcMemHandle_t) + ops.get_meta_buffer_ipc_handle(data_ptr, handle.data_ptr()) + return self._gather_ipc_meta((handle, 0)) + + def _gather_ipc_meta(self, shard_data) -> Tuple[List, List]: + """Exchange IPC metadata (handle + offset) across all ranks via TCP store. + + Each rank writes its serialised *shard_data* under a unique key, then + reads every other rank's data. ``store.get()`` blocks until the key + is available, providing natural barrier semantics without involving any + collective communication backend. + """ + seq = self._ipc_seq + self._ipc_seq += 1 + prefix = f"{self._store_key_prefix}/{seq}" + + self._store.set(f"{prefix}/r{self._rank}", pickle.dumps(shard_data)) + + handles = [] + offsets = [] + for r in range(self._world_size): + raw = self._store.get(f"{prefix}/r{r}") + h, o = pickle.loads(raw) + handles.append(h) + offsets.append(o) + return handles, offsets + + class CustomAllreduce: _SUPPORTED_WORLD_SIZES = [2, 4, 6, 8] @@ -52,7 +322,8 @@ class CustomAllreduce: self, group: ProcessGroup, device: Union[int, str, torch.device], - max_size=8192 * 1024 * 8, + max_size=1024 * 1024 * 1024, # 2GB bf16/half + enable_register_for_capturing: bool = True, ) -> None: """ Args: @@ -136,7 +407,7 @@ class CustomAllreduce: # test P2P capability, this checks software/cudaruntime support # this is expensive to compute at the first time # then we cache the result - # On hygon GPU, p2p is always enabled between XGMI connected GPUs + # On AMD GPU, p2p is always enabled between XGMI connected GPUs # if not current_platform.is_rocm() and not _can_p2p(rank, world_size): # logger.warning( # "Custom allreduce is disabled because your platform lacks " @@ -145,15 +416,7 @@ class CustomAllreduce: # return self.disabled = False - # buffers memory are owned by this Python class and passed to C++ - # meta data composes of two parts: meta data for synchronization - # (256 bytes) and a temporary buffer for storing intermediate - # allreduce results. - # if current_platform.is_rocm(): - self.meta = ops.allocate_meta_buffer(ops.meta_size() + max_size) - # This is a pre-registered IPC buffer. In eager mode, input tensors - # are first copied into this buffer before allreduce is performed - self.buffer = torch.empty(max_size, dtype=torch.uint8, device=self.device) + self.enable_register_for_capturing = enable_register_for_capturing # This is a buffer for storing the tuples of pointers pointing to # IPC buffers from all ranks. Each registered tuple has size of # 8*world_size bytes where world_size is at most 8. Allocating 8MB @@ -165,24 +428,64 @@ class CustomAllreduce: self.max_size = max_size self.rank = rank self.world_size = world_size - handle = ops.get_meta_buffer_ipc_handle(self.meta) - shard_data = ( - handle, # ipc handle to base ptr - 0, # offset of base ptr - ) - handles, offsets = self._gather_ipc_meta(shard_data) + + # Use a gloo-based barrier to synchronise init status across all ranks. + # If any rank fails (e.g. allocate_meta_buffer throws on platforms that + # do not support hipDeviceMallocUncached), the barrier ensures all other + # ranks learn about the failure instead of hanging forever in + # _gather_ipc_meta's store.get(). + init_ok = torch.ones(1, dtype=torch.int32) + try: + # Create IPC buffer pool and allocate all named buffers. + # "meta" uses hipAlloc (uncached) for synchronization metadata + + # intermediate allreduce temp storage. + # "input" uses torchAlloc (cached) for D2D relay in eager mode. + self._pool = IPCBufferPool(self.device, self.group) + self._pool.create("meta", ops.meta_size() + max_size * 2, uncached=True) + self._pool.create("input", max_size) + except Exception as e: + init_ok[0] = 0 + logger.warning( + "CustomAllreduce IPC buffer allocation failed (rank %d): %s. " + "Custom allreduce will be disabled.", + rank, e, + ) + + # All ranks must agree on whether init succeeded before proceeding to + # _gather_ipc_meta (which would hang if a peer rank is absent). + dist.all_reduce(init_ok, op=dist.ReduceOp.MIN, group=self.group) + if init_ok[0] == 0: + self.disabled = True + return + + # Exchange meta buffer IPC handles to initialize C++ backend + handles, offsets = self._pool.get_ipc_meta("meta") self.fully_connected = fully_connected self._ptr = ops.init_custom_ar( - self.meta, self.rank_data, handles, offsets, rank, self.fully_connected + self._pool["meta"].data_ptr, + self.rank_data.data_ptr(), + self.rank_data.numel(), + [h.data_ptr() for h in handles], + offsets, + rank, + self.fully_connected, + ) + + # Register input IPC buffer with the C++ backend + handles, offsets = self._pool.get_ipc_meta("input") + ops.register_input_buffer( + self._ptr, + self._pool["input"].data_ptr, + [h.data_ptr() for h in handles], + offsets, ) - self.register_buffer(self.buffer) @contextmanager def capture(self): """ The main responsibility of this context manager is the - `register_graph_buffers` call at the end of the context. + flush_graph_buffers call at the end of the context. It records all the buffer addresses used in the CUDA graph. """ try: @@ -191,61 +494,27 @@ class CustomAllreduce: finally: self._IS_CAPTURING = False if not self.disabled: - self.register_graph_buffers() - - def _get_ipc_meta(self, inp: torch.Tensor): - # if current_platform.is_rocm(): - if 1: - # _share_cuda_() doesn't accept meta buffer not allocated from - # PyTorch cache allocator, use direct HIP call to get IPC handle - handle = ops.get_meta_buffer_ipc_handle(inp) - shard_data = ( - handle, # ipc handle to base ptr - 0, # offset of base ptr - ) - else: - data = inp.untyped_storage()._share_cuda_() - shard_data = ( - data[1], # ipc handle to base ptr - data[3], # offset of base ptr - ) - return self._gather_ipc_meta(shard_data) - - def _gather_ipc_meta(self, shard_data): - # Note: don't use `[[None]] * self.world_size` here - # because it will create a list of the same reference - all_data: List[Optional[Any]] = [[None] for i in range(self.world_size)] - all_data[self.rank][0] = shard_data - - ranks = dist.get_process_group_ranks(group=self.group) - ranks.sort() - for i, rank in enumerate(ranks): - dist.broadcast_object_list( - all_data[i], src=rank, group=self.group, device="cpu" - ) - - # we cannot directly use `dist.all_gather_object` here - # because it is incompatible with `gloo` backend under inference mode. - # see https://github.com/pytorch/pytorch/issues/126032 for details. + self._pool.flush_graph_buffers(self._ptr) - handles = [] - offsets = [] - for i in range(len(all_data)): - handles.append(all_data[i][0][0]) # type: ignore - offsets.append(all_data[i][0][1]) # type: ignore - return handles, offsets + def register_input_buffer(self, inp: torch.Tensor): + """Register an external tensor as an IPC input buffer.""" + handles, offsets = self._pool.get_external_ipc_meta(inp) + ops.register_input_buffer( + self._ptr, inp.data_ptr(), [h.data_ptr() for h in handles], offsets + ) - def register_buffer(self, inp: torch.Tensor): - handles, offsets = self._get_ipc_meta(inp) - ops.register_buffer(self._ptr, inp, handles, offsets) + def register_output_buffer(self, out: torch.Tensor): + """Register an external tensor as an IPC output buffer.""" + handles, offsets = self._pool.get_external_ipc_meta(out) + ops.register_output_buffer( + self._ptr, out.data_ptr(), [h.data_ptr() for h in handles], offsets + ) def register_graph_buffers(self): - handle, offset = ops.get_graph_buffer_ipc_meta(self._ptr) - handles, offsets = self._gather_ipc_meta((handle, offset)) - logger.info("Registering %d cuda graph addresses", len(offset)) - ops.register_graph_buffers(self._ptr, handles, offsets) + """Batch-register graph-captured buffer addresses.""" + self._pool.flush_graph_buffers(self._ptr) - def should_custom_ar(self, inp: torch.Tensor): + def should_custom_ar(self, inp: torch.Tensor, prefill_support: bool = False): if self.disabled: return False inp_size = inp.numel() * inp.element_size() @@ -256,8 +525,28 @@ class CustomAllreduce: return False # for 4 or more non NVLink-capable GPUs, custom allreduce provides # little performance improvement over NCCL. + # In allreduce 2stage writemode, use 2x tmp buffer if self.world_size == 2 or self.fully_connected: - return inp_size <= self.max_size + # decode + if not prefill_support: + return inp_size <= 8192 * 8192 + # prefill + else: + return inp_size <= (self.max_size / 2) + return False + + def should_custom_ag(self, inp: torch.Tensor): + if self.disabled: + return False + inp_size = inp.numel() * inp.element_size() + if inp_size % 16 != 0: + return False + if not is_weak_contiguous(inp): + return False + # all_gather output = input * world_size, so the per-rank input + # must fit within max_size / world_size + if self.world_size == 2 or self.fully_connected: + return inp_size <= (self.max_size / (self.world_size * 2)) return False def all_reduce( @@ -265,8 +554,9 @@ class CustomAllreduce: inp: torch.Tensor, *, out: Optional[torch.Tensor] = None, + use_new: bool = True, open_fp8_quant: bool = False, - registered: bool = False, + registered_input: bool = False, ): """Performs an out-of-place all reduce. @@ -276,17 +566,22 @@ class CustomAllreduce: """ if out is None: out = torch.empty_like(inp) + assert is_weak_contiguous(out), "output tensor is not weak-contiguous" + reg_inp = 0 if registered_input else self._pool["input"].data_ptr + reg_inp_bytes = 0 if registered_input else self._pool["input"].max_size ops.all_reduce( self._ptr, inp, out, + use_new, open_fp8_quant, - None if registered else self.buffer, + reg_inp, + reg_inp_bytes, ) return out def custom_all_reduce( - self, input: torch.Tensor, open_fp8_quant: bool = False + self, input: torch.Tensor, use_new: bool = True, open_fp8_quant: bool = False ) -> Optional[torch.Tensor]: # when custom allreduce is disabled, this will be None if self.disabled or not self.should_custom_ar(input): @@ -294,7 +589,10 @@ class CustomAllreduce: if self._IS_CAPTURING: if torch.cuda.is_current_stream_capturing(): return self.all_reduce( - input, open_fp8_quant=open_fp8_quant, registered=True + input, + use_new=use_new, + open_fp8_quant=open_fp8_quant, + registered_input=self.enable_register_for_capturing, ) else: # if warm up, mimic the allocation pattern @@ -306,34 +604,100 @@ class CustomAllreduce: # be small(<=1% of overall latency) compared to the performance # gains of using custom kernels return self.all_reduce( - input, open_fp8_quant=open_fp8_quant, registered=False + input, + use_new=use_new, + open_fp8_quant=open_fp8_quant, + registered_input=False, ) - def all_gather_reg(self, inp: torch.Tensor, out: torch.Tensor = None): + def reduce_scatter( + self, + inp: torch.Tensor, + out: torch.Tensor, + *, + registered: bool = False, + ): + assert is_weak_contiguous(out), "output tensor is not weak-contiguous" + reg = 0 if registered else self._pool["input"].data_ptr + reg_bytes = 0 if registered else self._pool["input"].max_size + ops.reduce_scatter( + self._ptr, + inp, + out, + reg, + reg_bytes, + ) + + def custom_reduce_scatter( + self, input: torch.Tensor, output: torch.Tensor + ) -> Optional[torch.Tensor]: + # when custom allreduce is disabled, this will be None + if self.disabled or not self.should_custom_ar(input): + return None + if self._IS_CAPTURING: + if torch.cuda.is_current_stream_capturing(): + return self.reduce_scatter(input, output, registered=True) + else: + return self.reduce_scatter(input, output, registered=False) + + def _allgather_out_shape(self, inp: torch.Tensor, dim: int): + ndim = inp.dim() + if dim == 0: + return (inp.shape[0] * self.world_size,) + inp.shape[1:] + if dim == -1 or dim == ndim - 1: + return inp.shape[:-1] + (inp.shape[-1] * self.world_size,) + print( + f"[aiter] allgather does not support dim={dim}, falling back to 1-D output" + ) + return (inp.numel() * self.world_size,) + + def all_gather_reg(self, inp: torch.Tensor, out: torch.Tensor = None, dim: int = 0): if out is None: out = torch.empty( - inp.numel() * self.world_size, dtype=inp.dtype, device=inp.device + self._allgather_out_shape(inp, dim), + dtype=inp.dtype, + device=inp.device, ) - ops.all_gather_reg(self._ptr, inp, out) + assert is_weak_contiguous(out), "output tensor is not weak-contiguous" + ops.all_gather_reg( + self._ptr, + inp, + out, + dim, + ) return out - def all_gather_unreg(self, inp: torch.Tensor, out: torch.Tensor = None): + def all_gather_unreg( + self, inp: torch.Tensor, out: torch.Tensor = None, dim: int = 0 + ): if out is None: out = torch.empty( - inp.numel() * self.world_size, dtype=inp.dtype, device=inp.device + self._allgather_out_shape(inp, dim), + dtype=inp.dtype, + device=inp.device, ) - ops.all_gather_unreg(self._ptr, inp, self.buffer, out) + assert is_weak_contiguous(out), "output tensor is not weak-contiguous" + ops.all_gather_unreg( + self._ptr, + inp, + self._pool["input"].data_ptr, + out, + self._pool["input"].max_size, + dim, + ) return out - def custom_all_gather(self, inp: torch.Tensor) -> Optional[torch.Tensor]: + def custom_all_gather( + self, inp: torch.Tensor, dim: int = 0 + ) -> Optional[torch.Tensor]: if self._IS_CAPTURING: if torch.cuda.is_current_stream_capturing(): - return self.all_gather_reg(inp) + return self.all_gather_reg(inp, dim=dim) else: print("allgather capture hipgraph error") return torch.zeros_like(inp) else: - return self.all_gather_unreg(inp) + return self.all_gather_unreg(inp, dim=dim) def fused_ar_rms( self, @@ -342,51 +706,314 @@ class CustomAllreduce: *, res_out: Optional[torch.Tensor] = None, out: Optional[torch.Tensor] = None, + scale_out: Optional[torch.Tensor] = None, w: torch.Tensor, eps: float, registered: bool = False, + use_1stage: bool = False, + post_per_token_quant: bool = False, ): - if out is None: - out = torch.empty_like(inp) if res_out is None: res_out = torch.empty_like(inp) - ops.fused_allreduce_rmsnorm( + reg = 0 if registered else self._pool["input"].data_ptr + reg_bytes = 0 if registered else self._pool["input"].max_size + if not post_per_token_quant: + if out is None: + out = torch.empty_like(inp) + assert is_weak_contiguous(out), "output tensor is not weak-contiguous" + ops.fused_allreduce_rmsnorm( + self._ptr, + inp, + res_inp, + res_out, + out, + w, + eps, + reg, + reg_bytes, + use_1stage, + ) + return out, res_out + else: + if out is None: + out = torch.empty(inp.shape, dtype=fp8, device=inp.device) + assert is_weak_contiguous(out), "output tensor is not weak-contiguous" + if scale_out is None: + scale_out = torch.empty( + inp.shape[:-1] + (1,), dtype=torch.float32, device=inp.device + ) + ops.fused_allreduce_rmsnorm_quant( + self._ptr, + inp, + res_inp, + res_out, + out, + scale_out, + w, + eps, + reg, + reg_bytes, + use_1stage, + ) + return out, res_out, scale_out + + def custom_fused_ar_rms( + self, + input: torch.Tensor, + residual_inp: torch.Tensor, + weight: torch.Tensor, + eps: float, + use_1stage: bool = False, + ) -> Optional[torch.Tensor]: + # when custom allreduce is disabled, this will be None + if self.disabled or not self.should_custom_ar(input): + return None + if self._IS_CAPTURING: + if torch.cuda.is_current_stream_capturing(): + return self.fused_ar_rms( + input, + residual_inp, + w=weight, + eps=eps, + registered=True, + use_1stage=use_1stage, + ) + else: + return torch.zeros_like(input), torch.zeros_like(input) + else: + return self.fused_ar_rms( + input, + residual_inp, + w=weight, + eps=eps, + registered=False, + use_1stage=use_1stage, + ) + + def custom_fused_ar_rms_quant( + self, + input: torch.Tensor, + residual_inp: torch.Tensor, + weight: torch.Tensor, + eps: float, + use_1stage: bool = False, + ): + # when custom allreduce is disabled, this will be None + if self.disabled or not self.should_custom_ar(input): + return None + if self._IS_CAPTURING: + if torch.cuda.is_current_stream_capturing(): + return self.fused_ar_rms( + input, + residual_inp, + w=weight, + eps=eps, + registered=True, + use_1stage=use_1stage, + post_per_token_quant=True, + ) + else: + dummy_out = torch.zeros(input.shape, dtype=fp8, device=input.device) + dummy_scale_out = torch.zeros( + input.shape[:-1] + (1,), dtype=torch.float32, device=input.device + ) + return dummy_out, torch.zeros_like(input), dummy_scale_out + else: + return self.fused_ar_rms( + input, + residual_inp, + w=weight, + eps=eps, + registered=False, + use_1stage=use_1stage, + post_per_token_quant=True, + ) + + def fused_ar_rms_per_group_quant( + self, + inp: torch.Tensor, + res_inp: torch.Tensor, + *, + w: torch.Tensor, + eps: float, + group_size: int = 128, + registered: bool = False, + use_1stage: bool = False, + emit_bf16: bool = False, + ): + K = inp.shape[-1] + # Fail fast on bad ``group_size`` at the Python boundary. Mirrors + # the C++ host dispatcher checks; catching it here surfaces a + # synchronous ``ValueError`` instead of a post-launch + # ``RuntimeError`` that would only fire at CUDA-graph replay and + # would be much harder to attribute to the offending call site. + _validate_per_group_size(group_size, inp.element_size(), K) + res_out = torch.empty_like(inp) + num_groups = K // group_size + out = torch.empty(inp.shape, dtype=fp8, device=inp.device) + scale_out = torch.empty( + inp.shape[:-1] + (num_groups,), dtype=torch.float32, device=inp.device + ) + # Optional bf16/fp16 mirror of the pre-quantization normed output. + # Requested by GDN-style layers that also need an unquantized view + # (e.g. Qwen3.5 in_proj_ba). Zero-overhead when not requested + # because the kernel branches on the pointer being non-null. + bf16_out = None + bf16_ptr = 0 + if emit_bf16: + bf16_out = torch.empty_like(inp) + bf16_ptr = int(bf16_out.data_ptr()) + reg = 0 if registered else self._pool["input"].data_ptr + reg_bytes = 0 if registered else self._pool["input"].max_size + ops.fused_allreduce_rmsnorm_quant_per_group( self._ptr, inp, res_inp, res_out, out, + scale_out, w, eps, - None if registered else self.buffer, + group_size, + reg, + reg_bytes, + use_1stage, + bf16_ptr, ) - return res_out, out + if emit_bf16: + return out, res_out, scale_out, bf16_out + return out, res_out, scale_out + + def fused_qknorm_ar( + self, + qkv_in: torch.Tensor, + q_w: torch.Tensor, + k_w: torch.Tensor, + eps: float, + registered: bool = False, + ): + dtype = qkv_in.dtype + device = qkv_in.device + hidden_dim_q = q_w.shape[-1] + hidden_dim_k = k_w.shape[-1] + token_num = qkv_in.shape[0] + hidden_dim_v = qkv_in.shape[1] - (hidden_dim_q + hidden_dim_k) + q_out = torch.empty((token_num, hidden_dim_q), dtype=dtype, device=device) + k_out = torch.empty((token_num, hidden_dim_k), dtype=dtype, device=device) + v_out = torch.empty((token_num, hidden_dim_v), dtype=dtype, device=device) + reg = 0 if registered else self._pool["input"].data_ptr + reg_bytes = 0 if registered else self._pool["input"].max_size + ops.fused_qknorm_allreduce( + self._ptr, + qkv_in, + q_w, + k_w, + q_out, + k_out, + v_out, + eps, + reg, + reg_bytes, + ) + return q_out, k_out, v_out + + def custom_fused_qknorm_ar( + self, + qkv_in: torch.Tensor, + q_w: torch.Tensor, + k_w: torch.Tensor, + eps: float, + ) -> [torch.Tensor, torch.Tensor, torch.Tensor]: + dtype = qkv_in.dtype + if self.disabled: + return ( + torch.empty((qkv_in.shape[0], q_w.shape[-1]), dtype=dtype, device=qkv_in.device), + torch.empty((qkv_in.shape[0], k_w.shape[-1]), dtype=dtype, device=qkv_in.device), + torch.empty((qkv_in.shape[0], qkv_in.shape[1] - q_w.shape[-1] - k_w.shape[-1]), dtype=dtype, device=qkv_in.device) + ) + if self._IS_CAPTURING: + if torch.cuda.is_current_stream_capturing(): + return self.fused_qknorm_ar( + qkv_in, + q_w, + k_w, + eps, + registered=True, + ) + else: + return ( + torch.empty((qkv_in.shape[0], q_w.shape[-1]), dtype=dtype, device=qkv_in.device), + torch.empty((qkv_in.shape[0], k_w.shape[-1]), dtype=dtype, device=qkv_in.device), + torch.empty((qkv_in.shape[0], qkv_in.shape[1] - q_w.shape[-1] - k_w.shape[-1]), dtype=dtype, device=qkv_in.device) + ) + else: + return self.fused_qknorm_ar( + qkv_in, + q_w, + k_w, + eps, + registered=False, + ) - def custom_fused_ar_rms( + def custom_fused_ar_rms_per_group_quant( self, input: torch.Tensor, residual_inp: torch.Tensor, weight: torch.Tensor, eps: float, - ) -> Optional[torch.Tensor]: - # when custom allreduce is disabled, this will be None + group_size: int = 128, + use_1stage: bool = False, + emit_bf16: bool = False, + ): if self.disabled or not self.should_custom_ar(input): return None if self._IS_CAPTURING: if torch.cuda.is_current_stream_capturing(): - return self.fused_ar_rms( - input, residual_inp, w=weight, eps=eps, registered=True + return self.fused_ar_rms_per_group_quant( + input, + residual_inp, + w=weight, + eps=eps, + group_size=group_size, + registered=True, + use_1stage=use_1stage, + emit_bf16=emit_bf16, ) else: - return torch.zeros_like(input), torch.zeros_like(input) + K = input.shape[-1] + num_groups = K // group_size + dummy_out = torch.zeros(input.shape, dtype=fp8, device=input.device) + dummy_scale = torch.zeros( + input.shape[:-1] + (num_groups,), + dtype=torch.float32, + device=input.device, + ) + if emit_bf16: + return ( + dummy_out, + torch.zeros_like(input), + dummy_scale, + torch.zeros_like(input), + ) + return dummy_out, torch.zeros_like(input), dummy_scale else: - return self.fused_ar_rms( - input, residual_inp, w=weight, eps=eps, registered=False + return self.fused_ar_rms_per_group_quant( + input, + residual_inp, + w=weight, + eps=eps, + group_size=group_size, + registered=False, + use_1stage=use_1stage, + emit_bf16=emit_bf16, ) def close(self): - if not self.disabled and self._ptr: - ops.dispose(self._ptr) + if not self.disabled and getattr(self, "_ptr", 0): + try: + ops.dispose(self._ptr) + except (AttributeError, TypeError): + pass self._ptr = 0 def __del__(self): diff --git a/aiter/dist/parallel_state.py b/aiter/dist/parallel_state.py index 2ddc3c2ee620b5459f4f3f40d4b7003510c7f340..40600069484d31edf49640a0bf06e7a6fceaae44 100644 --- a/aiter/dist/parallel_state.py +++ b/aiter/dist/parallel_state.py @@ -319,7 +319,11 @@ class GroupCoordinator: yield graph_capture_context def all_reduce( - self, input_: torch.Tensor, ca_fp8_quant: bool = False + self, + input_: torch.Tensor, + use_new: bool = True, + open_fp8_quant: bool = False, + prefill_support: bool = False, ) -> torch.Tensor: """ User-facing all-reduce function before we actually call the @@ -340,7 +344,7 @@ class GroupCoordinator: return input_ return all_reduce_( - input_, group_name=self.unique_name, ca_fp8_quant=ca_fp8_quant + input_, group_name=self.unique_name, ca_fp8_quant=open_fp8_quant ) def _all_reduce_out_place( @@ -348,7 +352,7 @@ class GroupCoordinator: ) -> torch.Tensor: if self.device_communicator is None: raise ValueError("No device communicator found") - return self.device_communicator.all_reduce(input_, ca_fp8_quant) + return self.device_communicator.all_reduce(input_, ca_fp8_quant=ca_fp8_quant) def fused_allreduce_rmsnorm( self, @@ -356,11 +360,54 @@ class GroupCoordinator: residual_inp_: torch.Tensor, weight_: torch.Tensor, eps: float, + prefill_support: bool = False, ) -> tuple[torch.Tensor, torch.Tensor]: return fused_allreduce_rmsnorm_( input_, residual_inp_, weight_, eps, group_name=self.unique_name ) + def fused_allreduce_rmsnorm_quant( + self, + input_: torch.Tensor, + residual_inp_: torch.Tensor, + weight_: torch.Tensor, + eps: float, + prefill_support: bool = False, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + if self.device_communicator is None: + raise ValueError("No device communicator found") + return self.device_communicator.fused_allreduce_rmsnorm_quant( + input_, residual_inp_, weight_, eps, prefill_support + ) + + def fused_allreduce_rmsnorm_quant_per_group( + self, + input_: torch.Tensor, + residual_inp_: torch.Tensor, + weight_: torch.Tensor, + eps: float, + group_size: int = 128, + prefill_support: bool = False, + emit_bf16: bool = False, + ): + if self.device_communicator is None: + raise ValueError("No device communicator found") + return self.device_communicator.fused_allreduce_rmsnorm_quant_per_group( + input_, residual_inp_, weight_, eps, group_size, prefill_support, + emit_bf16=emit_bf16, + ) + + def fused_qknorm_allreduce( + self, + qkv_in: torch.Tensor, + q_w: torch.Tensor, + k_w: torch.Tensor, + eps: float, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + if self.device_communicator is None: + raise ValueError("No device communicator found") + return self.device_communicator.fused_qknorm_allreduce(qkv_in, q_w, k_w, eps) + def _fused_allreduce_rmsnorm_out_place( self, input_: torch.Tensor, @@ -375,12 +422,31 @@ class GroupCoordinator: ) def _all_gather_out_place(self, input_: torch.Tensor) -> torch.Tensor: - ca_comm = self.device_communicator.ca_comm - assert ca_comm is not None - assert not ca_comm.disabled - out = ca_comm.custom_all_gather(input_) - assert out is not None - return out + ca_comm = ( + self.device_communicator.ca_comm + if self.device_communicator is not None + else None + ) + if ca_comm is not None and not ca_comm.disabled: + out = ca_comm.custom_all_gather(input_) + assert out is not None + return out + # Fallback: ca_comm unavailable (e.g. non-tp custom group). + # Try pynccl first (graph-safe), then torch.distributed. + world_size = self.world_size + out_shape = (input_.shape[0] * world_size,) + input_.shape[1:] + output_tensor = torch.empty( + out_shape, dtype=input_.dtype, device=input_.device + ) + if self.device_communicator is not None: + pynccl_comm = self.device_communicator.pynccl_comm + if pynccl_comm is not None and not pynccl_comm.disabled: + pynccl_comm.all_gather(output_tensor, input_) + return output_tensor + torch.distributed.all_gather_into_tensor( + output_tensor, input_, group=self.device_group + ) + return output_tensor def custom_all_gather(self, input_: torch.Tensor) -> torch.Tensor: return outplace_all_gather(input_, group_name=self.unique_name) @@ -390,6 +456,33 @@ class GroupCoordinator: raise ValueError("No device communicator found") return self.device_communicator.reduce_scatter(input_, dim) + def reduce_scatter_tensor( + self, + input_: torch.Tensor, + use_custom: bool = True, + dim: int = 0, + ): + world_size = self.world_size + assert world_size > 1, "error! world_size = 1" + assert ( + input_.numel() % world_size == 0 + ), "input shape error, input.numel() % world_size should equals to 0" + if input_.shape[0] % world_size == 0: + out_dim0 = input_.shape[0] // world_size + out_shape = (out_dim0,) + input_.shape[1:] + else: + out_shape = (input_.numel() // world_size,) + + if use_custom and self.device_communicator is not None: + return self.device_communicator.reduce_scatter(input_, dim) + else: + output_ = torch.empty( + out_shape, dtype=input_.dtype, device=input_.device + ) + torch.distributed.reduce_scatter_tensor( + output_, input_, group=self.device_group + ) + return output_ def all_gather( self, input_: torch.Tensor, use_custom: bool = False, dim: int = -1 @@ -897,6 +990,71 @@ def get_ep_group() -> GroupCoordinator: return _EP +_CUSTOM: Dict[str, GroupCoordinator] = {} + + +def has_custom_group() -> bool: + """Return whether any custom group is initialized.""" + return bool(_CUSTOM) + + +def get_custom_group( + name: Optional[str] = None, +) -> "Union[GroupCoordinator, Dict[str, GroupCoordinator]]": + """Get custom group coordinator(s). + + - If only one custom group is initialized, returns the GroupCoordinator + instance directly (name is optional). + - If multiple custom groups are initialized and name is None, returns the + full dict so the caller can select by name. + - If name is given, returns that specific GroupCoordinator. + """ + assert _CUSTOM, "custom allreduce group is not initialized" + if name is not None: + assert name in _CUSTOM, ( + f"custom group '{name}' not found, " + f"available: {list(_CUSTOM.keys())}" + ) + return _CUSTOM[name] + if len(_CUSTOM) == 1: + return next(iter(_CUSTOM.values())) + return dict(_CUSTOM) + + +class CustomGroupConfig: + """Configuration builder for custom communication groups. + + Each group is defined by a rank list that can be: + - 1D List[int]: all ranks form a single communication group, + e.g. [0,1,2,3,4,5,6,7] → one TP8 group + - 2D List[List[int]]: multiple independent subgroups, + e.g. [[0,1,2,3],[4,5,6,7]] → two independent TP4 groups + + Usage: + config = CustomGroupConfig() + config.add_group("tp_group", [[0,1,2,3],[4,5,6,7]]) + ensure_model_parallel_initialized(..., custom_group_config=config.data()) + + Or pass a raw dict directly: + ensure_model_parallel_initialized(..., custom_group_config={ + "tp_group": [[0,1,2,3],[4,5,6,7]], + }) + """ + + def __init__(self): + self._groups: Dict[str, List] = {} + + def add_group(self, name: str, ranks: List) -> "CustomGroupConfig": + assert name not in self._groups, f"custom group '{name}' already exists" + assert ranks, f"custom group '{name}': ranks list must not be empty" + self._groups[name] = ranks + return self + + def data(self) -> Dict[str, List]: + assert self._groups, "no custom groups have been added" + return dict(self._groups) + + # kept for backward compatibility get_pipeline_model_parallel_group = get_pp_group @@ -996,6 +1154,7 @@ def initialize_model_parallel( # decode_context_model_parallel_size: Optional[int] = 1, backend: Optional[str] = None, data_parallel_size: int = 1, + custom_group_config: Optional[Dict[str, List]] = None, ) -> None: """ Initialize model parallel groups. @@ -1006,6 +1165,12 @@ def initialize_model_parallel( pipeline_model_parallel_size: number of GPUs used for pipeline model parallelism. backend: name of torch distributed communication backend. + custom_group_config: optional dict mapping group names to rank lists. + Each value can be: + - 1D List[int]: all ranks form a single group, + e.g. [0,1,2,3,4,5,6,7] + - 2D List[List[int]]: multiple independent subgroups, + e.g. [[0,1,2,3],[4,5,6,7]] Let's say we have a total of 8 GPUs denoted by g0 ... g7 and we use 2 GPUs to parallelize the model tensor, and 4 GPUs to parallelize @@ -1109,6 +1274,55 @@ def initialize_model_parallel( group_ranks, get_world_group().local_rank, backend, group_name="ep" ) + # Build the custom allreduce group(s) (optional). + global _CUSTOM + assert not _CUSTOM, "custom allreduce group is already initialized" + if custom_group_config is not None: + for gname, ranks in custom_group_config.items(): + assert ( + isinstance(ranks, list) and len(ranks) > 0 + ), f"custom group '{gname}': value must be a non-empty list" + + if all(isinstance(r, int) for r in ranks): + group_ranks = [ranks] + elif all(isinstance(g, list) for g in ranks): + group_ranks = ranks + subgroup_size = len(group_ranks[0]) + for g in group_ranks: + assert len(g) == subgroup_size, ( + f"custom group '{gname}': all subgroups must " + f"have the same size, expected {subgroup_size} " + f"but got {len(g)}" + ) + assert all(isinstance(r, int) for r in g), ( + f"custom group '{gname}': subgroup elements " + f"must be integers" + ) + else: + raise AssertionError( + f"custom group '{gname}': value must be List[int] " + f"(1D) or List[List[int]] (2D)" + ) + + all_ranks_flat = [r for g in group_ranks for r in g] + assert len(all_ranks_flat) == world_size, ( + f"custom group '{gname}': total ranks " + f"({len(all_ranks_flat)}) must equal world_size ({world_size})" + ) + assert len(set(all_ranks_flat)) == world_size, ( + f"custom group '{gname}': contains duplicate ranks" + ) + assert set(all_ranks_flat) == set(range(world_size)), ( + f"custom group '{gname}': must cover all ranks 0..{world_size - 1}" + ) + + _CUSTOM[gname] = init_model_parallel_group( + group_ranks, + get_world_group().local_rank, + backend, + group_name=f"custom_{gname}", + ) + logger.info( "rank %s in world size %s is assigned as " "DP rank %s, PP rank %s, TP rank %s, EP rank %s", @@ -1126,6 +1340,7 @@ def ensure_model_parallel_initialized( pipeline_model_parallel_size: int, backend: Optional[str] = None, data_parallel_size: int = 1, + custom_group_config: Optional[Dict[str, List]] = None, ) -> None: """Helper to initialize model parallel groups if they are not initialized, or ensure tensor-parallel and pipeline-parallel sizes are equal to expected @@ -1138,6 +1353,7 @@ def ensure_model_parallel_initialized( pipeline_model_parallel_size, backend, data_parallel_size, + custom_group_config, ) return @@ -1209,6 +1425,11 @@ def destroy_model_parallel(): _PP.destroy() _PP = None + global _CUSTOM + for g in _CUSTOM.values(): + g.destroy() + _CUSTOM.clear() + def destroy_distributed_environment(): global _WORLD diff --git a/aiter/fused_moe.py b/aiter/fused_moe.py index 85604fd8b688a1655dcf3944fb5d5d3fea5272ba..fccc5f898a18494fda775a0186c0c348bbba8839 100644 --- a/aiter/fused_moe.py +++ b/aiter/fused_moe.py @@ -733,11 +733,11 @@ def torch_moe_blockscale( # [expert, model_dim/blk_m, inter_dim/blk_k] fc2_scale=None, expert_mask=None, + computeType=torch.float32, ): - computeType = dtypes.fp32 - hidden_states = hidden_states.to(computeType) - w1 = w1.to(computeType) - w2 = w2.to(computeType) + hidden_states = hidden_states.float().to(computeType) + w1 = w1.float().to(computeType) + w2 = w2.float().to(computeType) token_num, topk = topk_ids.shape expert, model_dim, inter_dim = w2.shape B, D = hidden_states.shape @@ -767,9 +767,8 @@ def torch_moe_blockscale( nblk_n = inter_dim // blk_n nblk_k = model_dim // blk_k if fc1_scale is not None: - # gose to quant D_w8a8/w8a8 - # blk_n, blk_k = scale_blks - # expert, nblk_n, nblk_k = fc1_scale.shape + fc1_scale = fc1_scale.to(computeType) + fc2_scale = fc2_scale.to(computeType) fc1_scale = rearrange( fc1_scale.view(-1, 1) .repeat(1, blk_n * blk_k) diff --git a/aiter/fused_moe_asm_wna16.py b/aiter/fused_moe_asm_wna16.py index 085e2f65b998c31fa5f8195c708216094102fe02..1bb5bf19bfd9192983054bf45b726ebfafec9f12 100644 --- a/aiter/fused_moe_asm_wna16.py +++ b/aiter/fused_moe_asm_wna16.py @@ -12,13 +12,13 @@ from aiter import logger from aiter import per_token_quant_hip, per_block_quant_wrapper, get_hip_quant from aiter import ActivationType, QuantType, dtypes from aiter import silu_and_mul,gelu_and_mul -from aiter.ops.triton.fused_moe import ( - triton_moe_sum, - triton_silu_and_mul, - triton_gelu_and_mul, - triton_relu2, +from aiter.ops.triton.fused_moe import triton_moe_sum +from aiter.ops.triton.moe_activation import ( + _normalize_activation_and_gate, + _apply_activation, ) + from aiter.jit.core import AITER_ROOT_DIR # from vllm.model_executor.layers.fused_moe.fused_moe import moe_align_block_size # from vllm.model_executor.layers.quantization.utils.int8_utils import ( @@ -111,6 +111,7 @@ def run_fused_experts_asm_impl(hidden_states: torch.Tensor, dtype, inplace, activation, + None, # is_gated use_fp8_w8a8, use_int8_w8a8, use_int8_w4a8, @@ -181,6 +182,7 @@ def fused_experts_asm_impl(hidden_states: torch.Tensor, dtype: torch.dtype, inplace: bool = False, activation: str = "silu", + is_gated: Optional[bool] = None, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, use_int8_w4a8: bool = False, @@ -200,7 +202,12 @@ def fused_experts_asm_impl(hidden_states: torch.Tensor, persist_cu: Optional[int] = 0, use_shuffle: Optional[int] = 0, solution_id: Optional[str] = None, - routed_scaling_factor: Optional[float] = 1.0)-> torch.Tensor: + routed_scaling_factor: Optional[float] = 1.0, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None)-> torch.Tensor: + + + activation, is_gated = _normalize_activation_and_gate(activation, is_gated) # Check constraints. if use_int8_w4a8: assert block_shape[0] == 0 and block_shape[1] == 64, "[ERROR]ASM Fused MoE only support w4a8 block_shape=64 now." @@ -342,14 +349,14 @@ def fused_experts_asm_impl(hidden_states: torch.Tensor, 2, config["SOL_ID1"], config["BLOCK_SIZE_M"]) - if activation == "silu": - triton_silu_and_mul(d_silu,d_w1_out) - # silu_and_mul(d_silu,d_w1_out) - elif activation == "gelu": - triton_gelu_and_mul(d_silu,d_w1_out) - # gelu_and_mul(d_silu,d_w1_out) - else: - raise ValueError(f"Unsupported FusedMoe activation: {activation}") + _apply_activation( + activation=activation, + is_gated=is_gated, + activated_out=d_silu, + ffn1_out_2d=d_w1_out.view(-1, N), + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) if dtype == torch.bfloat16: if block_shape is not None and block_shape[1] == 32: aiter.asm_fmoe_stage2(d_w2_out, @@ -442,14 +449,15 @@ def fused_experts_asm_impl(hidden_states: torch.Tensor, odtype, config["PERSIST_GROUP1"], use_shuffle) - if activation == "silu": - triton_silu_and_mul(d_silu,d_w1_out) - # silu_and_mul(d_silu,d_w1_out) - elif activation == "gelu": - triton_gelu_and_mul(d_silu,d_w1_out) - # gelu_and_mul(d_silu,d_w1_out) - else: - raise ValueError(f"Unsupported FusedMoe activation: {activation}") + + _apply_activation( + activation=activation, + is_gated=is_gated, + activated_out=d_silu, + ffn1_out_2d=d_w1_out.view(-1, N), + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) bridge_q,bridge_scale = per_token_quant_hip(d_silu) #bridge_q,bridge_scale = per_token_quant_int8(d_silu) @@ -515,14 +523,15 @@ def fused_experts_asm_impl(hidden_states: torch.Tensor, config["SOL_ID1"], odtype, config["PERSIST_GROUP1"]) - if activation == "silu": - triton_silu_and_mul(d_silu,d_w1_out) - # silu_and_mul(d_silu,d_w1_out) - elif activation == "gelu": - triton_gelu_and_mul(d_silu,d_w1_out) - # gelu_and_mul(d_silu,d_w1_out) - else: - raise ValueError(f"Unsupported FusedMoe activation: {activation}") + + _apply_activation( + activation=activation, + is_gated=is_gated, + activated_out=d_silu, + ffn1_out_2d=d_w1_out.view(-1, N), + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) #quant_func = get_hip_quant(QuantType.per_1x64) #bridge_q,bridge_scale = quant_func(d_silu, quant_dtype=dtypes.i8) @@ -586,14 +595,15 @@ def fused_experts_asm_impl(hidden_states: torch.Tensor, odtype, config["PERSIST_GROUP1"], use_shuffle) - if activation == "silu": - triton_silu_and_mul(d_silu,d_w1_out) - # silu_and_mul(d_silu,d_w1_out) - elif activation == "gelu": - triton_gelu_and_mul(d_silu,d_w1_out) - # gelu_and_mul(d_silu,d_w1_out) - else: - raise ValueError(f"Unsupported FusedMoe activation: {activation}") + + _apply_activation( + activation=activation, + is_gated=is_gated, + activated_out=d_silu, + ffn1_out_2d=d_w1_out.view(-1, N), + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) #FIXME: aiter quant method performance is little worse than triton. Change it latter!! bridge_q, bridge_scale = per_block_quant_wrapper((1,block_shape[1]))(per_token_quant_hip)(d_silu, quant_dtype=torch.int8) @@ -657,14 +667,15 @@ def fused_experts_asm_impl(hidden_states: torch.Tensor, odtype, config["PERSIST_GROUP1"], use_shuffle) - if activation == "silu": - triton_silu_and_mul(d_silu,d_w1_out) - # silu_and_mul(d_silu,d_w1_out) - elif activation == "gelu": - triton_gelu_and_mul(d_silu,d_w1_out) - # gelu_and_mul(d_silu,d_w1_out) - else: - raise ValueError(f"Unsupported FusedMoe activation: {activation}") + + _apply_activation( + activation=activation, + is_gated=is_gated, + activated_out=d_silu, + ffn1_out_2d=d_w1_out.view(-1, N), + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) bridge_q,bridge_scale= per_token_quant_hip(d_silu, quant_dtype=torch.float8_e4m3fn) aiter.asm_fmoe_a8(d_w2_out, @@ -726,14 +737,15 @@ def fused_experts_asm_impl(hidden_states: torch.Tensor, odtype, config["PERSIST_GROUP1"], use_shuffle) - if activation == "silu": - triton_silu_and_mul(d_silu,d_w1_out) - # silu_and_mul(d_silu,d_w1_out) - elif activation == "gelu": - triton_gelu_and_mul(d_silu,d_w1_out) - # gelu_and_mul(d_silu,d_w1_out) - else: - raise ValueError(f"Unsupported FusedMoe activation: {activation}") + + _apply_activation( + activation=activation, + is_gated=is_gated, + activated_out=d_silu, + ffn1_out_2d=d_w1_out.view(-1, N), + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) bridge_q,bridge_scale = per_block_quant_wrapper((1,block_shape[1]))(per_token_quant_hip)(d_silu, quant_dtype=torch.float8_e4m3fn) aiter.asm_fmoe_a8(d_w2_out, @@ -795,16 +807,14 @@ def fused_experts_asm_impl(hidden_states: torch.Tensor, config["PERSIST_GROUP1"], use_shuffle) #return d_w1_out - if activation == "silu": - triton_silu_and_mul(d_silu,d_w1_out) - # silu_and_mul(d_silu,d_w1_out) - elif activation == "gelu": - triton_gelu_and_mul(d_silu,d_w1_out) - # gelu_and_mul(d_silu,d_w1_out) - elif activation == "relu2": - triton_relu2(d_silu,d_w1_out) - else: - raise ValueError(f"Unsupported FusedMoe activation: {activation}") + _apply_activation( + activation=activation, + is_gated=is_gated, + activated_out=d_silu, + ffn1_out_2d=d_w1_out.view(-1, N), + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) aiter.asm_fmoe_a8(d_w2_out, d_silu, @@ -1136,4 +1146,4 @@ def calculate_persist_groups(persist_cu, config, quant_type): if config[f"SOL_ID{i}"] in sol_id_table: config[f"PERSIST_GROUP{i}"] = persist_cu * sol_id_table[config[f'SOL_ID{i}']] else: - config[f"PERSIST_GROUP{i}"] = persist_cu \ No newline at end of file + config[f"PERSIST_GROUP{i}"] = persist_cu diff --git a/aiter/fused_moe_c.py b/aiter/fused_moe_c.py index e86a082f4b829788e0a30890cb154151ed5591a3..329aefe47f6b66d25034cb1b6410e1c6398999c9 100644 --- a/aiter/fused_moe_c.py +++ b/aiter/fused_moe_c.py @@ -17,10 +17,14 @@ import time from aiter.test_common import perftest import aiter from aiter import dtypes -from aiter import moe_c_silu_and_mul,moe_c_moe_sum, per_token_quant_hip +from aiter import moe_c_silu_and_mul,moe_c_moe_sum_opt_v2, per_token_quant_hip from aiter.jit.utils.torch_guard import torch_compile_guard from aiter.ops.triton.fused_moe import triton_moe_sum from triton.language.extra import libdevice +from aiter.ops.triton.moe_activation import ( + _normalize_activation_and_gate, + _apply_activation, +) logger = logging.getLogger(__name__) @@ -1960,6 +1964,82 @@ def invoke_fused_moe_kernel(A: torch.Tensor, **config, ) +_FLOAT_MOE_DTYPES = (torch.float16, torch.bfloat16, torch.float32) +_QUANTIZED_ACTIVATION_DTYPES = (torch.int8, torch.float8_e4m3fn) + + +def _is_prequantized_activation( + hidden_states_dtype: torch.dtype, + a_scale: Optional[torch.Tensor], +) -> bool: + return ( + a_scale is not None + or hidden_states_dtype in _QUANTIZED_ACTIVATION_DTYPES + ) + + +def _resolve_moe_compute_dtype( + hidden_states_dtype: torch.dtype, + compute_dtype: Optional[torch.dtype] = None, + *, + prequantized: bool = False, +) -> torch.dtype: + """Resolve fp16/bf16/fp32 dtype for GEMM outputs, caches, and compute_type.""" + if hidden_states_dtype in _FLOAT_MOE_DTYPES: + return hidden_states_dtype + if prequantized or hidden_states_dtype in _QUANTIZED_ACTIVATION_DTYPES: + if compute_dtype is not None: + assert compute_dtype in _FLOAT_MOE_DTYPES, ( + f"compute_dtype must be fp16/bf16/fp32, got {compute_dtype}") + return compute_dtype + return torch.bfloat16 + raise ValueError( + f"Unsupported hidden_states dtype: {hidden_states_dtype}") + + +def _torch_dtype_to_triton(dtype: torch.dtype): + if dtype == torch.bfloat16: + return tl.bfloat16 + if dtype == torch.float16: + return tl.float16 + if dtype == torch.float32: + return tl.float32 + raise ValueError(f"Unsupported compute dtype for triton: {dtype}") + + +def _is_marlin_tensorwise_scale( + B_scale: Optional[torch.Tensor], + num_experts: int, +) -> bool: + # Marlin W8A8 tensorwise path expects one scale per expert, + # represented as (E, 1, 1) to stay compatible with existing 3D scale checks. + return B_scale is not None and B_scale.shape == (num_experts, 1, 1) + + +def _validate_prequant_marlin_activation( + A: torch.Tensor, + A_scale: torch.Tensor, + expected_dtype, + block_shape: Optional[List[int]], + B: torch.Tensor, + B_scale: torch.Tensor, +) -> None: + """Validate pre-quantized activation (A, A_scale) for marlin MoE kernels.""" + assert A_scale is not None + allowed = (expected_dtype,) if isinstance(expected_dtype, torch.dtype) else tuple(expected_dtype) + assert A.dtype in allowed, ( + f"pre-quantized A must be one of {allowed}, got {A.dtype}") + if block_shape is None: + assert A_scale.shape[-1] == 1, ( + f"per-token A_scale last dim must be 1, got shape {A_scale.shape}") + else: + assert len(block_shape) == 2 + block_n, block_k = block_shape[0], block_shape[1] + assert triton.cdiv(A.shape[-1], block_k) == A_scale.shape[-1] + assert triton.cdiv(B.shape[-2], block_n) == B_scale.shape[-2] + assert triton.cdiv(B.shape[-1], block_k) == B_scale.shape[-1] + + def invoke_fused_moe_kernel_marlin(A: torch.Tensor, B: torch.Tensor, C: torch.Tensor, @@ -1983,6 +2063,7 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, use_int4_w4a16: bool, use_int4_w4a16_base: bool, is_bottom: bool, + key_selected:int, block_shape: Optional[List[int]] = None) -> None: find_best = os.getenv("WHICH_TO_TEST") assert topk_weights.stride(1) == 1 @@ -1991,12 +2072,13 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, if use_fp8_w8a8: assert B_scale is not None - if block_shape is None: - - A, A_scale = per_token_quant_hip(A,quant_dtype=torch.float8_e4m3fn) - - - + if A_scale is not None: + # Pre-quantized fp8 activation; skip internal quantization. + _validate_prequant_marlin_activation( + A, A_scale, (torch.float8_e4m3fn, torch.int8), + block_shape, B, B_scale) + elif block_shape is None: + A, A_scale = per_token_quant_hip(A, quant_dtype=torch.float8_e4m3fn) else: assert len(block_shape) == 2 block_n, block_k = block_shape[0], block_shape[1] @@ -2006,7 +2088,11 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, assert triton.cdiv(B.shape[-1], block_k) == B_scale.shape[-1] elif use_int8_w8a8: assert B_scale is not None - if block_shape is None: + if A_scale is not None: + # Pre-quantized int8 activation; skip internal quantization. + _validate_prequant_marlin_activation( + A, A_scale, torch.int8, block_shape, B, B_scale) + elif block_shape is None: A, A_scale = moe_kernel_prepare_input( A=A, B=B, @@ -2019,7 +2105,6 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, per_channel_quant=True, block_shape=None ) - else: assert len(block_shape) == 2 block_n, block_k = block_shape[0], block_shape[1] @@ -2029,7 +2114,11 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, assert triton.cdiv(B.shape[-1], block_k) == B_scale.shape[-1] elif use_int8_w4a8: assert B_scale is not None - if block_shape is None: + if A_scale is not None: + # Pre-quantized int8 activation; skip internal quantization. + _validate_prequant_marlin_activation( + A, A_scale, torch.int8, block_shape, B, B_scale) + elif block_shape is None: A, A_scale = moe_kernel_prepare_input( A=A, B=B, @@ -2043,7 +2132,6 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, per_channel_quant=True, block_shape=None ) - else: assert len(block_shape) == 2 block_n, block_k = block_shape[0], block_shape[1] @@ -2135,15 +2223,24 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, else : bit = 4 if (use_int4_w4a16 or use_int4_w4a16_base) else 8 - # print("calling wna16 awq -------") if (bit == 8 and use_int8_w8a16) : - # print("calling w8a16 awq -------") - aiter.moe_c_moe_w8a16_gemm_awq(A, C, B, B_scale, B_zp, - topk_weights if mul_routed_weight else None, - sorted_token_ids, expert_ids, - num_tokens_post_padded, top_k, - config["BLOCK_SIZE_M"], config["BLOCK_SIZE_N"], - config["BLOCK_SIZE_K"], bit) + B = B.view(torch.uint32) + if is_bottom: + # print("calling w8a16 awq gemm2-------") + aiter.moe_c_moe_gemm_marlin_w8a16(A, B , C, B_scale, topk_weights, #B处应该传shuffle权重 待修改 + sorted_token_ids, expert_ids, num_tokens_post_padded,1, config["MODE"], 1) + else: + # print("calling w8a16 awq gemm1-------") + aiter.moe_c_moe_gemm_marlin_w8a16(A, B, C, B_scale, None, #B处应该传shuffle权重 待修改 + sorted_token_ids, expert_ids, num_tokens_post_padded,8, config["MODE"], 1) + # print("calling wna16 awq -------") + # # print("calling w8a16 awq -------") + # aiter.moe_c_moe_w8a16_gemm_awq(A, C, B, B_scale, B_zp, + # topk_weights if mul_routed_weight else None, + # sorted_token_ids, expert_ids, + # num_tokens_post_padded, top_k, + # config["BLOCK_SIZE_M"], config["BLOCK_SIZE_N"], + # config["BLOCK_SIZE_K"], bit) return elif use_int4_w4a16_base : @@ -2185,19 +2282,29 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, # print("calling w4a16 awq end-------") return elif (use_int8_w8a8 and block_shape == None): - if is_bottom: - # print("B.shape",B.shape) - assert B.shape[1] in [7168,6144,4096,3072,2048] , f" K = {B.shape[1]} is not in support" - assert B.shape[2] in [128,256,384,768,2048] , f" N = {B.shape[2]} is not in support" - # print("calling w8a8 channel wise -------") - aiter.moe_c_moe_gemm_marlin_w8a8(A, B, C, A_scale, B_scale,topk_weights, - sorted_token_ids, expert_ids, num_tokens_post_padded,1, config["MODE"], top_k) - - - else : - # print("calling w8a8 channel wise -------") - aiter.moe_c_moe_gemm_marlin_w8a8(A, B, C, A_scale, B_scale, None, - sorted_token_ids, expert_ids, num_tokens_post_padded,top_k, config["MODE"], top_k) + if _is_marlin_tensorwise_scale(B_scale, B.shape[0]): + if is_bottom: + # print("calling w8a8 tensor wise -------") + aiter.moe_c_moe_gemm_marlin_w8a8_tensorwise(A, B, C, A_scale, B_scale, topk_weights, + sorted_token_ids, expert_ids, num_tokens_post_padded, 1, config["MODE"], top_k, key_selected) + else: + # print("calling w8a8 tensor wise -------") + aiter.moe_c_moe_gemm_marlin_w8a8_tensorwise(A, B, C, A_scale, B_scale, None, + sorted_token_ids, expert_ids, num_tokens_post_padded, top_k, config["MODE"], top_k, key_selected) + else: + if is_bottom: + # print("B.shape",B.shape) + assert B.shape[1] in [7168,6144,4096,3072,2048] , f" K = {B.shape[1]} is not in support" + assert B.shape[2] in [128,256,384,512,768,1024,2048] , f" N = {B.shape[2]} is not in support" + # print("calling w8a8 channel wise -------") + aiter.moe_c_moe_gemm_marlin_w8a8(A, B, C, A_scale, B_scale,topk_weights, + sorted_token_ids, expert_ids, num_tokens_post_padded,1, config["MODE"], top_k,key_selected) + + + else : + # print("calling w8a8 channel wise -------") + aiter.moe_c_moe_gemm_marlin_w8a8(A, B, C, A_scale, B_scale, None, + sorted_token_ids, expert_ids, num_tokens_post_padded,top_k, config["MODE"], top_k,key_selected) return elif (use_int8_w4a8 and block_shape == None): if is_bottom: @@ -2212,7 +2319,7 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, aiter.moe_c_moe_gemm_marlin_w4a8(A, B, C, A_scale, B_scale,topk_weights, - sorted_token_ids, expert_ids, num_tokens_post_padded,1, config["MODE"],top_k) + sorted_token_ids, expert_ids, num_tokens_post_padded,1, config["MODE"], top_k,key_selected) # end_event.record() # end_event.synchronize() @@ -2231,7 +2338,7 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, # start_event.record() aiter.moe_c_moe_gemm_marlin_w4a8(A, B, C, A_scale, B_scale, None, - sorted_token_ids, expert_ids, num_tokens_post_padded,top_k, config["MODE"],top_k) + sorted_token_ids, expert_ids, num_tokens_post_padded,top_k, config["MODE"], top_k,key_selected) # end_event.record() @@ -2242,16 +2349,28 @@ def invoke_fused_moe_kernel_marlin(A: torch.Tensor, return elif (use_fp8_w8a8 and block_shape == None): - if is_bottom: - # print("calling w8a8 channel wise -------") - aiter.moe_c_moe_gemm_marlin_w8a8_fp8(A, B, C, A_scale, B_scale,topk_weights, - sorted_token_ids, expert_ids, num_tokens_post_padded,1, config["MODE"], top_k) - - - else : - # print("calling w8a8 channel wise -------") - aiter.moe_c_moe_gemm_marlin_w8a8_fp8(A, B, C, A_scale, B_scale, None, - sorted_token_ids, expert_ids, num_tokens_post_padded,top_k, config["MODE"], top_k) + if _is_marlin_tensorwise_scale(B_scale, B.shape[0]): + if is_bottom: + # print("calling fp8 w8a8 tensor wise -------") + aiter.moe_c_moe_gemm_marlin_w8a8_fp8_tensorwise(A, B, C, A_scale, B_scale,topk_weights, + sorted_token_ids, expert_ids, num_tokens_post_padded,1, config["MODE"], top_k,key_selected) + + + else : + # print("calling fp8 w8a8 tensor wise -------") + aiter.moe_c_moe_gemm_marlin_w8a8_fp8_tensorwise(A, B, C, A_scale, B_scale, None, + sorted_token_ids, expert_ids, num_tokens_post_padded,top_k, config["MODE"], top_k,key_selected) + else: + if is_bottom: + # print("calling w8a8 channel wise -------") + aiter.moe_c_moe_gemm_marlin_w8a8_fp8(A, B, C, A_scale, B_scale,topk_weights, + sorted_token_ids, expert_ids, num_tokens_post_padded,1, config["MODE"], top_k,key_selected) + + + else : + # print("calling w8a8 channel wise -------") + aiter.moe_c_moe_gemm_marlin_w8a8_fp8(A, B, C, A_scale, B_scale, None, + sorted_token_ids, expert_ids, num_tokens_post_padded,top_k, config["MODE"], top_k,key_selected) return @@ -2591,7 +2710,10 @@ def try_get_optimal_moe_config_marlin( if configs: # If an optimal configuration map has been found, look up the # optimal config - config = configs[min(configs.keys(), key=lambda x: abs(x - M))] + key_selected = min(configs.keys(), key=lambda x: abs(x - M)) + config = configs[key_selected] + config["key_selected"] = key_selected + else: # Else use the default config config = get_default_config(M, E, N, w1_shape[2], top_k, dtype, @@ -2756,6 +2878,7 @@ def inplace_fused_experts(hidden_states: torch.Tensor, kloops2: int, nloops2: int, activation: Optional[str] = None, + is_gated: Optional[bool] = None, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, use_int8_w4a8: bool = False, @@ -2771,23 +2894,27 @@ def inplace_fused_experts(hidden_states: torch.Tensor, a1_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None, block_shape: Optional[List[int]] = None, - routed_scaling_factor: Optional[float] = 1.0) -> None: + routed_scaling_factor: Optional[float] = 1.0, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None, + compute_dtype: Optional[torch.dtype] = None) -> None: if activation is None: activation = "silu" - if (use_int4_w4a16 or (use_int8_w8a8 and block_shape == None) or (use_fp8_w8a8 and block_shape == None) or (use_int8_w4a8 and block_shape == None) ): + if (use_int4_w4a16 or (use_int8_w8a8 and block_shape == None) or (use_fp8_w8a8 and block_shape == None) or (use_int8_w8a16 and block_shape == None) or (use_int8_w4a8 and block_shape == None) ): fused_experts_impl_marlin(hidden_states, w1, w2, topk_weights, topk_ids, MODE1, MODE2, BM, - True, activation, use_fp8_w8a8, use_int8_w8a8,use_int8_w4a8, use_int8_w8a16, + True, activation,is_gated, use_fp8_w8a8, use_int8_w8a8,use_int8_w4a8, use_int8_w8a16, use_int4_w4a16, use_int4_w4a16_base, global_num_experts, expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, - a2_scale, block_shape, routed_scaling_factor) + a2_scale, block_shape, routed_scaling_factor,gemm1_alpha,gemm1_limit, + compute_dtype=compute_dtype) else: fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids ,BM,BN,BK,kloops, nloops,BN2, BK2,kloops2,nloops2,True, - activation, use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16, + activation, use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16, use_int4_w4a16, use_int4_w4a16_base, global_num_experts, expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, a2_scale, block_shape, routed_scaling_factor) @@ -2836,6 +2963,7 @@ def outplace_fused_experts( kloops2: int, nloops2: int, activation: Optional[str] = None, + is_gated: Optional[bool] = None, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, use_int8_w4a8: bool = False, @@ -2851,17 +2979,21 @@ def outplace_fused_experts( a1_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None, block_shape: Optional[List[int]] = None, - routed_scaling_factor: Optional[float] = 1.0) -> torch.Tensor: + routed_scaling_factor: Optional[float] = 1.0, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None, + compute_dtype: Optional[torch.dtype] = None) -> torch.Tensor: if activation is None: activation = "silu" - if (use_int4_w4a16 or (use_int8_w8a8 and block_shape == None) or (use_fp8_w8a8 and block_shape == None) or (use_int8_w4a8 and block_shape == None) ): + if (use_int4_w4a16 or (use_int8_w8a8 and block_shape == None) or (use_fp8_w8a8 and block_shape == None) or (use_int8_w8a16 and block_shape == None) or (use_int8_w4a8 and block_shape == None) ): return fused_experts_impl_marlin(hidden_states, w1, w2, topk_weights, topk_ids, MODE1, MODE2, BM, - False, activation, use_fp8_w8a8, use_int8_w8a8,use_int8_w4a8, use_int8_w8a16, + False, activation,is_gated, use_fp8_w8a8, use_int8_w8a8,use_int8_w4a8, use_int8_w8a16, use_int4_w4a16, use_int4_w4a16_base, global_num_experts, expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, - a2_scale, block_shape, routed_scaling_factor) + a2_scale, block_shape, routed_scaling_factor,gemm1_alpha,gemm1_limit, + compute_dtype=compute_dtype) return fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids,BM,BN,BK,kloops,nloops,BN2, @@ -2981,6 +3113,7 @@ def moe_c_fused_experts(hidden_states: torch.Tensor, nloops2: int = 1, inplace: bool = False, activation: Optional[str] = None, + is_gated: Optional[bool] = None, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, use_int8_w4a8: bool = False, @@ -2996,7 +3129,11 @@ def moe_c_fused_experts(hidden_states: torch.Tensor, a1_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None, block_shape: Optional[List[int]] = None, - routed_scaling_factor: Optional[float] = 1.0) -> torch.Tensor: + routed_scaling_factor: Optional[float] = 1.0, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None, + compute_dtype: Optional[torch.dtype] = None, + ) -> torch.Tensor: # assert not (use_int8_w4a8 and hidden_states.shape[0] < 1024) , "only support M >= 1024" @@ -3007,19 +3144,21 @@ def moe_c_fused_experts(hidden_states: torch.Tensor, inplace_fused_experts( hidden_states, w1, w2, topk_weights, topk_ids,MODE1,MODE2,BM,BN,BK,kloops,nloops,BN2, - BK2,kloops2,nloops2,activation, + BK2,kloops2,nloops2,activation,is_gated, use_fp8_w8a8, use_int8_w8a8, use_int8_w4a8,use_int8_w8a16, use_int4_w4a16, use_int4_w4a16_base, global_num_experts, expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, a2_scale, - block_shape, routed_scaling_factor) + block_shape, routed_scaling_factor,gemm1_alpha,gemm1_limit, + compute_dtype=compute_dtype) # print("hidden_states",hidden_states) return hidden_states else: return outplace_fused_experts( hidden_states, w1, w2, topk_weights, topk_ids,MODE1,MODE2,BM,BN,BK,kloops,nloops,BN2, - BK2,kloops2,nloops2,activation, + BK2,kloops2,nloops2,activation,is_gated, use_fp8_w8a8, use_int8_w8a8,use_int8_w4a8, use_int8_w8a16, use_int4_w4a16, use_int4_w4a16_base, global_num_experts, expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, a2_scale, - block_shape, routed_scaling_factor) + block_shape, routed_scaling_factor,gemm1_alpha,gemm1_limit, + compute_dtype=compute_dtype) @@ -3175,6 +3314,7 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, BM: int, inplace: bool = False, activation: str = "silu", + is_gated: Optional[bool] = None, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, use_int8_w4a8: bool = False, @@ -3190,7 +3330,29 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, a1_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None, block_shape: Optional[List[int]] = None, - routed_scaling_factor: Optional[float] = 1.0): + routed_scaling_factor: Optional[float] = 1.0, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None, + compute_dtype: Optional[torch.dtype] = None): + + activation, is_gated = _normalize_activation_and_gate(activation, is_gated) + + prequantized_input = _is_prequantized_activation( + hidden_states.dtype, a1_scale) + if prequantized_input: + assert use_fp8_w8a8 or use_int8_w8a8 or use_int8_w4a8, ( + "pre-quantized activation requires use_fp8_w8a8, " + "use_int8_w8a8, or use_int8_w4a8") + else: + assert hidden_states.dtype in _FLOAT_MOE_DTYPES, ( + f"hidden_states must be fp16/bf16/fp32, got {hidden_states.dtype}") + + compute_dtype = _resolve_moe_compute_dtype( + hidden_states.dtype, + compute_dtype, + prequantized=prequantized_input, + ) + # Check constraints. if use_int4_w4a16 or use_int8_w4a8: assert hidden_states.shape[1] // 2 == w1.shape[ @@ -3202,9 +3364,6 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, assert hidden_states.is_contiguous(), "Hidden_states must be contiguous" assert w1.stride(-1) == 1, "Stride of last dimension must be 1" assert w2.stride(-1) == 1, "Stride of last dimension must be 1" - assert hidden_states.dtype in [ - torch.float32, torch.float16, torch.bfloat16 - ] num_tokens, _ = hidden_states.shape E, N, _ = w1.shape @@ -3220,7 +3379,7 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, use_int8_w4a8=use_int8_w4a8, use_int8_w8a16=use_int8_w8a16, use_int4_w4a16=use_int4_w4a16, - dtype=hidden_states.dtype) + dtype=compute_dtype) get_config_func = functools.partial( try_get_optimal_moe_config_marlin, @@ -3244,7 +3403,7 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, # cache3, we're done with cache1 cache13 = torch.empty(M * top_k_num * max(N, w2.shape[1]), device=hidden_states.device, - dtype=hidden_states.dtype) + dtype=compute_dtype) intermediate_cache1 = cache13[:M * top_k_num * N].view( (M, topk_ids.shape[1], N)) intermediate_cache3 = cache13[:M * top_k_num * w2.shape[1]].view( @@ -3253,25 +3412,21 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, # This needs separate memory since it's used concurrently with cache1 intermediate_cache2 = torch.empty((M * top_k_num, N // 2), device=hidden_states.device, - dtype=hidden_states.dtype) + dtype=compute_dtype) - if hidden_states.dtype == torch.bfloat16: - compute_type = tl.bfloat16 - elif hidden_states.dtype == torch.float16: - compute_type = tl.float16 - elif hidden_states.dtype == torch.float32: - compute_type = tl.float32 - else: - raise ValueError(f"Unsupported compute_type: {hidden_states.dtype}") - - - + compute_type = _torch_dtype_to_triton(compute_dtype) if inplace: + assert not prequantized_input, ( + "inplace is not supported when hidden_states is pre-quantized") + assert hidden_states.dtype == compute_dtype out_hidden_states = hidden_states - # out_hidden_states = torch.empty_like(hidden_states) else: - out_hidden_states = torch.empty_like(hidden_states) + out_hidden_states = torch.empty( + (num_tokens, hidden_states.shape[1]), + device=hidden_states.device, + dtype=compute_dtype, + ) @@ -3298,11 +3453,14 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, curr_topk_ids = topk_ids[begin_chunk_idx:end_chunk_idx] curr_topk_weights = topk_weights[begin_chunk_idx:end_chunk_idx] - + curr_a1_scale = (a1_scale[begin_chunk_idx:end_chunk_idx] + if a1_scale is not None else None) + curr_a2_scale = (a2_scale[begin_chunk_idx:end_chunk_idx] + if a2_scale is not None else None) find_best = os.environ.get("WHICH_TO_TEST") if(find_best): - if(use_int4_w4a16 or use_int8_w4a8): + if(use_int4_w4a16 or use_int8_w4a8 or use_int8_w8a16 ): sorted_token_ids, expert_ids, num_tokens_post_padded = ( moe_align_block_size(curr_topk_ids, BM, global_num_experts, expert_map)) @@ -3312,13 +3470,13 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, topk_weights, global_num_experts, 7168, - hidden_states.dtype, + compute_dtype, BM, expert_mask=None, ) else: - if(use_int4_w4a16 or use_int8_w4a8): + if(use_int4_w4a16 or use_int8_w4a8 or use_int8_w8a16 ): # print("*****************************",config["BLOCK_SIZE_M"]) sorted_token_ids, expert_ids, num_tokens_post_padded = ( moe_align_block_size(curr_topk_ids, config["BLOCK_SIZE_M"], @@ -3333,7 +3491,7 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, topk_weights, global_num_experts, 7168, - hidden_states.dtype, + compute_dtype, config["BLOCK_SIZE_M"], expert_mask=None, ) @@ -3350,7 +3508,7 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, invoke_fused_moe_kernel_marlin(curr_hidden_states, w1, intermediate_cache1, - a1_scale, + curr_a1_scale, w1_scale, w1_zp, curr_topk_weights, @@ -3370,16 +3528,24 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, use_int4_w4a16=use_int4_w4a16, use_int4_w4a16_base=use_int4_w4a16_base, is_bottom = False, + key_selected=config["key_selected"], block_shape=block_shape) if activation == "silu": - moe_c_silu_and_mul(intermediate_cache2, - intermediate_cache1.view(-1, N)) - # elif activation == "gelu": - # torch.ops._C.gelu_and_mul(intermediate_cache2, - # intermediate_cache1.view(-1, N)) + rows_per_block,vec_size =aiter.load_silu_tune_config(M * top_k_num,N // 2) + + moe_c_silu_and_mul(intermediate_cache2, + intermediate_cache1.view(-1, N),rows_per_block,vec_size) else: - raise ValueError(f"Unsupported FusedMoe activation: {activation}") + _apply_activation( + activation=activation, + is_gated=is_gated, + activated_out=intermediate_cache2, + ffn1_out_2d=intermediate_cache1.view(-1, N), + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) + # use_moe_wna16_cuda = should_moe_wna16_use_cuda( # num_valid_tokens=topk_ids.numel(), @@ -3400,7 +3566,7 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, invoke_fused_moe_kernel_marlin(intermediate_cache2, w2, intermediate_cache3, - a2_scale, + curr_a2_scale, w2_scale, w2_zp, curr_topk_weights, @@ -3420,6 +3586,7 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, use_int4_w4a16=use_int4_w4a16, use_int4_w4a16_base=use_int4_w4a16_base, is_bottom = True, + key_selected=config["key_selected"] , block_shape=block_shape) mode_use_triton_moe_sum = out_hidden_states.dtype == torch.float16 or \ out_hidden_states.dtype == torch.bfloat16 or \ @@ -3432,8 +3599,8 @@ def fused_experts_impl_marlin(hidden_states: torch.Tensor, # out_hidden_states[begin_chunk_idx:end_chunk_idx]) triton_moe_sum(intermediate_cache3.view(*intermediate_cache3.shape), out_hidden_states[begin_chunk_idx:end_chunk_idx] , routed_scaling_factor) else: - moe_c_moe_sum(intermediate_cache3.view(*intermediate_cache3.shape), - out_hidden_states[begin_chunk_idx:end_chunk_idx],curr_topk_ids) + moe_c_moe_sum_opt_v2(intermediate_cache3.view(*intermediate_cache3.shape), + out_hidden_states[begin_chunk_idx:end_chunk_idx],routed_scaling_factor) diff --git a/aiter/jit/core.py b/aiter/jit/core.py index d276d5ebf50a57a9c8287aeac02b15db5edb1e36..b334deeb7b850323cfca0f9a44346f895631a3a2 100644 --- a/aiter/jit/core.py +++ b/aiter/jit/core.py @@ -552,7 +552,7 @@ def build_module( "-Wno-vla-cxx-extension", "-Wno-undefined-func-template", "-Wno-macro-redefined", - "-Wno-missing-template-arg-list-after-template-kw", + # "-Wno-missing-template-arg-list-after-template-kw", "-fgpu-flush-denormals-to-zero", ] @@ -794,6 +794,7 @@ def compile_ops( fc_name: Optional[str] = None, gen_func: Optional[Callable[..., dict[str, Any]]] = None, gen_fake: Optional[Callable[..., Any]] = None, + develop: bool = False, ): def decorator(func): func.arg_checked = False @@ -897,12 +898,18 @@ def compile_ops( doc_str = re.sub(pattern, r"Optional[\1]", doc_str) for el in enum_types: doc_str = re.sub(f" aiter.*{el} ", f" {el} ", doc_str) + try: + from ..utility.aiter_types import aiter_tensor_t as _aiter_tensor_t + except ImportError: + _aiter_tensor_t = None namespace = { "List": List, "Optional": Optional, "torch": torch, "typing": typing, } + if _aiter_tensor_t is not None: + namespace["aiter_tensor_t"] = _aiter_tensor_t exec( f"from aiter import*\ndef {doc_str}: pass", @@ -955,13 +962,34 @@ def compile_ops( return True if not func.arg_checked: - func.arg_checked = check_args() + if develop: + func.arg_checked = True # skip type-check when develop=True; tensors are converted below + else: + func.arg_checked = check_args() if AITER_LOG_MORE == 2: from ..test_common import log_args log_args(func, *args, **kwargs) + # develop=True: convert torch.Tensor → pybind aiter_tensor_t and inject HIP stream. + # develop=False (default): all existing ops pass through unchanged. + if develop: + import torch + from ..utility.dtypes import torch_to_aiter_pybind + + args = tuple( + torch_to_aiter_pybind(a) if isinstance(a, torch.Tensor) else a + for a in args + ) + kwargs = { + k: (torch_to_aiter_pybind(v) if isinstance(v, torch.Tensor) else v) + for k, v in kwargs.items() + } + module._set_current_hip_stream( + torch.cuda.current_stream().cuda_stream + ) + return op(*args, **kwargs) @torch_compile_guard(device="cuda", gen_fake=gen_fake, calling_func_=func) diff --git a/aiter/jit/optCompilerConfig.json b/aiter/jit/optCompilerConfig.json index cecfcacb7d8bc6fb2ed8c977e9faa140536a179e..7fca18496a42a170fb91dbae423fb717f713cb96 100644 --- a/aiter/jit/optCompilerConfig.json +++ b/aiter/jit/optCompilerConfig.json @@ -80,6 +80,41 @@ "hipify": "True", "blob_gen_cmd": "''" }, + "module_grouped_gemm": { + "srcs": [ + "f'{AITER_CSRC_DIR}/pybind/grouped_gemm_ck_pybind.cu'", + "f'{AITER_CSRC_DIR}/py_itfs_ck/grouped_gemm_kernels.cu'", + "f'{CK_DIR}/example_hcu/ck_tile/19_grouped_gemm/grouped_gemm.cpp'", + "f'{CK_DIR}/example_hcu/ck_tile/19_grouped_gemm/instances/grouped_gemm_fp16.cpp'", + "f'{CK_DIR}/example_hcu/ck_tile/19_grouped_gemm/instances/grouped_gemm_bf16.cpp'", + "f'{CK_DIR}/example_hcu/ck_tile/19_grouped_gemm/instances/grouped_gemm_fp8.cpp'", + "f'{CK_DIR}/example_hcu/ck_tile/19_grouped_gemm/instances/grouped_gemm_int8.cpp'" + ], + "flags_extra_cc": [ + "'-DCK_TILE_GROUPED_GEMM_FAST_BUILD'", + "'-DCK_TILE_GROUPED_GEMM_FAST_FP16'", + "'-DCK_TILE_GROUPED_GEMM_FAST_BF16'", + "'-DCK_TILE_GROUPED_GEMM_FAST_FP8'", + "'-DCK_TILE_GROUPED_GEMM_FAST_INT8'", + "'-DCK_TILE_GROUPED_GEMM_FAST_RC_ONLY'" + ], + "flags_extra_hip": [ + "'-DCK_TILE_GROUPED_GEMM_FAST_BUILD'", + "'-DCK_TILE_GROUPED_GEMM_FAST_FP16'", + "'-DCK_TILE_GROUPED_GEMM_FAST_BF16'", + "'-DCK_TILE_GROUPED_GEMM_FAST_FP8'", + "'-DCK_TILE_GROUPED_GEMM_FAST_INT8'", + "'-DCK_TILE_GROUPED_GEMM_FAST_RC_ONLY'" + ], + "extra_ldflags": "None", + "extra_include": [ + "f'{CK_DIR}/example_hcu/ck_tile/19_grouped_gemm'", + "f'{CK_DIR}/example_hcu/ck_tile/19_grouped_gemm/instances'" + ], + "verbose": "False", + "hipify": "False", + "blob_gen_cmd": "''" + }, "module_moe_utils":{ "srcs": [ "f'{AITER_CSRC_DIR}/pybind/moe_utils_pybind.cu'", @@ -351,7 +386,7 @@ "f'{MOE_C_DIR}/csrc_for_aiter'", "f'{AITER_CSRC_DIR}/py_itfs_moe_c/moe_c.cu'" ], - "flags_extra_cc": ["' -mllvm -support-768-vgprs=true -mllvm -disable-machine-sink '" + "flags_extra_cc": ["' -mllvm -support-768-vgprs=true -mllvm -disable-machine-sink -w'" ], "flags_extra_hip": [], "extra_ldflags": "None", @@ -386,5 +421,17 @@ "verbose": "False", "hipify": "True", "blob_gen_cmd": "''" + }, + "module_mhc": { + "srcs": [ + "f'{AITER_CSRC_DIR}/pybind/mhc_pybind.cu'", + "f'{AITER_CSRC_DIR}/kernels/mhc_kernels.cu'" + ], + "flags_extra_cc": [], + "flags_extra_hip": [], + "extra_ldflags": "None", + "extra_include": [], + "verbose": "False", + "blob_gen_cmd": "''" } } diff --git a/aiter/moe.py b/aiter/moe.py index e4d39673bb9769897dd41a7b8c7d0670593cd38a..0ed2fffb222d2cbebe50e948126595e39684df16 100644 --- a/aiter/moe.py +++ b/aiter/moe.py @@ -22,9 +22,11 @@ class MoeQuantType: """Quantization types supported by get_aiter_moe_config / aiter_moe.""" W16A16 = "w16a16" W4A16 = "w4a16" - W8A8 = "w8a8" - FP8_W8A8 = "fp8_w8a8" W4A8 = "w4a8" + W8A8 = "int8_w8a8" + FP8_W8A8 = "fp8_w8a8" + INT8_W8A16 = "int8_w8a16" + FP8_W8A16 = "fp8_w8a16" @dataclass @@ -90,6 +92,16 @@ def _try_get_moe_c_config( is_bottom=False, use_moe_wna16_cuda=True, ) + elif quant_type == MoeQuantType.INT8_W8A16: + configs = get_moe_configs_marlin( + E=e, + N=n, + dtype="int8_w8a16", + block_n=0, + block_k=block_size if block_size else 0, + is_bottom=False, + use_moe_wna16_cuda=True, + ) else: return None @@ -139,7 +151,7 @@ def _try_get_asm_config( if quant_type == MoeQuantType.W8A8: from .fused_moe_asm_wna16 import decode_sol_0 - + asm_quant_type = AsmMoeQuantType.INT8_W8A8_C if (block_size == 0 or block_size is None) else AsmMoeQuantType.INT8_W8A8 solution = get_moe_asm_solution( arch=arch, token=m, @@ -147,7 +159,7 @@ def _try_get_asm_config( model_dim=k, expert=e, topk=top_k, - quant_type=AsmMoeQuantType.INT8_W8A8, + quant_type=asm_quant_type, ) if solution == "default": return None @@ -155,7 +167,7 @@ def _try_get_asm_config( if quant_type == MoeQuantType.FP8_W8A8: from .fused_moe_asm_wna16 import decode_sol_0 - + asm_quant_type = AsmMoeQuantType.F8_W8A8_C if (block_size == 0 or block_size is None) else AsmMoeQuantType.F8_W8A8 solution = get_moe_asm_solution( arch=arch, token=m, @@ -163,7 +175,7 @@ def _try_get_asm_config( model_dim=k, expert=e, topk=top_k, - quant_type=AsmMoeQuantType.F8_W8A8, + quant_type=asm_quant_type, ) if solution == "default": return None @@ -208,6 +220,7 @@ def _try_get_triton_config( MoeQuantType.W4A16: "int4_w4a16", MoeQuantType.W8A8: "int8_w8a8", MoeQuantType.FP8_W8A8: "fp8_w8a8", + MoeQuantType.INT8_W8A16: "int8_w8a16", }.get(quant_type) if dtype_name is None: return None @@ -238,18 +251,22 @@ def _try_get_ck_config( block_shape: Optional[List[int]], ) -> Optional[Dict[str, Any]]: try: - if quant_type not in (MoeQuantType.W8A8, MoeQuantType.FP8_W8A8): - return None - from .fused_moe_ck import get_moe_ck_solution_id, MoeQuantType as CkMoeQuantType from .jit.utils.chip_info import get_gfx + + if quant_type == MoeQuantType.W16A16: + ck_quant_type = CkMoeQuantType.NO_QUANT + elif quant_type == MoeQuantType.W8A8 or quant_type == MoeQuantType.FP8_W8A8: + ck_quant_type = CkMoeQuantType.INT8_W8A8 + else: + return None arch = get_gfx() q_size_n = block_shape[0] if block_shape is not None else 0 q_size_k = block_shape[1] if block_shape is not None else 0 solution_id = get_moe_ck_solution_id( arch, - CkMoeQuantType.INT8_W8A8, + ck_quant_type, m, n, k, @@ -274,7 +291,7 @@ def get_aiter_moe_config( block_size: int, dtype: torch.dtype, quant_type: str, - activation: str = "silu", # "silu"/"gelu"/"relu2"/... + activation: str = "silu", # "silu"/"gelu"/"relu2"/"swigluoai"/"swiglustep"... gated: Optional[bool] = None, # True=GLU-gated (N1=2*inter), False=non-gated (N1=inter); None=auto from activation ) -> Tuple[bool, AiterMoeConfig]: """Get the best backend config for a MOE problem. @@ -285,6 +302,7 @@ def get_aiter_moe_config( - ``MoeQuantType.W8A8`` (int8) - ``MoeQuantType.FP8_W8A8`` (fp8) - ``MoeQuantType.W4A8`` + - ``MoeQuantType.INT8_W8A16`` (int8 weight, fp16/bf16 activation) Backend priority: - ``w16a16``: asm > triton @@ -292,6 +310,8 @@ def get_aiter_moe_config( - ``w8a8``: asm > moe_c > triton > ck - ``fp8_w8a8``: asm > moe_c > triton > ck - ``w4a8``: moe_c + - ``int8_w8a16``: moe_c > triton (ASM kernel not available) + - ``fp8_w8a16``: not yet implemented (raises NotImplementedError) For non-gated MOE (e.g. Nemotron with ReLU² activation), pass ``gated=False`` (or let it auto-detect from ``activation="relu2"``) @@ -299,7 +319,7 @@ def get_aiter_moe_config( """ # Determine gating: explicit > auto-detect from activation if gated is None: - gated = activation in ("silu", "gelu") + gated = activation in ("silu", "gelu", "swigluoai", "swiglustep") # For gated (GLU): N1 = 2 * intermediate_size, n = N1 // 2 # For non-gated: N1 = intermediate_size, n = N1 @@ -310,17 +330,20 @@ def get_aiter_moe_config( if dtype == torch.float16: candidates = [ (MoeSolutionType.MOE_C, lambda: _try_get_moe_c_config(quant_type, M, E, n, block_size)), + (MoeSolutionType.TRITON, lambda: _try_get_triton_config(quant_type, M, E, n, block_size)), ] elif dtype == torch.bfloat16: candidates = [ (MoeSolutionType.ASM, lambda: _try_get_asm_config(quant_type, M, E, n, K, top_k, block_size)), (MoeSolutionType.TRITON, lambda: _try_get_triton_config(quant_type, M, E, n, block_size)), + (MoeSolutionType.MOE_C, lambda: _try_get_moe_c_config(quant_type, M, E, n, block_size)), ] else: raise ValueError(f"Unsupported dtype: {dtype}") elif quant_type in (MoeQuantType.W8A8, MoeQuantType.FP8_W8A8): - if block_size == 0: # Channel wise choose MOE_C + if block_size is None or block_size == 0: # Channel wise candidates = [ + (MoeSolutionType.ASM, lambda: _try_get_asm_config(quant_type, M, E, n, K, top_k, block_size)), (MoeSolutionType.MOE_C, lambda: _try_get_moe_c_config(quant_type, M, E, n, block_size)), (MoeSolutionType.TRITON, lambda: _try_get_triton_config(quant_type, M, E, n, block_size)), # (MoeSolutionType.CK, lambda: _try_get_ck_config(quant_type, M, E, n, K, top_k, block_shape)), @@ -328,17 +351,31 @@ def get_aiter_moe_config( else: # Block wise choose ASM candidates = [ (MoeSolutionType.ASM, lambda: _try_get_asm_config(quant_type, M, E, n, K, top_k, block_size)), + (MoeSolutionType.TRITON, lambda: _try_get_triton_config(quant_type, M, E, n, block_size)), ] elif quant_type == MoeQuantType.W4A8: candidates = [ (MoeSolutionType.MOE_C, lambda: _try_get_moe_c_config(quant_type, M, E, n, block_size)), + (MoeSolutionType.TRITON, lambda: _try_get_triton_config(quant_type, M, E, n, block_size)), # (MoeSolutionType.ASM, lambda: _try_get_asm_config(quant_type, M, E, n, K, top_k)), ] + elif quant_type == MoeQuantType.INT8_W8A16: + # ASM backend currently has no W8A16 kernel/CSV; skip ASM and use moe_c -> triton. + candidates = [ + (MoeSolutionType.MOE_C, lambda: _try_get_moe_c_config(quant_type, M, E, n, block_size)), + (MoeSolutionType.TRITON, lambda: _try_get_triton_config(quant_type, M, E, n, block_size)), + ] + elif quant_type == MoeQuantType.FP8_W8A16: + # No backend currently implements FP8 weight + 16-bit activation MoE. + raise NotImplementedError( + "MoeQuantType.FP8_W8A16 is not yet supported by any aiter MOE backend (asm/moe_c/triton)." + ) elif quant_type == MoeQuantType.W16A16: candidates = [ (MoeSolutionType.ASM, lambda: _try_get_asm_config(quant_type, M, E, n, K, top_k, None)), (MoeSolutionType.TRITON, lambda: _try_get_triton_config(quant_type, M, E, n, block_size)), + # (MoeSolutionType.CK, lambda: _try_get_ck_config(quant_type, M, E, n, K, top_k, block_shape)), ] else: raise ValueError(f"Unsupported quant_type: {quant_type}") @@ -374,6 +411,10 @@ def aiter_moe( global_num_experts: int = -1, expert_map: Optional[torch.Tensor] = None, routed_scaling_factor: Optional[float] = 1.0, + use_weight_shuffle: bool = False, + output_dtype: Optional[torch.dtype] = None, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None ) -> torch.Tensor: """Execute MOE using the backend and quant type described by *moe_config*.""" if moe_config.solution_type is None or moe_config.quant_type is None: @@ -381,11 +422,15 @@ def aiter_moe( "moe_config has no valid solution_type/quant_type. " "Call get_aiter_moe_config first and check the status." ) + + if output_dtype is None: + output_dtype = hidden_states.dtype use_int4_w4a16 = moe_config.quant_type == MoeQuantType.W4A16 use_int8_w8a8 = moe_config.quant_type == MoeQuantType.W8A8 use_fp8_w8a8 = moe_config.quant_type == MoeQuantType.FP8_W8A8 use_int8_w4a8 = moe_config.quant_type == MoeQuantType.W4A8 + use_int8_w8a16 = moe_config.quant_type == MoeQuantType.INT8_W8A16 if moe_config.solution_type == MoeSolutionType.MOE_C: from .fused_moe_c import moe_c_fused_experts @@ -401,6 +446,7 @@ def aiter_moe( use_int8_w8a8=use_int8_w8a8, use_fp8_w8a8=use_fp8_w8a8, use_int8_w4a8=use_int8_w4a8, + use_int8_w8a16=use_int8_w8a16, activation=activation, global_num_experts=global_num_experts, expert_map=expert_map, @@ -411,12 +457,14 @@ def aiter_moe( a1_scale=a1_scale, a2_scale=a2_scale, block_shape=block_shape, - routed_scaling_factor=routed_scaling_factor + routed_scaling_factor=routed_scaling_factor, + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit ) if moe_config.solution_type == MoeSolutionType.ASM: from .fused_moe_asm_wna16 import fused_experts_asm_impl - + per_channel_quant = True if block_shape is None else False cfg = moe_config.config solution_id = f"{cfg['SOL_ID1']}+{cfg['SOL_ID2']}" return fused_experts_asm_impl( @@ -425,12 +473,13 @@ def aiter_moe( w2, topk_weights, topk_ids, - dtype=hidden_states.dtype, + dtype=output_dtype, inplace=inplace, use_int4_w4a16=use_int4_w4a16, use_int8_w8a8=use_int8_w8a8, use_fp8_w8a8=use_fp8_w8a8, activation=activation, + per_channel_quant = per_channel_quant, global_num_experts=global_num_experts, expert_map=expert_map, w1_scale=w1_scale, @@ -440,15 +489,18 @@ def aiter_moe( a1_scale=a1_scale, a2_scale=a2_scale, block_shape=block_shape, + use_shuffle=use_weight_shuffle, solution_id=solution_id, - routed_scaling_factor=routed_scaling_factor + routed_scaling_factor=routed_scaling_factor, + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit ) if moe_config.solution_type == MoeSolutionType.TRITON: from .ops.triton.fused_moe import fused_experts_impl - # W8A8 channel-wise (block_shape=None) requires per_channel_quant=True - per_channel_quant = (use_int8_w8a8 or use_fp8_w8a8) and block_shape is None + # W8A8 / W8A16 channel-wise (block_shape=None) requires per_channel_quant=True + per_channel_quant = (use_int8_w8a8 or use_fp8_w8a8 or use_int8_w8a16) and block_shape is None return fused_experts_impl( hidden_states, @@ -456,11 +508,12 @@ def aiter_moe( w2, topk_weights, topk_ids, - odtype=hidden_states.dtype, + output_dtype=output_dtype, inplace=inplace, use_int4_w4a16=use_int4_w4a16, use_int8_w8a8=use_int8_w8a8, use_fp8_w8a8=use_fp8_w8a8, + use_int8_w8a16=use_int8_w8a16, activation=activation, per_channel_quant=per_channel_quant, global_num_experts=global_num_experts, @@ -472,7 +525,9 @@ def aiter_moe( a1_scale=a1_scale, a2_scale=a2_scale, block_shape=block_shape, - routed_scaling_factor=routed_scaling_factor + routed_scaling_factor=routed_scaling_factor, + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit ) if moe_config.solution_type == MoeSolutionType.CK: @@ -485,7 +540,7 @@ def aiter_moe( w2, topk_weights, topk_ids, - odtype=hidden_states.dtype, + odtype=output_dtype, inplace=inplace, use_int8_w8a8=use_int8_w8a8, use_fp8_w8a8=use_fp8_w8a8, @@ -499,6 +554,7 @@ def aiter_moe( a1_scale=a1_scale, a2_scale=a2_scale, block_shape=block_shape, + use_shuffle=use_weight_shuffle, routed_scaling_factor=routed_scaling_factor, solution_id=solution_id, ) diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=1024,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=2048,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..773cf9c3cc5df1cf1616176b3b13cf13cdc19d38 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=64,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..773cf9c3cc5df1cf1616176b3b13cf13cdc19d38 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=72,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..773cf9c3cc5df1cf1616176b3b13cf13cdc19d38 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx936,num_cus=80,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..773cf9c3cc5df1cf1616176b3b13cf13cdc19d38 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=64,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..773cf9c3cc5df1cf1616176b3b13cf13cdc19d38 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=72,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16,is_bottom=True.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..773cf9c3cc5df1cf1616176b3b13cf13cdc19d38 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16.json b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=128,N=768,gfx_version=gfx938,num_cus=80,dtype=int8_w8a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=1024,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=128,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=2048,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=256,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..5068778e637bf38724ade92bfca0795954345d31 --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..5068778e637bf38724ade92bfca0795954345d31 --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=384,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx936,num_cus=80,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx936,num_cus=80,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json index 9d15ed418196961cbdcf8956dcc02dd4f9fe8613..16da22707b54ffff4414c654da5dddfd17ef0510 100644 --- a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=int8_w4a8.json @@ -1,87 +1,87 @@ { "1": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "2": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "3": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "4": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "5": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "6": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "7": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "8": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "9": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "10": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "11": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "12": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "13": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "14": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "15": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "16": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "32": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "64": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "128": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "256": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "512": { "BLOCK_SIZE_M": 16, - "MODE": 16 + "MODE": 124 }, "1024": { "BLOCK_SIZE_M": 48, diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=256,N=512,gfx_version=gfx938,num_cus=72,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..d3ca347575b6ed628e97c08699b2d0f892efeff4 --- /dev/null +++ b/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 39 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 39 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 48, + "MODE": 165 + }, + "4096": { + "BLOCK_SIZE_M": 80, + "MODE": 168 + }, + "8192": { + "BLOCK_SIZE_M": 80, + "MODE": 168 + }, + "16384": { + "BLOCK_SIZE_M": 96, + "MODE": 169 + }, + "32768": { + "BLOCK_SIZE_M": 96, + "MODE": 169 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..86dc74c8453cc7727b20ec9e010ac1e022c79c47 --- /dev/null +++ b/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 92 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 75 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 72 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 62 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 62 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 81 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 69 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + }, + "2048": { + "BLOCK_SIZE_M": 48, + "MODE": 162 + }, + "4096": { + "BLOCK_SIZE_M": 80, + "MODE": 165 + }, + "8192": { + "BLOCK_SIZE_M": 80, + "MODE": 165 + }, + "16384": { + "BLOCK_SIZE_M": 96, + "MODE": 166 + }, + "32768": { + "BLOCK_SIZE_M": 96, + "MODE": 166 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..d3ca347575b6ed628e97c08699b2d0f892efeff4 --- /dev/null +++ b/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 39 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 39 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 48, + "MODE": 165 + }, + "4096": { + "BLOCK_SIZE_M": 80, + "MODE": 168 + }, + "8192": { + "BLOCK_SIZE_M": 80, + "MODE": 168 + }, + "16384": { + "BLOCK_SIZE_M": 96, + "MODE": 169 + }, + "32768": { + "BLOCK_SIZE_M": 96, + "MODE": 169 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..86dc74c8453cc7727b20ec9e010ac1e022c79c47 --- /dev/null +++ b/aiter/moe_c_configs/E=288,N=320,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 92 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 75 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 72 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 62 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 62 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 81 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 69 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + }, + "2048": { + "BLOCK_SIZE_M": 48, + "MODE": 162 + }, + "4096": { + "BLOCK_SIZE_M": 80, + "MODE": 165 + }, + "8192": { + "BLOCK_SIZE_M": 80, + "MODE": 165 + }, + "16384": { + "BLOCK_SIZE_M": 96, + "MODE": 166 + }, + "32768": { + "BLOCK_SIZE_M": 96, + "MODE": 166 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..d3ca347575b6ed628e97c08699b2d0f892efeff4 --- /dev/null +++ b/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 39 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 39 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 48, + "MODE": 165 + }, + "4096": { + "BLOCK_SIZE_M": 80, + "MODE": 168 + }, + "8192": { + "BLOCK_SIZE_M": 80, + "MODE": 168 + }, + "16384": { + "BLOCK_SIZE_M": 96, + "MODE": 169 + }, + "32768": { + "BLOCK_SIZE_M": 96, + "MODE": 169 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..95feeae51689accaede53316becf8bcd92d1a2a7 --- /dev/null +++ b/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 92 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 75 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 72 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 62 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 62 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 81 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 69 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + }, + "2048": { + "BLOCK_SIZE_M": 48, + "MODE": 162 + }, + "4096": { + "BLOCK_SIZE_M": 80, + "MODE": 165 + }, + "8192": { + "BLOCK_SIZE_M": 80, + "MODE": 165 + }, + "16384": { + "BLOCK_SIZE_M": 96, + "MODE": 166 + }, + "32768": { + "BLOCK_SIZE_M": 96, + "MODE": 166 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..d3ca347575b6ed628e97c08699b2d0f892efeff4 --- /dev/null +++ b/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 39 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 39 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 48, + "MODE": 165 + }, + "4096": { + "BLOCK_SIZE_M": 80, + "MODE": 168 + }, + "8192": { + "BLOCK_SIZE_M": 80, + "MODE": 168 + }, + "16384": { + "BLOCK_SIZE_M": 96, + "MODE": 169 + }, + "32768": { + "BLOCK_SIZE_M": 96, + "MODE": 169 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..95feeae51689accaede53316becf8bcd92d1a2a7 --- /dev/null +++ b/aiter/moe_c_configs/E=288,N=640,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 92 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 75 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 72 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 62 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 62 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 77 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 125 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 81 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 69 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 159 + }, + "2048": { + "BLOCK_SIZE_M": 48, + "MODE": 162 + }, + "4096": { + "BLOCK_SIZE_M": 80, + "MODE": 165 + }, + "8192": { + "BLOCK_SIZE_M": 80, + "MODE": 165 + }, + "16384": { + "BLOCK_SIZE_M": 96, + "MODE": 166 + }, + "32768": { + "BLOCK_SIZE_M": 96, + "MODE": 166 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=36,N=1280,gfx_version=gfx938,num_cus=72,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=64,dtype=int4_w4a16,is_bottom=True.json b/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=64,dtype=int4_w4a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..079e76c1e08c35744b248905dae6fcf9d4de2eb5 --- /dev/null +++ b/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=64,dtype=int4_w4a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=64,dtype=int4_w4a16.json b/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=64,dtype=int4_w4a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=64,dtype=int4_w4a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=72,dtype=int4_w4a16,is_bottom=True.json b/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=72,dtype=int4_w4a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..079e76c1e08c35744b248905dae6fcf9d4de2eb5 --- /dev/null +++ b/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=72,dtype=int4_w4a16,is_bottom=True.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 2 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=72,dtype=int4_w4a16.json b/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=72,dtype=int4_w4a16.json new file mode 100644 index 0000000000000000000000000000000000000000..2f659fc79e6082c02425c13db583b31ff266a481 --- /dev/null +++ b/aiter/moe_c_configs/E=384,N=1024,gfx_version=gfx938,num_cus=72,dtype=int4_w4a16.json @@ -0,0 +1,102 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 0 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "2048": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "4096": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + }, + "8192": { + "BLOCK_SIZE_M": 16, + "MODE": 5 + } +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json b/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=fp8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json b/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..7fc799e8113b4b4c65d04ef461d4a33432ba6cdb --- /dev/null +++ b/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,112 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 42 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 38 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 46 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 43 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 86 + + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json b/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..499fc19617d2de1f0938bd950bb1865a74a56a3a --- /dev/null +++ b/aiter/moe_c_configs/E=512,N=128,gfx_version=gfx938,num_cus=64,dtype=int8_w8a8.json @@ -0,0 +1,111 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "2": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "3": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "4": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "5": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "6": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "7": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "8": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "9": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "10": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "11": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "12": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "13": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "14": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "15": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "16": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "32": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "64": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "128": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "256": { + "BLOCK_SIZE_M": 16, + "MODE": 121 + }, + "512": { + "BLOCK_SIZE_M": 16, + "MODE": 98 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "MODE": 183 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "MODE": 146 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "8192": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "16384": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + }, + "32768": { + "BLOCK_SIZE_M": 32, + "MODE": 160 + } + +} \ No newline at end of file diff --git a/aiter/moe_c_configs/silu_configs/silu_config_summary.json b/aiter/moe_c_configs/silu_configs/silu_config_summary.json new file mode 100644 index 0000000000000000000000000000000000000000..f4320184a2addca7b972afe7a86f8b7f1e715ada --- /dev/null +++ b/aiter/moe_c_configs/silu_configs/silu_config_summary.json @@ -0,0 +1,152922 @@ +{ + "meta": { + "dtype": "bf16", + "m_min": 1, + "m_max": 327680, + "m_list": [ + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128, + 256, + 512, + 1024, + 2048, + 3072, + 4096, + 5120, + 6144, + 7168, + 8192, + 9216, + 10240, + 11264, + 12288, + 13312, + 14336, + 15360, + 16384, + 17408, + 18432, + 19456, + 20480, + 21504, + 22528, + 23552, + 24576, + 25600, + 26624, + 27648, + 28672, + 29696, + 30720, + 31744, + 32768, + 33792, + 34816, + 35840, + 36864, + 37888, + 38912, + 39936, + 40960, + 41984, + 43008, + 44032, + 45056, + 46080, + 47104, + 48128, + 49152, + 50176, + 51200, + 52224, + 53248, + 54272, + 55296, + 56320, + 57344, + 58368, + 59392, + 60416, + 61440, + 62464, + 63488, + 64512, + 65536, + 66560, + 67584, + 68608, + 69632, + 70656, + 71680, + 72704, + 73728, + 74752, + 75776, + 76800, + 77824, + 78848, + 79872, + 80896, + 81920, + 82944, + 83968, + 84992, + 86016, + 87040, + 88064, + 89088, + 90112, + 91136, + 92160, + 93184, + 94208, + 95232, + 96256, + 97280, + 98304, + 99328, + 100352, + 101376, + 102400, + 103424, + 104448, + 105472, + 106496, + 107520, + 108544, + 109568, + 110592, + 111616, + 112640, + 113664, + 114688, + 115712, + 116736, + 117760, + 118784, + 119808, + 120832, + 121856, + 122880, + 123904, + 124928, + 125952, + 126976, + 128000, + 129024, + 130048, + 131072, + 132096, + 133120, + 134144, + 135168, + 136192, + 137216, + 138240, + 139264, + 140288, + 141312, + 142336, + 143360, + 144384, + 145408, + 146432, + 147456, + 148480, + 149504, + 150528, + 151552, + 152576, + 153600, + 154624, + 155648, + 156672, + 157696, + 158720, + 159744, + 160768, + 161792, + 162816, + 163840, + 164864, + 165888, + 166912, + 167936, + 168960, + 169984, + 171008, + 172032, + 173056, + 174080, + 175104, + 176128, + 177152, + 178176, + 179200, + 180224, + 181248, + 182272, + 183296, + 184320, + 185344, + 186368, + 187392, + 188416, + 189440, + 190464, + 191488, + 192512, + 193536, + 194560, + 195584, + 196608, + 197632, + 198656, + 199680, + 200704, + 201728, + 202752, + 203776, + 204800, + 205824, + 206848, + 207872, + 208896, + 209920, + 210944, + 211968, + 212992, + 214016, + 215040, + 216064, + 217088, + 218112, + 219136, + 220160, + 221184, + 222208, + 223232, + 224256, + 225280, + 226304, + 227328, + 228352, + 229376, + 230400, + 231424, + 232448, + 233472, + 234496, + 235520, + 236544, + 237568, + 238592, + 239616, + 240640, + 241664, + 242688, + 243712, + 244736, + 245760, + 246784, + 247808, + 248832, + 249856, + 250880, + 251904, + 252928, + 253952, + 254976, + 256000, + 257024, + 258048, + 259072, + 260096, + 261120, + 262144, + 263168, + 264192, + 265216, + 266240, + 267264, + 268288, + 269312, + 270336, + 271360, + 272384, + 273408, + 274432, + 275456, + 276480, + 277504, + 278528, + 279552, + 280576, + 281600, + 282624, + 283648, + 284672, + 285696, + 286720, + 287744, + 288768, + 289792, + 290816, + 291840, + 292864, + 293888, + 294912, + 295936, + 296960, + 297984, + 299008, + 300032, + 301056, + 302080, + 303104, + 304128, + 305152, + 306176, + 307200, + 308224, + 309248, + 310272, + 311296, + 312320, + 313344, + 314368, + 315392, + 316416, + 317440, + 318464, + 319488, + 320512, + 321536, + 322560, + 323584, + 324608, + 325632, + 326656, + 327680 + ], + "n_list": [ + 128, + 160, + 192, + 256, + 320, + 384, + 480, + 512, + 576, + 640, + 768, + 800, + 896, + 960, + 1024, + 1120, + 1152, + 1280, + 1344, + 1408, + 1440, + 1536, + 1600, + 1664, + 1728, + 1760, + 1792, + 1920, + 2048, + 2080, + 2240, + 2400, + 2560 + ], + "rows_candidates": [ + 1, + 2, + 3, + 4, + 5, + 6, + 8, + 10 + ], + "vec_candidates": [ + 1, + 2, + 4 + ], + "warmup": 5, + "iters": 20, + "device": "BW1000B" + }, + "cases": { + "M=1,N=128": { + "file": "silu_config_M1_N128.json", + "M": 1, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=1,N=160": { + "file": "silu_config_M1_N160.json", + "M": 1, + "N": 160, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + "M=1,N=192": { + "file": "silu_config_M1_N192.json", + "M": 1, + "N": 192, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.3190000000000026 + }, + "M=1,N=256": { + "file": "silu_config_M1_N256.json", + "M": 1, + "N": 256, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + "M=1,N=320": { + "file": "silu_config_M1_N320.json", + "M": 1, + "N": 320, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=1,N=384": { + "file": "silu_config_M1_N384.json", + "M": 1, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2.1589999999999847 + }, + "M=1,N=480": { + "file": "silu_config_M1_N480.json", + "M": 1, + "N": 480, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + "M=1,N=512": { + "file": "silu_config_M1_N512.json", + "M": 1, + "N": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=1,N=576": { + "file": "silu_config_M1_N576.json", + "M": 1, + "N": 576, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + "M=1,N=640": { + "file": "silu_config_M1_N640.json", + "M": 1, + "N": 640, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + "M=1,N=768": { + "file": "silu_config_M1_N768.json", + "M": 1, + "N": 768, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999856 + }, + "M=1,N=800": { + "file": "silu_config_M1_N800.json", + "M": 1, + "N": 800, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=1,N=896": { + "file": "silu_config_M1_N896.json", + "M": 1, + "N": 896, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.03899999999998 + }, + "M=1,N=960": { + "file": "silu_config_M1_N960.json", + "M": 1, + "N": 960, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=1,N=1024": { + "file": "silu_config_M1_N1024.json", + "M": 1, + "N": 1024, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999927 + }, + "M=1,N=1120": { + "file": "silu_config_M1_N1120.json", + "M": 1, + "N": 1120, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + "M=1,N=1152": { + "file": "silu_config_M1_N1152.json", + "M": 1, + "N": 1152, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=1,N=1280": { + "file": "silu_config_M1_N1280.json", + "M": 1, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 2.158999999999992 + }, + "M=1,N=1344": { + "file": "silu_config_M1_N1344.json", + "M": 1, + "N": 1344, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + "M=1,N=1408": { + "file": "silu_config_M1_N1408.json", + "M": 1, + "N": 1408, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + "M=1,N=1440": { + "file": "silu_config_M1_N1440.json", + "M": 1, + "N": 1440, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=1,N=1536": { + "file": "silu_config_M1_N1536.json", + "M": 1, + "N": 1536, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + "M=1,N=1600": { + "file": "silu_config_M1_N1600.json", + "M": 1, + "N": 1600, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=1,N=1664": { + "file": "silu_config_M1_N1664.json", + "M": 1, + "N": 1664, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=1,N=1728": { + "file": "silu_config_M1_N1728.json", + "M": 1, + "N": 1728, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + "M=1,N=1760": { + "file": "silu_config_M1_N1760.json", + "M": 1, + "N": 1760, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=1,N=1792": { + "file": "silu_config_M1_N1792.json", + "M": 1, + "N": 1792, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=1,N=1920": { + "file": "silu_config_M1_N1920.json", + "M": 1, + "N": 1920, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + "M=1,N=2048": { + "file": "silu_config_M1_N2048.json", + "M": 1, + "N": 2048, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=1,N=2080": { + "file": "silu_config_M1_N2080.json", + "M": 1, + "N": 2080, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=1,N=2240": { + "file": "silu_config_M1_N2240.json", + "M": 1, + "N": 2240, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999983 + }, + "M=1,N=2400": { + "file": "silu_config_M1_N2400.json", + "M": 1, + "N": 2400, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=1,N=2560": { + "file": "silu_config_M1_N2560.json", + "M": 1, + "N": 2560, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0390000000000086 + }, + "M=2,N=128": { + "file": "silu_config_M2_N128.json", + "M": 2, + "N": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=2,N=160": { + "file": "silu_config_M2_N160.json", + "M": 2, + "N": 160, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3190000000000026 + }, + "M=2,N=192": { + "file": "silu_config_M2_N192.json", + "M": 2, + "N": 192, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + "M=2,N=256": { + "file": "silu_config_M2_N256.json", + "M": 2, + "N": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=2,N=320": { + "file": "silu_config_M2_N320.json", + "M": 2, + "N": 320, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=2,N=384": { + "file": "silu_config_M2_N384.json", + "M": 2, + "N": 384, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0390000000000015 + }, + "M=2,N=480": { + "file": "silu_config_M2_N480.json", + "M": 2, + "N": 480, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=2,N=512": { + "file": "silu_config_M2_N512.json", + "M": 2, + "N": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0389999999999944 + }, + "M=2,N=576": { + "file": "silu_config_M2_N576.json", + "M": 2, + "N": 576, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + "M=2,N=640": { + "file": "silu_config_M2_N640.json", + "M": 2, + "N": 640, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1589999999999847 + }, + "M=2,N=768": { + "file": "silu_config_M2_N768.json", + "M": 2, + "N": 768, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + "M=2,N=800": { + "file": "silu_config_M2_N800.json", + "M": 2, + "N": 800, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999893 + }, + "M=2,N=896": { + "file": "silu_config_M2_N896.json", + "M": 2, + "N": 896, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + "M=2,N=960": { + "file": "silu_config_M2_N960.json", + "M": 2, + "N": 960, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=2,N=1024": { + "file": "silu_config_M2_N1024.json", + "M": 2, + "N": 1024, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0389999999999944 + }, + "M=2,N=1120": { + "file": "silu_config_M2_N1120.json", + "M": 2, + "N": 1120, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + "M=2,N=1152": { + "file": "silu_config_M2_N1152.json", + "M": 2, + "N": 1152, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + "M=2,N=1280": { + "file": "silu_config_M2_N1280.json", + "M": 2, + "N": 1280, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=2,N=1344": { + "file": "silu_config_M2_N1344.json", + "M": 2, + "N": 1344, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=2,N=1408": { + "file": "silu_config_M2_N1408.json", + "M": 2, + "N": 1408, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + "M=2,N=1440": { + "file": "silu_config_M2_N1440.json", + "M": 2, + "N": 1440, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + "M=2,N=1536": { + "file": "silu_config_M2_N1536.json", + "M": 2, + "N": 1536, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=2,N=1600": { + "file": "silu_config_M2_N1600.json", + "M": 2, + "N": 1600, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + "M=2,N=1664": { + "file": "silu_config_M2_N1664.json", + "M": 2, + "N": 1664, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + "M=2,N=1728": { + "file": "silu_config_M2_N1728.json", + "M": 2, + "N": 1728, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + "M=2,N=1760": { + "file": "silu_config_M2_N1760.json", + "M": 2, + "N": 1760, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=2,N=1792": { + "file": "silu_config_M2_N1792.json", + "M": 2, + "N": 1792, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=2,N=1920": { + "file": "silu_config_M2_N1920.json", + "M": 2, + "N": 1920, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999856 + }, + "M=2,N=2048": { + "file": "silu_config_M2_N2048.json", + "M": 2, + "N": 2048, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=2,N=2080": { + "file": "silu_config_M2_N2080.json", + "M": 2, + "N": 2080, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=2,N=2240": { + "file": "silu_config_M2_N2240.json", + "M": 2, + "N": 2240, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=2,N=2400": { + "file": "silu_config_M2_N2400.json", + "M": 2, + "N": 2400, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + "M=2,N=2560": { + "file": "silu_config_M2_N2560.json", + "M": 2, + "N": 2560, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=4,N=128": { + "file": "silu_config_M4_N128.json", + "M": 4, + "N": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + "M=4,N=160": { + "file": "silu_config_M4_N160.json", + "M": 4, + "N": 160, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + "M=4,N=192": { + "file": "silu_config_M4_N192.json", + "M": 4, + "N": 192, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + "M=4,N=256": { + "file": "silu_config_M4_N256.json", + "M": 4, + "N": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + "M=4,N=320": { + "file": "silu_config_M4_N320.json", + "M": 4, + "N": 320, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1992499999999993 + }, + "M=4,N=384": { + "file": "silu_config_M4_N384.json", + "M": 4, + "N": 384, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=4,N=480": { + "file": "silu_config_M4_N480.json", + "M": 4, + "N": 480, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=4,N=512": { + "file": "silu_config_M4_N512.json", + "M": 4, + "N": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=4,N=576": { + "file": "silu_config_M4_N576.json", + "M": 4, + "N": 576, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + "M=4,N=640": { + "file": "silu_config_M4_N640.json", + "M": 4, + "N": 640, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + "M=4,N=768": { + "file": "silu_config_M4_N768.json", + "M": 4, + "N": 768, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0389999999999944 + }, + "M=4,N=800": { + "file": "silu_config_M4_N800.json", + "M": 4, + "N": 800, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + "M=4,N=896": { + "file": "silu_config_M4_N896.json", + "M": 4, + "N": 896, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0390000000000086 + }, + "M=4,N=960": { + "file": "silu_config_M4_N960.json", + "M": 4, + "N": 960, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=4,N=1024": { + "file": "silu_config_M4_N1024.json", + "M": 4, + "N": 1024, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=4,N=1120": { + "file": "silu_config_M4_N1120.json", + "M": 4, + "N": 1120, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + "M=4,N=1152": { + "file": "silu_config_M4_N1152.json", + "M": 4, + "N": 1152, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=4,N=1280": { + "file": "silu_config_M4_N1280.json", + "M": 4, + "N": 1280, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + "M=4,N=1344": { + "file": "silu_config_M4_N1344.json", + "M": 4, + "N": 1344, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=4,N=1408": { + "file": "silu_config_M4_N1408.json", + "M": 4, + "N": 1408, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=4,N=1440": { + "file": "silu_config_M4_N1440.json", + "M": 4, + "N": 1440, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + "M=4,N=1536": { + "file": "silu_config_M4_N1536.json", + "M": 4, + "N": 1536, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=4,N=1600": { + "file": "silu_config_M4_N1600.json", + "M": 4, + "N": 1600, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=4,N=1664": { + "file": "silu_config_M4_N1664.json", + "M": 4, + "N": 1664, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + "M=4,N=1728": { + "file": "silu_config_M4_N1728.json", + "M": 4, + "N": 1728, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + "M=4,N=1760": { + "file": "silu_config_M4_N1760.json", + "M": 4, + "N": 1760, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=4,N=1792": { + "file": "silu_config_M4_N1792.json", + "M": 4, + "N": 1792, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.079000000000022 + }, + "M=4,N=1920": { + "file": "silu_config_M4_N1920.json", + "M": 4, + "N": 1920, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0389999999999944 + }, + "M=4,N=2048": { + "file": "silu_config_M4_N2048.json", + "M": 4, + "N": 2048, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=4,N=2080": { + "file": "silu_config_M4_N2080.json", + "M": 4, + "N": 2080, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=4,N=2240": { + "file": "silu_config_M4_N2240.json", + "M": 4, + "N": 2240, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=4,N=2400": { + "file": "silu_config_M4_N2400.json", + "M": 4, + "N": 2400, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=4,N=2560": { + "file": "silu_config_M4_N2560.json", + "M": 4, + "N": 2560, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=8,N=128": { + "file": "silu_config_M8_N128.json", + "M": 8, + "N": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0390000000000015 + }, + "M=8,N=160": { + "file": "silu_config_M8_N160.json", + "M": 8, + "N": 160, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + "M=8,N=192": { + "file": "silu_config_M8_N192.json", + "M": 8, + "N": 192, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.23899999999999 + }, + "M=8,N=256": { + "file": "silu_config_M8_N256.json", + "M": 8, + "N": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + "M=8,N=320": { + "file": "silu_config_M8_N320.json", + "M": 8, + "N": 320, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + "M=8,N=384": { + "file": "silu_config_M8_N384.json", + "M": 8, + "N": 384, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + "M=8,N=480": { + "file": "silu_config_M8_N480.json", + "M": 8, + "N": 480, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=8,N=512": { + "file": "silu_config_M8_N512.json", + "M": 8, + "N": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119250000000001 + }, + "M=8,N=576": { + "file": "silu_config_M8_N576.json", + "M": 8, + "N": 576, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000106 + }, + "M=8,N=640": { + "file": "silu_config_M8_N640.json", + "M": 8, + "N": 640, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=8,N=768": { + "file": "silu_config_M8_N768.json", + "M": 8, + "N": 768, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=8,N=800": { + "file": "silu_config_M8_N800.json", + "M": 8, + "N": 800, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=8,N=896": { + "file": "silu_config_M8_N896.json", + "M": 8, + "N": 896, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=8,N=960": { + "file": "silu_config_M8_N960.json", + "M": 8, + "N": 960, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + "M=8,N=1024": { + "file": "silu_config_M8_N1024.json", + "M": 8, + "N": 1024, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=8,N=1120": { + "file": "silu_config_M8_N1120.json", + "M": 8, + "N": 1120, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + "M=8,N=1152": { + "file": "silu_config_M8_N1152.json", + "M": 8, + "N": 1152, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + "M=8,N=1280": { + "file": "silu_config_M8_N1280.json", + "M": 8, + "N": 1280, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=8,N=1344": { + "file": "silu_config_M8_N1344.json", + "M": 8, + "N": 1344, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + "M=8,N=1408": { + "file": "silu_config_M8_N1408.json", + "M": 8, + "N": 1408, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=8,N=1440": { + "file": "silu_config_M8_N1440.json", + "M": 8, + "N": 1440, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + "M=8,N=1536": { + "file": "silu_config_M8_N1536.json", + "M": 8, + "N": 1536, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=8,N=1600": { + "file": "silu_config_M8_N1600.json", + "M": 8, + "N": 1600, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=8,N=1664": { + "file": "silu_config_M8_N1664.json", + "M": 8, + "N": 1664, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + "M=8,N=1728": { + "file": "silu_config_M8_N1728.json", + "M": 8, + "N": 1728, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999991 + }, + "M=8,N=1760": { + "file": "silu_config_M8_N1760.json", + "M": 8, + "N": 1760, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=8,N=1792": { + "file": "silu_config_M8_N1792.json", + "M": 8, + "N": 1792, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119249999999994 + }, + "M=8,N=1920": { + "file": "silu_config_M8_N1920.json", + "M": 8, + "N": 1920, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + "M=8,N=2048": { + "file": "silu_config_M8_N2048.json", + "M": 8, + "N": 2048, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + "M=8,N=2080": { + "file": "silu_config_M8_N2080.json", + "M": 8, + "N": 2080, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + "M=8,N=2240": { + "file": "silu_config_M8_N2240.json", + "M": 8, + "N": 2240, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=8,N=2400": { + "file": "silu_config_M8_N2400.json", + "M": 8, + "N": 2400, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=8,N=2560": { + "file": "silu_config_M8_N2560.json", + "M": 8, + "N": 2560, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999927 + }, + "M=16,N=128": { + "file": "silu_config_M16_N128.json", + "M": 16, + "N": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999927 + }, + "M=16,N=160": { + "file": "silu_config_M16_N160.json", + "M": 16, + "N": 160, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + "M=16,N=192": { + "file": "silu_config_M16_N192.json", + "M": 16, + "N": 192, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=16,N=256": { + "file": "silu_config_M16_N256.json", + "M": 16, + "N": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=16,N=320": { + "file": "silu_config_M16_N320.json", + "M": 16, + "N": 320, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + "M=16,N=384": { + "file": "silu_config_M16_N384.json", + "M": 16, + "N": 384, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999927 + }, + "M=16,N=480": { + "file": "silu_config_M16_N480.json", + "M": 16, + "N": 480, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=16,N=512": { + "file": "silu_config_M16_N512.json", + "M": 16, + "N": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=16,N=576": { + "file": "silu_config_M16_N576.json", + "M": 16, + "N": 576, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=16,N=640": { + "file": "silu_config_M16_N640.json", + "M": 16, + "N": 640, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + "M=16,N=768": { + "file": "silu_config_M16_N768.json", + "M": 16, + "N": 768, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 1.9990000000000026 + }, + "M=16,N=800": { + "file": "silu_config_M16_N800.json", + "M": 16, + "N": 800, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=16,N=896": { + "file": "silu_config_M16_N896.json", + "M": 16, + "N": 896, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=16,N=960": { + "file": "silu_config_M16_N960.json", + "M": 16, + "N": 960, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + "M=16,N=1024": { + "file": "silu_config_M16_N1024.json", + "M": 16, + "N": 1024, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999864 + }, + "M=16,N=1120": { + "file": "silu_config_M16_N1120.json", + "M": 16, + "N": 1120, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999893 + }, + "M=16,N=1152": { + "file": "silu_config_M16_N1152.json", + "M": 16, + "N": 1152, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + "M=16,N=1280": { + "file": "silu_config_M16_N1280.json", + "M": 16, + "N": 1280, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + "M=16,N=1344": { + "file": "silu_config_M16_N1344.json", + "M": 16, + "N": 1344, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3190000000000026 + }, + "M=16,N=1408": { + "file": "silu_config_M16_N1408.json", + "M": 16, + "N": 1408, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=16,N=1440": { + "file": "silu_config_M16_N1440.json", + "M": 16, + "N": 1440, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + "M=16,N=1536": { + "file": "silu_config_M16_N1536.json", + "M": 16, + "N": 1536, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 1.9990000000000026 + }, + "M=16,N=1600": { + "file": "silu_config_M16_N1600.json", + "M": 16, + "N": 1600, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=16,N=1664": { + "file": "silu_config_M16_N1664.json", + "M": 16, + "N": 1664, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=16,N=1728": { + "file": "silu_config_M16_N1728.json", + "M": 16, + "N": 1728, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + "M=16,N=1760": { + "file": "silu_config_M16_N1760.json", + "M": 16, + "N": 1760, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + "M=16,N=1792": { + "file": "silu_config_M16_N1792.json", + "M": 16, + "N": 1792, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=16,N=1920": { + "file": "silu_config_M16_N1920.json", + "M": 16, + "N": 1920, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=16,N=2048": { + "file": "silu_config_M16_N2048.json", + "M": 16, + "N": 2048, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1589999999999847 + }, + "M=16,N=2080": { + "file": "silu_config_M16_N2080.json", + "M": 16, + "N": 2080, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3589999999999947 + }, + "M=16,N=2240": { + "file": "silu_config_M16_N2240.json", + "M": 16, + "N": 2240, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=16,N=2400": { + "file": "silu_config_M16_N2400.json", + "M": 16, + "N": 2400, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.5989999999999966 + }, + "M=16,N=2560": { + "file": "silu_config_M16_N2560.json", + "M": 16, + "N": 2560, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.5589999999999975 + }, + "M=32,N=128": { + "file": "silu_config_M32_N128.json", + "M": 32, + "N": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999992 + }, + "M=32,N=160": { + "file": "silu_config_M32_N160.json", + "M": 32, + "N": 160, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + "M=32,N=192": { + "file": "silu_config_M32_N192.json", + "M": 32, + "N": 192, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + "M=32,N=256": { + "file": "silu_config_M32_N256.json", + "M": 32, + "N": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=32,N=320": { + "file": "silu_config_M32_N320.json", + "M": 32, + "N": 320, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + "M=32,N=384": { + "file": "silu_config_M32_N384.json", + "M": 32, + "N": 384, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=32,N=480": { + "file": "silu_config_M32_N480.json", + "M": 32, + "N": 480, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=32,N=512": { + "file": "silu_config_M32_N512.json", + "M": 32, + "N": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + "M=32,N=576": { + "file": "silu_config_M32_N576.json", + "M": 32, + "N": 576, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=32,N=640": { + "file": "silu_config_M32_N640.json", + "M": 32, + "N": 640, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + "M=32,N=768": { + "file": "silu_config_M32_N768.json", + "M": 32, + "N": 768, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=32,N=800": { + "file": "silu_config_M32_N800.json", + "M": 32, + "N": 800, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + "M=32,N=896": { + "file": "silu_config_M32_N896.json", + "M": 32, + "N": 896, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + "M=32,N=960": { + "file": "silu_config_M32_N960.json", + "M": 32, + "N": 960, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + "M=32,N=1024": { + "file": "silu_config_M32_N1024.json", + "M": 32, + "N": 1024, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + "M=32,N=1120": { + "file": "silu_config_M32_N1120.json", + "M": 32, + "N": 1120, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.5189999999999912 + }, + "M=32,N=1152": { + "file": "silu_config_M32_N1152.json", + "M": 32, + "N": 1152, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + "M=32,N=1280": { + "file": "silu_config_M32_N1280.json", + "M": 32, + "N": 1280, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=32,N=1344": { + "file": "silu_config_M32_N1344.json", + "M": 32, + "N": 1344, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + "M=32,N=1408": { + "file": "silu_config_M32_N1408.json", + "M": 32, + "N": 1408, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=32,N=1440": { + "file": "silu_config_M32_N1440.json", + "M": 32, + "N": 1440, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=32,N=1536": { + "file": "silu_config_M32_N1536.json", + "M": 32, + "N": 1536, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.438999999999993 + }, + "M=32,N=1600": { + "file": "silu_config_M32_N1600.json", + "M": 32, + "N": 1600, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + "M=32,N=1664": { + "file": "silu_config_M32_N1664.json", + "M": 32, + "N": 1664, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=32,N=1728": { + "file": "silu_config_M32_N1728.json", + "M": 32, + "N": 1728, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + "M=32,N=1760": { + "file": "silu_config_M32_N1760.json", + "M": 32, + "N": 1760, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=32,N=1792": { + "file": "silu_config_M32_N1792.json", + "M": 32, + "N": 1792, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=32,N=1920": { + "file": "silu_config_M32_N1920.json", + "M": 32, + "N": 1920, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + "M=32,N=2048": { + "file": "silu_config_M32_N2048.json", + "M": 32, + "N": 2048, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.23899999999999 + }, + "M=32,N=2080": { + "file": "silu_config_M32_N2080.json", + "M": 32, + "N": 2080, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.9989999999999952 + }, + "M=32,N=2240": { + "file": "silu_config_M32_N2240.json", + "M": 32, + "N": 2240, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.958999999999996 + }, + "M=32,N=2400": { + "file": "silu_config_M32_N2400.json", + "M": 32, + "N": 2400, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + "M=32,N=2560": { + "file": "silu_config_M32_N2560.json", + "M": 32, + "N": 2560, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0789999999999935 + }, + "M=64,N=128": { + "file": "silu_config_M64_N128.json", + "M": 64, + "N": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119249999999994 + }, + "M=64,N=160": { + "file": "silu_config_M64_N160.json", + "M": 64, + "N": 160, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199249999999992 + }, + "M=64,N=192": { + "file": "silu_config_M64_N192.json", + "M": 64, + "N": 192, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=64,N=256": { + "file": "silu_config_M64_N256.json", + "M": 64, + "N": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0390000000000086 + }, + "M=64,N=320": { + "file": "silu_config_M64_N320.json", + "M": 64, + "N": 320, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.23899999999999 + }, + "M=64,N=384": { + "file": "silu_config_M64_N384.json", + "M": 64, + "N": 384, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=64,N=480": { + "file": "silu_config_M64_N480.json", + "M": 64, + "N": 480, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + "M=64,N=512": { + "file": "silu_config_M64_N512.json", + "M": 64, + "N": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + "M=64,N=576": { + "file": "silu_config_M64_N576.json", + "M": 64, + "N": 576, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + "M=64,N=640": { + "file": "silu_config_M64_N640.json", + "M": 64, + "N": 640, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=64,N=768": { + "file": "silu_config_M64_N768.json", + "M": 64, + "N": 768, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=64,N=800": { + "file": "silu_config_M64_N800.json", + "M": 64, + "N": 800, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + "M=64,N=896": { + "file": "silu_config_M64_N896.json", + "M": 64, + "N": 896, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=64,N=960": { + "file": "silu_config_M64_N960.json", + "M": 64, + "N": 960, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.398999999999994 + }, + "M=64,N=1024": { + "file": "silu_config_M64_N1024.json", + "M": 64, + "N": 1024, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=64,N=1120": { + "file": "silu_config_M64_N1120.json", + "M": 64, + "N": 1120, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0390000000000086 + }, + "M=64,N=1152": { + "file": "silu_config_M64_N1152.json", + "M": 64, + "N": 1152, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.959000000000003 + }, + "M=64,N=1280": { + "file": "silu_config_M64_N1280.json", + "M": 64, + "N": 1280, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0390000000000086 + }, + "M=64,N=1344": { + "file": "silu_config_M64_N1344.json", + "M": 64, + "N": 1344, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.958999999999996 + }, + "M=64,N=1408": { + "file": "silu_config_M64_N1408.json", + "M": 64, + "N": 1408, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0390000000000015 + }, + "M=64,N=1440": { + "file": "silu_config_M64_N1440.json", + "M": 64, + "N": 1440, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + "M=64,N=1536": { + "file": "silu_config_M64_N1536.json", + "M": 64, + "N": 1536, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + "M=64,N=1600": { + "file": "silu_config_M64_N1600.json", + "M": 64, + "N": 1600, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.9990000000000023 + }, + "M=64,N=1664": { + "file": "silu_config_M64_N1664.json", + "M": 64, + "N": 1664, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0390000000000086 + }, + "M=64,N=1728": { + "file": "silu_config_M64_N1728.json", + "M": 64, + "N": 1728, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0790000000000006 + }, + "M=64,N=1760": { + "file": "silu_config_M64_N1760.json", + "M": 64, + "N": 1760, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0790000000000006 + }, + "M=64,N=1792": { + "file": "silu_config_M64_N1792.json", + "M": 64, + "N": 1792, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0790000000000006 + }, + "M=64,N=1920": { + "file": "silu_config_M64_N1920.json", + "M": 64, + "N": 1920, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + "M=64,N=2048": { + "file": "silu_config_M64_N2048.json", + "M": 64, + "N": 2048, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 2.678999999999995 + }, + "M=64,N=2080": { + "file": "silu_config_M64_N2080.json", + "M": 64, + "N": 2080, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.5189999999999984 + }, + "M=64,N=2240": { + "file": "silu_config_M64_N2240.json", + "M": 64, + "N": 2240, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.438999999999993 + }, + "M=64,N=2400": { + "file": "silu_config_M64_N2400.json", + "M": 64, + "N": 2400, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.5190000000000055 + }, + "M=64,N=2560": { + "file": "silu_config_M64_N2560.json", + "M": 64, + "N": 2560, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.5589999999999975 + }, + "M=128,N=128": { + "file": "silu_config_M128_N128.json", + "M": 128, + "N": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + "M=128,N=160": { + "file": "silu_config_M128_N160.json", + "M": 128, + "N": 160, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + "M=128,N=192": { + "file": "silu_config_M128_N192.json", + "M": 128, + "N": 192, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + "M=128,N=256": { + "file": "silu_config_M128_N256.json", + "M": 128, + "N": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + "M=128,N=320": { + "file": "silu_config_M128_N320.json", + "M": 128, + "N": 320, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=128,N=384": { + "file": "silu_config_M128_N384.json", + "M": 128, + "N": 384, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + "M=128,N=480": { + "file": "silu_config_M128_N480.json", + "M": 128, + "N": 480, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + "M=128,N=512": { + "file": "silu_config_M128_N512.json", + "M": 128, + "N": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + "M=128,N=576": { + "file": "silu_config_M128_N576.json", + "M": 128, + "N": 576, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.9989999999999952 + }, + "M=128,N=640": { + "file": "silu_config_M128_N640.json", + "M": 128, + "N": 640, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.9989999999999952 + }, + "M=128,N=768": { + "file": "silu_config_M128_N768.json", + "M": 128, + "N": 768, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + "M=128,N=800": { + "file": "silu_config_M128_N800.json", + "M": 128, + "N": 800, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + "M=128,N=896": { + "file": "silu_config_M128_N896.json", + "M": 128, + "N": 896, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0390000000000015 + }, + "M=128,N=960": { + "file": "silu_config_M128_N960.json", + "M": 128, + "N": 960, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.9989999999999952 + }, + "M=128,N=1024": { + "file": "silu_config_M128_N1024.json", + "M": 128, + "N": 1024, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 2.7989999999999995 + }, + "M=128,N=1120": { + "file": "silu_config_M128_N1120.json", + "M": 128, + "N": 1120, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.918999999999997 + }, + "M=128,N=1152": { + "file": "silu_config_M128_N1152.json", + "M": 128, + "N": 1152, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.959000000000003 + }, + "M=128,N=1280": { + "file": "silu_config_M128_N1280.json", + "M": 128, + "N": 1280, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.9589999999999965 + }, + "M=128,N=1344": { + "file": "silu_config_M128_N1344.json", + "M": 128, + "N": 1344, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.959000000000003 + }, + "M=128,N=1408": { + "file": "silu_config_M128_N1408.json", + "M": 128, + "N": 1408, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.919000000000004 + }, + "M=128,N=1440": { + "file": "silu_config_M128_N1440.json", + "M": 128, + "N": 1440, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.9990000000000023 + }, + "M=128,N=1536": { + "file": "silu_config_M128_N1536.json", + "M": 128, + "N": 1536, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.079000000000008 + }, + "M=128,N=1600": { + "file": "silu_config_M128_N1600.json", + "M": 128, + "N": 1600, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.038999999999994 + }, + "M=128,N=1664": { + "file": "silu_config_M128_N1664.json", + "M": 128, + "N": 1664, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.0789999999999935 + }, + "M=128,N=1728": { + "file": "silu_config_M128_N1728.json", + "M": 128, + "N": 1728, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.0789999999999935 + }, + "M=128,N=1760": { + "file": "silu_config_M128_N1760.json", + "M": 128, + "N": 1760, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.038999999999994 + }, + "M=128,N=1792": { + "file": "silu_config_M128_N1792.json", + "M": 128, + "N": 1792, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.079000000000001 + }, + "M=128,N=1920": { + "file": "silu_config_M128_N1920.json", + "M": 128, + "N": 1920, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.119000000000007 + }, + "M=128,N=2048": { + "file": "silu_config_M128_N2048.json", + "M": 128, + "N": 2048, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 3.439 + }, + "M=128,N=2080": { + "file": "silu_config_M128_N2080.json", + "M": 128, + "N": 2080, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.5589999999999975 + }, + "M=128,N=2240": { + "file": "silu_config_M128_N2240.json", + "M": 128, + "N": 2240, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.518999999999991 + }, + "M=128,N=2400": { + "file": "silu_config_M128_N2400.json", + "M": 128, + "N": 2400, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.838999999999999 + }, + "M=128,N=2560": { + "file": "silu_config_M128_N2560.json", + "M": 128, + "N": 2560, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.919000000000004 + }, + "M=256,N=128": { + "file": "silu_config_M256_N128.json", + "M": 256, + "N": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + "M=256,N=160": { + "file": "silu_config_M256_N160.json", + "M": 256, + "N": 160, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.959000000000003 + }, + "M=256,N=192": { + "file": "silu_config_M256_N192.json", + "M": 256, + "N": 192, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.879000000000005 + }, + "M=256,N=256": { + "file": "silu_config_M256_N256.json", + "M": 256, + "N": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.639000000000003 + }, + "M=256,N=320": { + "file": "silu_config_M256_N320.json", + "M": 256, + "N": 320, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.0790000000000077 + }, + "M=256,N=384": { + "file": "silu_config_M256_N384.json", + "M": 256, + "N": 384, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.0390000000000086 + }, + "M=256,N=480": { + "file": "silu_config_M256_N480.json", + "M": 256, + "N": 480, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.0790000000000006 + }, + "M=256,N=512": { + "file": "silu_config_M256_N512.json", + "M": 256, + "N": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.959000000000003 + }, + "M=256,N=576": { + "file": "silu_config_M256_N576.json", + "M": 256, + "N": 576, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.9990000000000023 + }, + "M=256,N=640": { + "file": "silu_config_M256_N640.json", + "M": 256, + "N": 640, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.879000000000005 + }, + "M=256,N=768": { + "file": "silu_config_M256_N768.json", + "M": 256, + "N": 768, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.0789999999999935 + }, + "M=256,N=800": { + "file": "silu_config_M256_N800.json", + "M": 256, + "N": 800, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.9989999999999957 + }, + "M=256,N=896": { + "file": "silu_config_M256_N896.json", + "M": 256, + "N": 896, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.039000000000009 + }, + "M=256,N=960": { + "file": "silu_config_M256_N960.json", + "M": 256, + "N": 960, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.119250000000008 + }, + "M=256,N=1024": { + "file": "silu_config_M256_N1024.json", + "M": 256, + "N": 1024, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 3.9589999999999965 + }, + "M=256,N=1120": { + "file": "silu_config_M256_N1120.json", + "M": 256, + "N": 1120, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.879000000000005 + }, + "M=256,N=1152": { + "file": "silu_config_M256_N1152.json", + "M": 256, + "N": 1152, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.7989999999999995 + }, + "M=256,N=1280": { + "file": "silu_config_M256_N1280.json", + "M": 256, + "N": 1280, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 5.878999999999998 + }, + "M=256,N=1344": { + "file": "silu_config_M256_N1344.json", + "M": 256, + "N": 1344, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.959000000000003 + }, + "M=256,N=1408": { + "file": "silu_config_M256_N1408.json", + "M": 256, + "N": 1408, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.878999999999998 + }, + "M=256,N=1440": { + "file": "silu_config_M256_N1440.json", + "M": 256, + "N": 1440, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.7989999999999995 + }, + "M=256,N=1536": { + "file": "silu_config_M256_N1536.json", + "M": 256, + "N": 1536, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.918999999999997 + }, + "M=256,N=1600": { + "file": "silu_config_M256_N1600.json", + "M": 256, + "N": 1600, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.878999999999991 + }, + "M=256,N=1664": { + "file": "silu_config_M256_N1664.json", + "M": 256, + "N": 1664, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.878999999999991 + }, + "M=256,N=1728": { + "file": "silu_config_M256_N1728.json", + "M": 256, + "N": 1728, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 6.079000000000001 + }, + "M=256,N=1760": { + "file": "silu_config_M256_N1760.json", + "M": 256, + "N": 1760, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.958999999999996 + }, + "M=256,N=1792": { + "file": "silu_config_M256_N1792.json", + "M": 256, + "N": 1792, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.879250000000006 + }, + "M=256,N=1920": { + "file": "silu_config_M256_N1920.json", + "M": 256, + "N": 1920, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.879000000000005 + }, + "M=256,N=2048": { + "file": "silu_config_M256_N2048.json", + "M": 256, + "N": 2048, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 5.038999999999994 + }, + "M=256,N=2080": { + "file": "silu_config_M256_N2080.json", + "M": 256, + "N": 2080, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 6.999000000000002 + }, + "M=256,N=2240": { + "file": "silu_config_M256_N2240.json", + "M": 256, + "N": 2240, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 6.878999999999998 + }, + "M=256,N=2400": { + "file": "silu_config_M256_N2400.json", + "M": 256, + "N": 2400, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 7.2792499999999976 + }, + "M=256,N=2560": { + "file": "silu_config_M256_N2560.json", + "M": 256, + "N": 2560, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 7.478999999999999 + }, + "M=512,N=128": { + "file": "silu_config_M512_N128.json", + "M": 512, + "N": 128, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.5589999999999975 + }, + "M=512,N=160": { + "file": "silu_config_M512_N160.json", + "M": 512, + "N": 160, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.878999999999998 + }, + "M=512,N=192": { + "file": "silu_config_M512_N192.json", + "M": 512, + "N": 192, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 3.8389999999999986 + }, + "M=512,N=256": { + "file": "silu_config_M512_N256.json", + "M": 512, + "N": 256, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 3.5589999999999975 + }, + "M=512,N=320": { + "file": "silu_config_M512_N320.json", + "M": 512, + "N": 320, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 4.478999999999992 + }, + "M=512,N=384": { + "file": "silu_config_M512_N384.json", + "M": 512, + "N": 384, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 4.679000000000002 + }, + "M=512,N=480": { + "file": "silu_config_M512_N480.json", + "M": 512, + "N": 480, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 4.919000000000004 + }, + "M=512,N=512": { + "file": "silu_config_M512_N512.json", + "M": 512, + "N": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 4.559000000000012 + }, + "M=512,N=576": { + "file": "silu_config_M512_N576.json", + "M": 512, + "N": 576, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.919250000000005 + }, + "M=512,N=640": { + "file": "silu_config_M512_N640.json", + "M": 512, + "N": 640, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.919000000000004 + }, + "M=512,N=768": { + "file": "silu_config_M512_N768.json", + "M": 512, + "N": 768, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.839000000000006 + }, + "M=512,N=800": { + "file": "silu_config_M512_N800.json", + "M": 512, + "N": 800, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.919000000000004 + }, + "M=512,N=896": { + "file": "silu_config_M512_N896.json", + "M": 512, + "N": 896, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.838999999999999 + }, + "M=512,N=960": { + "file": "silu_config_M512_N960.json", + "M": 512, + "N": 960, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.919000000000004 + }, + "M=512,N=1024": { + "file": "silu_config_M512_N1024.json", + "M": 512, + "N": 1024, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 5.559000000000005 + }, + "M=512,N=1120": { + "file": "silu_config_M512_N1120.json", + "M": 512, + "N": 1120, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 9.478999999999992 + }, + "M=512,N=1152": { + "file": "silu_config_M512_N1152.json", + "M": 512, + "N": 1152, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 9.359249999999989 + }, + "M=512,N=1280": { + "file": "silu_config_M512_N1280.json", + "M": 512, + "N": 1280, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.039000000000001 + }, + "M=512,N=1344": { + "file": "silu_config_M512_N1344.json", + "M": 512, + "N": 1344, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.59924999999999 + }, + "M=512,N=1408": { + "file": "silu_config_M512_N1408.json", + "M": 512, + "N": 1408, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 9.479 + }, + "M=512,N=1440": { + "file": "silu_config_M512_N1440.json", + "M": 512, + "N": 1440, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.199000000000005 + }, + "M=512,N=1536": { + "file": "silu_config_M512_N1536.json", + "M": 512, + "N": 1536, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.159000000000006 + }, + "M=512,N=1600": { + "file": "silu_config_M512_N1600.json", + "M": 512, + "N": 1600, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.598999999999997 + }, + "M=512,N=1664": { + "file": "silu_config_M512_N1664.json", + "M": 512, + "N": 1664, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.318999999999996 + }, + "M=512,N=1728": { + "file": "silu_config_M512_N1728.json", + "M": 512, + "N": 1728, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 9.598999999999997 + }, + "M=512,N=1760": { + "file": "silu_config_M512_N1760.json", + "M": 512, + "N": 1760, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 9.55899999999999 + }, + "M=512,N=1792": { + "file": "silu_config_M512_N1792.json", + "M": 512, + "N": 1792, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 9.639000000000003 + }, + "M=512,N=1920": { + "file": "silu_config_M512_N1920.json", + "M": 512, + "N": 1920, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 9.678999999999995 + }, + "M=512,N=2048": { + "file": "silu_config_M512_N2048.json", + "M": 512, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 7.5989999999999895 + }, + "M=512,N=2080": { + "file": "silu_config_M512_N2080.json", + "M": 512, + "N": 2080, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 10.798999999999992 + }, + "M=512,N=2240": { + "file": "silu_config_M512_N2240.json", + "M": 512, + "N": 2240, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 10.878999999999998 + }, + "M=512,N=2400": { + "file": "silu_config_M512_N2400.json", + "M": 512, + "N": 2400, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 11.158999999999992 + }, + "M=512,N=2560": { + "file": "silu_config_M512_N2560.json", + "M": 512, + "N": 2560, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 11.319000000000003 + }, + "M=1024,N=128": { + "file": "silu_config_M1024_N128.json", + "M": 1024, + "N": 128, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.5990000000000038 + }, + "M=1024,N=160": { + "file": "silu_config_M1024_N160.json", + "M": 1024, + "N": 160, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 5.518999999999991 + }, + "M=1024,N=192": { + "file": "silu_config_M1024_N192.json", + "M": 1024, + "N": 192, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 5.7590000000000074 + }, + "M=1024,N=256": { + "file": "silu_config_M1024_N256.json", + "M": 1024, + "N": 256, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 5.439 + }, + "M=1024,N=320": { + "file": "silu_config_M1024_N320.json", + "M": 1024, + "N": 320, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 6.959249999999997 + }, + "M=1024,N=384": { + "file": "silu_config_M1024_N384.json", + "M": 1024, + "N": 384, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 6.879000000000005 + }, + "M=1024,N=480": { + "file": "silu_config_M1024_N480.json", + "M": 1024, + "N": 480, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 7.0390000000000015 + }, + "M=1024,N=512": { + "file": "silu_config_M1024_N512.json", + "M": 1024, + "N": 512, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 6.319000000000003 + }, + "M=1024,N=576": { + "file": "silu_config_M1024_N576.json", + "M": 1024, + "N": 576, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.279000000000003 + }, + "M=1024,N=640": { + "file": "silu_config_M1024_N640.json", + "M": 1024, + "N": 640, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.479 + }, + "M=1024,N=768": { + "file": "silu_config_M1024_N768.json", + "M": 1024, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 9.599000000000004 + }, + "M=1024,N=800": { + "file": "silu_config_M1024_N800.json", + "M": 1024, + "N": 800, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.679249999999996 + }, + "M=1024,N=896": { + "file": "silu_config_M1024_N896.json", + "M": 1024, + "N": 896, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.238999999999997 + }, + "M=1024,N=960": { + "file": "silu_config_M1024_N960.json", + "M": 1024, + "N": 960, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.598999999999997 + }, + "M=1024,N=1024": { + "file": "silu_config_M1024_N1024.json", + "M": 1024, + "N": 1024, + "rows_per_block": 2, + "vec_size": 4, + "time_us": 8.238999999999997 + }, + "M=1024,N=1120": { + "file": "silu_config_M1024_N1120.json", + "M": 1024, + "N": 1120, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.799 + }, + "M=1024,N=1152": { + "file": "silu_config_M1024_N1152.json", + "M": 1024, + "N": 1152, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.678999999999995 + }, + "M=1024,N=1280": { + "file": "silu_config_M1024_N1280.json", + "M": 1024, + "N": 1280, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.679000000000002 + }, + "M=1024,N=1344": { + "file": "silu_config_M1024_N1344.json", + "M": 1024, + "N": 1344, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.71925000000001 + }, + "M=1024,N=1408": { + "file": "silu_config_M1024_N1408.json", + "M": 1024, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.679249999999982 + }, + "M=1024,N=1440": { + "file": "silu_config_M1024_N1440.json", + "M": 1024, + "N": 1440, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.759 + }, + "M=1024,N=1536": { + "file": "silu_config_M1024_N1536.json", + "M": 1024, + "N": 1536, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.758999999999993 + }, + "M=1024,N=1600": { + "file": "silu_config_M1024_N1600.json", + "M": 1024, + "N": 1600, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.758999999999993 + }, + "M=1024,N=1664": { + "file": "silu_config_M1024_N1664.json", + "M": 1024, + "N": 1664, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.679000000000002 + }, + "M=1024,N=1728": { + "file": "silu_config_M1024_N1728.json", + "M": 1024, + "N": 1728, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.879249999999992 + }, + "M=1024,N=1760": { + "file": "silu_config_M1024_N1760.json", + "M": 1024, + "N": 1760, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.838999999999992 + }, + "M=1024,N=1792": { + "file": "silu_config_M1024_N1792.json", + "M": 1024, + "N": 1792, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.639000000000003 + }, + "M=1024,N=1920": { + "file": "silu_config_M1024_N1920.json", + "M": 1024, + "N": 1920, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.759 + }, + "M=1024,N=2048": { + "file": "silu_config_M1024_N2048.json", + "M": 1024, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 12.359000000000002 + }, + "M=1024,N=2080": { + "file": "silu_config_M1024_N2080.json", + "M": 1024, + "N": 2080, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 16.998999999999995 + }, + "M=1024,N=2240": { + "file": "silu_config_M1024_N2240.json", + "M": 1024, + "N": 2240, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 17.479 + }, + "M=1024,N=2400": { + "file": "silu_config_M1024_N2400.json", + "M": 1024, + "N": 2400, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 18.038999999999994 + }, + "M=1024,N=2560": { + "file": "silu_config_M1024_N2560.json", + "M": 1024, + "N": 2560, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 18.359249999999996 + }, + "M=2048,N=128": { + "file": "silu_config_M2048_N128.json", + "M": 2048, + "N": 128, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 5.439 + }, + "M=2048,N=160": { + "file": "silu_config_M2048_N160.json", + "M": 2048, + "N": 160, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 8.598999999999997 + }, + "M=2048,N=192": { + "file": "silu_config_M2048_N192.json", + "M": 2048, + "N": 192, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 8.598999999999997 + }, + "M=2048,N=256": { + "file": "silu_config_M2048_N256.json", + "M": 2048, + "N": 256, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 7.879000000000004 + }, + "M=2048,N=320": { + "file": "silu_config_M2048_N320.json", + "M": 2048, + "N": 320, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.998999999999995 + }, + "M=2048,N=384": { + "file": "silu_config_M2048_N384.json", + "M": 2048, + "N": 384, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.959250000000011 + }, + "M=2048,N=480": { + "file": "silu_config_M2048_N480.json", + "M": 2048, + "N": 480, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 10.158999999999992 + }, + "M=2048,N=512": { + "file": "silu_config_M2048_N512.json", + "M": 2048, + "N": 512, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 8.918999999999997 + }, + "M=2048,N=576": { + "file": "silu_config_M2048_N576.json", + "M": 2048, + "N": 576, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 14.558999999999997 + }, + "M=2048,N=640": { + "file": "silu_config_M2048_N640.json", + "M": 2048, + "N": 640, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 14.558999999999997 + }, + "M=2048,N=768": { + "file": "silu_config_M2048_N768.json", + "M": 2048, + "N": 768, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.478999999999992 + }, + "M=2048,N=800": { + "file": "silu_config_M2048_N800.json", + "M": 2048, + "N": 800, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 14.638999999999989 + }, + "M=2048,N=896": { + "file": "silu_config_M2048_N896.json", + "M": 2048, + "N": 896, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 14.519000000000013 + }, + "M=2048,N=960": { + "file": "silu_config_M2048_N960.json", + "M": 2048, + "N": 960, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 14.759000000000015 + }, + "M=2048,N=1024": { + "file": "silu_config_M2048_N1024.json", + "M": 2048, + "N": 1024, + "rows_per_block": 2, + "vec_size": 4, + "time_us": 12.719000000000008 + }, + "M=2048,N=1120": { + "file": "silu_config_M2048_N1120.json", + "M": 2048, + "N": 1120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 25.278999999999982 + }, + "M=2048,N=1152": { + "file": "silu_config_M2048_N1152.json", + "M": 2048, + "N": 1152, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 24.918999999999983 + }, + "M=2048,N=1280": { + "file": "silu_config_M2048_N1280.json", + "M": 2048, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 24.87925 + }, + "M=2048,N=1344": { + "file": "silu_config_M2048_N1344.json", + "M": 2048, + "N": 1344, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.039250000000003 + }, + "M=2048,N=1408": { + "file": "silu_config_M2048_N1408.json", + "M": 2048, + "N": 1408, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.038999999999987 + }, + "M=2048,N=1440": { + "file": "silu_config_M2048_N1440.json", + "M": 2048, + "N": 1440, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 25.319249999999997 + }, + "M=2048,N=1536": { + "file": "silu_config_M2048_N1536.json", + "M": 2048, + "N": 1536, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 24.999000000000002 + }, + "M=2048,N=1600": { + "file": "silu_config_M2048_N1600.json", + "M": 2048, + "N": 1600, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.318999999999996 + }, + "M=2048,N=1664": { + "file": "silu_config_M2048_N1664.json", + "M": 2048, + "N": 1664, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 25.079 + }, + "M=2048,N=1728": { + "file": "silu_config_M2048_N1728.json", + "M": 2048, + "N": 1728, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.238999999999997 + }, + "M=2048,N=1760": { + "file": "silu_config_M2048_N1760.json", + "M": 2048, + "N": 1760, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.279000000000025 + }, + "M=2048,N=1792": { + "file": "silu_config_M2048_N1792.json", + "M": 2048, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 25.198999999999998 + }, + "M=2048,N=1920": { + "file": "silu_config_M2048_N1920.json", + "M": 2048, + "N": 1920, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 25.31899999999999 + }, + "M=2048,N=2048": { + "file": "silu_config_M2048_N2048.json", + "M": 2048, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 20.239000000000004 + }, + "M=2048,N=2080": { + "file": "silu_config_M2048_N2080.json", + "M": 2048, + "N": 2080, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 28.519250000000007 + }, + "M=2048,N=2240": { + "file": "silu_config_M2048_N2240.json", + "M": 2048, + "N": 2240, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 29.87899999999999 + }, + "M=2048,N=2400": { + "file": "silu_config_M2048_N2400.json", + "M": 2048, + "N": 2400, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 30.91924999999999 + }, + "M=2048,N=2560": { + "file": "silu_config_M2048_N2560.json", + "M": 2048, + "N": 2560, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 31.83925 + }, + "M=3072,N=128": { + "file": "silu_config_M3072_N128.json", + "M": 3072, + "N": 128, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 6.919000000000004 + }, + "M=3072,N=160": { + "file": "silu_config_M3072_N160.json", + "M": 3072, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 10.63924999999999 + }, + "M=3072,N=192": { + "file": "silu_config_M3072_N192.json", + "M": 3072, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 10.718999999999994 + }, + "M=3072,N=256": { + "file": "silu_config_M3072_N256.json", + "M": 3072, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 9.759 + }, + "M=3072,N=320": { + "file": "silu_config_M3072_N320.json", + "M": 3072, + "N": 320, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 12.839000000000006 + }, + "M=3072,N=384": { + "file": "silu_config_M3072_N384.json", + "M": 3072, + "N": 384, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 12.799 + }, + "M=3072,N=480": { + "file": "silu_config_M3072_N480.json", + "M": 3072, + "N": 480, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 12.918999999999997 + }, + "M=3072,N=512": { + "file": "silu_config_M3072_N512.json", + "M": 3072, + "N": 512, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 11.558999999999997 + }, + "M=3072,N=576": { + "file": "silu_config_M3072_N576.json", + "M": 3072, + "N": 576, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 19.599249999999998 + }, + "M=3072,N=640": { + "file": "silu_config_M3072_N640.json", + "M": 3072, + "N": 640, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 19.599250000000005 + }, + "M=3072,N=768": { + "file": "silu_config_M3072_N768.json", + "M": 3072, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 19.799 + }, + "M=3072,N=800": { + "file": "silu_config_M3072_N800.json", + "M": 3072, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 19.959000000000003 + }, + "M=3072,N=896": { + "file": "silu_config_M3072_N896.json", + "M": 3072, + "N": 896, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 19.599000000000004 + }, + "M=3072,N=960": { + "file": "silu_config_M3072_N960.json", + "M": 3072, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 20.159000000000006 + }, + "M=3072,N=1024": { + "file": "silu_config_M3072_N1024.json", + "M": 3072, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 17.55924999999999 + }, + "M=3072,N=1120": { + "file": "silu_config_M3072_N1120.json", + "M": 3072, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.999 + }, + "M=3072,N=1152": { + "file": "silu_config_M3072_N1152.json", + "M": 3072, + "N": 1152, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 35.038999999999994 + }, + "M=3072,N=1280": { + "file": "silu_config_M3072_N1280.json", + "M": 3072, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.639 + }, + "M=3072,N=1344": { + "file": "silu_config_M3072_N1344.json", + "M": 3072, + "N": 1344, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 35.439000000000014 + }, + "M=3072,N=1408": { + "file": "silu_config_M3072_N1408.json", + "M": 3072, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.47900000000001 + }, + "M=3072,N=1440": { + "file": "silu_config_M3072_N1440.json", + "M": 3072, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.87899999999999 + }, + "M=3072,N=1536": { + "file": "silu_config_M3072_N1536.json", + "M": 3072, + "N": 1536, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 35.27924999999999 + }, + "M=3072,N=1600": { + "file": "silu_config_M3072_N1600.json", + "M": 3072, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 35.27925 + }, + "M=3072,N=1664": { + "file": "silu_config_M3072_N1664.json", + "M": 3072, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.59900000000001 + }, + "M=3072,N=1728": { + "file": "silu_config_M3072_N1728.json", + "M": 3072, + "N": 1728, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 35.678999999999995 + }, + "M=3072,N=1760": { + "file": "silu_config_M3072_N1760.json", + "M": 3072, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 35.358999999999995 + }, + "M=3072,N=1792": { + "file": "silu_config_M3072_N1792.json", + "M": 3072, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.79899999999999 + }, + "M=3072,N=1920": { + "file": "silu_config_M3072_N1920.json", + "M": 3072, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.99900000000001 + }, + "M=3072,N=2048": { + "file": "silu_config_M3072_N2048.json", + "M": 3072, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 28.59899999999999 + }, + "M=3072,N=2080": { + "file": "silu_config_M3072_N2080.json", + "M": 3072, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 39.79925 + }, + "M=3072,N=2240": { + "file": "silu_config_M3072_N2240.json", + "M": 3072, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 41.43925 + }, + "M=3072,N=2400": { + "file": "silu_config_M3072_N2400.json", + "M": 3072, + "N": 2400, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 43.95924999999999 + }, + "M=3072,N=2560": { + "file": "silu_config_M3072_N2560.json", + "M": 3072, + "N": 2560, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 44.879 + }, + "M=4096,N=128": { + "file": "silu_config_M4096_N128.json", + "M": 4096, + "N": 128, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 7.838999999999999 + }, + "M=4096,N=160": { + "file": "silu_config_M4096_N160.json", + "M": 4096, + "N": 160, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 12.318999999999996 + }, + "M=4096,N=192": { + "file": "silu_config_M4096_N192.json", + "M": 4096, + "N": 192, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 12.319249999999997 + }, + "M=4096,N=256": { + "file": "silu_config_M4096_N256.json", + "M": 4096, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 11.399250000000002 + }, + "M=4096,N=320": { + "file": "silu_config_M4096_N320.json", + "M": 4096, + "N": 320, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 16.03900000000001 + }, + "M=4096,N=384": { + "file": "silu_config_M4096_N384.json", + "M": 4096, + "N": 384, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 15.918999999999988 + }, + "M=4096,N=480": { + "file": "silu_config_M4096_N480.json", + "M": 4096, + "N": 480, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 16.359250000000003 + }, + "M=4096,N=512": { + "file": "silu_config_M4096_N512.json", + "M": 4096, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 14.039000000000001 + }, + "M=4096,N=576": { + "file": "silu_config_M4096_N576.json", + "M": 4096, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 24.639000000000003 + }, + "M=4096,N=640": { + "file": "silu_config_M4096_N640.json", + "M": 4096, + "N": 640, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 24.519000000000005 + }, + "M=4096,N=768": { + "file": "silu_config_M4096_N768.json", + "M": 4096, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 24.599000000000018 + }, + "M=4096,N=800": { + "file": "silu_config_M4096_N800.json", + "M": 4096, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 24.839000000000013 + }, + "M=4096,N=896": { + "file": "silu_config_M4096_N896.json", + "M": 4096, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 24.47924999999998 + }, + "M=4096,N=960": { + "file": "silu_config_M4096_N960.json", + "M": 4096, + "N": 960, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.11925 + }, + "M=4096,N=1024": { + "file": "silu_config_M4096_N1024.json", + "M": 4096, + "N": 1024, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 22.23899999999999 + }, + "M=4096,N=1120": { + "file": "silu_config_M4096_N1120.json", + "M": 4096, + "N": 1120, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 45.75924999999999 + }, + "M=4096,N=1152": { + "file": "silu_config_M4096_N1152.json", + "M": 4096, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.55925000000001 + }, + "M=4096,N=1280": { + "file": "silu_config_M4096_N1280.json", + "M": 4096, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.67899999999999 + }, + "M=4096,N=1344": { + "file": "silu_config_M4096_N1344.json", + "M": 4096, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.91924999999999 + }, + "M=4096,N=1408": { + "file": "silu_config_M4096_N1408.json", + "M": 4096, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 45.03925000000001 + }, + "M=4096,N=1440": { + "file": "silu_config_M4096_N1440.json", + "M": 4096, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 45.159 + }, + "M=4096,N=1536": { + "file": "silu_config_M4096_N1536.json", + "M": 4096, + "N": 1536, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 45.07925 + }, + "M=4096,N=1600": { + "file": "silu_config_M4096_N1600.json", + "M": 4096, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 45.199250000000006 + }, + "M=4096,N=1664": { + "file": "silu_config_M4096_N1664.json", + "M": 4096, + "N": 1664, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 45.23925000000001 + }, + "M=4096,N=1728": { + "file": "silu_config_M4096_N1728.json", + "M": 4096, + "N": 1728, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 46.159250000000014 + }, + "M=4096,N=1760": { + "file": "silu_config_M4096_N1760.json", + "M": 4096, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 45.31924999999998 + }, + "M=4096,N=1792": { + "file": "silu_config_M4096_N1792.json", + "M": 4096, + "N": 1792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 45.35925000000001 + }, + "M=4096,N=1920": { + "file": "silu_config_M4096_N1920.json", + "M": 4096, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 45.27925000000002 + }, + "M=4096,N=2048": { + "file": "silu_config_M4096_N2048.json", + "M": 4096, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 36.318999999999996 + }, + "M=4096,N=2080": { + "file": "silu_config_M4096_N2080.json", + "M": 4096, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 51.11924999999997 + }, + "M=4096,N=2240": { + "file": "silu_config_M4096_N2240.json", + "M": 4096, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 52.99925 + }, + "M=4096,N=2400": { + "file": "silu_config_M4096_N2400.json", + "M": 4096, + "N": 2400, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 56.27925000000002 + }, + "M=4096,N=2560": { + "file": "silu_config_M4096_N2560.json", + "M": 4096, + "N": 2560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 57.23925 + }, + "M=5120,N=128": { + "file": "silu_config_M5120_N128.json", + "M": 5120, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 8.959249999999997 + }, + "M=5120,N=160": { + "file": "silu_config_M5120_N160.json", + "M": 5120, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 13.718999999999994 + }, + "M=5120,N=192": { + "file": "silu_config_M5120_N192.json", + "M": 5120, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 13.639000000000003 + }, + "M=5120,N=256": { + "file": "silu_config_M5120_N256.json", + "M": 5120, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 12.719000000000001 + }, + "M=5120,N=320": { + "file": "silu_config_M5120_N320.json", + "M": 5120, + "N": 320, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 18.719000000000015 + }, + "M=5120,N=384": { + "file": "silu_config_M5120_N384.json", + "M": 5120, + "N": 384, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 18.718999999999994 + }, + "M=5120,N=480": { + "file": "silu_config_M5120_N480.json", + "M": 5120, + "N": 480, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 19.119000000000007 + }, + "M=5120,N=512": { + "file": "silu_config_M5120_N512.json", + "M": 5120, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 16.038999999999994 + }, + "M=5120,N=576": { + "file": "silu_config_M5120_N576.json", + "M": 5120, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 29.519000000000005 + }, + "M=5120,N=640": { + "file": "silu_config_M5120_N640.json", + "M": 5120, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 29.399000000000015 + }, + "M=5120,N=768": { + "file": "silu_config_M5120_N768.json", + "M": 5120, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 29.599249999999998 + }, + "M=5120,N=800": { + "file": "silu_config_M5120_N800.json", + "M": 5120, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 29.839250000000014 + }, + "M=5120,N=896": { + "file": "silu_config_M5120_N896.json", + "M": 5120, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 29.55899999999999 + }, + "M=5120,N=960": { + "file": "silu_config_M5120_N960.json", + "M": 5120, + "N": 960, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 30.199250000000013 + }, + "M=5120,N=1024": { + "file": "silu_config_M5120_N1024.json", + "M": 5120, + "N": 1024, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 26.599000000000004 + }, + "M=5120,N=1120": { + "file": "silu_config_M5120_N1120.json", + "M": 5120, + "N": 1120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 55.15925 + }, + "M=5120,N=1152": { + "file": "silu_config_M5120_N1152.json", + "M": 5120, + "N": 1152, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 54.55924999999999 + }, + "M=5120,N=1280": { + "file": "silu_config_M5120_N1280.json", + "M": 5120, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.43925000000002 + }, + "M=5120,N=1344": { + "file": "silu_config_M5120_N1344.json", + "M": 5120, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 55.079250000000016 + }, + "M=5120,N=1408": { + "file": "silu_config_M5120_N1408.json", + "M": 5120, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 54.59925 + }, + "M=5120,N=1440": { + "file": "silu_config_M5120_N1440.json", + "M": 5120, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 54.23925000000001 + }, + "M=5120,N=1536": { + "file": "silu_config_M5120_N1536.json", + "M": 5120, + "N": 1536, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 54.83925 + }, + "M=5120,N=1600": { + "file": "silu_config_M5120_N1600.json", + "M": 5120, + "N": 1600, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 56.39924999999998 + }, + "M=5120,N=1664": { + "file": "silu_config_M5120_N1664.json", + "M": 5120, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.91924999999999 + }, + "M=5120,N=1728": { + "file": "silu_config_M5120_N1728.json", + "M": 5120, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 55.03925 + }, + "M=5120,N=1760": { + "file": "silu_config_M5120_N1760.json", + "M": 5120, + "N": 1760, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 55.63924999999999 + }, + "M=5120,N=1792": { + "file": "silu_config_M5120_N1792.json", + "M": 5120, + "N": 1792, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 56.119249999999994 + }, + "M=5120,N=1920": { + "file": "silu_config_M5120_N1920.json", + "M": 5120, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 55.039249999999996 + }, + "M=5120,N=2048": { + "file": "silu_config_M5120_N2048.json", + "M": 5120, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 43.75900000000001 + }, + "M=5120,N=2080": { + "file": "silu_config_M5120_N2080.json", + "M": 5120, + "N": 2080, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 63.71925 + }, + "M=5120,N=2240": { + "file": "silu_config_M5120_N2240.json", + "M": 5120, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 65.07924999999999 + }, + "M=5120,N=2400": { + "file": "silu_config_M5120_N2400.json", + "M": 5120, + "N": 2400, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 68.23949999999998 + }, + "M=5120,N=2560": { + "file": "silu_config_M5120_N2560.json", + "M": 5120, + "N": 2560, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 70.31925 + }, + "M=6144,N=128": { + "file": "silu_config_M6144_N128.json", + "M": 6144, + "N": 128, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 9.87899999999999 + }, + "M=6144,N=160": { + "file": "silu_config_M6144_N160.json", + "M": 6144, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 15.199250000000006 + }, + "M=6144,N=192": { + "file": "silu_config_M6144_N192.json", + "M": 6144, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 15.078999999999994 + }, + "M=6144,N=256": { + "file": "silu_config_M6144_N256.json", + "M": 6144, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 13.799 + }, + "M=6144,N=320": { + "file": "silu_config_M6144_N320.json", + "M": 6144, + "N": 320, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 21.399249999999995 + }, + "M=6144,N=384": { + "file": "silu_config_M6144_N384.json", + "M": 6144, + "N": 384, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 21.439249999999994 + }, + "M=6144,N=480": { + "file": "silu_config_M6144_N480.json", + "M": 6144, + "N": 480, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 21.799249999999994 + }, + "M=6144,N=512": { + "file": "silu_config_M6144_N512.json", + "M": 6144, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 18.238999999999997 + }, + "M=6144,N=576": { + "file": "silu_config_M6144_N576.json", + "M": 6144, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.71900000000002 + }, + "M=6144,N=640": { + "file": "silu_config_M6144_N640.json", + "M": 6144, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.279250000000005 + }, + "M=6144,N=768": { + "file": "silu_config_M6144_N768.json", + "M": 6144, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.358999999999995 + }, + "M=6144,N=800": { + "file": "silu_config_M6144_N800.json", + "M": 6144, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.71925000000001 + }, + "M=6144,N=896": { + "file": "silu_config_M6144_N896.json", + "M": 6144, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.399250000000016 + }, + "M=6144,N=960": { + "file": "silu_config_M6144_N960.json", + "M": 6144, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 35.03900000000002 + }, + "M=6144,N=1024": { + "file": "silu_config_M6144_N1024.json", + "M": 6144, + "N": 1024, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 30.719250000000002 + }, + "M=6144,N=1120": { + "file": "silu_config_M6144_N1120.json", + "M": 6144, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.5595 + }, + "M=6144,N=1152": { + "file": "silu_config_M6144_N1152.json", + "M": 6144, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 63.79925 + }, + "M=6144,N=1280": { + "file": "silu_config_M6144_N1280.json", + "M": 6144, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 63.439250000000015 + }, + "M=6144,N=1344": { + "file": "silu_config_M6144_N1344.json", + "M": 6144, + "N": 1344, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 65.83925 + }, + "M=6144,N=1408": { + "file": "silu_config_M6144_N1408.json", + "M": 6144, + "N": 1408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 63.999249999999996 + }, + "M=6144,N=1440": { + "file": "silu_config_M6144_N1440.json", + "M": 6144, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 64.75924999999998 + }, + "M=6144,N=1536": { + "file": "silu_config_M6144_N1536.json", + "M": 6144, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 63.71925 + }, + "M=6144,N=1600": { + "file": "silu_config_M6144_N1600.json", + "M": 6144, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.63924999999999 + }, + "M=6144,N=1664": { + "file": "silu_config_M6144_N1664.json", + "M": 6144, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.35924999999999 + }, + "M=6144,N=1728": { + "file": "silu_config_M6144_N1728.json", + "M": 6144, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.47925 + }, + "M=6144,N=1760": { + "file": "silu_config_M6144_N1760.json", + "M": 6144, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.35925 + }, + "M=6144,N=1792": { + "file": "silu_config_M6144_N1792.json", + "M": 6144, + "N": 1792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 65.67925 + }, + "M=6144,N=1920": { + "file": "silu_config_M6144_N1920.json", + "M": 6144, + "N": 1920, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.79925000000003 + }, + "M=6144,N=2048": { + "file": "silu_config_M6144_N2048.json", + "M": 6144, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 52.11900000000001 + }, + "M=6144,N=2080": { + "file": "silu_config_M6144_N2080.json", + "M": 6144, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 72.39925000000001 + }, + "M=6144,N=2240": { + "file": "silu_config_M6144_N2240.json", + "M": 6144, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 76.11925 + }, + "M=6144,N=2400": { + "file": "silu_config_M6144_N2400.json", + "M": 6144, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 79.71950000000001 + }, + "M=6144,N=2560": { + "file": "silu_config_M6144_N2560.json", + "M": 6144, + "N": 2560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 82.3195 + }, + "M=7168,N=128": { + "file": "silu_config_M7168_N128.json", + "M": 7168, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 10.718999999999994 + }, + "M=7168,N=160": { + "file": "silu_config_M7168_N160.json", + "M": 7168, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 16.599000000000004 + }, + "M=7168,N=192": { + "file": "silu_config_M7168_N192.json", + "M": 7168, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 16.678999999999995 + }, + "M=7168,N=256": { + "file": "silu_config_M7168_N256.json", + "M": 7168, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 15.039000000000001 + }, + "M=7168,N=320": { + "file": "silu_config_M7168_N320.json", + "M": 7168, + "N": 320, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 24.278999999999982 + }, + "M=7168,N=384": { + "file": "silu_config_M7168_N384.json", + "M": 7168, + "N": 384, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 24.159000000000006 + }, + "M=7168,N=480": { + "file": "silu_config_M7168_N480.json", + "M": 7168, + "N": 480, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 24.599249999999984 + }, + "M=7168,N=512": { + "file": "silu_config_M7168_N512.json", + "M": 7168, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 20.358999999999988 + }, + "M=7168,N=576": { + "file": "silu_config_M7168_N576.json", + "M": 7168, + "N": 576, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 44.75899999999999 + }, + "M=7168,N=640": { + "file": "silu_config_M7168_N640.json", + "M": 7168, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 39.43924999999997 + }, + "M=7168,N=768": { + "file": "silu_config_M7168_N768.json", + "M": 7168, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 39.47924999999999 + }, + "M=7168,N=800": { + "file": "silu_config_M7168_N800.json", + "M": 7168, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 40.079249999999995 + }, + "M=7168,N=896": { + "file": "silu_config_M7168_N896.json", + "M": 7168, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 39.83899999999999 + }, + "M=7168,N=960": { + "file": "silu_config_M7168_N960.json", + "M": 7168, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 39.959000000000024 + }, + "M=7168,N=1024": { + "file": "silu_config_M7168_N1024.json", + "M": 7168, + "N": 1024, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 34.919000000000004 + }, + "M=7168,N=1120": { + "file": "silu_config_M7168_N1120.json", + "M": 7168, + "N": 1120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 75.87925000000003 + }, + "M=7168,N=1152": { + "file": "silu_config_M7168_N1152.json", + "M": 7168, + "N": 1152, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 75.07925000000003 + }, + "M=7168,N=1280": { + "file": "silu_config_M7168_N1280.json", + "M": 7168, + "N": 1280, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 76.03924999999998 + }, + "M=7168,N=1344": { + "file": "silu_config_M7168_N1344.json", + "M": 7168, + "N": 1344, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 75.99924999999999 + }, + "M=7168,N=1408": { + "file": "silu_config_M7168_N1408.json", + "M": 7168, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 75.23950000000002 + }, + "M=7168,N=1440": { + "file": "silu_config_M7168_N1440.json", + "M": 7168, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 73.59925 + }, + "M=7168,N=1536": { + "file": "silu_config_M7168_N1536.json", + "M": 7168, + "N": 1536, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 75.39950000000002 + }, + "M=7168,N=1600": { + "file": "silu_config_M7168_N1600.json", + "M": 7168, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 75.23925 + }, + "M=7168,N=1664": { + "file": "silu_config_M7168_N1664.json", + "M": 7168, + "N": 1664, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 76.43950000000001 + }, + "M=7168,N=1728": { + "file": "silu_config_M7168_N1728.json", + "M": 7168, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 73.79924999999999 + }, + "M=7168,N=1760": { + "file": "silu_config_M7168_N1760.json", + "M": 7168, + "N": 1760, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 76.71925000000002 + }, + "M=7168,N=1792": { + "file": "silu_config_M7168_N1792.json", + "M": 7168, + "N": 1792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 76.07925 + }, + "M=7168,N=1920": { + "file": "silu_config_M7168_N1920.json", + "M": 7168, + "N": 1920, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 76.95925000000001 + }, + "M=7168,N=2048": { + "file": "silu_config_M7168_N2048.json", + "M": 7168, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 59.559250000000006 + }, + "M=7168,N=2080": { + "file": "silu_config_M7168_N2080.json", + "M": 7168, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.79925000000001 + }, + "M=7168,N=2240": { + "file": "silu_config_M7168_N2240.json", + "M": 7168, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.03924999999998 + }, + "M=7168,N=2400": { + "file": "silu_config_M7168_N2400.json", + "M": 7168, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.39949999999997 + }, + "M=7168,N=2560": { + "file": "silu_config_M7168_N2560.json", + "M": 7168, + "N": 2560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 95.75924999999998 + }, + "M=8192,N=128": { + "file": "silu_config_M8192_N128.json", + "M": 8192, + "N": 128, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 11.358999999999995 + }, + "M=8192,N=160": { + "file": "silu_config_M8192_N160.json", + "M": 8192, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 17.83899999999999 + }, + "M=8192,N=192": { + "file": "silu_config_M8192_N192.json", + "M": 8192, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 17.839250000000007 + }, + "M=8192,N=256": { + "file": "silu_config_M8192_N256.json", + "M": 8192, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 16.279000000000003 + }, + "M=8192,N=320": { + "file": "silu_config_M8192_N320.json", + "M": 8192, + "N": 320, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 26.839000000000006 + }, + "M=8192,N=384": { + "file": "silu_config_M8192_N384.json", + "M": 8192, + "N": 384, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 26.718999999999994 + }, + "M=8192,N=480": { + "file": "silu_config_M8192_N480.json", + "M": 8192, + "N": 480, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 27.358999999999988 + }, + "M=8192,N=512": { + "file": "silu_config_M8192_N512.json", + "M": 8192, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 22.51899999999999 + }, + "M=8192,N=576": { + "file": "silu_config_M8192_N576.json", + "M": 8192, + "N": 576, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 45.319250000000004 + }, + "M=8192,N=640": { + "file": "silu_config_M8192_N640.json", + "M": 8192, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 43.71925 + }, + "M=8192,N=768": { + "file": "silu_config_M8192_N768.json", + "M": 8192, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.07924999999999 + }, + "M=8192,N=800": { + "file": "silu_config_M8192_N800.json", + "M": 8192, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.63925000000001 + }, + "M=8192,N=896": { + "file": "silu_config_M8192_N896.json", + "M": 8192, + "N": 896, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 45.27925000000001 + }, + "M=8192,N=960": { + "file": "silu_config_M8192_N960.json", + "M": 8192, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.67924999999998 + }, + "M=8192,N=1024": { + "file": "silu_config_M8192_N1024.json", + "M": 8192, + "N": 1024, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 38.99925000000001 + }, + "M=8192,N=1120": { + "file": "silu_config_M8192_N1120.json", + "M": 8192, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.11925000000002 + }, + "M=8192,N=1152": { + "file": "silu_config_M8192_N1152.json", + "M": 8192, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.31925 + }, + "M=8192,N=1280": { + "file": "silu_config_M8192_N1280.json", + "M": 8192, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 81.99950000000003 + }, + "M=8192,N=1344": { + "file": "silu_config_M8192_N1344.json", + "M": 8192, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 85.03950000000003 + }, + "M=8192,N=1408": { + "file": "silu_config_M8192_N1408.json", + "M": 8192, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 85.3995 + }, + "M=8192,N=1440": { + "file": "silu_config_M8192_N1440.json", + "M": 8192, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.35924999999999 + }, + "M=8192,N=1536": { + "file": "silu_config_M8192_N1536.json", + "M": 8192, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 84.31924999999997 + }, + "M=8192,N=1600": { + "file": "silu_config_M8192_N1600.json", + "M": 8192, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.51925 + }, + "M=8192,N=1664": { + "file": "silu_config_M8192_N1664.json", + "M": 8192, + "N": 1664, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 86.87950000000001 + }, + "M=8192,N=1728": { + "file": "silu_config_M8192_N1728.json", + "M": 8192, + "N": 1728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 85.6795 + }, + "M=8192,N=1760": { + "file": "silu_config_M8192_N1760.json", + "M": 8192, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.87950000000002 + }, + "M=8192,N=1792": { + "file": "silu_config_M8192_N1792.json", + "M": 8192, + "N": 1792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 86.03925000000001 + }, + "M=8192,N=1920": { + "file": "silu_config_M8192_N1920.json", + "M": 8192, + "N": 1920, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 86.31949999999999 + }, + "M=8192,N=2048": { + "file": "silu_config_M8192_N2048.json", + "M": 8192, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 69.19925000000002 + }, + "M=8192,N=2080": { + "file": "silu_config_M8192_N2080.json", + "M": 8192, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 93.39949999999999 + }, + "M=8192,N=2240": { + "file": "silu_config_M8192_N2240.json", + "M": 8192, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 99.83950000000002 + }, + "M=8192,N=2400": { + "file": "silu_config_M8192_N2400.json", + "M": 8192, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.95949999999999 + }, + "M=8192,N=2560": { + "file": "silu_config_M8192_N2560.json", + "M": 8192, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 106.23950000000002 + }, + "M=9216,N=128": { + "file": "silu_config_M9216_N128.json", + "M": 9216, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 11.919249999999991 + }, + "M=9216,N=160": { + "file": "silu_config_M9216_N160.json", + "M": 9216, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 18.91899999999999 + }, + "M=9216,N=192": { + "file": "silu_config_M9216_N192.json", + "M": 9216, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 18.839 + }, + "M=9216,N=256": { + "file": "silu_config_M9216_N256.json", + "M": 9216, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 17.318999999999996 + }, + "M=9216,N=320": { + "file": "silu_config_M9216_N320.json", + "M": 9216, + "N": 320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 29.478999999999992 + }, + "M=9216,N=384": { + "file": "silu_config_M9216_N384.json", + "M": 9216, + "N": 384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 29.438999999999993 + }, + "M=9216,N=480": { + "file": "silu_config_M9216_N480.json", + "M": 9216, + "N": 480, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 29.839 + }, + "M=9216,N=512": { + "file": "silu_config_M9216_N512.json", + "M": 9216, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 24.398999999999987 + }, + "M=9216,N=576": { + "file": "silu_config_M9216_N576.json", + "M": 9216, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 49.19925000000001 + }, + "M=9216,N=640": { + "file": "silu_config_M9216_N640.json", + "M": 9216, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 48.71924999999999 + }, + "M=9216,N=768": { + "file": "silu_config_M9216_N768.json", + "M": 9216, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 49.75924999999998 + }, + "M=9216,N=800": { + "file": "silu_config_M9216_N800.json", + "M": 9216, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 49.479249999999986 + }, + "M=9216,N=896": { + "file": "silu_config_M9216_N896.json", + "M": 9216, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 49.99925 + }, + "M=9216,N=960": { + "file": "silu_config_M9216_N960.json", + "M": 9216, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 50.599249999999984 + }, + "M=9216,N=1024": { + "file": "silu_config_M9216_N1024.json", + "M": 9216, + "N": 1024, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 43.159000000000006 + }, + "M=9216,N=1120": { + "file": "silu_config_M9216_N1120.json", + "M": 9216, + "N": 1120, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 97.4795 + }, + "M=9216,N=1152": { + "file": "silu_config_M9216_N1152.json", + "M": 9216, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.91950000000001 + }, + "M=9216,N=1280": { + "file": "silu_config_M9216_N1280.json", + "M": 9216, + "N": 1280, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 100.31925000000001 + }, + "M=9216,N=1344": { + "file": "silu_config_M9216_N1344.json", + "M": 9216, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.35950000000001 + }, + "M=9216,N=1408": { + "file": "silu_config_M9216_N1408.json", + "M": 9216, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 95.43949999999995 + }, + "M=9216,N=1440": { + "file": "silu_config_M9216_N1440.json", + "M": 9216, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.67924999999998 + }, + "M=9216,N=1536": { + "file": "silu_config_M9216_N1536.json", + "M": 9216, + "N": 1536, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 95.79924999999999 + }, + "M=9216,N=1600": { + "file": "silu_config_M9216_N1600.json", + "M": 9216, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.71950000000002 + }, + "M=9216,N=1664": { + "file": "silu_config_M9216_N1664.json", + "M": 9216, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 93.95925 + }, + "M=9216,N=1728": { + "file": "silu_config_M9216_N1728.json", + "M": 9216, + "N": 1728, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 97.07925 + }, + "M=9216,N=1760": { + "file": "silu_config_M9216_N1760.json", + "M": 9216, + "N": 1760, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 97.35950000000001 + }, + "M=9216,N=1792": { + "file": "silu_config_M9216_N1792.json", + "M": 9216, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.19924999999999 + }, + "M=9216,N=1920": { + "file": "silu_config_M9216_N1920.json", + "M": 9216, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.99950000000001 + }, + "M=9216,N=2048": { + "file": "silu_config_M9216_N2048.json", + "M": 9216, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 75.31925000000003 + }, + "M=9216,N=2080": { + "file": "silu_config_M9216_N2080.json", + "M": 9216, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.23925000000001 + }, + "M=9216,N=2240": { + "file": "silu_config_M9216_N2240.json", + "M": 9216, + "N": 2240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 113.47950000000003 + }, + "M=9216,N=2400": { + "file": "silu_config_M9216_N2400.json", + "M": 9216, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 116.63950000000003 + }, + "M=9216,N=2560": { + "file": "silu_config_M9216_N2560.json", + "M": 9216, + "N": 2560, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 125.27950000000001 + }, + "M=10240,N=128": { + "file": "silu_config_M10240_N128.json", + "M": 10240, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 12.718999999999994 + }, + "M=10240,N=160": { + "file": "silu_config_M10240_N160.json", + "M": 10240, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 20.439000000000007 + }, + "M=10240,N=192": { + "file": "silu_config_M10240_N192.json", + "M": 10240, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 20.31899999999999 + }, + "M=10240,N=256": { + "file": "silu_config_M10240_N256.json", + "M": 10240, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 18.638999999999996 + }, + "M=10240,N=320": { + "file": "silu_config_M10240_N320.json", + "M": 10240, + "N": 320, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 31.959 + }, + "M=10240,N=384": { + "file": "silu_config_M10240_N384.json", + "M": 10240, + "N": 384, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 31.638999999999985 + }, + "M=10240,N=480": { + "file": "silu_config_M10240_N480.json", + "M": 10240, + "N": 480, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 32.319 + }, + "M=10240,N=512": { + "file": "silu_config_M10240_N512.json", + "M": 10240, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 26.67924999999999 + }, + "M=10240,N=576": { + "file": "silu_config_M10240_N576.json", + "M": 10240, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 53.63925000000002 + }, + "M=10240,N=640": { + "file": "silu_config_M10240_N640.json", + "M": 10240, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.27924999999999 + }, + "M=10240,N=768": { + "file": "silu_config_M10240_N768.json", + "M": 10240, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 53.479250000000015 + }, + "M=10240,N=800": { + "file": "silu_config_M10240_N800.json", + "M": 10240, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 53.999249999999996 + }, + "M=10240,N=896": { + "file": "silu_config_M10240_N896.json", + "M": 10240, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 54.27924999999999 + }, + "M=10240,N=960": { + "file": "silu_config_M10240_N960.json", + "M": 10240, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 54.39925000000001 + }, + "M=10240,N=1024": { + "file": "silu_config_M10240_N1024.json", + "M": 10240, + "N": 1024, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 47.279 + }, + "M=10240,N=1120": { + "file": "silu_config_M10240_N1120.json", + "M": 10240, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.31949999999996 + }, + "M=10240,N=1152": { + "file": "silu_config_M10240_N1152.json", + "M": 10240, + "N": 1152, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 106.6795 + }, + "M=10240,N=1280": { + "file": "silu_config_M10240_N1280.json", + "M": 10240, + "N": 1280, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 104.55925000000002 + }, + "M=10240,N=1344": { + "file": "silu_config_M10240_N1344.json", + "M": 10240, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 105.11950000000003 + }, + "M=10240,N=1408": { + "file": "silu_config_M10240_N1408.json", + "M": 10240, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 104.8395 + }, + "M=10240,N=1440": { + "file": "silu_config_M10240_N1440.json", + "M": 10240, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.91949999999999 + }, + "M=10240,N=1536": { + "file": "silu_config_M10240_N1536.json", + "M": 10240, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 104.39950000000002 + }, + "M=10240,N=1600": { + "file": "silu_config_M10240_N1600.json", + "M": 10240, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.79925000000001 + }, + "M=10240,N=1664": { + "file": "silu_config_M10240_N1664.json", + "M": 10240, + "N": 1664, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 105.23924999999997 + }, + "M=10240,N=1728": { + "file": "silu_config_M10240_N1728.json", + "M": 10240, + "N": 1728, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 106.27950000000003 + }, + "M=10240,N=1760": { + "file": "silu_config_M10240_N1760.json", + "M": 10240, + "N": 1760, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 108.9595 + }, + "M=10240,N=1792": { + "file": "silu_config_M10240_N1792.json", + "M": 10240, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.27924999999999 + }, + "M=10240,N=1920": { + "file": "silu_config_M10240_N1920.json", + "M": 10240, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.63925 + }, + "M=10240,N=2048": { + "file": "silu_config_M10240_N2048.json", + "M": 10240, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 82.27949999999993 + }, + "M=10240,N=2080": { + "file": "silu_config_M10240_N2080.json", + "M": 10240, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 114.87949999999998 + }, + "M=10240,N=2240": { + "file": "silu_config_M10240_N2240.json", + "M": 10240, + "N": 2240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 124.47949999999997 + }, + "M=10240,N=2400": { + "file": "silu_config_M10240_N2400.json", + "M": 10240, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 129.35949999999997 + }, + "M=10240,N=2560": { + "file": "silu_config_M10240_N2560.json", + "M": 10240, + "N": 2560, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 135.23949999999996 + }, + "M=11264,N=128": { + "file": "silu_config_M11264_N128.json", + "M": 11264, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 13.359000000000009 + }, + "M=11264,N=160": { + "file": "silu_config_M11264_N160.json", + "M": 11264, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 21.598999999999997 + }, + "M=11264,N=192": { + "file": "silu_config_M11264_N192.json", + "M": 11264, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 21.439000000000007 + }, + "M=11264,N=256": { + "file": "silu_config_M11264_N256.json", + "M": 11264, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 19.599000000000004 + }, + "M=11264,N=320": { + "file": "silu_config_M11264_N320.json", + "M": 11264, + "N": 320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.759249999999994 + }, + "M=11264,N=384": { + "file": "silu_config_M11264_N384.json", + "M": 11264, + "N": 384, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 34.55900000000002 + }, + "M=11264,N=480": { + "file": "silu_config_M11264_N480.json", + "M": 11264, + "N": 480, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 35.27900000000001 + }, + "M=11264,N=512": { + "file": "silu_config_M11264_N512.json", + "M": 11264, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 28.75925 + }, + "M=11264,N=576": { + "file": "silu_config_M11264_N576.json", + "M": 11264, + "N": 576, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 60.759250000000016 + }, + "M=11264,N=640": { + "file": "silu_config_M11264_N640.json", + "M": 11264, + "N": 640, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 60.39925 + }, + "M=11264,N=768": { + "file": "silu_config_M11264_N768.json", + "M": 11264, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 59.83925000000002 + }, + "M=11264,N=800": { + "file": "silu_config_M11264_N800.json", + "M": 11264, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 60.31925000000001 + }, + "M=11264,N=896": { + "file": "silu_config_M11264_N896.json", + "M": 11264, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 58.599250000000026 + }, + "M=11264,N=960": { + "file": "silu_config_M11264_N960.json", + "M": 11264, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 60.679249999999996 + }, + "M=11264,N=1024": { + "file": "silu_config_M11264_N1024.json", + "M": 11264, + "N": 1024, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 51.559250000000006 + }, + "M=11264,N=1120": { + "file": "silu_config_M11264_N1120.json", + "M": 11264, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 111.51949999999995 + }, + "M=11264,N=1152": { + "file": "silu_config_M11264_N1152.json", + "M": 11264, + "N": 1152, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 116.9595 + }, + "M=11264,N=1280": { + "file": "silu_config_M11264_N1280.json", + "M": 11264, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 113.75949999999999 + }, + "M=11264,N=1344": { + "file": "silu_config_M11264_N1344.json", + "M": 11264, + "N": 1344, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 118.39949999999999 + }, + "M=11264,N=1408": { + "file": "silu_config_M11264_N1408.json", + "M": 11264, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 115.63950000000001 + }, + "M=11264,N=1440": { + "file": "silu_config_M11264_N1440.json", + "M": 11264, + "N": 1440, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 117.19949999999997 + }, + "M=11264,N=1536": { + "file": "silu_config_M11264_N1536.json", + "M": 11264, + "N": 1536, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 117.59950000000003 + }, + "M=11264,N=1600": { + "file": "silu_config_M11264_N1600.json", + "M": 11264, + "N": 1600, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 117.1995 + }, + "M=11264,N=1664": { + "file": "silu_config_M11264_N1664.json", + "M": 11264, + "N": 1664, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 117.7995 + }, + "M=11264,N=1728": { + "file": "silu_config_M11264_N1728.json", + "M": 11264, + "N": 1728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 115.87950000000001 + }, + "M=11264,N=1760": { + "file": "silu_config_M11264_N1760.json", + "M": 11264, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 116.1195 + }, + "M=11264,N=1792": { + "file": "silu_config_M11264_N1792.json", + "M": 11264, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 112.1995 + }, + "M=11264,N=1920": { + "file": "silu_config_M11264_N1920.json", + "M": 11264, + "N": 1920, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 115.43950000000004 + }, + "M=11264,N=2048": { + "file": "silu_config_M11264_N2048.json", + "M": 11264, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 90.59925000000001 + }, + "M=11264,N=2080": { + "file": "silu_config_M11264_N2080.json", + "M": 11264, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.47949999999999 + }, + "M=11264,N=2240": { + "file": "silu_config_M11264_N2240.json", + "M": 11264, + "N": 2240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 137.3195 + }, + "M=11264,N=2400": { + "file": "silu_config_M11264_N2400.json", + "M": 11264, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 141.99975000000003 + }, + "M=11264,N=2560": { + "file": "silu_config_M11264_N2560.json", + "M": 11264, + "N": 2560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 146.9595 + }, + "M=12288,N=128": { + "file": "silu_config_M12288_N128.json", + "M": 12288, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 13.919000000000011 + }, + "M=12288,N=160": { + "file": "silu_config_M12288_N160.json", + "M": 12288, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 22.599000000000018 + }, + "M=12288,N=192": { + "file": "silu_config_M12288_N192.json", + "M": 12288, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 22.839250000000007 + }, + "M=12288,N=256": { + "file": "silu_config_M12288_N256.json", + "M": 12288, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 20.75925 + }, + "M=12288,N=320": { + "file": "silu_config_M12288_N320.json", + "M": 12288, + "N": 320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 37.27925000000001 + }, + "M=12288,N=384": { + "file": "silu_config_M12288_N384.json", + "M": 12288, + "N": 384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 37.03900000000001 + }, + "M=12288,N=480": { + "file": "silu_config_M12288_N480.json", + "M": 12288, + "N": 480, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 37.79925 + }, + "M=12288,N=512": { + "file": "silu_config_M12288_N512.json", + "M": 12288, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 30.15925 + }, + "M=12288,N=576": { + "file": "silu_config_M12288_N576.json", + "M": 12288, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 62.91925000000001 + }, + "M=12288,N=640": { + "file": "silu_config_M12288_N640.json", + "M": 12288, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 63.27925000000001 + }, + "M=12288,N=768": { + "file": "silu_config_M12288_N768.json", + "M": 12288, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 62.95950000000001 + }, + "M=12288,N=800": { + "file": "silu_config_M12288_N800.json", + "M": 12288, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 65.43924999999999 + }, + "M=12288,N=896": { + "file": "silu_config_M12288_N896.json", + "M": 12288, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 63.71924999999999 + }, + "M=12288,N=960": { + "file": "silu_config_M12288_N960.json", + "M": 12288, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.59925 + }, + "M=12288,N=1024": { + "file": "silu_config_M12288_N1024.json", + "M": 12288, + "N": 1024, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 55.439250000000015 + }, + "M=12288,N=1120": { + "file": "silu_config_M12288_N1120.json", + "M": 12288, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.47975000000002 + }, + "M=12288,N=1152": { + "file": "silu_config_M12288_N1152.json", + "M": 12288, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 122.39950000000003 + }, + "M=12288,N=1280": { + "file": "silu_config_M12288_N1280.json", + "M": 12288, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.3595 + }, + "M=12288,N=1344": { + "file": "silu_config_M12288_N1344.json", + "M": 12288, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 124.1195 + }, + "M=12288,N=1408": { + "file": "silu_config_M12288_N1408.json", + "M": 12288, + "N": 1408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 122.91949999999999 + }, + "M=12288,N=1440": { + "file": "silu_config_M12288_N1440.json", + "M": 12288, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 124.35950000000005 + }, + "M=12288,N=1536": { + "file": "silu_config_M12288_N1536.json", + "M": 12288, + "N": 1536, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 125.87950000000004 + }, + "M=12288,N=1600": { + "file": "silu_config_M12288_N1600.json", + "M": 12288, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.99950000000001 + }, + "M=12288,N=1664": { + "file": "silu_config_M12288_N1664.json", + "M": 12288, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 123.43950000000001 + }, + "M=12288,N=1728": { + "file": "silu_config_M12288_N1728.json", + "M": 12288, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 122.27950000000001 + }, + "M=12288,N=1760": { + "file": "silu_config_M12288_N1760.json", + "M": 12288, + "N": 1760, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 129.83975 + }, + "M=12288,N=1792": { + "file": "silu_config_M12288_N1792.json", + "M": 12288, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 123.87949999999995 + }, + "M=12288,N=1920": { + "file": "silu_config_M12288_N1920.json", + "M": 12288, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 121.5995 + }, + "M=12288,N=2048": { + "file": "silu_config_M12288_N2048.json", + "M": 12288, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 97.79925 + }, + "M=12288,N=2080": { + "file": "silu_config_M12288_N2080.json", + "M": 12288, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 138.59975 + }, + "M=12288,N=2240": { + "file": "silu_config_M12288_N2240.json", + "M": 12288, + "N": 2240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 149.15974999999997 + }, + "M=12288,N=2400": { + "file": "silu_config_M12288_N2400.json", + "M": 12288, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 153.07975 + }, + "M=12288,N=2560": { + "file": "silu_config_M12288_N2560.json", + "M": 12288, + "N": 2560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 158.5195 + }, + "M=13312,N=128": { + "file": "silu_config_M13312_N128.json", + "M": 13312, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 14.399000000000001 + }, + "M=13312,N=160": { + "file": "silu_config_M13312_N160.json", + "M": 13312, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 23.959000000000003 + }, + "M=13312,N=192": { + "file": "silu_config_M13312_N192.json", + "M": 13312, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 23.879000000000012 + }, + "M=13312,N=256": { + "file": "silu_config_M13312_N256.json", + "M": 13312, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 22.039 + }, + "M=13312,N=320": { + "file": "silu_config_M13312_N320.json", + "M": 13312, + "N": 320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 39.839000000000006 + }, + "M=13312,N=384": { + "file": "silu_config_M13312_N384.json", + "M": 13312, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 39.639250000000004 + }, + "M=13312,N=480": { + "file": "silu_config_M13312_N480.json", + "M": 13312, + "N": 480, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 40.59899999999999 + }, + "M=13312,N=512": { + "file": "silu_config_M13312_N512.json", + "M": 13312, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 32.879 + }, + "M=13312,N=576": { + "file": "silu_config_M13312_N576.json", + "M": 13312, + "N": 576, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 74.67925 + }, + "M=13312,N=640": { + "file": "silu_config_M13312_N640.json", + "M": 13312, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 68.67925 + }, + "M=13312,N=768": { + "file": "silu_config_M13312_N768.json", + "M": 13312, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 68.91925000000002 + }, + "M=13312,N=800": { + "file": "silu_config_M13312_N800.json", + "M": 13312, + "N": 800, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 71.4795 + }, + "M=13312,N=896": { + "file": "silu_config_M13312_N896.json", + "M": 13312, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 69.95924999999997 + }, + "M=13312,N=960": { + "file": "silu_config_M13312_N960.json", + "M": 13312, + "N": 960, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 71.83950000000002 + }, + "M=13312,N=1024": { + "file": "silu_config_M13312_N1024.json", + "M": 13312, + "N": 1024, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 59.35925 + }, + "M=13312,N=1120": { + "file": "silu_config_M13312_N1120.json", + "M": 13312, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 130.9595 + }, + "M=13312,N=1152": { + "file": "silu_config_M13312_N1152.json", + "M": 13312, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.71949999999995 + }, + "M=13312,N=1280": { + "file": "silu_config_M13312_N1280.json", + "M": 13312, + "N": 1280, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 137.71949999999998 + }, + "M=13312,N=1344": { + "file": "silu_config_M13312_N1344.json", + "M": 13312, + "N": 1344, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 139.07950000000002 + }, + "M=13312,N=1408": { + "file": "silu_config_M13312_N1408.json", + "M": 13312, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 139.83950000000004 + }, + "M=13312,N=1440": { + "file": "silu_config_M13312_N1440.json", + "M": 13312, + "N": 1440, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 139.7995 + }, + "M=13312,N=1536": { + "file": "silu_config_M13312_N1536.json", + "M": 13312, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 133.95949999999996 + }, + "M=13312,N=1600": { + "file": "silu_config_M13312_N1600.json", + "M": 13312, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 135.5595 + }, + "M=13312,N=1664": { + "file": "silu_config_M13312_N1664.json", + "M": 13312, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 134.35950000000003 + }, + "M=13312,N=1728": { + "file": "silu_config_M13312_N1728.json", + "M": 13312, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 132.15949999999998 + }, + "M=13312,N=1760": { + "file": "silu_config_M13312_N1760.json", + "M": 13312, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 136.2395 + }, + "M=13312,N=1792": { + "file": "silu_config_M13312_N1792.json", + "M": 13312, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 134.87950000000004 + }, + "M=13312,N=1920": { + "file": "silu_config_M13312_N1920.json", + "M": 13312, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 131.67950000000002 + }, + "M=13312,N=2048": { + "file": "silu_config_M13312_N2048.json", + "M": 13312, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 107.1995 + }, + "M=13312,N=2080": { + "file": "silu_config_M13312_N2080.json", + "M": 13312, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 150.87974999999994 + }, + "M=13312,N=2240": { + "file": "silu_config_M13312_N2240.json", + "M": 13312, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 158.43975000000003 + }, + "M=13312,N=2400": { + "file": "silu_config_M13312_N2400.json", + "M": 13312, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 166.43975 + }, + "M=13312,N=2560": { + "file": "silu_config_M13312_N2560.json", + "M": 13312, + "N": 2560, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 178.95999999999998 + }, + "M=14336,N=128": { + "file": "silu_config_M14336_N128.json", + "M": 14336, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 15.079 + }, + "M=14336,N=160": { + "file": "silu_config_M14336_N160.json", + "M": 14336, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 26.359000000000023 + }, + "M=14336,N=192": { + "file": "silu_config_M14336_N192.json", + "M": 14336, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 25.158999999999992 + }, + "M=14336,N=256": { + "file": "silu_config_M14336_N256.json", + "M": 14336, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 23.039 + }, + "M=14336,N=320": { + "file": "silu_config_M14336_N320.json", + "M": 14336, + "N": 320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 42.439 + }, + "M=14336,N=384": { + "file": "silu_config_M14336_N384.json", + "M": 14336, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 42.159000000000006 + }, + "M=14336,N=480": { + "file": "silu_config_M14336_N480.json", + "M": 14336, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 43.23900000000002 + }, + "M=14336,N=512": { + "file": "silu_config_M14336_N512.json", + "M": 14336, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 34.99925000000002 + }, + "M=14336,N=576": { + "file": "silu_config_M14336_N576.json", + "M": 14336, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 74.1995 + }, + "M=14336,N=640": { + "file": "silu_config_M14336_N640.json", + "M": 14336, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 74.55924999999999 + }, + "M=14336,N=768": { + "file": "silu_config_M14336_N768.json", + "M": 14336, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 74.83924999999999 + }, + "M=14336,N=800": { + "file": "silu_config_M14336_N800.json", + "M": 14336, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 74.55925 + }, + "M=14336,N=896": { + "file": "silu_config_M14336_N896.json", + "M": 14336, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 74.03950000000002 + }, + "M=14336,N=960": { + "file": "silu_config_M14336_N960.json", + "M": 14336, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 75.11924999999998 + }, + "M=14336,N=1024": { + "file": "silu_config_M14336_N1024.json", + "M": 14336, + "N": 1024, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 63.75925 + }, + "M=14336,N=1120": { + "file": "silu_config_M14336_N1120.json", + "M": 14336, + "N": 1120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 147.11950000000004 + }, + "M=14336,N=1152": { + "file": "silu_config_M14336_N1152.json", + "M": 14336, + "N": 1152, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 147.71974999999998 + }, + "M=14336,N=1280": { + "file": "silu_config_M14336_N1280.json", + "M": 14336, + "N": 1280, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 145.7995 + }, + "M=14336,N=1344": { + "file": "silu_config_M14336_N1344.json", + "M": 14336, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 144.9995 + }, + "M=14336,N=1408": { + "file": "silu_config_M14336_N1408.json", + "M": 14336, + "N": 1408, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 148.11950000000002 + }, + "M=14336,N=1440": { + "file": "silu_config_M14336_N1440.json", + "M": 14336, + "N": 1440, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 158.35950000000003 + }, + "M=14336,N=1536": { + "file": "silu_config_M14336_N1536.json", + "M": 14336, + "N": 1536, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 148.47975000000002 + }, + "M=14336,N=1600": { + "file": "silu_config_M14336_N1600.json", + "M": 14336, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 157.27949999999993 + }, + "M=14336,N=1664": { + "file": "silu_config_M14336_N1664.json", + "M": 14336, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 144.27975 + }, + "M=14336,N=1728": { + "file": "silu_config_M14336_N1728.json", + "M": 14336, + "N": 1728, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 150.43975 + }, + "M=14336,N=1760": { + "file": "silu_config_M14336_N1760.json", + "M": 14336, + "N": 1760, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 148.67950000000002 + }, + "M=14336,N=1792": { + "file": "silu_config_M14336_N1792.json", + "M": 14336, + "N": 1792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 147.27950000000004 + }, + "M=14336,N=1920": { + "file": "silu_config_M14336_N1920.json", + "M": 14336, + "N": 1920, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 147.39950000000002 + }, + "M=14336,N=2048": { + "file": "silu_config_M14336_N2048.json", + "M": 14336, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 114.75950000000003 + }, + "M=14336,N=2080": { + "file": "silu_config_M14336_N2080.json", + "M": 14336, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 157.47949999999997 + }, + "M=14336,N=2240": { + "file": "silu_config_M14336_N2240.json", + "M": 14336, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 166.07975000000005 + }, + "M=14336,N=2400": { + "file": "silu_config_M14336_N2400.json", + "M": 14336, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 174.39975000000004 + }, + "M=14336,N=2560": { + "file": "silu_config_M14336_N2560.json", + "M": 14336, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 180.55975000000007 + }, + "M=15360,N=128": { + "file": "silu_config_M15360_N128.json", + "M": 15360, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 15.558999999999997 + }, + "M=15360,N=160": { + "file": "silu_config_M15360_N160.json", + "M": 15360, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 26.318999999999996 + }, + "M=15360,N=192": { + "file": "silu_config_M15360_N192.json", + "M": 15360, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 27.558999999999997 + }, + "M=15360,N=256": { + "file": "silu_config_M15360_N256.json", + "M": 15360, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 24.198999999999998 + }, + "M=15360,N=320": { + "file": "silu_config_M15360_N320.json", + "M": 15360, + "N": 320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.838999999999984 + }, + "M=15360,N=384": { + "file": "silu_config_M15360_N384.json", + "M": 15360, + "N": 384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.47900000000001 + }, + "M=15360,N=480": { + "file": "silu_config_M15360_N480.json", + "M": 15360, + "N": 480, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 45.398999999999994 + }, + "M=15360,N=512": { + "file": "silu_config_M15360_N512.json", + "M": 15360, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 37.07925000000001 + }, + "M=15360,N=576": { + "file": "silu_config_M15360_N576.json", + "M": 15360, + "N": 576, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 81.27925000000002 + }, + "M=15360,N=640": { + "file": "silu_config_M15360_N640.json", + "M": 15360, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 78.99950000000001 + }, + "M=15360,N=768": { + "file": "silu_config_M15360_N768.json", + "M": 15360, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.3195 + }, + "M=15360,N=800": { + "file": "silu_config_M15360_N800.json", + "M": 15360, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 79.99925 + }, + "M=15360,N=896": { + "file": "silu_config_M15360_N896.json", + "M": 15360, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 78.47924999999998 + }, + "M=15360,N=960": { + "file": "silu_config_M15360_N960.json", + "M": 15360, + "N": 960, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 85.2795 + }, + "M=15360,N=1024": { + "file": "silu_config_M15360_N1024.json", + "M": 15360, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 67.23924999999998 + }, + "M=15360,N=1120": { + "file": "silu_config_M15360_N1120.json", + "M": 15360, + "N": 1120, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 159.5995 + }, + "M=15360,N=1152": { + "file": "silu_config_M15360_N1152.json", + "M": 15360, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 151.63950000000003 + }, + "M=15360,N=1280": { + "file": "silu_config_M15360_N1280.json", + "M": 15360, + "N": 1280, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 154.35974999999996 + }, + "M=15360,N=1344": { + "file": "silu_config_M15360_N1344.json", + "M": 15360, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 153.91949999999994 + }, + "M=15360,N=1408": { + "file": "silu_config_M15360_N1408.json", + "M": 15360, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 147.63950000000006 + }, + "M=15360,N=1440": { + "file": "silu_config_M15360_N1440.json", + "M": 15360, + "N": 1440, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 156.55975000000004 + }, + "M=15360,N=1536": { + "file": "silu_config_M15360_N1536.json", + "M": 15360, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 147.5195 + }, + "M=15360,N=1600": { + "file": "silu_config_M15360_N1600.json", + "M": 15360, + "N": 1600, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 156.59975 + }, + "M=15360,N=1664": { + "file": "silu_config_M15360_N1664.json", + "M": 15360, + "N": 1664, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 155.35949999999997 + }, + "M=15360,N=1728": { + "file": "silu_config_M15360_N1728.json", + "M": 15360, + "N": 1728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 154.79949999999994 + }, + "M=15360,N=1760": { + "file": "silu_config_M15360_N1760.json", + "M": 15360, + "N": 1760, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 161.15975000000003 + }, + "M=15360,N=1792": { + "file": "silu_config_M15360_N1792.json", + "M": 15360, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 150.4395 + }, + "M=15360,N=1920": { + "file": "silu_config_M15360_N1920.json", + "M": 15360, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 149.15975000000003 + }, + "M=15360,N=2048": { + "file": "silu_config_M15360_N2048.json", + "M": 15360, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 125.43950000000001 + }, + "M=15360,N=2080": { + "file": "silu_config_M15360_N2080.json", + "M": 15360, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 171.71975000000003 + }, + "M=15360,N=2240": { + "file": "silu_config_M15360_N2240.json", + "M": 15360, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 180.39999999999998 + }, + "M=15360,N=2400": { + "file": "silu_config_M15360_N2400.json", + "M": 15360, + "N": 2400, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 192.99975000000006 + }, + "M=15360,N=2560": { + "file": "silu_config_M15360_N2560.json", + "M": 15360, + "N": 2560, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 199.75974999999994 + }, + "M=16384,N=128": { + "file": "silu_config_M16384_N128.json", + "M": 16384, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 16.358999999999995 + }, + "M=16384,N=160": { + "file": "silu_config_M16384_N160.json", + "M": 16384, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 27.479000000000013 + }, + "M=16384,N=192": { + "file": "silu_config_M16384_N192.json", + "M": 16384, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 27.559000000000005 + }, + "M=16384,N=256": { + "file": "silu_config_M16384_N256.json", + "M": 16384, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 25.119000000000014 + }, + "M=16384,N=320": { + "file": "silu_config_M16384_N320.json", + "M": 16384, + "N": 320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 47.599249999999984 + }, + "M=16384,N=384": { + "file": "silu_config_M16384_N384.json", + "M": 16384, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 47.39925 + }, + "M=16384,N=480": { + "file": "silu_config_M16384_N480.json", + "M": 16384, + "N": 480, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 48.31925 + }, + "M=16384,N=512": { + "file": "silu_config_M16384_N512.json", + "M": 16384, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 38.67925000000002 + }, + "M=16384,N=576": { + "file": "silu_config_M16384_N576.json", + "M": 16384, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.0395 + }, + "M=16384,N=640": { + "file": "silu_config_M16384_N640.json", + "M": 16384, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 83.27924999999998 + }, + "M=16384,N=768": { + "file": "silu_config_M16384_N768.json", + "M": 16384, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.79924999999997 + }, + "M=16384,N=800": { + "file": "silu_config_M16384_N800.json", + "M": 16384, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.63950000000003 + }, + "M=16384,N=896": { + "file": "silu_config_M16384_N896.json", + "M": 16384, + "N": 896, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 86.47950000000002 + }, + "M=16384,N=960": { + "file": "silu_config_M16384_N960.json", + "M": 16384, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.31949999999998 + }, + "M=16384,N=1024": { + "file": "silu_config_M16384_N1024.json", + "M": 16384, + "N": 1024, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 71.83925000000002 + }, + "M=16384,N=1120": { + "file": "silu_config_M16384_N1120.json", + "M": 16384, + "N": 1120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 167.31975 + }, + "M=16384,N=1152": { + "file": "silu_config_M16384_N1152.json", + "M": 16384, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 158.27975 + }, + "M=16384,N=1280": { + "file": "silu_config_M16384_N1280.json", + "M": 16384, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 158.47975000000005 + }, + "M=16384,N=1344": { + "file": "silu_config_M16384_N1344.json", + "M": 16384, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.95975000000004 + }, + "M=16384,N=1408": { + "file": "silu_config_M16384_N1408.json", + "M": 16384, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.07950000000002 + }, + "M=16384,N=1440": { + "file": "silu_config_M16384_N1440.json", + "M": 16384, + "N": 1440, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 168.35975000000005 + }, + "M=16384,N=1536": { + "file": "silu_config_M16384_N1536.json", + "M": 16384, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.35950000000003 + }, + "M=16384,N=1600": { + "file": "silu_config_M16384_N1600.json", + "M": 16384, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 165.75975000000005 + }, + "M=16384,N=1664": { + "file": "silu_config_M16384_N1664.json", + "M": 16384, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.23950000000002 + }, + "M=16384,N=1728": { + "file": "silu_config_M16384_N1728.json", + "M": 16384, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.1995 + }, + "M=16384,N=1760": { + "file": "silu_config_M16384_N1760.json", + "M": 16384, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.39975000000004 + }, + "M=16384,N=1792": { + "file": "silu_config_M16384_N1792.json", + "M": 16384, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.11949999999993 + }, + "M=16384,N=1920": { + "file": "silu_config_M16384_N1920.json", + "M": 16384, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.35950000000003 + }, + "M=16384,N=2048": { + "file": "silu_config_M16384_N2048.json", + "M": 16384, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 130.39974999999998 + }, + "M=16384,N=2080": { + "file": "silu_config_M16384_N2080.json", + "M": 16384, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 184.19975 + }, + "M=16384,N=2240": { + "file": "silu_config_M16384_N2240.json", + "M": 16384, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 187.35975000000002 + }, + "M=16384,N=2400": { + "file": "silu_config_M16384_N2400.json", + "M": 16384, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 202.71974999999998 + }, + "M=16384,N=2560": { + "file": "silu_config_M16384_N2560.json", + "M": 16384, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 203.79975000000002 + }, + "M=17408,N=128": { + "file": "silu_config_M17408_N128.json", + "M": 17408, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 16.919000000000004 + }, + "M=17408,N=160": { + "file": "silu_config_M17408_N160.json", + "M": 17408, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 30.239250000000006 + }, + "M=17408,N=192": { + "file": "silu_config_M17408_N192.json", + "M": 17408, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 30.359 + }, + "M=17408,N=256": { + "file": "silu_config_M17408_N256.json", + "M": 17408, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 26.398999999999994 + }, + "M=17408,N=320": { + "file": "silu_config_M17408_N320.json", + "M": 17408, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 50.199249999999985 + }, + "M=17408,N=384": { + "file": "silu_config_M17408_N384.json", + "M": 17408, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 49.67924999999999 + }, + "M=17408,N=480": { + "file": "silu_config_M17408_N480.json", + "M": 17408, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 50.71925000000002 + }, + "M=17408,N=512": { + "file": "silu_config_M17408_N512.json", + "M": 17408, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 41.15924999999999 + }, + "M=17408,N=576": { + "file": "silu_config_M17408_N576.json", + "M": 17408, + "N": 576, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 96.63925 + }, + "M=17408,N=640": { + "file": "silu_config_M17408_N640.json", + "M": 17408, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 86.35925 + }, + "M=17408,N=768": { + "file": "silu_config_M17408_N768.json", + "M": 17408, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 89.11924999999998 + }, + "M=17408,N=800": { + "file": "silu_config_M17408_N800.json", + "M": 17408, + "N": 800, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 92.15949999999998 + }, + "M=17408,N=896": { + "file": "silu_config_M17408_N896.json", + "M": 17408, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 90.1995 + }, + "M=17408,N=960": { + "file": "silu_config_M17408_N960.json", + "M": 17408, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 89.99924999999998 + }, + "M=17408,N=1024": { + "file": "silu_config_M17408_N1024.json", + "M": 17408, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 75.71949999999998 + }, + "M=17408,N=1120": { + "file": "silu_config_M17408_N1120.json", + "M": 17408, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 174.75975 + }, + "M=17408,N=1152": { + "file": "silu_config_M17408_N1152.json", + "M": 17408, + "N": 1152, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 178.43975 + }, + "M=17408,N=1280": { + "file": "silu_config_M17408_N1280.json", + "M": 17408, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 184.39975000000004 + }, + "M=17408,N=1344": { + "file": "silu_config_M17408_N1344.json", + "M": 17408, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 174.87975 + }, + "M=17408,N=1408": { + "file": "silu_config_M17408_N1408.json", + "M": 17408, + "N": 1408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 173.35974999999993 + }, + "M=17408,N=1440": { + "file": "silu_config_M17408_N1440.json", + "M": 17408, + "N": 1440, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 178.51975000000002 + }, + "M=17408,N=1536": { + "file": "silu_config_M17408_N1536.json", + "M": 17408, + "N": 1536, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 176.43975000000003 + }, + "M=17408,N=1600": { + "file": "silu_config_M17408_N1600.json", + "M": 17408, + "N": 1600, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 181.11975000000004 + }, + "M=17408,N=1664": { + "file": "silu_config_M17408_N1664.json", + "M": 17408, + "N": 1664, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 179.75975 + }, + "M=17408,N=1728": { + "file": "silu_config_M17408_N1728.json", + "M": 17408, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 181.67974999999996 + }, + "M=17408,N=1760": { + "file": "silu_config_M17408_N1760.json", + "M": 17408, + "N": 1760, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 179.67975 + }, + "M=17408,N=1792": { + "file": "silu_config_M17408_N1792.json", + "M": 17408, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 169.83974999999998 + }, + "M=17408,N=1920": { + "file": "silu_config_M17408_N1920.json", + "M": 17408, + "N": 1920, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 180.59975000000003 + }, + "M=17408,N=2048": { + "file": "silu_config_M17408_N2048.json", + "M": 17408, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 137.63974999999994 + }, + "M=17408,N=2080": { + "file": "silu_config_M17408_N2080.json", + "M": 17408, + "N": 2080, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 199.52000000000004 + }, + "M=17408,N=2240": { + "file": "silu_config_M17408_N2240.json", + "M": 17408, + "N": 2240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 208.35975000000008 + }, + "M=17408,N=2400": { + "file": "silu_config_M17408_N2400.json", + "M": 17408, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 207.72000000000008 + }, + "M=17408,N=2560": { + "file": "silu_config_M17408_N2560.json", + "M": 17408, + "N": 2560, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 226.95999999999992 + }, + "M=18432,N=128": { + "file": "silu_config_M18432_N128.json", + "M": 18432, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 17.279249999999998 + }, + "M=18432,N=160": { + "file": "silu_config_M18432_N160.json", + "M": 18432, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 31.55900000000002 + }, + "M=18432,N=192": { + "file": "silu_config_M18432_N192.json", + "M": 18432, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 31.519 + }, + "M=18432,N=256": { + "file": "silu_config_M18432_N256.json", + "M": 18432, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 27.159000000000006 + }, + "M=18432,N=320": { + "file": "silu_config_M18432_N320.json", + "M": 18432, + "N": 320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.47924999999998 + }, + "M=18432,N=384": { + "file": "silu_config_M18432_N384.json", + "M": 18432, + "N": 384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.15925 + }, + "M=18432,N=480": { + "file": "silu_config_M18432_N480.json", + "M": 18432, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.119249999999994 + }, + "M=18432,N=512": { + "file": "silu_config_M18432_N512.json", + "M": 18432, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 43.159 + }, + "M=18432,N=576": { + "file": "silu_config_M18432_N576.json", + "M": 18432, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 95.19950000000003 + }, + "M=18432,N=640": { + "file": "silu_config_M18432_N640.json", + "M": 18432, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 92.31925000000003 + }, + "M=18432,N=768": { + "file": "silu_config_M18432_N768.json", + "M": 18432, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 94.87925000000001 + }, + "M=18432,N=800": { + "file": "silu_config_M18432_N800.json", + "M": 18432, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 95.8795 + }, + "M=18432,N=896": { + "file": "silu_config_M18432_N896.json", + "M": 18432, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.83950000000002 + }, + "M=18432,N=960": { + "file": "silu_config_M18432_N960.json", + "M": 18432, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.27924999999999 + }, + "M=18432,N=1024": { + "file": "silu_config_M18432_N1024.json", + "M": 18432, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 79.31925000000003 + }, + "M=18432,N=1120": { + "file": "silu_config_M18432_N1120.json", + "M": 18432, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 183.23975000000002 + }, + "M=18432,N=1152": { + "file": "silu_config_M18432_N1152.json", + "M": 18432, + "N": 1152, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 185.6 + }, + "M=18432,N=1280": { + "file": "silu_config_M18432_N1280.json", + "M": 18432, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 181.39974999999998 + }, + "M=18432,N=1344": { + "file": "silu_config_M18432_N1344.json", + "M": 18432, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 179.39975 + }, + "M=18432,N=1408": { + "file": "silu_config_M18432_N1408.json", + "M": 18432, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 186.35975000000005 + }, + "M=18432,N=1440": { + "file": "silu_config_M18432_N1440.json", + "M": 18432, + "N": 1440, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 188.80000000000004 + }, + "M=18432,N=1536": { + "file": "silu_config_M18432_N1536.json", + "M": 18432, + "N": 1536, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 189.67975 + }, + "M=18432,N=1600": { + "file": "silu_config_M18432_N1600.json", + "M": 18432, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 179.79975000000002 + }, + "M=18432,N=1664": { + "file": "silu_config_M18432_N1664.json", + "M": 18432, + "N": 1664, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 187.07975 + }, + "M=18432,N=1728": { + "file": "silu_config_M18432_N1728.json", + "M": 18432, + "N": 1728, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 189.31975 + }, + "M=18432,N=1760": { + "file": "silu_config_M18432_N1760.json", + "M": 18432, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 185.15975000000003 + }, + "M=18432,N=1792": { + "file": "silu_config_M18432_N1792.json", + "M": 18432, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 183.15974999999992 + }, + "M=18432,N=1920": { + "file": "silu_config_M18432_N1920.json", + "M": 18432, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 189.15975000000003 + }, + "M=18432,N=2048": { + "file": "silu_config_M18432_N2048.json", + "M": 18432, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 149.07949999999988 + }, + "M=18432,N=2080": { + "file": "silu_config_M18432_N2080.json", + "M": 18432, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 204.07975 + }, + "M=18432,N=2240": { + "file": "silu_config_M18432_N2240.json", + "M": 18432, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 217.8400000000001 + }, + "M=18432,N=2400": { + "file": "silu_config_M18432_N2400.json", + "M": 18432, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 219.68 + }, + "M=18432,N=2560": { + "file": "silu_config_M18432_N2560.json", + "M": 18432, + "N": 2560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 233.71999999999997 + }, + "M=19456,N=128": { + "file": "silu_config_M19456_N128.json", + "M": 19456, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 17.918999999999997 + }, + "M=19456,N=160": { + "file": "silu_config_M19456_N160.json", + "M": 19456, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 31.079000000000008 + }, + "M=19456,N=192": { + "file": "silu_config_M19456_N192.json", + "M": 19456, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 32.99924999999999 + }, + "M=19456,N=256": { + "file": "silu_config_M19456_N256.json", + "M": 19456, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 28.358999999999988 + }, + "M=19456,N=320": { + "file": "silu_config_M19456_N320.json", + "M": 19456, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 55.11925 + }, + "M=19456,N=384": { + "file": "silu_config_M19456_N384.json", + "M": 19456, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 54.799249999999994 + }, + "M=19456,N=480": { + "file": "silu_config_M19456_N480.json", + "M": 19456, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 55.759250000000016 + }, + "M=19456,N=512": { + "file": "silu_config_M19456_N512.json", + "M": 19456, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 45.159 + }, + "M=19456,N=576": { + "file": "silu_config_M19456_N576.json", + "M": 19456, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 107.55950000000001 + }, + "M=19456,N=640": { + "file": "silu_config_M19456_N640.json", + "M": 19456, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 99.51924999999997 + }, + "M=19456,N=768": { + "file": "silu_config_M19456_N768.json", + "M": 19456, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 98.27925 + }, + "M=19456,N=800": { + "file": "silu_config_M19456_N800.json", + "M": 19456, + "N": 800, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 102.5595 + }, + "M=19456,N=896": { + "file": "silu_config_M19456_N896.json", + "M": 19456, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 100.39925000000001 + }, + "M=19456,N=960": { + "file": "silu_config_M19456_N960.json", + "M": 19456, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 101.43925000000002 + }, + "M=19456,N=1024": { + "file": "silu_config_M19456_N1024.json", + "M": 19456, + "N": 1024, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 83.91925 + }, + "M=19456,N=1120": { + "file": "silu_config_M19456_N1120.json", + "M": 19456, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 194.71974999999995 + }, + "M=19456,N=1152": { + "file": "silu_config_M19456_N1152.json", + "M": 19456, + "N": 1152, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 198.76000000000002 + }, + "M=19456,N=1280": { + "file": "silu_config_M19456_N1280.json", + "M": 19456, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 192.79974999999996 + }, + "M=19456,N=1344": { + "file": "silu_config_M19456_N1344.json", + "M": 19456, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 189.07975 + }, + "M=19456,N=1408": { + "file": "silu_config_M19456_N1408.json", + "M": 19456, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 187.59975 + }, + "M=19456,N=1440": { + "file": "silu_config_M19456_N1440.json", + "M": 19456, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 195.83975000000004 + }, + "M=19456,N=1536": { + "file": "silu_config_M19456_N1536.json", + "M": 19456, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 187.87975 + }, + "M=19456,N=1600": { + "file": "silu_config_M19456_N1600.json", + "M": 19456, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 195.59975000000003 + }, + "M=19456,N=1664": { + "file": "silu_config_M19456_N1664.json", + "M": 19456, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 194.03974999999997 + }, + "M=19456,N=1728": { + "file": "silu_config_M19456_N1728.json", + "M": 19456, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 199.07999999999998 + }, + "M=19456,N=1760": { + "file": "silu_config_M19456_N1760.json", + "M": 19456, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 189.47974999999997 + }, + "M=19456,N=1792": { + "file": "silu_config_M19456_N1792.json", + "M": 19456, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 194.55975000000007 + }, + "M=19456,N=1920": { + "file": "silu_config_M19456_N1920.json", + "M": 19456, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 197.92000000000002 + }, + "M=19456,N=2048": { + "file": "silu_config_M19456_N2048.json", + "M": 19456, + "N": 2048, + "rows_per_block": 2, + "vec_size": 4, + "time_us": 166.55975 + }, + "M=19456,N=2080": { + "file": "silu_config_M19456_N2080.json", + "M": 19456, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 209.28000000000003 + }, + "M=19456,N=2240": { + "file": "silu_config_M19456_N2240.json", + "M": 19456, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 220.03999999999996 + }, + "M=19456,N=2400": { + "file": "silu_config_M19456_N2400.json", + "M": 19456, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 238.51999999999987 + }, + "M=19456,N=2560": { + "file": "silu_config_M19456_N2560.json", + "M": 19456, + "N": 2560, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 258.1599999999999 + }, + "M=20480,N=128": { + "file": "silu_config_M20480_N128.json", + "M": 20480, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 18.558999999999997 + }, + "M=20480,N=160": { + "file": "silu_config_M20480_N160.json", + "M": 20480, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 32.719 + }, + "M=20480,N=192": { + "file": "silu_config_M20480_N192.json", + "M": 20480, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 32.359 + }, + "M=20480,N=256": { + "file": "silu_config_M20480_N256.json", + "M": 20480, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 29.559000000000005 + }, + "M=20480,N=320": { + "file": "silu_config_M20480_N320.json", + "M": 20480, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 57.47925 + }, + "M=20480,N=384": { + "file": "silu_config_M20480_N384.json", + "M": 20480, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 57.23925000000002 + }, + "M=20480,N=480": { + "file": "silu_config_M20480_N480.json", + "M": 20480, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 58.679249999999996 + }, + "M=20480,N=512": { + "file": "silu_config_M20480_N512.json", + "M": 20480, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 47.159249999999986 + }, + "M=20480,N=576": { + "file": "silu_config_M20480_N576.json", + "M": 20480, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 104.39925000000001 + }, + "M=20480,N=640": { + "file": "silu_config_M20480_N640.json", + "M": 20480, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 102.9195 + }, + "M=20480,N=768": { + "file": "silu_config_M20480_N768.json", + "M": 20480, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 100.03949999999999 + }, + "M=20480,N=800": { + "file": "silu_config_M20480_N800.json", + "M": 20480, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 104.47950000000003 + }, + "M=20480,N=896": { + "file": "silu_config_M20480_N896.json", + "M": 20480, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 100.55950000000001 + }, + "M=20480,N=960": { + "file": "silu_config_M20480_N960.json", + "M": 20480, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.9595 + }, + "M=20480,N=1024": { + "file": "silu_config_M20480_N1024.json", + "M": 20480, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 87.79949999999998 + }, + "M=20480,N=1120": { + "file": "silu_config_M20480_N1120.json", + "M": 20480, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 195.56000000000003 + }, + "M=20480,N=1152": { + "file": "silu_config_M20480_N1152.json", + "M": 20480, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 193.44000000000008 + }, + "M=20480,N=1280": { + "file": "silu_config_M20480_N1280.json", + "M": 20480, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.07974999999993 + }, + "M=20480,N=1344": { + "file": "silu_config_M20480_N1344.json", + "M": 20480, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 205.99975 + }, + "M=20480,N=1408": { + "file": "silu_config_M20480_N1408.json", + "M": 20480, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.03975000000003 + }, + "M=20480,N=1440": { + "file": "silu_config_M20480_N1440.json", + "M": 20480, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 196.27975000000004 + }, + "M=20480,N=1536": { + "file": "silu_config_M20480_N1536.json", + "M": 20480, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.51975 + }, + "M=20480,N=1600": { + "file": "silu_config_M20480_N1600.json", + "M": 20480, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 196.39999999999986 + }, + "M=20480,N=1664": { + "file": "silu_config_M20480_N1664.json", + "M": 20480, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.99999999999994 + }, + "M=20480,N=1728": { + "file": "silu_config_M20480_N1728.json", + "M": 20480, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 207.27975000000004 + }, + "M=20480,N=1760": { + "file": "silu_config_M20480_N1760.json", + "M": 20480, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 206.99975 + }, + "M=20480,N=1792": { + "file": "silu_config_M20480_N1792.json", + "M": 20480, + "N": 1792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 206.31974999999994 + }, + "M=20480,N=1920": { + "file": "silu_config_M20480_N1920.json", + "M": 20480, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 195.87975 + }, + "M=20480,N=2048": { + "file": "silu_config_M20480_N2048.json", + "M": 20480, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 164.07975000000005 + }, + "M=20480,N=2080": { + "file": "silu_config_M20480_N2080.json", + "M": 20480, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 219.71974999999992 + }, + "M=20480,N=2240": { + "file": "silu_config_M20480_N2240.json", + "M": 20480, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 228.28000000000014 + }, + "M=20480,N=2400": { + "file": "silu_config_M20480_N2400.json", + "M": 20480, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 240.3999999999998 + }, + "M=20480,N=2560": { + "file": "silu_config_M20480_N2560.json", + "M": 20480, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 249.80000000000007 + }, + "M=21504,N=128": { + "file": "silu_config_M21504_N128.json", + "M": 21504, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 19.119 + }, + "M=21504,N=160": { + "file": "silu_config_M21504_N160.json", + "M": 21504, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 33.518999999999984 + }, + "M=21504,N=192": { + "file": "silu_config_M21504_N192.json", + "M": 21504, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 35.519 + }, + "M=21504,N=256": { + "file": "silu_config_M21504_N256.json", + "M": 21504, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 30.799000000000014 + }, + "M=21504,N=320": { + "file": "silu_config_M21504_N320.json", + "M": 21504, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 60.07925 + }, + "M=21504,N=384": { + "file": "silu_config_M21504_N384.json", + "M": 21504, + "N": 384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 59.87924999999998 + }, + "M=21504,N=480": { + "file": "silu_config_M21504_N480.json", + "M": 21504, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 60.75925 + }, + "M=21504,N=512": { + "file": "silu_config_M21504_N512.json", + "M": 21504, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 49.23924999999999 + }, + "M=21504,N=576": { + "file": "silu_config_M21504_N576.json", + "M": 21504, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 110.35950000000001 + }, + "M=21504,N=640": { + "file": "silu_config_M21504_N640.json", + "M": 21504, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 109.35950000000001 + }, + "M=21504,N=768": { + "file": "silu_config_M21504_N768.json", + "M": 21504, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 107.1995 + }, + "M=21504,N=800": { + "file": "silu_config_M21504_N800.json", + "M": 21504, + "N": 800, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 112.9595 + }, + "M=21504,N=896": { + "file": "silu_config_M21504_N896.json", + "M": 21504, + "N": 896, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 112.1995 + }, + "M=21504,N=960": { + "file": "silu_config_M21504_N960.json", + "M": 21504, + "N": 960, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 118.11950000000002 + }, + "M=21504,N=1024": { + "file": "silu_config_M21504_N1024.json", + "M": 21504, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 91.43950000000001 + }, + "M=21504,N=1120": { + "file": "silu_config_M21504_N1120.json", + "M": 21504, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 216.63975000000005 + }, + "M=21504,N=1152": { + "file": "silu_config_M21504_N1152.json", + "M": 21504, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 210.4 + }, + "M=21504,N=1280": { + "file": "silu_config_M21504_N1280.json", + "M": 21504, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 210.75975 + }, + "M=21504,N=1344": { + "file": "silu_config_M21504_N1344.json", + "M": 21504, + "N": 1344, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 218.68 + }, + "M=21504,N=1408": { + "file": "silu_config_M21504_N1408.json", + "M": 21504, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 217.48000000000002 + }, + "M=21504,N=1440": { + "file": "silu_config_M21504_N1440.json", + "M": 21504, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 217.72000000000003 + }, + "M=21504,N=1536": { + "file": "silu_config_M21504_N1536.json", + "M": 21504, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 215.67974999999996 + }, + "M=21504,N=1600": { + "file": "silu_config_M21504_N1600.json", + "M": 21504, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 214.15974999999997 + }, + "M=21504,N=1664": { + "file": "silu_config_M21504_N1664.json", + "M": 21504, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 216.08000000000004 + }, + "M=21504,N=1728": { + "file": "silu_config_M21504_N1728.json", + "M": 21504, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 209.3197500000001 + }, + "M=21504,N=1760": { + "file": "silu_config_M21504_N1760.json", + "M": 21504, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 214.51975 + }, + "M=21504,N=1792": { + "file": "silu_config_M21504_N1792.json", + "M": 21504, + "N": 1792, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 221.44 + }, + "M=21504,N=1920": { + "file": "silu_config_M21504_N1920.json", + "M": 21504, + "N": 1920, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 218.08000000000004 + }, + "M=21504,N=2048": { + "file": "silu_config_M21504_N2048.json", + "M": 21504, + "N": 2048, + "rows_per_block": 2, + "vec_size": 4, + "time_us": 183.67974999999996 + }, + "M=21504,N=2080": { + "file": "silu_config_M21504_N2080.json", + "M": 21504, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 236.28000000000003 + }, + "M=21504,N=2240": { + "file": "silu_config_M21504_N2240.json", + "M": 21504, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 248.4000000000002 + }, + "M=21504,N=2400": { + "file": "silu_config_M21504_N2400.json", + "M": 21504, + "N": 2400, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 274.36024999999984 + }, + "M=21504,N=2560": { + "file": "silu_config_M21504_N2560.json", + "M": 21504, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 265.52025000000003 + }, + "M=22528,N=128": { + "file": "silu_config_M22528_N128.json", + "M": 22528, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 19.59899999999999 + }, + "M=22528,N=160": { + "file": "silu_config_M22528_N160.json", + "M": 22528, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.71925 + }, + "M=22528,N=192": { + "file": "silu_config_M22528_N192.json", + "M": 22528, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 37.07900000000001 + }, + "M=22528,N=256": { + "file": "silu_config_M22528_N256.json", + "M": 22528, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 31.879250000000006 + }, + "M=22528,N=320": { + "file": "silu_config_M22528_N320.json", + "M": 22528, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 62.759250000000016 + }, + "M=22528,N=384": { + "file": "silu_config_M22528_N384.json", + "M": 22528, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 62.279250000000005 + }, + "M=22528,N=480": { + "file": "silu_config_M22528_N480.json", + "M": 22528, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 63.35925000000002 + }, + "M=22528,N=512": { + "file": "silu_config_M22528_N512.json", + "M": 22528, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 51.15900000000001 + }, + "M=22528,N=576": { + "file": "silu_config_M22528_N576.json", + "M": 22528, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 115.3195 + }, + "M=22528,N=640": { + "file": "silu_config_M22528_N640.json", + "M": 22528, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 112.55949999999996 + }, + "M=22528,N=768": { + "file": "silu_config_M22528_N768.json", + "M": 22528, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 114.83950000000002 + }, + "M=22528,N=800": { + "file": "silu_config_M22528_N800.json", + "M": 22528, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 116.07949999999998 + }, + "M=22528,N=896": { + "file": "silu_config_M22528_N896.json", + "M": 22528, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 115.35950000000003 + }, + "M=22528,N=960": { + "file": "silu_config_M22528_N960.json", + "M": 22528, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 116.67950000000002 + }, + "M=22528,N=1024": { + "file": "silu_config_M22528_N1024.json", + "M": 22528, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 95.7995 + }, + "M=22528,N=1120": { + "file": "silu_config_M22528_N1120.json", + "M": 22528, + "N": 1120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 228.47974999999997 + }, + "M=22528,N=1152": { + "file": "silu_config_M22528_N1152.json", + "M": 22528, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 221.88000000000005 + }, + "M=22528,N=1280": { + "file": "silu_config_M22528_N1280.json", + "M": 22528, + "N": 1280, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 230.2 + }, + "M=22528,N=1344": { + "file": "silu_config_M22528_N1344.json", + "M": 22528, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 224.84000000000003 + }, + "M=22528,N=1408": { + "file": "silu_config_M22528_N1408.json", + "M": 22528, + "N": 1408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 222.83999999999992 + }, + "M=22528,N=1440": { + "file": "silu_config_M22528_N1440.json", + "M": 22528, + "N": 1440, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 233.3599999999999 + }, + "M=22528,N=1536": { + "file": "silu_config_M22528_N1536.json", + "M": 22528, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 225.0000000000001 + }, + "M=22528,N=1600": { + "file": "silu_config_M22528_N1600.json", + "M": 22528, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 218.31975 + }, + "M=22528,N=1664": { + "file": "silu_config_M22528_N1664.json", + "M": 22528, + "N": 1664, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 227.24 + }, + "M=22528,N=1728": { + "file": "silu_config_M22528_N1728.json", + "M": 22528, + "N": 1728, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 233.60000000000002 + }, + "M=22528,N=1760": { + "file": "silu_config_M22528_N1760.json", + "M": 22528, + "N": 1760, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 229.60000000000002 + }, + "M=22528,N=1792": { + "file": "silu_config_M22528_N1792.json", + "M": 22528, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 223.63999999999987 + }, + "M=22528,N=1920": { + "file": "silu_config_M22528_N1920.json", + "M": 22528, + "N": 1920, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 228.27999999999997 + }, + "M=22528,N=2048": { + "file": "silu_config_M22528_N2048.json", + "M": 22528, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 175.6797499999999 + }, + "M=22528,N=2080": { + "file": "silu_config_M22528_N2080.json", + "M": 22528, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 240.95999999999992 + }, + "M=22528,N=2240": { + "file": "silu_config_M22528_N2240.json", + "M": 22528, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 251.36025000000006 + }, + "M=22528,N=2400": { + "file": "silu_config_M22528_N2400.json", + "M": 22528, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 266.8800000000001 + }, + "M=22528,N=2560": { + "file": "silu_config_M22528_N2560.json", + "M": 22528, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 276.20000000000016 + }, + "M=23552,N=128": { + "file": "silu_config_M23552_N128.json", + "M": 23552, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 20.159 + }, + "M=23552,N=160": { + "file": "silu_config_M23552_N160.json", + "M": 23552, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 38.27924999999999 + }, + "M=23552,N=192": { + "file": "silu_config_M23552_N192.json", + "M": 23552, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 38.279250000000005 + }, + "M=23552,N=256": { + "file": "silu_config_M23552_N256.json", + "M": 23552, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 36.79900000000001 + }, + "M=23552,N=320": { + "file": "silu_config_M23552_N320.json", + "M": 23552, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 64.79925 + }, + "M=23552,N=384": { + "file": "silu_config_M23552_N384.json", + "M": 23552, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.67925 + }, + "M=23552,N=480": { + "file": "silu_config_M23552_N480.json", + "M": 23552, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 65.83925000000002 + }, + "M=23552,N=512": { + "file": "silu_config_M23552_N512.json", + "M": 23552, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 53.199250000000006 + }, + "M=23552,N=576": { + "file": "silu_config_M23552_N576.json", + "M": 23552, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 120.35950000000001 + }, + "M=23552,N=640": { + "file": "silu_config_M23552_N640.json", + "M": 23552, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 119.39949999999996 + }, + "M=23552,N=768": { + "file": "silu_config_M23552_N768.json", + "M": 23552, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 119.95950000000002 + }, + "M=23552,N=800": { + "file": "silu_config_M23552_N800.json", + "M": 23552, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 121.27950000000001 + }, + "M=23552,N=896": { + "file": "silu_config_M23552_N896.json", + "M": 23552, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 118.39949999999996 + }, + "M=23552,N=960": { + "file": "silu_config_M23552_N960.json", + "M": 23552, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 119.91949999999994 + }, + "M=23552,N=1024": { + "file": "silu_config_M23552_N1024.json", + "M": 23552, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 99.83924999999999 + }, + "M=23552,N=1120": { + "file": "silu_config_M23552_N1120.json", + "M": 23552, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 234.5999999999999 + }, + "M=23552,N=1152": { + "file": "silu_config_M23552_N1152.json", + "M": 23552, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 224.56 + }, + "M=23552,N=1280": { + "file": "silu_config_M23552_N1280.json", + "M": 23552, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 225.15999999999997 + }, + "M=23552,N=1344": { + "file": "silu_config_M23552_N1344.json", + "M": 23552, + "N": 1344, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 242.8002499999999 + }, + "M=23552,N=1408": { + "file": "silu_config_M23552_N1408.json", + "M": 23552, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 233.92000000000002 + }, + "M=23552,N=1440": { + "file": "silu_config_M23552_N1440.json", + "M": 23552, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 227.92000000000002 + }, + "M=23552,N=1536": { + "file": "silu_config_M23552_N1536.json", + "M": 23552, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 233.11999999999995 + }, + "M=23552,N=1600": { + "file": "silu_config_M23552_N1600.json", + "M": 23552, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 235.2399999999999 + }, + "M=23552,N=1664": { + "file": "silu_config_M23552_N1664.json", + "M": 23552, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.00000000000006 + }, + "M=23552,N=1728": { + "file": "silu_config_M23552_N1728.json", + "M": 23552, + "N": 1728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 235.23999999999995 + }, + "M=23552,N=1760": { + "file": "silu_config_M23552_N1760.json", + "M": 23552, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 235.67999999999995 + }, + "M=23552,N=1792": { + "file": "silu_config_M23552_N1792.json", + "M": 23552, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 234.72000000000008 + }, + "M=23552,N=1920": { + "file": "silu_config_M23552_N1920.json", + "M": 23552, + "N": 1920, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 238.36000000000007 + }, + "M=23552,N=2048": { + "file": "silu_config_M23552_N2048.json", + "M": 23552, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 188.11975000000012 + }, + "M=23552,N=2080": { + "file": "silu_config_M23552_N2080.json", + "M": 23552, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 249.67999999999995 + }, + "M=23552,N=2240": { + "file": "silu_config_M23552_N2240.json", + "M": 23552, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 273.31999999999994 + }, + "M=23552,N=2400": { + "file": "silu_config_M23552_N2400.json", + "M": 23552, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 288.96025 + }, + "M=23552,N=2560": { + "file": "silu_config_M23552_N2560.json", + "M": 23552, + "N": 2560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 299.3202499999999 + }, + "M=24576,N=128": { + "file": "silu_config_M24576_N128.json", + "M": 24576, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 20.679000000000002 + }, + "M=24576,N=160": { + "file": "silu_config_M24576_N160.json", + "M": 24576, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 37.11899999999999 + }, + "M=24576,N=192": { + "file": "silu_config_M24576_N192.json", + "M": 24576, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 37.159 + }, + "M=24576,N=256": { + "file": "silu_config_M24576_N256.json", + "M": 24576, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 33.91899999999999 + }, + "M=24576,N=320": { + "file": "silu_config_M24576_N320.json", + "M": 24576, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 66.95925000000003 + }, + "M=24576,N=384": { + "file": "silu_config_M24576_N384.json", + "M": 24576, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 66.95924999999997 + }, + "M=24576,N=480": { + "file": "silu_config_M24576_N480.json", + "M": 24576, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 68.27924999999999 + }, + "M=24576,N=512": { + "file": "silu_config_M24576_N512.json", + "M": 24576, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 55.199250000000006 + }, + "M=24576,N=576": { + "file": "silu_config_M24576_N576.json", + "M": 24576, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.15949999999994 + }, + "M=24576,N=640": { + "file": "silu_config_M24576_N640.json", + "M": 24576, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 119.07950000000002 + }, + "M=24576,N=768": { + "file": "silu_config_M24576_N768.json", + "M": 24576, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.71950000000001 + }, + "M=24576,N=800": { + "file": "silu_config_M24576_N800.json", + "M": 24576, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.1995 + }, + "M=24576,N=896": { + "file": "silu_config_M24576_N896.json", + "M": 24576, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 122.43950000000001 + }, + "M=24576,N=960": { + "file": "silu_config_M24576_N960.json", + "M": 24576, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 124.11949999999993 + }, + "M=24576,N=1024": { + "file": "silu_config_M24576_N1024.json", + "M": 24576, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 103.47950000000003 + }, + "M=24576,N=1120": { + "file": "silu_config_M24576_N1120.json", + "M": 24576, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 236.87999999999994 + }, + "M=24576,N=1152": { + "file": "silu_config_M24576_N1152.json", + "M": 24576, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.32000000000005 + }, + "M=24576,N=1280": { + "file": "silu_config_M24576_N1280.json", + "M": 24576, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 245.63999999999993 + }, + "M=24576,N=1344": { + "file": "silu_config_M24576_N1344.json", + "M": 24576, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 248.04000000000002 + }, + "M=24576,N=1408": { + "file": "silu_config_M24576_N1408.json", + "M": 24576, + "N": 1408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 240.52024999999998 + }, + "M=24576,N=1440": { + "file": "silu_config_M24576_N1440.json", + "M": 24576, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 243.96000000000004 + }, + "M=24576,N=1536": { + "file": "silu_config_M24576_N1536.json", + "M": 24576, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 235.2800000000001 + }, + "M=24576,N=1600": { + "file": "silu_config_M24576_N1600.json", + "M": 24576, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 243.04000000000013 + }, + "M=24576,N=1664": { + "file": "silu_config_M24576_N1664.json", + "M": 24576, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 241.04000000000002 + }, + "M=24576,N=1728": { + "file": "silu_config_M24576_N1728.json", + "M": 24576, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 245.44 + }, + "M=24576,N=1760": { + "file": "silu_config_M24576_N1760.json", + "M": 24576, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.4402500000001 + }, + "M=24576,N=1792": { + "file": "silu_config_M24576_N1792.json", + "M": 24576, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 236.16000000000008 + }, + "M=24576,N=1920": { + "file": "silu_config_M24576_N1920.json", + "M": 24576, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 236.56 + }, + "M=24576,N=2048": { + "file": "silu_config_M24576_N2048.json", + "M": 24576, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 187.51974999999987 + }, + "M=24576,N=2080": { + "file": "silu_config_M24576_N2080.json", + "M": 24576, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 268.96 + }, + "M=24576,N=2240": { + "file": "silu_config_M24576_N2240.json", + "M": 24576, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 282.6402499999999 + }, + "M=24576,N=2400": { + "file": "silu_config_M24576_N2400.json", + "M": 24576, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 300.92025 + }, + "M=24576,N=2560": { + "file": "silu_config_M24576_N2560.json", + "M": 24576, + "N": 2560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 309.44025 + }, + "M=25600,N=128": { + "file": "silu_config_M25600_N128.json", + "M": 25600, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 21.43925 + }, + "M=25600,N=160": { + "file": "silu_config_M25600_N160.json", + "M": 25600, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 38.319250000000004 + }, + "M=25600,N=192": { + "file": "silu_config_M25600_N192.json", + "M": 25600, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 40.998999999999995 + }, + "M=25600,N=256": { + "file": "silu_config_M25600_N256.json", + "M": 25600, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 39.479 + }, + "M=25600,N=320": { + "file": "silu_config_M25600_N320.json", + "M": 25600, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.03925 + }, + "M=25600,N=384": { + "file": "silu_config_M25600_N384.json", + "M": 25600, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 69.19925 + }, + "M=25600,N=480": { + "file": "silu_config_M25600_N480.json", + "M": 25600, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.83950000000002 + }, + "M=25600,N=512": { + "file": "silu_config_M25600_N512.json", + "M": 25600, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 55.759249999999994 + }, + "M=25600,N=576": { + "file": "silu_config_M25600_N576.json", + "M": 25600, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.03950000000003 + }, + "M=25600,N=640": { + "file": "silu_config_M25600_N640.json", + "M": 25600, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.1995 + }, + "M=25600,N=768": { + "file": "silu_config_M25600_N768.json", + "M": 25600, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 127.71949999999998 + }, + "M=25600,N=800": { + "file": "silu_config_M25600_N800.json", + "M": 25600, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 129.1995 + }, + "M=25600,N=896": { + "file": "silu_config_M25600_N896.json", + "M": 25600, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.47975000000005 + }, + "M=25600,N=960": { + "file": "silu_config_M25600_N960.json", + "M": 25600, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 131.03975 + }, + "M=25600,N=1024": { + "file": "silu_config_M25600_N1024.json", + "M": 25600, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 107.51949999999997 + }, + "M=25600,N=1120": { + "file": "silu_config_M25600_N1120.json", + "M": 25600, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 252.72024999999994 + }, + "M=25600,N=1152": { + "file": "silu_config_M25600_N1152.json", + "M": 25600, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 251.64 + }, + "M=25600,N=1280": { + "file": "silu_config_M25600_N1280.json", + "M": 25600, + "N": 1280, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 260.64025000000004 + }, + "M=25600,N=1344": { + "file": "silu_config_M25600_N1344.json", + "M": 25600, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 254.88024999999988 + }, + "M=25600,N=1408": { + "file": "silu_config_M25600_N1408.json", + "M": 25600, + "N": 1408, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 261.2799999999999 + }, + "M=25600,N=1440": { + "file": "silu_config_M25600_N1440.json", + "M": 25600, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 253.9999999999999 + }, + "M=25600,N=1536": { + "file": "silu_config_M25600_N1536.json", + "M": 25600, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 241.24024999999995 + }, + "M=25600,N=1600": { + "file": "silu_config_M25600_N1600.json", + "M": 25600, + "N": 1600, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 256.96 + }, + "M=25600,N=1664": { + "file": "silu_config_M25600_N1664.json", + "M": 25600, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 251.40025000000006 + }, + "M=25600,N=1728": { + "file": "silu_config_M25600_N1728.json", + "M": 25600, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 263.47999999999996 + }, + "M=25600,N=1760": { + "file": "silu_config_M25600_N1760.json", + "M": 25600, + "N": 1760, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 258.0799999999998 + }, + "M=25600,N=1792": { + "file": "silu_config_M25600_N1792.json", + "M": 25600, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 241.72000000000008 + }, + "M=25600,N=1920": { + "file": "silu_config_M25600_N1920.json", + "M": 25600, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 252.80024999999983 + }, + "M=25600,N=2048": { + "file": "silu_config_M25600_N2048.json", + "M": 25600, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 198.79975000000002 + }, + "M=25600,N=2080": { + "file": "silu_config_M25600_N2080.json", + "M": 25600, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 282.12025000000006 + }, + "M=25600,N=2240": { + "file": "silu_config_M25600_N2240.json", + "M": 25600, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 286.9202500000001 + }, + "M=25600,N=2400": { + "file": "silu_config_M25600_N2400.json", + "M": 25600, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.68025 + }, + "M=25600,N=2560": { + "file": "silu_config_M25600_N2560.json", + "M": 25600, + "N": 2560, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 328.1205 + }, + "M=26624,N=128": { + "file": "silu_config_M26624_N128.json", + "M": 26624, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 21.83925 + }, + "M=26624,N=160": { + "file": "silu_config_M26624_N160.json", + "M": 26624, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 42.35925000000002 + }, + "M=26624,N=192": { + "file": "silu_config_M26624_N192.json", + "M": 26624, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 42.39925000000001 + }, + "M=26624,N=256": { + "file": "silu_config_M26624_N256.json", + "M": 26624, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 40.83899999999999 + }, + "M=26624,N=320": { + "file": "silu_config_M26624_N320.json", + "M": 26624, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 72.55925000000002 + }, + "M=26624,N=384": { + "file": "silu_config_M26624_N384.json", + "M": 26624, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 71.79925 + }, + "M=26624,N=480": { + "file": "silu_config_M26624_N480.json", + "M": 26624, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 73.55925000000002 + }, + "M=26624,N=512": { + "file": "silu_config_M26624_N512.json", + "M": 26624, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 59.15924999999999 + }, + "M=26624,N=576": { + "file": "silu_config_M26624_N576.json", + "M": 26624, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 133.07975000000005 + }, + "M=26624,N=640": { + "file": "silu_config_M26624_N640.json", + "M": 26624, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 134.35975 + }, + "M=26624,N=768": { + "file": "silu_config_M26624_N768.json", + "M": 26624, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 134.83975 + }, + "M=26624,N=800": { + "file": "silu_config_M26624_N800.json", + "M": 26624, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 136.51950000000002 + }, + "M=26624,N=896": { + "file": "silu_config_M26624_N896.json", + "M": 26624, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 129.6395 + }, + "M=26624,N=960": { + "file": "silu_config_M26624_N960.json", + "M": 26624, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 131.59949999999998 + }, + "M=26624,N=1024": { + "file": "silu_config_M26624_N1024.json", + "M": 26624, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 111.83949999999999 + }, + "M=26624,N=1120": { + "file": "silu_config_M26624_N1120.json", + "M": 26624, + "N": 1120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 269.04024999999996 + }, + "M=26624,N=1152": { + "file": "silu_config_M26624_N1152.json", + "M": 26624, + "N": 1152, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 270.52025000000003 + }, + "M=26624,N=1280": { + "file": "silu_config_M26624_N1280.json", + "M": 26624, + "N": 1280, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 271.20025000000004 + }, + "M=26624,N=1344": { + "file": "silu_config_M26624_N1344.json", + "M": 26624, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 256.15999999999997 + }, + "M=26624,N=1408": { + "file": "silu_config_M26624_N1408.json", + "M": 26624, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 261.84000000000003 + }, + "M=26624,N=1440": { + "file": "silu_config_M26624_N1440.json", + "M": 26624, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 274.92025 + }, + "M=26624,N=1536": { + "file": "silu_config_M26624_N1536.json", + "M": 26624, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 262.44000000000005 + }, + "M=26624,N=1600": { + "file": "silu_config_M26624_N1600.json", + "M": 26624, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 253.72000000000006 + }, + "M=26624,N=1664": { + "file": "silu_config_M26624_N1664.json", + "M": 26624, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 254.4802499999999 + }, + "M=26624,N=1728": { + "file": "silu_config_M26624_N1728.json", + "M": 26624, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 264.2402500000001 + }, + "M=26624,N=1760": { + "file": "silu_config_M26624_N1760.json", + "M": 26624, + "N": 1760, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 270.55999999999983 + }, + "M=26624,N=1792": { + "file": "silu_config_M26624_N1792.json", + "M": 26624, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 266.2 + }, + "M=26624,N=1920": { + "file": "silu_config_M26624_N1920.json", + "M": 26624, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 263.88 + }, + "M=26624,N=2048": { + "file": "silu_config_M26624_N2048.json", + "M": 26624, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 203.83999999999997 + }, + "M=26624,N=2080": { + "file": "silu_config_M26624_N2080.json", + "M": 26624, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 283.4002499999999 + }, + "M=26624,N=2240": { + "file": "silu_config_M26624_N2240.json", + "M": 26624, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 308.0005000000002 + }, + "M=26624,N=2400": { + "file": "silu_config_M26624_N2400.json", + "M": 26624, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 321.68049999999994 + }, + "M=26624,N=2560": { + "file": "silu_config_M26624_N2560.json", + "M": 26624, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 337.40049999999985 + }, + "M=27648,N=128": { + "file": "silu_config_M27648_N128.json", + "M": 27648, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 22.39925000000001 + }, + "M=27648,N=160": { + "file": "silu_config_M27648_N160.json", + "M": 27648, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 43.439 + }, + "M=27648,N=192": { + "file": "silu_config_M27648_N192.json", + "M": 27648, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 43.43925 + }, + "M=27648,N=256": { + "file": "silu_config_M27648_N256.json", + "M": 27648, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 37.11925000000001 + }, + "M=27648,N=320": { + "file": "silu_config_M27648_N320.json", + "M": 27648, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 74.55925000000002 + }, + "M=27648,N=384": { + "file": "silu_config_M27648_N384.json", + "M": 27648, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 74.59924999999998 + }, + "M=27648,N=480": { + "file": "silu_config_M27648_N480.json", + "M": 27648, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 75.95925 + }, + "M=27648,N=512": { + "file": "silu_config_M27648_N512.json", + "M": 27648, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 59.35925 + }, + "M=27648,N=576": { + "file": "silu_config_M27648_N576.json", + "M": 27648, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 140.43975 + }, + "M=27648,N=640": { + "file": "silu_config_M27648_N640.json", + "M": 27648, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 139.27950000000004 + }, + "M=27648,N=768": { + "file": "silu_config_M27648_N768.json", + "M": 27648, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 139.83975000000004 + }, + "M=27648,N=800": { + "file": "silu_config_M27648_N800.json", + "M": 27648, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 141.5195 + }, + "M=27648,N=896": { + "file": "silu_config_M27648_N896.json", + "M": 27648, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 140.5595 + }, + "M=27648,N=960": { + "file": "silu_config_M27648_N960.json", + "M": 27648, + "N": 960, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 144.39949999999996 + }, + "M=27648,N=1024": { + "file": "silu_config_M27648_N1024.json", + "M": 27648, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 115.47950000000003 + }, + "M=27648,N=1120": { + "file": "silu_config_M27648_N1120.json", + "M": 27648, + "N": 1120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 279.08025 + }, + "M=27648,N=1152": { + "file": "silu_config_M27648_N1152.json", + "M": 27648, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 270.60024999999996 + }, + "M=27648,N=1280": { + "file": "silu_config_M27648_N1280.json", + "M": 27648, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 271.0400000000001 + }, + "M=27648,N=1344": { + "file": "silu_config_M27648_N1344.json", + "M": 27648, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 265.72 + }, + "M=27648,N=1408": { + "file": "silu_config_M27648_N1408.json", + "M": 27648, + "N": 1408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 269.71999999999997 + }, + "M=27648,N=1440": { + "file": "silu_config_M27648_N1440.json", + "M": 27648, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 265.36 + }, + "M=27648,N=1536": { + "file": "silu_config_M27648_N1536.json", + "M": 27648, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 271.2802499999998 + }, + "M=27648,N=1600": { + "file": "silu_config_M27648_N1600.json", + "M": 27648, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 276.6 + }, + "M=27648,N=1664": { + "file": "silu_config_M27648_N1664.json", + "M": 27648, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 264.1999999999998 + }, + "M=27648,N=1728": { + "file": "silu_config_M27648_N1728.json", + "M": 27648, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 273.9600000000001 + }, + "M=27648,N=1760": { + "file": "silu_config_M27648_N1760.json", + "M": 27648, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 273.60024999999996 + }, + "M=27648,N=1792": { + "file": "silu_config_M27648_N1792.json", + "M": 27648, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 275.72024999999996 + }, + "M=27648,N=1920": { + "file": "silu_config_M27648_N1920.json", + "M": 27648, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 273.28025 + }, + "M=27648,N=2048": { + "file": "silu_config_M27648_N2048.json", + "M": 27648, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 220.55999999999995 + }, + "M=27648,N=2080": { + "file": "silu_config_M27648_N2080.json", + "M": 27648, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 301.7602499999999 + }, + "M=27648,N=2240": { + "file": "silu_config_M27648_N2240.json", + "M": 27648, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 316.5205000000001 + }, + "M=27648,N=2400": { + "file": "silu_config_M27648_N2400.json", + "M": 27648, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 333.40025 + }, + "M=27648,N=2560": { + "file": "silu_config_M27648_N2560.json", + "M": 27648, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 346.8405 + }, + "M=28672,N=128": { + "file": "silu_config_M28672_N128.json", + "M": 28672, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 23.159000000000006 + }, + "M=28672,N=160": { + "file": "silu_config_M28672_N160.json", + "M": 28672, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 41.759000000000015 + }, + "M=28672,N=192": { + "file": "silu_config_M28672_N192.json", + "M": 28672, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.99924999999999 + }, + "M=28672,N=256": { + "file": "silu_config_M28672_N256.json", + "M": 28672, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 38.19924999999999 + }, + "M=28672,N=320": { + "file": "silu_config_M28672_N320.json", + "M": 28672, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.39950000000002 + }, + "M=28672,N=384": { + "file": "silu_config_M28672_N384.json", + "M": 28672, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.0795 + }, + "M=28672,N=480": { + "file": "silu_config_M28672_N480.json", + "M": 28672, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 78.47925 + }, + "M=28672,N=512": { + "file": "silu_config_M28672_N512.json", + "M": 28672, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 63.31925000000001 + }, + "M=28672,N=576": { + "file": "silu_config_M28672_N576.json", + "M": 28672, + "N": 576, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 148.19975 + }, + "M=28672,N=640": { + "file": "silu_config_M28672_N640.json", + "M": 28672, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 138.31950000000003 + }, + "M=28672,N=768": { + "file": "silu_config_M28672_N768.json", + "M": 28672, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 138.15949999999998 + }, + "M=28672,N=800": { + "file": "silu_config_M28672_N800.json", + "M": 28672, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 155.79950000000002 + }, + "M=28672,N=896": { + "file": "silu_config_M28672_N896.json", + "M": 28672, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 145.55975000000004 + }, + "M=28672,N=960": { + "file": "silu_config_M28672_N960.json", + "M": 28672, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 144.87974999999997 + }, + "M=28672,N=1024": { + "file": "silu_config_M28672_N1024.json", + "M": 28672, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 119.71950000000004 + }, + "M=28672,N=1120": { + "file": "silu_config_M28672_N1120.json", + "M": 28672, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 275.08000000000004 + }, + "M=28672,N=1152": { + "file": "silu_config_M28672_N1152.json", + "M": 28672, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 272.24025 + }, + "M=28672,N=1280": { + "file": "silu_config_M28672_N1280.json", + "M": 28672, + "N": 1280, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 286.52025000000003 + }, + "M=28672,N=1344": { + "file": "silu_config_M28672_N1344.json", + "M": 28672, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 285.9202500000001 + }, + "M=28672,N=1408": { + "file": "silu_config_M28672_N1408.json", + "M": 28672, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 272.76025 + }, + "M=28672,N=1440": { + "file": "silu_config_M28672_N1440.json", + "M": 28672, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 284.80024999999995 + }, + "M=28672,N=1536": { + "file": "silu_config_M28672_N1536.json", + "M": 28672, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 282.08025000000015 + }, + "M=28672,N=1600": { + "file": "silu_config_M28672_N1600.json", + "M": 28672, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 284.64025 + }, + "M=28672,N=1664": { + "file": "silu_config_M28672_N1664.json", + "M": 28672, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 273.68025000000006 + }, + "M=28672,N=1728": { + "file": "silu_config_M28672_N1728.json", + "M": 28672, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 275.6400000000001 + }, + "M=28672,N=1760": { + "file": "silu_config_M28672_N1760.json", + "M": 28672, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 284.00025000000005 + }, + "M=28672,N=1792": { + "file": "silu_config_M28672_N1792.json", + "M": 28672, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 271.8399999999999 + }, + "M=28672,N=1920": { + "file": "silu_config_M28672_N1920.json", + "M": 28672, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 285.9202499999999 + }, + "M=28672,N=2048": { + "file": "silu_config_M28672_N2048.json", + "M": 28672, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 228.1199999999999 + }, + "M=28672,N=2080": { + "file": "silu_config_M28672_N2080.json", + "M": 28672, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 315.2802499999999 + }, + "M=28672,N=2240": { + "file": "silu_config_M28672_N2240.json", + "M": 28672, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.00024999999994 + }, + "M=28672,N=2400": { + "file": "silu_config_M28672_N2400.json", + "M": 28672, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 348.52049999999997 + }, + "M=28672,N=2560": { + "file": "silu_config_M28672_N2560.json", + "M": 28672, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 351.16049999999996 + }, + "M=29696,N=128": { + "file": "silu_config_M29696_N128.json", + "M": 29696, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 23.47925 + }, + "M=29696,N=160": { + "file": "silu_config_M29696_N160.json", + "M": 29696, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 46.07899999999999 + }, + "M=29696,N=192": { + "file": "silu_config_M29696_N192.json", + "M": 29696, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 43.55924999999999 + }, + "M=29696,N=256": { + "file": "silu_config_M29696_N256.json", + "M": 29696, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.75925 + }, + "M=29696,N=320": { + "file": "silu_config_M29696_N320.json", + "M": 29696, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 79.83924999999999 + }, + "M=29696,N=384": { + "file": "silu_config_M29696_N384.json", + "M": 29696, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 79.47925000000002 + }, + "M=29696,N=480": { + "file": "silu_config_M29696_N480.json", + "M": 29696, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.11925 + }, + "M=29696,N=512": { + "file": "silu_config_M29696_N512.json", + "M": 29696, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 65.15924999999999 + }, + "M=29696,N=576": { + "file": "silu_config_M29696_N576.json", + "M": 29696, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 150.3995 + }, + "M=29696,N=640": { + "file": "silu_config_M29696_N640.json", + "M": 29696, + "N": 640, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 152.4795 + }, + "M=29696,N=768": { + "file": "silu_config_M29696_N768.json", + "M": 29696, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 149.95950000000002 + }, + "M=29696,N=800": { + "file": "silu_config_M29696_N800.json", + "M": 29696, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 148.99949999999993 + }, + "M=29696,N=896": { + "file": "silu_config_M29696_N896.json", + "M": 29696, + "N": 896, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 153.47949999999994 + }, + "M=29696,N=960": { + "file": "silu_config_M29696_N960.json", + "M": 29696, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 145.99949999999998 + }, + "M=29696,N=1024": { + "file": "silu_config_M29696_N1024.json", + "M": 29696, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 123.83975000000004 + }, + "M=29696,N=1120": { + "file": "silu_config_M29696_N1120.json", + "M": 29696, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 281.92025000000007 + }, + "M=29696,N=1152": { + "file": "silu_config_M29696_N1152.json", + "M": 29696, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 292.68024999999983 + }, + "M=29696,N=1280": { + "file": "silu_config_M29696_N1280.json", + "M": 29696, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.5602500000001 + }, + "M=29696,N=1344": { + "file": "silu_config_M29696_N1344.json", + "M": 29696, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 305.20025000000004 + }, + "M=29696,N=1408": { + "file": "silu_config_M29696_N1408.json", + "M": 29696, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.7602500000001 + }, + "M=29696,N=1440": { + "file": "silu_config_M29696_N1440.json", + "M": 29696, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 295.72025 + }, + "M=29696,N=1536": { + "file": "silu_config_M29696_N1536.json", + "M": 29696, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 293.64025 + }, + "M=29696,N=1600": { + "file": "silu_config_M29696_N1600.json", + "M": 29696, + "N": 1600, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 300.08025 + }, + "M=29696,N=1664": { + "file": "silu_config_M29696_N1664.json", + "M": 29696, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 292.56025 + }, + "M=29696,N=1728": { + "file": "silu_config_M29696_N1728.json", + "M": 29696, + "N": 1728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 295.2802499999999 + }, + "M=29696,N=1760": { + "file": "silu_config_M29696_N1760.json", + "M": 29696, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 295.88024999999993 + }, + "M=29696,N=1792": { + "file": "silu_config_M29696_N1792.json", + "M": 29696, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 291.2402500000002 + }, + "M=29696,N=1920": { + "file": "silu_config_M29696_N1920.json", + "M": 29696, + "N": 1920, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 294.1205000000001 + }, + "M=29696,N=2048": { + "file": "silu_config_M29696_N2048.json", + "M": 29696, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 236.0799999999998 + }, + "M=29696,N=2080": { + "file": "silu_config_M29696_N2080.json", + "M": 29696, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 315.0002499999998 + }, + "M=29696,N=2240": { + "file": "silu_config_M29696_N2240.json", + "M": 29696, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 342.2405 + }, + "M=29696,N=2400": { + "file": "silu_config_M29696_N2400.json", + "M": 29696, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 359.96074999999996 + }, + "M=29696,N=2560": { + "file": "silu_config_M29696_N2560.json", + "M": 29696, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 370.84074999999996 + }, + "M=30720,N=128": { + "file": "silu_config_M30720_N128.json", + "M": 30720, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 24.15925 + }, + "M=30720,N=160": { + "file": "silu_config_M30720_N160.json", + "M": 30720, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.399 + }, + "M=30720,N=192": { + "file": "silu_config_M30720_N192.json", + "M": 30720, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 44.559000000000005 + }, + "M=30720,N=256": { + "file": "silu_config_M30720_N256.json", + "M": 30720, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 45.679249999999996 + }, + "M=30720,N=320": { + "file": "silu_config_M30720_N320.json", + "M": 30720, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 82.43924999999996 + }, + "M=30720,N=384": { + "file": "silu_config_M30720_N384.json", + "M": 30720, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 81.79925000000003 + }, + "M=30720,N=480": { + "file": "silu_config_M30720_N480.json", + "M": 30720, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 83.67925 + }, + "M=30720,N=512": { + "file": "silu_config_M30720_N512.json", + "M": 30720, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 67.31925000000001 + }, + "M=30720,N=576": { + "file": "silu_config_M30720_N576.json", + "M": 30720, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 147.47975 + }, + "M=30720,N=640": { + "file": "silu_config_M30720_N640.json", + "M": 30720, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 146.07975000000002 + }, + "M=30720,N=768": { + "file": "silu_config_M30720_N768.json", + "M": 30720, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.27974999999995 + }, + "M=30720,N=800": { + "file": "silu_config_M30720_N800.json", + "M": 30720, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 148.35950000000003 + }, + "M=30720,N=896": { + "file": "silu_config_M30720_N896.json", + "M": 30720, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 147.59975000000006 + }, + "M=30720,N=960": { + "file": "silu_config_M30720_N960.json", + "M": 30720, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 151.11975 + }, + "M=30720,N=1024": { + "file": "silu_config_M30720_N1024.json", + "M": 30720, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 127.07950000000001 + }, + "M=30720,N=1120": { + "file": "silu_config_M30720_N1120.json", + "M": 30720, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 300.20025 + }, + "M=30720,N=1152": { + "file": "silu_config_M30720_N1152.json", + "M": 30720, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 291.28024999999985 + }, + "M=30720,N=1280": { + "file": "silu_config_M30720_N1280.json", + "M": 30720, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 307.60024999999996 + }, + "M=30720,N=1344": { + "file": "silu_config_M30720_N1344.json", + "M": 30720, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 288.92000000000013 + }, + "M=30720,N=1408": { + "file": "silu_config_M30720_N1408.json", + "M": 30720, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.20025000000015 + }, + "M=30720,N=1440": { + "file": "silu_config_M30720_N1440.json", + "M": 30720, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.4802499999999 + }, + "M=30720,N=1536": { + "file": "silu_config_M30720_N1536.json", + "M": 30720, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 303.4002499999999 + }, + "M=30720,N=1600": { + "file": "silu_config_M30720_N1600.json", + "M": 30720, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 313.8805 + }, + "M=30720,N=1664": { + "file": "silu_config_M30720_N1664.json", + "M": 30720, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 303.8002499999999 + }, + "M=30720,N=1728": { + "file": "silu_config_M30720_N1728.json", + "M": 30720, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 300.16025 + }, + "M=30720,N=1760": { + "file": "silu_config_M30720_N1760.json", + "M": 30720, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 314.9605 + }, + "M=30720,N=1792": { + "file": "silu_config_M30720_N1792.json", + "M": 30720, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 298.84024999999986 + }, + "M=30720,N=1920": { + "file": "silu_config_M30720_N1920.json", + "M": 30720, + "N": 1920, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 306.88025000000005 + }, + "M=30720,N=2048": { + "file": "silu_config_M30720_N2048.json", + "M": 30720, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 245.72000000000003 + }, + "M=30720,N=2080": { + "file": "silu_config_M30720_N2080.json", + "M": 30720, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.6002500000001 + }, + "M=30720,N=2240": { + "file": "silu_config_M30720_N2240.json", + "M": 30720, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 347.52049999999986 + }, + "M=30720,N=2400": { + "file": "silu_config_M30720_N2400.json", + "M": 30720, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 371.4805000000001 + }, + "M=30720,N=2560": { + "file": "silu_config_M30720_N2560.json", + "M": 30720, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 386.40049999999997 + }, + "M=31744,N=128": { + "file": "silu_config_M31744_N128.json", + "M": 31744, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 24.59900000000001 + }, + "M=31744,N=160": { + "file": "silu_config_M31744_N160.json", + "M": 31744, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 48.83925000000002 + }, + "M=31744,N=192": { + "file": "silu_config_M31744_N192.json", + "M": 31744, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 45.518999999999984 + }, + "M=31744,N=256": { + "file": "silu_config_M31744_N256.json", + "M": 31744, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 41.51925 + }, + "M=31744,N=320": { + "file": "silu_config_M31744_N320.json", + "M": 31744, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 84.75925 + }, + "M=31744,N=384": { + "file": "silu_config_M31744_N384.json", + "M": 31744, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 84.59924999999998 + }, + "M=31744,N=480": { + "file": "silu_config_M31744_N480.json", + "M": 31744, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 86.07925000000003 + }, + "M=31744,N=512": { + "file": "silu_config_M31744_N512.json", + "M": 31744, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 69.23925000000001 + }, + "M=31744,N=576": { + "file": "silu_config_M31744_N576.json", + "M": 31744, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 157.91975000000002 + }, + "M=31744,N=640": { + "file": "silu_config_M31744_N640.json", + "M": 31744, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 159.31950000000003 + }, + "M=31744,N=768": { + "file": "silu_config_M31744_N768.json", + "M": 31744, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.03975000000003 + }, + "M=31744,N=800": { + "file": "silu_config_M31744_N800.json", + "M": 31744, + "N": 800, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 164.7595 + }, + "M=31744,N=896": { + "file": "silu_config_M31744_N896.json", + "M": 31744, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 153.95950000000002 + }, + "M=31744,N=960": { + "file": "silu_config_M31744_N960.json", + "M": 31744, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 159.87975 + }, + "M=31744,N=1024": { + "file": "silu_config_M31744_N1024.json", + "M": 31744, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 131.55949999999999 + }, + "M=31744,N=1120": { + "file": "silu_config_M31744_N1120.json", + "M": 31744, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 314.36024999999995 + }, + "M=31744,N=1152": { + "file": "silu_config_M31744_N1152.json", + "M": 31744, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 310.5202499999998 + }, + "M=31744,N=1280": { + "file": "silu_config_M31744_N1280.json", + "M": 31744, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.0802500000001 + }, + "M=31744,N=1344": { + "file": "silu_config_M31744_N1344.json", + "M": 31744, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 313.80025 + }, + "M=31744,N=1408": { + "file": "silu_config_M31744_N1408.json", + "M": 31744, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 316.8402500000001 + }, + "M=31744,N=1440": { + "file": "silu_config_M31744_N1440.json", + "M": 31744, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 314.7605000000001 + }, + "M=31744,N=1536": { + "file": "silu_config_M31744_N1536.json", + "M": 31744, + "N": 1536, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 317.44049999999993 + }, + "M=31744,N=1600": { + "file": "silu_config_M31744_N1600.json", + "M": 31744, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.96024999999986 + }, + "M=31744,N=1664": { + "file": "silu_config_M31744_N1664.json", + "M": 31744, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 312.1204999999999 + }, + "M=31744,N=1728": { + "file": "silu_config_M31744_N1728.json", + "M": 31744, + "N": 1728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 315.20025 + }, + "M=31744,N=1760": { + "file": "silu_config_M31744_N1760.json", + "M": 31744, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 315.72050000000013 + }, + "M=31744,N=1792": { + "file": "silu_config_M31744_N1792.json", + "M": 31744, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.24025000000006 + }, + "M=31744,N=1920": { + "file": "silu_config_M31744_N1920.json", + "M": 31744, + "N": 1920, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 319.6005 + }, + "M=31744,N=2048": { + "file": "silu_config_M31744_N2048.json", + "M": 31744, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 242.2800000000002 + }, + "M=31744,N=2080": { + "file": "silu_config_M31744_N2080.json", + "M": 31744, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 336.24025000000006 + }, + "M=31744,N=2240": { + "file": "silu_config_M31744_N2240.json", + "M": 31744, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 360.68050000000017 + }, + "M=31744,N=2400": { + "file": "silu_config_M31744_N2400.json", + "M": 31744, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 372.20050000000003 + }, + "M=31744,N=2560": { + "file": "silu_config_M31744_N2560.json", + "M": 31744, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 387.52049999999997 + }, + "M=32768,N=128": { + "file": "silu_config_M32768_N128.json", + "M": 32768, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 25.119250000000008 + }, + "M=32768,N=160": { + "file": "silu_config_M32768_N160.json", + "M": 32768, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 46.599 + }, + "M=32768,N=192": { + "file": "silu_config_M32768_N192.json", + "M": 32768, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 46.83899999999999 + }, + "M=32768,N=256": { + "file": "silu_config_M32768_N256.json", + "M": 32768, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 42.63925 + }, + "M=32768,N=320": { + "file": "silu_config_M32768_N320.json", + "M": 32768, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.47925000000001 + }, + "M=32768,N=384": { + "file": "silu_config_M32768_N384.json", + "M": 32768, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 86.71925000000002 + }, + "M=32768,N=480": { + "file": "silu_config_M32768_N480.json", + "M": 32768, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 88.4795 + }, + "M=32768,N=512": { + "file": "silu_config_M32768_N512.json", + "M": 32768, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 68.91925 + }, + "M=32768,N=576": { + "file": "silu_config_M32768_N576.json", + "M": 32768, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 158.27975 + }, + "M=32768,N=640": { + "file": "silu_config_M32768_N640.json", + "M": 32768, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 157.23975 + }, + "M=32768,N=768": { + "file": "silu_config_M32768_N768.json", + "M": 32768, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 164.95974999999993 + }, + "M=32768,N=800": { + "file": "silu_config_M32768_N800.json", + "M": 32768, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 163.87949999999998 + }, + "M=32768,N=896": { + "file": "silu_config_M32768_N896.json", + "M": 32768, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 158.75949999999997 + }, + "M=32768,N=960": { + "file": "silu_config_M32768_N960.json", + "M": 32768, + "N": 960, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 170.31975 + }, + "M=32768,N=1024": { + "file": "silu_config_M32768_N1024.json", + "M": 32768, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 135.51975 + }, + "M=32768,N=1120": { + "file": "silu_config_M32768_N1120.json", + "M": 32768, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 313.6005 + }, + "M=32768,N=1152": { + "file": "silu_config_M32768_N1152.json", + "M": 32768, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.2402499999999 + }, + "M=32768,N=1280": { + "file": "silu_config_M32768_N1280.json", + "M": 32768, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 307.28024999999997 + }, + "M=32768,N=1344": { + "file": "silu_config_M32768_N1344.json", + "M": 32768, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 323.88024999999993 + }, + "M=32768,N=1408": { + "file": "silu_config_M32768_N1408.json", + "M": 32768, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.88025000000005 + }, + "M=32768,N=1440": { + "file": "silu_config_M32768_N1440.json", + "M": 32768, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 313.52049999999997 + }, + "M=32768,N=1536": { + "file": "silu_config_M32768_N1536.json", + "M": 32768, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 318.44025 + }, + "M=32768,N=1600": { + "file": "silu_config_M32768_N1600.json", + "M": 32768, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 313.52025000000003 + }, + "M=32768,N=1664": { + "file": "silu_config_M32768_N1664.json", + "M": 32768, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 311.9602500000001 + }, + "M=32768,N=1728": { + "file": "silu_config_M32768_N1728.json", + "M": 32768, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 321.28025 + }, + "M=32768,N=1760": { + "file": "silu_config_M32768_N1760.json", + "M": 32768, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.6002500000001 + }, + "M=32768,N=1792": { + "file": "silu_config_M32768_N1792.json", + "M": 32768, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 322.96024999999986 + }, + "M=32768,N=1920": { + "file": "silu_config_M32768_N1920.json", + "M": 32768, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 313.36024999999995 + }, + "M=32768,N=2048": { + "file": "silu_config_M32768_N2048.json", + "M": 32768, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 253.27999999999994 + }, + "M=32768,N=2080": { + "file": "silu_config_M32768_N2080.json", + "M": 32768, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.6005000000001 + }, + "M=32768,N=2240": { + "file": "silu_config_M32768_N2240.json", + "M": 32768, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 365.2405000000001 + }, + "M=32768,N=2400": { + "file": "silu_config_M32768_N2400.json", + "M": 32768, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 383.88049999999987 + }, + "M=32768,N=2560": { + "file": "silu_config_M32768_N2560.json", + "M": 32768, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 399.8007500000002 + }, + "M=33792,N=128": { + "file": "silu_config_M33792_N128.json", + "M": 33792, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 25.67924999999999 + }, + "M=33792,N=160": { + "file": "silu_config_M33792_N160.json", + "M": 33792, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 51.359249999999996 + }, + "M=33792,N=192": { + "file": "silu_config_M33792_N192.json", + "M": 33792, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 48.15924999999999 + }, + "M=33792,N=256": { + "file": "silu_config_M33792_N256.json", + "M": 33792, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 49.67925 + }, + "M=33792,N=320": { + "file": "silu_config_M33792_N320.json", + "M": 33792, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.71949999999998 + }, + "M=33792,N=384": { + "file": "silu_config_M33792_N384.json", + "M": 33792, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.11925 + }, + "M=33792,N=480": { + "file": "silu_config_M33792_N480.json", + "M": 33792, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.39949999999999 + }, + "M=33792,N=512": { + "file": "silu_config_M33792_N512.json", + "M": 33792, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 73.15925000000001 + }, + "M=33792,N=576": { + "file": "silu_config_M33792_N576.json", + "M": 33792, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 166.39975 + }, + "M=33792,N=640": { + "file": "silu_config_M33792_N640.json", + "M": 33792, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 172.99975000000003 + }, + "M=33792,N=768": { + "file": "silu_config_M33792_N768.json", + "M": 33792, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 165.83950000000002 + }, + "M=33792,N=800": { + "file": "silu_config_M33792_N800.json", + "M": 33792, + "N": 800, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 175.03975000000003 + }, + "M=33792,N=896": { + "file": "silu_config_M33792_N896.json", + "M": 33792, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 173.19975000000005 + }, + "M=33792,N=960": { + "file": "silu_config_M33792_N960.json", + "M": 33792, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 168.4395 + }, + "M=33792,N=1024": { + "file": "silu_config_M33792_N1024.json", + "M": 33792, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 138.67950000000005 + }, + "M=33792,N=1120": { + "file": "silu_config_M33792_N1120.json", + "M": 33792, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 333.7202500000001 + }, + "M=33792,N=1152": { + "file": "silu_config_M33792_N1152.json", + "M": 33792, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 327.48049999999984 + }, + "M=33792,N=1280": { + "file": "silu_config_M33792_N1280.json", + "M": 33792, + "N": 1280, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 336.56050000000016 + }, + "M=33792,N=1344": { + "file": "silu_config_M33792_N1344.json", + "M": 33792, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 329.4805000000001 + }, + "M=33792,N=1408": { + "file": "silu_config_M33792_N1408.json", + "M": 33792, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.4404999999998 + }, + "M=33792,N=1440": { + "file": "silu_config_M33792_N1440.json", + "M": 33792, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 334.08024999999986 + }, + "M=33792,N=1536": { + "file": "silu_config_M33792_N1536.json", + "M": 33792, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.64025000000015 + }, + "M=33792,N=1600": { + "file": "silu_config_M33792_N1600.json", + "M": 33792, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.16025000000013 + }, + "M=33792,N=1664": { + "file": "silu_config_M33792_N1664.json", + "M": 33792, + "N": 1664, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 338.16050000000007 + }, + "M=33792,N=1728": { + "file": "silu_config_M33792_N1728.json", + "M": 33792, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.92049999999995 + }, + "M=33792,N=1760": { + "file": "silu_config_M33792_N1760.json", + "M": 33792, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 333.3202500000001 + }, + "M=33792,N=1792": { + "file": "silu_config_M33792_N1792.json", + "M": 33792, + "N": 1792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 339.0005000000001 + }, + "M=33792,N=1920": { + "file": "silu_config_M33792_N1920.json", + "M": 33792, + "N": 1920, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 331.20025 + }, + "M=33792,N=2048": { + "file": "silu_config_M33792_N2048.json", + "M": 33792, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 255.8000000000001 + }, + "M=33792,N=2080": { + "file": "silu_config_M33792_N2080.json", + "M": 33792, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 364.0807500000001 + }, + "M=33792,N=2240": { + "file": "silu_config_M33792_N2240.json", + "M": 33792, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 385.88049999999987 + }, + "M=33792,N=2400": { + "file": "silu_config_M33792_N2400.json", + "M": 33792, + "N": 2400, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 417.3207500000001 + }, + "M=33792,N=2560": { + "file": "silu_config_M33792_N2560.json", + "M": 33792, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 419.2007500000004 + }, + "M=34816,N=128": { + "file": "silu_config_M34816_N128.json", + "M": 34816, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 26.27900000000001 + }, + "M=34816,N=160": { + "file": "silu_config_M34816_N160.json", + "M": 34816, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.75925000000001 + }, + "M=34816,N=192": { + "file": "silu_config_M34816_N192.json", + "M": 34816, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.87925000000001 + }, + "M=34816,N=256": { + "file": "silu_config_M34816_N256.json", + "M": 34816, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.71925000000001 + }, + "M=34816,N=320": { + "file": "silu_config_M34816_N320.json", + "M": 34816, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.1995 + }, + "M=34816,N=384": { + "file": "silu_config_M34816_N384.json", + "M": 34816, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.43949999999995 + }, + "M=34816,N=480": { + "file": "silu_config_M34816_N480.json", + "M": 34816, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 93.59949999999999 + }, + "M=34816,N=512": { + "file": "silu_config_M34816_N512.json", + "M": 34816, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 75.23924999999998 + }, + "M=34816,N=576": { + "file": "silu_config_M34816_N576.json", + "M": 34816, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 172.51975 + }, + "M=34816,N=640": { + "file": "silu_config_M34816_N640.json", + "M": 34816, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 183.04000000000002 + }, + "M=34816,N=768": { + "file": "silu_config_M34816_N768.json", + "M": 34816, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 178.23975000000004 + }, + "M=34816,N=800": { + "file": "silu_config_M34816_N800.json", + "M": 34816, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 176.87975 + }, + "M=34816,N=896": { + "file": "silu_config_M34816_N896.json", + "M": 34816, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 172.63975 + }, + "M=34816,N=960": { + "file": "silu_config_M34816_N960.json", + "M": 34816, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 170.31975 + }, + "M=34816,N=1024": { + "file": "silu_config_M34816_N1024.json", + "M": 34816, + "N": 1024, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 143.31975 + }, + "M=34816,N=1120": { + "file": "silu_config_M34816_N1120.json", + "M": 34816, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 343.84049999999996 + }, + "M=34816,N=1152": { + "file": "silu_config_M34816_N1152.json", + "M": 34816, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 339.92050000000006 + }, + "M=34816,N=1280": { + "file": "silu_config_M34816_N1280.json", + "M": 34816, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 329.68050000000017 + }, + "M=34816,N=1344": { + "file": "silu_config_M34816_N1344.json", + "M": 34816, + "N": 1344, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 350.0804999999999 + }, + "M=34816,N=1408": { + "file": "silu_config_M34816_N1408.json", + "M": 34816, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 347.2405000000001 + }, + "M=34816,N=1440": { + "file": "silu_config_M34816_N1440.json", + "M": 34816, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 350.0005000000002 + }, + "M=34816,N=1536": { + "file": "silu_config_M34816_N1536.json", + "M": 34816, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 337.24024999999995 + }, + "M=34816,N=1600": { + "file": "silu_config_M34816_N1600.json", + "M": 34816, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 344.44025 + }, + "M=34816,N=1664": { + "file": "silu_config_M34816_N1664.json", + "M": 34816, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 352.80049999999994 + }, + "M=34816,N=1728": { + "file": "silu_config_M34816_N1728.json", + "M": 34816, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 350.60050000000024 + }, + "M=34816,N=1760": { + "file": "silu_config_M34816_N1760.json", + "M": 34816, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 344.6005 + }, + "M=34816,N=1792": { + "file": "silu_config_M34816_N1792.json", + "M": 34816, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 338.56049999999993 + }, + "M=34816,N=1920": { + "file": "silu_config_M34816_N1920.json", + "M": 34816, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 339.64049999999986 + }, + "M=34816,N=2048": { + "file": "silu_config_M34816_N2048.json", + "M": 34816, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 265.2002500000001 + }, + "M=34816,N=2080": { + "file": "silu_config_M34816_N2080.json", + "M": 34816, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 374.92075000000034 + }, + "M=34816,N=2240": { + "file": "silu_config_M34816_N2240.json", + "M": 34816, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 387.0005000000002 + }, + "M=34816,N=2400": { + "file": "silu_config_M34816_N2400.json", + "M": 34816, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 417.56074999999976 + }, + "M=34816,N=2560": { + "file": "silu_config_M34816_N2560.json", + "M": 34816, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 434.9207500000001 + }, + "M=35840,N=128": { + "file": "silu_config_M35840_N128.json", + "M": 35840, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 26.799000000000014 + }, + "M=35840,N=160": { + "file": "silu_config_M35840_N160.json", + "M": 35840, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 53.95925 + }, + "M=35840,N=192": { + "file": "silu_config_M35840_N192.json", + "M": 35840, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 50.35925 + }, + "M=35840,N=256": { + "file": "silu_config_M35840_N256.json", + "M": 35840, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.639 + }, + "M=35840,N=320": { + "file": "silu_config_M35840_N320.json", + "M": 35840, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 94.23925 + }, + "M=35840,N=384": { + "file": "silu_config_M35840_N384.json", + "M": 35840, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 93.71949999999997 + }, + "M=35840,N=480": { + "file": "silu_config_M35840_N480.json", + "M": 35840, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 96.03949999999999 + }, + "M=35840,N=512": { + "file": "silu_config_M35840_N512.json", + "M": 35840, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 77.23924999999998 + }, + "M=35840,N=576": { + "file": "silu_config_M35840_N576.json", + "M": 35840, + "N": 576, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 184.27974999999998 + }, + "M=35840,N=640": { + "file": "silu_config_M35840_N640.json", + "M": 35840, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 176.23975000000004 + }, + "M=35840,N=768": { + "file": "silu_config_M35840_N768.json", + "M": 35840, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 178.63975000000005 + }, + "M=35840,N=800": { + "file": "silu_config_M35840_N800.json", + "M": 35840, + "N": 800, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 185.67974999999996 + }, + "M=35840,N=896": { + "file": "silu_config_M35840_N896.json", + "M": 35840, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 181.43999999999994 + }, + "M=35840,N=960": { + "file": "silu_config_M35840_N960.json", + "M": 35840, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 186.16000000000003 + }, + "M=35840,N=1024": { + "file": "silu_config_M35840_N1024.json", + "M": 35840, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 147.67949999999996 + }, + "M=35840,N=1120": { + "file": "silu_config_M35840_N1120.json", + "M": 35840, + "N": 1120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 356.92050000000006 + }, + "M=35840,N=1152": { + "file": "silu_config_M35840_N1152.json", + "M": 35840, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 349.7205 + }, + "M=35840,N=1280": { + "file": "silu_config_M35840_N1280.json", + "M": 35840, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 343.4805 + }, + "M=35840,N=1344": { + "file": "silu_config_M35840_N1344.json", + "M": 35840, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 346.08025 + }, + "M=35840,N=1408": { + "file": "silu_config_M35840_N1408.json", + "M": 35840, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 354.16049999999984 + }, + "M=35840,N=1440": { + "file": "silu_config_M35840_N1440.json", + "M": 35840, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 342.3604999999999 + }, + "M=35840,N=1536": { + "file": "silu_config_M35840_N1536.json", + "M": 35840, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 350.8405 + }, + "M=35840,N=1600": { + "file": "silu_config_M35840_N1600.json", + "M": 35840, + "N": 1600, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 358.0804999999999 + }, + "M=35840,N=1664": { + "file": "silu_config_M35840_N1664.json", + "M": 35840, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 351.80049999999994 + }, + "M=35840,N=1728": { + "file": "silu_config_M35840_N1728.json", + "M": 35840, + "N": 1728, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 358.6005 + }, + "M=35840,N=1760": { + "file": "silu_config_M35840_N1760.json", + "M": 35840, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 355.9604999999999 + }, + "M=35840,N=1792": { + "file": "silu_config_M35840_N1792.json", + "M": 35840, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 346.04050000000007 + }, + "M=35840,N=1920": { + "file": "silu_config_M35840_N1920.json", + "M": 35840, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.96050000000014 + }, + "M=35840,N=2048": { + "file": "silu_config_M35840_N2048.json", + "M": 35840, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 280.4000000000001 + }, + "M=35840,N=2080": { + "file": "silu_config_M35840_N2080.json", + "M": 35840, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 378.2805000000002 + }, + "M=35840,N=2240": { + "file": "silu_config_M35840_N2240.json", + "M": 35840, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 397.48075000000017 + }, + "M=35840,N=2400": { + "file": "silu_config_M35840_N2400.json", + "M": 35840, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 428.8007500000002 + }, + "M=35840,N=2560": { + "file": "silu_config_M35840_N2560.json", + "M": 35840, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 430.0407500000001 + }, + "M=36864,N=128": { + "file": "silu_config_M36864_N128.json", + "M": 36864, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 27.319249999999997 + }, + "M=36864,N=160": { + "file": "silu_config_M36864_N160.json", + "M": 36864, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 51.799 + }, + "M=36864,N=192": { + "file": "silu_config_M36864_N192.json", + "M": 36864, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 51.51925 + }, + "M=36864,N=256": { + "file": "silu_config_M36864_N256.json", + "M": 36864, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 53.79925000000001 + }, + "M=36864,N=320": { + "file": "silu_config_M36864_N320.json", + "M": 36864, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 97.07924999999999 + }, + "M=36864,N=384": { + "file": "silu_config_M36864_N384.json", + "M": 36864, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 96.43950000000002 + }, + "M=36864,N=480": { + "file": "silu_config_M36864_N480.json", + "M": 36864, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 98.67925 + }, + "M=36864,N=512": { + "file": "silu_config_M36864_N512.json", + "M": 36864, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 79.15924999999996 + }, + "M=36864,N=576": { + "file": "silu_config_M36864_N576.json", + "M": 36864, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 181.07975 + }, + "M=36864,N=640": { + "file": "silu_config_M36864_N640.json", + "M": 36864, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 179.79975000000005 + }, + "M=36864,N=768": { + "file": "silu_config_M36864_N768.json", + "M": 36864, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 180.43975 + }, + "M=36864,N=800": { + "file": "silu_config_M36864_N800.json", + "M": 36864, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 182.31975000000006 + }, + "M=36864,N=896": { + "file": "silu_config_M36864_N896.json", + "M": 36864, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 181.15975000000003 + }, + "M=36864,N=960": { + "file": "silu_config_M36864_N960.json", + "M": 36864, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 183.35975000000002 + }, + "M=36864,N=1024": { + "file": "silu_config_M36864_N1024.json", + "M": 36864, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 149.8395 + }, + "M=36864,N=1120": { + "file": "silu_config_M36864_N1120.json", + "M": 36864, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 360.4805 + }, + "M=36864,N=1152": { + "file": "silu_config_M36864_N1152.json", + "M": 36864, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 354.4805 + }, + "M=36864,N=1280": { + "file": "silu_config_M36864_N1280.json", + "M": 36864, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 355.04049999999995 + }, + "M=36864,N=1344": { + "file": "silu_config_M36864_N1344.json", + "M": 36864, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 360.72050000000013 + }, + "M=36864,N=1408": { + "file": "silu_config_M36864_N1408.json", + "M": 36864, + "N": 1408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 367.20050000000003 + }, + "M=36864,N=1440": { + "file": "silu_config_M36864_N1440.json", + "M": 36864, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 361.56049999999993 + }, + "M=36864,N=1536": { + "file": "silu_config_M36864_N1536.json", + "M": 36864, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.08050000000003 + }, + "M=36864,N=1600": { + "file": "silu_config_M36864_N1600.json", + "M": 36864, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 361.4005000000002 + }, + "M=36864,N=1664": { + "file": "silu_config_M36864_N1664.json", + "M": 36864, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 360.9604999999999 + }, + "M=36864,N=1728": { + "file": "silu_config_M36864_N1728.json", + "M": 36864, + "N": 1728, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 371.9607500000001 + }, + "M=36864,N=1760": { + "file": "silu_config_M36864_N1760.json", + "M": 36864, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 363.3204999999998 + }, + "M=36864,N=1792": { + "file": "silu_config_M36864_N1792.json", + "M": 36864, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 359.8404999999998 + }, + "M=36864,N=1920": { + "file": "silu_config_M36864_N1920.json", + "M": 36864, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.4802500000002 + }, + "M=36864,N=2048": { + "file": "silu_config_M36864_N2048.json", + "M": 36864, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 278.2000000000003 + }, + "M=36864,N=2080": { + "file": "silu_config_M36864_N2080.json", + "M": 36864, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 396.0007499999999 + }, + "M=36864,N=2240": { + "file": "silu_config_M36864_N2240.json", + "M": 36864, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 404.5207499999999 + }, + "M=36864,N=2400": { + "file": "silu_config_M36864_N2400.json", + "M": 36864, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 436.92100000000016 + }, + "M=36864,N=2560": { + "file": "silu_config_M36864_N2560.json", + "M": 36864, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 449.08100000000013 + }, + "M=37888,N=128": { + "file": "silu_config_M37888_N128.json", + "M": 37888, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 28.119250000000008 + }, + "M=37888,N=160": { + "file": "silu_config_M37888_N160.json", + "M": 37888, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 56.75925 + }, + "M=37888,N=192": { + "file": "silu_config_M37888_N192.json", + "M": 37888, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 56.75925000000001 + }, + "M=37888,N=256": { + "file": "silu_config_M37888_N256.json", + "M": 37888, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 47.99925 + }, + "M=37888,N=320": { + "file": "silu_config_M37888_N320.json", + "M": 37888, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 99.83950000000002 + }, + "M=37888,N=384": { + "file": "silu_config_M37888_N384.json", + "M": 37888, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 99.07950000000002 + }, + "M=37888,N=480": { + "file": "silu_config_M37888_N480.json", + "M": 37888, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 101.11950000000003 + }, + "M=37888,N=512": { + "file": "silu_config_M37888_N512.json", + "M": 37888, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 81.15925 + }, + "M=37888,N=576": { + "file": "silu_config_M37888_N576.json", + "M": 37888, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 190.71975000000003 + }, + "M=37888,N=640": { + "file": "silu_config_M37888_N640.json", + "M": 37888, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 185.87975000000006 + }, + "M=37888,N=768": { + "file": "silu_config_M37888_N768.json", + "M": 37888, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 186.63975 + }, + "M=37888,N=800": { + "file": "silu_config_M37888_N800.json", + "M": 37888, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 194.71975000000003 + }, + "M=37888,N=896": { + "file": "silu_config_M37888_N896.json", + "M": 37888, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 190.87974999999994 + }, + "M=37888,N=960": { + "file": "silu_config_M37888_N960.json", + "M": 37888, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 193.11975 + }, + "M=37888,N=1024": { + "file": "silu_config_M37888_N1024.json", + "M": 37888, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 153.83974999999998 + }, + "M=37888,N=1120": { + "file": "silu_config_M37888_N1120.json", + "M": 37888, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 373.40049999999997 + }, + "M=37888,N=1152": { + "file": "silu_config_M37888_N1152.json", + "M": 37888, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 369.4404999999998 + }, + "M=37888,N=1280": { + "file": "silu_config_M37888_N1280.json", + "M": 37888, + "N": 1280, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 376.5205000000001 + }, + "M=37888,N=1344": { + "file": "silu_config_M37888_N1344.json", + "M": 37888, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 371.80075 + }, + "M=37888,N=1408": { + "file": "silu_config_M37888_N1408.json", + "M": 37888, + "N": 1408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 370.2805000000002 + }, + "M=37888,N=1440": { + "file": "silu_config_M37888_N1440.json", + "M": 37888, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 372.44074999999987 + }, + "M=37888,N=1536": { + "file": "silu_config_M37888_N1536.json", + "M": 37888, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 381.08050000000014 + }, + "M=37888,N=1600": { + "file": "silu_config_M37888_N1600.json", + "M": 37888, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 383.52075 + }, + "M=37888,N=1664": { + "file": "silu_config_M37888_N1664.json", + "M": 37888, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.6405000000001 + }, + "M=37888,N=1728": { + "file": "silu_config_M37888_N1728.json", + "M": 37888, + "N": 1728, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 382.16050000000007 + }, + "M=37888,N=1760": { + "file": "silu_config_M37888_N1760.json", + "M": 37888, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 369.2004999999998 + }, + "M=37888,N=1792": { + "file": "silu_config_M37888_N1792.json", + "M": 37888, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 366.7204999999999 + }, + "M=37888,N=1920": { + "file": "silu_config_M37888_N1920.json", + "M": 37888, + "N": 1920, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 380.56049999999993 + }, + "M=37888,N=2048": { + "file": "silu_config_M37888_N2048.json", + "M": 37888, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 302.08024999999986 + }, + "M=37888,N=2080": { + "file": "silu_config_M37888_N2080.json", + "M": 37888, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 409.6407499999999 + }, + "M=37888,N=2240": { + "file": "silu_config_M37888_N2240.json", + "M": 37888, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 425.48074999999983 + }, + "M=37888,N=2400": { + "file": "silu_config_M37888_N2400.json", + "M": 37888, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 441.76075000000037 + }, + "M=37888,N=2560": { + "file": "silu_config_M37888_N2560.json", + "M": 37888, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 468.1210000000001 + }, + "M=38912,N=128": { + "file": "silu_config_M38912_N128.json", + "M": 38912, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 28.559000000000005 + }, + "M=38912,N=160": { + "file": "silu_config_M38912_N160.json", + "M": 38912, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.91924999999999 + }, + "M=38912,N=192": { + "file": "silu_config_M38912_N192.json", + "M": 38912, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.959250000000004 + }, + "M=38912,N=256": { + "file": "silu_config_M38912_N256.json", + "M": 38912, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 49.03900000000002 + }, + "M=38912,N=320": { + "file": "silu_config_M38912_N320.json", + "M": 38912, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 102.03925 + }, + "M=38912,N=384": { + "file": "silu_config_M38912_N384.json", + "M": 38912, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.3995 + }, + "M=38912,N=480": { + "file": "silu_config_M38912_N480.json", + "M": 38912, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.23949999999999 + }, + "M=38912,N=512": { + "file": "silu_config_M38912_N512.json", + "M": 38912, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 83.19949999999996 + }, + "M=38912,N=576": { + "file": "silu_config_M38912_N576.json", + "M": 38912, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 197.99975000000003 + }, + "M=38912,N=640": { + "file": "silu_config_M38912_N640.json", + "M": 38912, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 196.56 + }, + "M=38912,N=768": { + "file": "silu_config_M38912_N768.json", + "M": 38912, + "N": 768, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 198.92000000000002 + }, + "M=38912,N=800": { + "file": "silu_config_M38912_N800.json", + "M": 38912, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 199.55975 + }, + "M=38912,N=896": { + "file": "silu_config_M38912_N896.json", + "M": 38912, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 195.87974999999994 + }, + "M=38912,N=960": { + "file": "silu_config_M38912_N960.json", + "M": 38912, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 197.79974999999996 + }, + "M=38912,N=1024": { + "file": "silu_config_M38912_N1024.json", + "M": 38912, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 156.6395 + }, + "M=38912,N=1120": { + "file": "silu_config_M38912_N1120.json", + "M": 38912, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 370.20074999999997 + }, + "M=38912,N=1152": { + "file": "silu_config_M38912_N1152.json", + "M": 38912, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 367.2405 + }, + "M=38912,N=1280": { + "file": "silu_config_M38912_N1280.json", + "M": 38912, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 378.7204999999998 + }, + "M=38912,N=1344": { + "file": "silu_config_M38912_N1344.json", + "M": 38912, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 381.4805000000001 + }, + "M=38912,N=1408": { + "file": "silu_config_M38912_N1408.json", + "M": 38912, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 363.9207500000001 + }, + "M=38912,N=1440": { + "file": "silu_config_M38912_N1440.json", + "M": 38912, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 377.5205000000002 + }, + "M=38912,N=1536": { + "file": "silu_config_M38912_N1536.json", + "M": 38912, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 379.6405 + }, + "M=38912,N=1600": { + "file": "silu_config_M38912_N1600.json", + "M": 38912, + "N": 1600, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 391.5207499999999 + }, + "M=38912,N=1664": { + "file": "silu_config_M38912_N1664.json", + "M": 38912, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 391.08050000000014 + }, + "M=38912,N=1728": { + "file": "silu_config_M38912_N1728.json", + "M": 38912, + "N": 1728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 385.20074999999997 + }, + "M=38912,N=1760": { + "file": "silu_config_M38912_N1760.json", + "M": 38912, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 389.4805 + }, + "M=38912,N=1792": { + "file": "silu_config_M38912_N1792.json", + "M": 38912, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 382.56074999999987 + }, + "M=38912,N=1920": { + "file": "silu_config_M38912_N1920.json", + "M": 38912, + "N": 1920, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 383.84075000000007 + }, + "M=38912,N=2048": { + "file": "silu_config_M38912_N2048.json", + "M": 38912, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 315.32050000000015 + }, + "M=38912,N=2080": { + "file": "silu_config_M38912_N2080.json", + "M": 38912, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 425.40075 + }, + "M=38912,N=2240": { + "file": "silu_config_M38912_N2240.json", + "M": 38912, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 441.44075 + }, + "M=38912,N=2400": { + "file": "silu_config_M38912_N2400.json", + "M": 38912, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 459.8409999999999 + }, + "M=38912,N=2560": { + "file": "silu_config_M38912_N2560.json", + "M": 38912, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 483.8009999999997 + }, + "M=39936,N=128": { + "file": "silu_config_M39936_N128.json", + "M": 39936, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 29.079250000000002 + }, + "M=39936,N=160": { + "file": "silu_config_M39936_N160.json", + "M": 39936, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 59.23925 + }, + "M=39936,N=192": { + "file": "silu_config_M39936_N192.json", + "M": 39936, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 55.15925000000001 + }, + "M=39936,N=256": { + "file": "silu_config_M39936_N256.json", + "M": 39936, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 57.59924999999999 + }, + "M=39936,N=320": { + "file": "silu_config_M39936_N320.json", + "M": 39936, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.75949999999997 + }, + "M=39936,N=384": { + "file": "silu_config_M39936_N384.json", + "M": 39936, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.11949999999997 + }, + "M=39936,N=480": { + "file": "silu_config_M39936_N480.json", + "M": 39936, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 106.15924999999996 + }, + "M=39936,N=512": { + "file": "silu_config_M39936_N512.json", + "M": 39936, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 85.23924999999996 + }, + "M=39936,N=576": { + "file": "silu_config_M39936_N576.json", + "M": 39936, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 200.72 + }, + "M=39936,N=640": { + "file": "silu_config_M39936_N640.json", + "M": 39936, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 199.27999999999997 + }, + "M=39936,N=768": { + "file": "silu_config_M39936_N768.json", + "M": 39936, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 201.99975 + }, + "M=39936,N=800": { + "file": "silu_config_M39936_N800.json", + "M": 39936, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 202.47975000000002 + }, + "M=39936,N=896": { + "file": "silu_config_M39936_N896.json", + "M": 39936, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 195.75975 + }, + "M=39936,N=960": { + "file": "silu_config_M39936_N960.json", + "M": 39936, + "N": 960, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 206.35974999999996 + }, + "M=39936,N=1024": { + "file": "silu_config_M39936_N1024.json", + "M": 39936, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 160.35975000000002 + }, + "M=39936,N=1120": { + "file": "silu_config_M39936_N1120.json", + "M": 39936, + "N": 1120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 400.60074999999995 + }, + "M=39936,N=1152": { + "file": "silu_config_M39936_N1152.json", + "M": 39936, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 382.4805000000001 + }, + "M=39936,N=1280": { + "file": "silu_config_M39936_N1280.json", + "M": 39936, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.44075 + }, + "M=39936,N=1344": { + "file": "silu_config_M39936_N1344.json", + "M": 39936, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 390.28075 + }, + "M=39936,N=1408": { + "file": "silu_config_M39936_N1408.json", + "M": 39936, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 394.0407499999998 + }, + "M=39936,N=1440": { + "file": "silu_config_M39936_N1440.json", + "M": 39936, + "N": 1440, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 401.7607499999999 + }, + "M=39936,N=1536": { + "file": "silu_config_M39936_N1536.json", + "M": 39936, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 389.08050000000003 + }, + "M=39936,N=1600": { + "file": "silu_config_M39936_N1600.json", + "M": 39936, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 402.52049999999986 + }, + "M=39936,N=1664": { + "file": "silu_config_M39936_N1664.json", + "M": 39936, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 388.36075000000017 + }, + "M=39936,N=1728": { + "file": "silu_config_M39936_N1728.json", + "M": 39936, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 398.0007499999998 + }, + "M=39936,N=1760": { + "file": "silu_config_M39936_N1760.json", + "M": 39936, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 388.80050000000006 + }, + "M=39936,N=1792": { + "file": "silu_config_M39936_N1792.json", + "M": 39936, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 389.44050000000004 + }, + "M=39936,N=1920": { + "file": "silu_config_M39936_N1920.json", + "M": 39936, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 397.00075000000027 + }, + "M=39936,N=2048": { + "file": "silu_config_M39936_N2048.json", + "M": 39936, + "N": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 316.68025 + }, + "M=39936,N=2080": { + "file": "silu_config_M39936_N2080.json", + "M": 39936, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 419.7610000000002 + }, + "M=39936,N=2240": { + "file": "silu_config_M39936_N2240.json", + "M": 39936, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 442.0007499999997 + }, + "M=39936,N=2400": { + "file": "silu_config_M39936_N2400.json", + "M": 39936, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 471.4012499999999 + }, + "M=39936,N=2560": { + "file": "silu_config_M39936_N2560.json", + "M": 39936, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 491.5609999999999 + }, + "M=40960,N=128": { + "file": "silu_config_M40960_N128.json", + "M": 40960, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 29.519000000000005 + }, + "M=40960,N=160": { + "file": "silu_config_M40960_N160.json", + "M": 40960, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 55.91924999999999 + }, + "M=40960,N=192": { + "file": "silu_config_M40960_N192.json", + "M": 40960, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 60.83925000000001 + }, + "M=40960,N=256": { + "file": "silu_config_M40960_N256.json", + "M": 40960, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 51.27925000000001 + }, + "M=40960,N=320": { + "file": "silu_config_M40960_N320.json", + "M": 40960, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 106.55949999999997 + }, + "M=40960,N=384": { + "file": "silu_config_M40960_N384.json", + "M": 40960, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 105.99925000000002 + }, + "M=40960,N=480": { + "file": "silu_config_M40960_N480.json", + "M": 40960, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 108.83924999999998 + }, + "M=40960,N=512": { + "file": "silu_config_M40960_N512.json", + "M": 40960, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 85.31924999999998 + }, + "M=40960,N=576": { + "file": "silu_config_M40960_N576.json", + "M": 40960, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.07974999999985 + }, + "M=40960,N=640": { + "file": "silu_config_M40960_N640.json", + "M": 40960, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 192.43975000000006 + }, + "M=40960,N=768": { + "file": "silu_config_M40960_N768.json", + "M": 40960, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 193.35974999999996 + }, + "M=40960,N=800": { + "file": "silu_config_M40960_N800.json", + "M": 40960, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 195.59974999999997 + }, + "M=40960,N=896": { + "file": "silu_config_M40960_N896.json", + "M": 40960, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 204.51975000000004 + }, + "M=40960,N=960": { + "file": "silu_config_M40960_N960.json", + "M": 40960, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 196.27975000000004 + }, + "M=40960,N=1024": { + "file": "silu_config_M40960_N1024.json", + "M": 40960, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 163.99975 + }, + "M=40960,N=1120": { + "file": "silu_config_M40960_N1120.json", + "M": 40960, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 392.6807500000001 + }, + "M=40960,N=1152": { + "file": "silu_config_M40960_N1152.json", + "M": 40960, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 379.76049999999987 + }, + "M=40960,N=1280": { + "file": "silu_config_M40960_N1280.json", + "M": 40960, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 379.9604999999999 + }, + "M=40960,N=1344": { + "file": "silu_config_M40960_N1344.json", + "M": 40960, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 382.48075000000017 + }, + "M=40960,N=1408": { + "file": "silu_config_M40960_N1408.json", + "M": 40960, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 380.2407499999998 + }, + "M=40960,N=1440": { + "file": "silu_config_M40960_N1440.json", + "M": 40960, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.68075 + }, + "M=40960,N=1536": { + "file": "silu_config_M40960_N1536.json", + "M": 40960, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 380.8805000000003 + }, + "M=40960,N=1600": { + "file": "silu_config_M40960_N1600.json", + "M": 40960, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 383.4007499999998 + }, + "M=40960,N=1664": { + "file": "silu_config_M40960_N1664.json", + "M": 40960, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 381.6405000000002 + }, + "M=40960,N=1728": { + "file": "silu_config_M40960_N1728.json", + "M": 40960, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 394.56075 + }, + "M=40960,N=1760": { + "file": "silu_config_M40960_N1760.json", + "M": 40960, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 391.5607500000001 + }, + "M=40960,N=1792": { + "file": "silu_config_M40960_N1792.json", + "M": 40960, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 382.4407500000001 + }, + "M=40960,N=1920": { + "file": "silu_config_M40960_N1920.json", + "M": 40960, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 383.08050000000003 + }, + "M=40960,N=2048": { + "file": "silu_config_M40960_N2048.json", + "M": 40960, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 303.3605000000001 + }, + "M=40960,N=2080": { + "file": "silu_config_M40960_N2080.json", + "M": 40960, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 430.2407499999998 + }, + "M=40960,N=2240": { + "file": "silu_config_M40960_N2240.json", + "M": 40960, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 455.84074999999984 + }, + "M=40960,N=2400": { + "file": "silu_config_M40960_N2400.json", + "M": 40960, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 469.2810000000002 + }, + "M=40960,N=2560": { + "file": "silu_config_M40960_N2560.json", + "M": 40960, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 490.12099999999987 + }, + "M=41984,N=128": { + "file": "silu_config_M41984_N128.json", + "M": 41984, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 33.59925000000001 + }, + "M=41984,N=160": { + "file": "silu_config_M41984_N160.json", + "M": 41984, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 61.99925 + }, + "M=41984,N=192": { + "file": "silu_config_M41984_N192.json", + "M": 41984, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 62.11924999999999 + }, + "M=41984,N=256": { + "file": "silu_config_M41984_N256.json", + "M": 41984, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 52.519249999999985 + }, + "M=41984,N=320": { + "file": "silu_config_M41984_N320.json", + "M": 41984, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 109.87950000000002 + }, + "M=41984,N=384": { + "file": "silu_config_M41984_N384.json", + "M": 41984, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 108.99949999999997 + }, + "M=41984,N=480": { + "file": "silu_config_M41984_N480.json", + "M": 41984, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 111.47949999999999 + }, + "M=41984,N=512": { + "file": "silu_config_M41984_N512.json", + "M": 41984, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 89.15950000000002 + }, + "M=41984,N=576": { + "file": "silu_config_M41984_N576.json", + "M": 41984, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 210.51999999999992 + }, + "M=41984,N=640": { + "file": "silu_config_M41984_N640.json", + "M": 41984, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 210.68 + }, + "M=41984,N=768": { + "file": "silu_config_M41984_N768.json", + "M": 41984, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 206.3597499999999 + }, + "M=41984,N=800": { + "file": "silu_config_M41984_N800.json", + "M": 41984, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 212.19975000000005 + }, + "M=41984,N=896": { + "file": "silu_config_M41984_N896.json", + "M": 41984, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 206.64 + }, + "M=41984,N=960": { + "file": "silu_config_M41984_N960.json", + "M": 41984, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 208.43975 + }, + "M=41984,N=1024": { + "file": "silu_config_M41984_N1024.json", + "M": 41984, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 168.03974999999997 + }, + "M=41984,N=1120": { + "file": "silu_config_M41984_N1120.json", + "M": 41984, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 413.20074999999986 + }, + "M=41984,N=1152": { + "file": "silu_config_M41984_N1152.json", + "M": 41984, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 401.3207500000002 + }, + "M=41984,N=1280": { + "file": "silu_config_M41984_N1280.json", + "M": 41984, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 407.36075000000017 + }, + "M=41984,N=1344": { + "file": "silu_config_M41984_N1344.json", + "M": 41984, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 413.5210000000001 + }, + "M=41984,N=1408": { + "file": "silu_config_M41984_N1408.json", + "M": 41984, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 407.56074999999976 + }, + "M=41984,N=1440": { + "file": "silu_config_M41984_N1440.json", + "M": 41984, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 414.56074999999976 + }, + "M=41984,N=1536": { + "file": "silu_config_M41984_N1536.json", + "M": 41984, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 403.0804999999998 + }, + "M=41984,N=1600": { + "file": "silu_config_M41984_N1600.json", + "M": 41984, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 414.3207500000001 + }, + "M=41984,N=1664": { + "file": "silu_config_M41984_N1664.json", + "M": 41984, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 408.8407500000003 + }, + "M=41984,N=1728": { + "file": "silu_config_M41984_N1728.json", + "M": 41984, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 400.80074999999977 + }, + "M=41984,N=1760": { + "file": "silu_config_M41984_N1760.json", + "M": 41984, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 423.08074999999997 + }, + "M=41984,N=1792": { + "file": "silu_config_M41984_N1792.json", + "M": 41984, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 409.6407499999999 + }, + "M=41984,N=1920": { + "file": "silu_config_M41984_N1920.json", + "M": 41984, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 410.12074999999993 + }, + "M=41984,N=2048": { + "file": "silu_config_M41984_N2048.json", + "M": 41984, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 334.52025000000003 + }, + "M=41984,N=2080": { + "file": "silu_config_M41984_N2080.json", + "M": 41984, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 446.80099999999993 + }, + "M=41984,N=2240": { + "file": "silu_config_M41984_N2240.json", + "M": 41984, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 475.24099999999976 + }, + "M=41984,N=2400": { + "file": "silu_config_M41984_N2400.json", + "M": 41984, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 510.0812499999997 + }, + "M=41984,N=2560": { + "file": "silu_config_M41984_N2560.json", + "M": 41984, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 515.9212500000001 + }, + "M=43008,N=128": { + "file": "silu_config_M43008_N128.json", + "M": 43008, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 33.91900000000001 + }, + "M=43008,N=160": { + "file": "silu_config_M43008_N160.json", + "M": 43008, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 58.75924999999998 + }, + "M=43008,N=192": { + "file": "silu_config_M43008_N192.json", + "M": 43008, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 63.35924999999999 + }, + "M=43008,N=256": { + "file": "silu_config_M43008_N256.json", + "M": 43008, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 61.55925000000002 + }, + "M=43008,N=320": { + "file": "silu_config_M43008_N320.json", + "M": 43008, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 112.15950000000001 + }, + "M=43008,N=384": { + "file": "silu_config_M43008_N384.json", + "M": 43008, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 111.5995 + }, + "M=43008,N=480": { + "file": "silu_config_M43008_N480.json", + "M": 43008, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 114.03950000000003 + }, + "M=43008,N=512": { + "file": "silu_config_M43008_N512.json", + "M": 43008, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 91.19924999999998 + }, + "M=43008,N=576": { + "file": "silu_config_M43008_N576.json", + "M": 43008, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 215.88 + }, + "M=43008,N=640": { + "file": "silu_config_M43008_N640.json", + "M": 43008, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 214.07975000000005 + }, + "M=43008,N=768": { + "file": "silu_config_M43008_N768.json", + "M": 43008, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 209.83975000000004 + }, + "M=43008,N=800": { + "file": "silu_config_M43008_N800.json", + "M": 43008, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 207.67974999999996 + }, + "M=43008,N=896": { + "file": "silu_config_M43008_N896.json", + "M": 43008, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 215.63975000000005 + }, + "M=43008,N=960": { + "file": "silu_config_M43008_N960.json", + "M": 43008, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 207.39999999999992 + }, + "M=43008,N=1024": { + "file": "silu_config_M43008_N1024.json", + "M": 43008, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 171.99975000000006 + }, + "M=43008,N=1120": { + "file": "silu_config_M43008_N1120.json", + "M": 43008, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 424.76099999999997 + }, + "M=43008,N=1152": { + "file": "silu_config_M43008_N1152.json", + "M": 43008, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 410.48075000000017 + }, + "M=43008,N=1280": { + "file": "silu_config_M43008_N1280.json", + "M": 43008, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 415.84075000000007 + }, + "M=43008,N=1344": { + "file": "silu_config_M43008_N1344.json", + "M": 43008, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 419.84074999999984 + }, + "M=43008,N=1408": { + "file": "silu_config_M43008_N1408.json", + "M": 43008, + "N": 1408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 416.32075 + }, + "M=43008,N=1440": { + "file": "silu_config_M43008_N1440.json", + "M": 43008, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 420.4407500000002 + }, + "M=43008,N=1536": { + "file": "silu_config_M43008_N1536.json", + "M": 43008, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 417.24074999999993 + }, + "M=43008,N=1600": { + "file": "silu_config_M43008_N1600.json", + "M": 43008, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 415.5210000000002 + }, + "M=43008,N=1664": { + "file": "silu_config_M43008_N1664.json", + "M": 43008, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 429.2007500000002 + }, + "M=43008,N=1728": { + "file": "silu_config_M43008_N1728.json", + "M": 43008, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 416.2807499999998 + }, + "M=43008,N=1760": { + "file": "silu_config_M43008_N1760.json", + "M": 43008, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 421.60074999999995 + }, + "M=43008,N=1792": { + "file": "silu_config_M43008_N1792.json", + "M": 43008, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 418.7607499999998 + }, + "M=43008,N=1920": { + "file": "silu_config_M43008_N1920.json", + "M": 43008, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 430.5210000000001 + }, + "M=43008,N=2048": { + "file": "silu_config_M43008_N2048.json", + "M": 43008, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 333.8802499999998 + }, + "M=43008,N=2080": { + "file": "silu_config_M43008_N2080.json", + "M": 43008, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.8009999999997 + }, + "M=43008,N=2240": { + "file": "silu_config_M43008_N2240.json", + "M": 43008, + "N": 2240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 488.5610000000004 + }, + "M=43008,N=2400": { + "file": "silu_config_M43008_N2400.json", + "M": 43008, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 513.9612500000001 + }, + "M=43008,N=2560": { + "file": "silu_config_M43008_N2560.json", + "M": 43008, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 522.3612500000002 + }, + "M=44032,N=128": { + "file": "silu_config_M44032_N128.json", + "M": 44032, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.87925 + }, + "M=44032,N=160": { + "file": "silu_config_M44032_N160.json", + "M": 44032, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 60.15925000000003 + }, + "M=44032,N=192": { + "file": "silu_config_M44032_N192.json", + "M": 44032, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.71925000000002 + }, + "M=44032,N=256": { + "file": "silu_config_M44032_N256.json", + "M": 44032, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 62.99924999999998 + }, + "M=44032,N=320": { + "file": "silu_config_M44032_N320.json", + "M": 44032, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 114.1995 + }, + "M=44032,N=384": { + "file": "silu_config_M44032_N384.json", + "M": 44032, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 113.9995 + }, + "M=44032,N=480": { + "file": "silu_config_M44032_N480.json", + "M": 44032, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 116.4795 + }, + "M=44032,N=512": { + "file": "silu_config_M44032_N512.json", + "M": 44032, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 93.03950000000003 + }, + "M=44032,N=576": { + "file": "silu_config_M44032_N576.json", + "M": 44032, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 220.84000000000003 + }, + "M=44032,N=640": { + "file": "silu_config_M44032_N640.json", + "M": 44032, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 219.15975000000003 + }, + "M=44032,N=768": { + "file": "silu_config_M44032_N768.json", + "M": 44032, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 215.95974999999999 + }, + "M=44032,N=800": { + "file": "silu_config_M44032_N800.json", + "M": 44032, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 212.35999999999996 + }, + "M=44032,N=896": { + "file": "silu_config_M44032_N896.json", + "M": 44032, + "N": 896, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 224.75999999999993 + }, + "M=44032,N=960": { + "file": "silu_config_M44032_N960.json", + "M": 44032, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 218.47975000000002 + }, + "M=44032,N=1024": { + "file": "silu_config_M44032_N1024.json", + "M": 44032, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 175.91975000000002 + }, + "M=44032,N=1120": { + "file": "silu_config_M44032_N1120.json", + "M": 44032, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 423.76075000000014 + }, + "M=44032,N=1152": { + "file": "silu_config_M44032_N1152.json", + "M": 44032, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 425.7207500000002 + }, + "M=44032,N=1280": { + "file": "silu_config_M44032_N1280.json", + "M": 44032, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 441.241 + }, + "M=44032,N=1344": { + "file": "silu_config_M44032_N1344.json", + "M": 44032, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 418.2007500000002 + }, + "M=44032,N=1408": { + "file": "silu_config_M44032_N1408.json", + "M": 44032, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 431.6007500000003 + }, + "M=44032,N=1440": { + "file": "silu_config_M44032_N1440.json", + "M": 44032, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 435.2807499999999 + }, + "M=44032,N=1536": { + "file": "silu_config_M44032_N1536.json", + "M": 44032, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 430.4007499999998 + }, + "M=44032,N=1600": { + "file": "silu_config_M44032_N1600.json", + "M": 44032, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 435.44074999999975 + }, + "M=44032,N=1664": { + "file": "silu_config_M44032_N1664.json", + "M": 44032, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 433.2407499999998 + }, + "M=44032,N=1728": { + "file": "silu_config_M44032_N1728.json", + "M": 44032, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 435.8007499999999 + }, + "M=44032,N=1760": { + "file": "silu_config_M44032_N1760.json", + "M": 44032, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 431.28075000000024 + }, + "M=44032,N=1792": { + "file": "silu_config_M44032_N1792.json", + "M": 44032, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 433.44075 + }, + "M=44032,N=1920": { + "file": "silu_config_M44032_N1920.json", + "M": 44032, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 423.84074999999996 + }, + "M=44032,N=2048": { + "file": "silu_config_M44032_N2048.json", + "M": 44032, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 350.08050000000014 + }, + "M=44032,N=2080": { + "file": "silu_config_M44032_N2080.json", + "M": 44032, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 472.40099999999984 + }, + "M=44032,N=2240": { + "file": "silu_config_M44032_N2240.json", + "M": 44032, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 481.44100000000003 + }, + "M=44032,N=2400": { + "file": "silu_config_M44032_N2400.json", + "M": 44032, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 522.9612500000001 + }, + "M=44032,N=2560": { + "file": "silu_config_M44032_N2560.json", + "M": 44032, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 540.6412499999997 + }, + "M=45056,N=128": { + "file": "silu_config_M45056_N128.json", + "M": 45056, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 31.959000000000007 + }, + "M=45056,N=160": { + "file": "silu_config_M45056_N160.json", + "M": 45056, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 65.91950000000001 + }, + "M=45056,N=192": { + "file": "silu_config_M45056_N192.json", + "M": 45056, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 60.95925000000002 + }, + "M=45056,N=256": { + "file": "silu_config_M45056_N256.json", + "M": 45056, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.39925 + }, + "M=45056,N=320": { + "file": "silu_config_M45056_N320.json", + "M": 45056, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 117.3195 + }, + "M=45056,N=384": { + "file": "silu_config_M45056_N384.json", + "M": 45056, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 116.3595 + }, + "M=45056,N=480": { + "file": "silu_config_M45056_N480.json", + "M": 45056, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.03950000000003 + }, + "M=45056,N=512": { + "file": "silu_config_M45056_N512.json", + "M": 45056, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 95.07925 + }, + "M=45056,N=576": { + "file": "silu_config_M45056_N576.json", + "M": 45056, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 221.83974999999992 + }, + "M=45056,N=640": { + "file": "silu_config_M45056_N640.json", + "M": 45056, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 220.24 + }, + "M=45056,N=768": { + "file": "silu_config_M45056_N768.json", + "M": 45056, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 213.31974999999994 + }, + "M=45056,N=800": { + "file": "silu_config_M45056_N800.json", + "M": 45056, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 227.72000000000003 + }, + "M=45056,N=896": { + "file": "silu_config_M45056_N896.json", + "M": 45056, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 225.44000000000005 + }, + "M=45056,N=960": { + "file": "silu_config_M45056_N960.json", + "M": 45056, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 223.35999999999984 + }, + "M=45056,N=1024": { + "file": "silu_config_M45056_N1024.json", + "M": 45056, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 179.59975000000003 + }, + "M=45056,N=1120": { + "file": "silu_config_M45056_N1120.json", + "M": 45056, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 422.9610000000001 + }, + "M=45056,N=1152": { + "file": "silu_config_M45056_N1152.json", + "M": 45056, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.72100000000023 + }, + "M=45056,N=1280": { + "file": "silu_config_M45056_N1280.json", + "M": 45056, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 435.6407499999999 + }, + "M=45056,N=1344": { + "file": "silu_config_M45056_N1344.json", + "M": 45056, + "N": 1344, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 451.6009999999999 + }, + "M=45056,N=1408": { + "file": "silu_config_M45056_N1408.json", + "M": 45056, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 441.08074999999985 + }, + "M=45056,N=1440": { + "file": "silu_config_M45056_N1440.json", + "M": 45056, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.36075000000017 + }, + "M=45056,N=1536": { + "file": "silu_config_M45056_N1536.json", + "M": 45056, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 431.32074999999986 + }, + "M=45056,N=1600": { + "file": "silu_config_M45056_N1600.json", + "M": 45056, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 439.6009999999999 + }, + "M=45056,N=1664": { + "file": "silu_config_M45056_N1664.json", + "M": 45056, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 432.0007499999998 + }, + "M=45056,N=1728": { + "file": "silu_config_M45056_N1728.json", + "M": 45056, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.7607499999999 + }, + "M=45056,N=1760": { + "file": "silu_config_M45056_N1760.json", + "M": 45056, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 445.0409999999998 + }, + "M=45056,N=1792": { + "file": "silu_config_M45056_N1792.json", + "M": 45056, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 448.92100000000005 + }, + "M=45056,N=1920": { + "file": "silu_config_M45056_N1920.json", + "M": 45056, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 427.80100000000016 + }, + "M=45056,N=2048": { + "file": "silu_config_M45056_N2048.json", + "M": 45056, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 346.1205 + }, + "M=45056,N=2080": { + "file": "silu_config_M45056_N2080.json", + "M": 45056, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 477.6809999999998 + }, + "M=45056,N=2240": { + "file": "silu_config_M45056_N2240.json", + "M": 45056, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 508.6012499999999 + }, + "M=45056,N=2400": { + "file": "silu_config_M45056_N2400.json", + "M": 45056, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 545.32125 + }, + "M=45056,N=2560": { + "file": "silu_config_M45056_N2560.json", + "M": 45056, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 552.2412500000003 + }, + "M=46080,N=128": { + "file": "silu_config_M46080_N128.json", + "M": 46080, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 32.11924999999999 + }, + "M=46080,N=160": { + "file": "silu_config_M46080_N160.json", + "M": 46080, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 67.03925 + }, + "M=46080,N=192": { + "file": "silu_config_M46080_N192.json", + "M": 46080, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 67.11925000000002 + }, + "M=46080,N=256": { + "file": "silu_config_M46080_N256.json", + "M": 46080, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 56.71925 + }, + "M=46080,N=320": { + "file": "silu_config_M46080_N320.json", + "M": 46080, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 118.91949999999999 + }, + "M=46080,N=384": { + "file": "silu_config_M46080_N384.json", + "M": 46080, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 117.95950000000002 + }, + "M=46080,N=480": { + "file": "silu_config_M46080_N480.json", + "M": 46080, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 121.15950000000001 + }, + "M=46080,N=512": { + "file": "silu_config_M46080_N512.json", + "M": 46080, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 97.27950000000001 + }, + "M=46080,N=576": { + "file": "silu_config_M46080_N576.json", + "M": 46080, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 229.2 + }, + "M=46080,N=640": { + "file": "silu_config_M46080_N640.json", + "M": 46080, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 223.75999999999993 + }, + "M=46080,N=768": { + "file": "silu_config_M46080_N768.json", + "M": 46080, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 224.43999999999994 + }, + "M=46080,N=800": { + "file": "silu_config_M46080_N800.json", + "M": 46080, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 229.80000000000007 + }, + "M=46080,N=896": { + "file": "silu_config_M46080_N896.json", + "M": 46080, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 224.4799999999999 + }, + "M=46080,N=960": { + "file": "silu_config_M46080_N960.json", + "M": 46080, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 229.27999999999986 + }, + "M=46080,N=1024": { + "file": "silu_config_M46080_N1024.json", + "M": 46080, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 183.11975 + }, + "M=46080,N=1120": { + "file": "silu_config_M46080_N1120.json", + "M": 46080, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.28075 + }, + "M=46080,N=1152": { + "file": "silu_config_M46080_N1152.json", + "M": 46080, + "N": 1152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 444.68100000000015 + }, + "M=46080,N=1280": { + "file": "silu_config_M46080_N1280.json", + "M": 46080, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 445.2007500000001 + }, + "M=46080,N=1344": { + "file": "silu_config_M46080_N1344.json", + "M": 46080, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.84075000000007 + }, + "M=46080,N=1408": { + "file": "silu_config_M46080_N1408.json", + "M": 46080, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 434.6007500000002 + }, + "M=46080,N=1440": { + "file": "silu_config_M46080_N1440.json", + "M": 46080, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 450.92075 + }, + "M=46080,N=1536": { + "file": "silu_config_M46080_N1536.json", + "M": 46080, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 427.6407499999999 + }, + "M=46080,N=1600": { + "file": "silu_config_M46080_N1600.json", + "M": 46080, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 451.20074999999986 + }, + "M=46080,N=1664": { + "file": "silu_config_M46080_N1664.json", + "M": 46080, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 448.9207500000001 + }, + "M=46080,N=1728": { + "file": "silu_config_M46080_N1728.json", + "M": 46080, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 449.48074999999994 + }, + "M=46080,N=1760": { + "file": "silu_config_M46080_N1760.json", + "M": 46080, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 449.92075 + }, + "M=46080,N=1792": { + "file": "silu_config_M46080_N1792.json", + "M": 46080, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 436.48074999999994 + }, + "M=46080,N=1920": { + "file": "silu_config_M46080_N1920.json", + "M": 46080, + "N": 1920, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 449.2407499999997 + }, + "M=46080,N=2048": { + "file": "silu_config_M46080_N2048.json", + "M": 46080, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 359.92074999999977 + }, + "M=46080,N=2080": { + "file": "silu_config_M46080_N2080.json", + "M": 46080, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 482.9609999999998 + }, + "M=46080,N=2240": { + "file": "silu_config_M46080_N2240.json", + "M": 46080, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 508.68100000000004 + }, + "M=46080,N=2400": { + "file": "silu_config_M46080_N2400.json", + "M": 46080, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 546.5212499999998 + }, + "M=46080,N=2560": { + "file": "silu_config_M46080_N2560.json", + "M": 46080, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.2412499999998 + }, + "M=47104,N=128": { + "file": "silu_config_M47104_N128.json", + "M": 47104, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 32.839000000000006 + }, + "M=47104,N=160": { + "file": "silu_config_M47104_N160.json", + "M": 47104, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 63.47925 + }, + "M=47104,N=192": { + "file": "silu_config_M47104_N192.json", + "M": 47104, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 68.67925000000002 + }, + "M=47104,N=256": { + "file": "silu_config_M47104_N256.json", + "M": 47104, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 57.83925 + }, + "M=47104,N=320": { + "file": "silu_config_M47104_N320.json", + "M": 47104, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 121.9995 + }, + "M=47104,N=384": { + "file": "silu_config_M47104_N384.json", + "M": 47104, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.35950000000003 + }, + "M=47104,N=480": { + "file": "silu_config_M47104_N480.json", + "M": 47104, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.11975000000001 + }, + "M=47104,N=512": { + "file": "silu_config_M47104_N512.json", + "M": 47104, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 99.15924999999999 + }, + "M=47104,N=576": { + "file": "silu_config_M47104_N576.json", + "M": 47104, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 243.27999999999997 + }, + "M=47104,N=640": { + "file": "silu_config_M47104_N640.json", + "M": 47104, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 229.92000000000002 + }, + "M=47104,N=768": { + "file": "silu_config_M47104_N768.json", + "M": 47104, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 230.76000000000005 + }, + "M=47104,N=800": { + "file": "silu_config_M47104_N800.json", + "M": 47104, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.48000000000008 + }, + "M=47104,N=896": { + "file": "silu_config_M47104_N896.json", + "M": 47104, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 233.59999999999985 + }, + "M=47104,N=960": { + "file": "silu_config_M47104_N960.json", + "M": 47104, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.71999999999997 + }, + "M=47104,N=1024": { + "file": "silu_config_M47104_N1024.json", + "M": 47104, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 187.0797500000001 + }, + "M=47104,N=1120": { + "file": "silu_config_M47104_N1120.json", + "M": 47104, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 446.68075000000033 + }, + "M=47104,N=1152": { + "file": "silu_config_M47104_N1152.json", + "M": 47104, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 448.48074999999983 + }, + "M=47104,N=1280": { + "file": "silu_config_M47104_N1280.json", + "M": 47104, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 454.60074999999983 + }, + "M=47104,N=1344": { + "file": "silu_config_M47104_N1344.json", + "M": 47104, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 452.3209999999998 + }, + "M=47104,N=1408": { + "file": "silu_config_M47104_N1408.json", + "M": 47104, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 449.4409999999998 + }, + "M=47104,N=1440": { + "file": "silu_config_M47104_N1440.json", + "M": 47104, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 464.68075 + }, + "M=47104,N=1536": { + "file": "silu_config_M47104_N1536.json", + "M": 47104, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 460.12099999999987 + }, + "M=47104,N=1600": { + "file": "silu_config_M47104_N1600.json", + "M": 47104, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 447.8409999999998 + }, + "M=47104,N=1664": { + "file": "silu_config_M47104_N1664.json", + "M": 47104, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 450.7207499999997 + }, + "M=47104,N=1728": { + "file": "silu_config_M47104_N1728.json", + "M": 47104, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 453.24074999999993 + }, + "M=47104,N=1760": { + "file": "silu_config_M47104_N1760.json", + "M": 47104, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 448.52099999999984 + }, + "M=47104,N=1792": { + "file": "silu_config_M47104_N1792.json", + "M": 47104, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 451.081 + }, + "M=47104,N=1920": { + "file": "silu_config_M47104_N1920.json", + "M": 47104, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 462.7610000000002 + }, + "M=47104,N=2048": { + "file": "silu_config_M47104_N2048.json", + "M": 47104, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 356.2004999999998 + }, + "M=47104,N=2080": { + "file": "silu_config_M47104_N2080.json", + "M": 47104, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 494.0409999999997 + }, + "M=47104,N=2240": { + "file": "silu_config_M47104_N2240.json", + "M": 47104, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 519.8012500000002 + }, + "M=47104,N=2400": { + "file": "silu_config_M47104_N2400.json", + "M": 47104, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 558.5215000000001 + }, + "M=47104,N=2560": { + "file": "silu_config_M47104_N2560.json", + "M": 47104, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 571.0014999999999 + }, + "M=48128,N=128": { + "file": "silu_config_M48128_N128.json", + "M": 48128, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 33.479000000000006 + }, + "M=48128,N=160": { + "file": "silu_config_M48128_N160.json", + "M": 48128, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 69.95925000000001 + }, + "M=48128,N=192": { + "file": "silu_config_M48128_N192.json", + "M": 48128, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.67925000000001 + }, + "M=48128,N=256": { + "file": "silu_config_M48128_N256.json", + "M": 48128, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 59.03924999999999 + }, + "M=48128,N=320": { + "file": "silu_config_M48128_N320.json", + "M": 48128, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 124.6795 + }, + "M=48128,N=384": { + "file": "silu_config_M48128_N384.json", + "M": 48128, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 123.67949999999996 + }, + "M=48128,N=480": { + "file": "silu_config_M48128_N480.json", + "M": 48128, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 126.71949999999995 + }, + "M=48128,N=512": { + "file": "silu_config_M48128_N512.json", + "M": 48128, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 101.19950000000003 + }, + "M=48128,N=576": { + "file": "silu_config_M48128_N576.json", + "M": 48128, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 236.68 + }, + "M=48128,N=640": { + "file": "silu_config_M48128_N640.json", + "M": 48128, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 234.88000000000005 + }, + "M=48128,N=768": { + "file": "silu_config_M48128_N768.json", + "M": 48128, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 228.95999999999998 + }, + "M=48128,N=800": { + "file": "silu_config_M48128_N800.json", + "M": 48128, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 237.75999999999993 + }, + "M=48128,N=896": { + "file": "silu_config_M48128_N896.json", + "M": 48128, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 238.24 + }, + "M=48128,N=960": { + "file": "silu_config_M48128_N960.json", + "M": 48128, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 242.9200000000002 + }, + "M=48128,N=1024": { + "file": "silu_config_M48128_N1024.json", + "M": 48128, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 190.67975000000013 + }, + "M=48128,N=1120": { + "file": "silu_config_M48128_N1120.json", + "M": 48128, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 471.7610000000001 + }, + "M=48128,N=1152": { + "file": "silu_config_M48128_N1152.json", + "M": 48128, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 447.24074999999993 + }, + "M=48128,N=1280": { + "file": "silu_config_M48128_N1280.json", + "M": 48128, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 458.2009999999998 + }, + "M=48128,N=1344": { + "file": "silu_config_M48128_N1344.json", + "M": 48128, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 467.8009999999998 + }, + "M=48128,N=1408": { + "file": "silu_config_M48128_N1408.json", + "M": 48128, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 464.7209999999998 + }, + "M=48128,N=1440": { + "file": "silu_config_M48128_N1440.json", + "M": 48128, + "N": 1440, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 483.24099999999976 + }, + "M=48128,N=1536": { + "file": "silu_config_M48128_N1536.json", + "M": 48128, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 465.3610000000001 + }, + "M=48128,N=1600": { + "file": "silu_config_M48128_N1600.json", + "M": 48128, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 468.5210000000001 + }, + "M=48128,N=1664": { + "file": "silu_config_M48128_N1664.json", + "M": 48128, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 459.7209999999999 + }, + "M=48128,N=1728": { + "file": "silu_config_M48128_N1728.json", + "M": 48128, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 479.8409999999999 + }, + "M=48128,N=1760": { + "file": "silu_config_M48128_N1760.json", + "M": 48128, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 473.56100000000015 + }, + "M=48128,N=1792": { + "file": "silu_config_M48128_N1792.json", + "M": 48128, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 455.8007499999999 + }, + "M=48128,N=1920": { + "file": "silu_config_M48128_N1920.json", + "M": 48128, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 461.6809999999998 + }, + "M=48128,N=2048": { + "file": "silu_config_M48128_N2048.json", + "M": 48128, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 369.72075000000007 + }, + "M=48128,N=2080": { + "file": "silu_config_M48128_N2080.json", + "M": 48128, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 509.1209999999998 + }, + "M=48128,N=2240": { + "file": "silu_config_M48128_N2240.json", + "M": 48128, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 531.0012499999998 + }, + "M=48128,N=2400": { + "file": "silu_config_M48128_N2400.json", + "M": 48128, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 583.7614999999998 + }, + "M=48128,N=2560": { + "file": "silu_config_M48128_N2560.json", + "M": 48128, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 583.8014999999998 + }, + "M=49152,N=128": { + "file": "silu_config_M49152_N128.json", + "M": 49152, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.07925 + }, + "M=49152,N=160": { + "file": "silu_config_M49152_N160.json", + "M": 49152, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 65.71925000000002 + }, + "M=49152,N=192": { + "file": "silu_config_M49152_N192.json", + "M": 49152, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 65.91925000000003 + }, + "M=49152,N=256": { + "file": "silu_config_M49152_N256.json", + "M": 49152, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 59.959249999999976 + }, + "M=49152,N=320": { + "file": "silu_config_M49152_N320.json", + "M": 49152, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 126.99950000000001 + }, + "M=49152,N=384": { + "file": "silu_config_M49152_N384.json", + "M": 49152, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 126.19949999999999 + }, + "M=49152,N=480": { + "file": "silu_config_M49152_N480.json", + "M": 49152, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.4795 + }, + "M=49152,N=512": { + "file": "silu_config_M49152_N512.json", + "M": 49152, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 99.35950000000005 + }, + "M=49152,N=576": { + "file": "silu_config_M49152_N576.json", + "M": 49152, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.76000000000005 + }, + "M=49152,N=640": { + "file": "silu_config_M49152_N640.json", + "M": 49152, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 238.16000000000008 + }, + "M=49152,N=768": { + "file": "silu_config_M49152_N768.json", + "M": 49152, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 238.76 + }, + "M=49152,N=800": { + "file": "silu_config_M49152_N800.json", + "M": 49152, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 235.68000000000006 + }, + "M=49152,N=896": { + "file": "silu_config_M49152_N896.json", + "M": 49152, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.2800000000001 + }, + "M=49152,N=960": { + "file": "silu_config_M49152_N960.json", + "M": 49152, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 241.31999999999994 + }, + "M=49152,N=1024": { + "file": "silu_config_M49152_N1024.json", + "M": 49152, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 195.3197500000001 + }, + "M=49152,N=1120": { + "file": "silu_config_M49152_N1120.json", + "M": 49152, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 465.9610000000001 + }, + "M=49152,N=1152": { + "file": "silu_config_M49152_N1152.json", + "M": 49152, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 462.3207500000002 + }, + "M=49152,N=1280": { + "file": "silu_config_M49152_N1280.json", + "M": 49152, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 474.64099999999996 + }, + "M=49152,N=1344": { + "file": "silu_config_M49152_N1344.json", + "M": 49152, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 466.3610000000002 + }, + "M=49152,N=1408": { + "file": "silu_config_M49152_N1408.json", + "M": 49152, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 463.3609999999999 + }, + "M=49152,N=1440": { + "file": "silu_config_M49152_N1440.json", + "M": 49152, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 467.0409999999998 + }, + "M=49152,N=1536": { + "file": "silu_config_M49152_N1536.json", + "M": 49152, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 463.76099999999997 + }, + "M=49152,N=1600": { + "file": "silu_config_M49152_N1600.json", + "M": 49152, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 467.0809999999998 + }, + "M=49152,N=1664": { + "file": "silu_config_M49152_N1664.json", + "M": 49152, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 464.3610000000002 + }, + "M=49152,N=1728": { + "file": "silu_config_M49152_N1728.json", + "M": 49152, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 467.361 + }, + "M=49152,N=1760": { + "file": "silu_config_M49152_N1760.json", + "M": 49152, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 467.8009999999998 + }, + "M=49152,N=1792": { + "file": "silu_config_M49152_N1792.json", + "M": 49152, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 476.28099999999984 + }, + "M=49152,N=1920": { + "file": "silu_config_M49152_N1920.json", + "M": 49152, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 466.241 + }, + "M=49152,N=2048": { + "file": "silu_config_M49152_N2048.json", + "M": 49152, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 376.0007499999999 + }, + "M=49152,N=2080": { + "file": "silu_config_M49152_N2080.json", + "M": 49152, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 509.721 + }, + "M=49152,N=2240": { + "file": "silu_config_M49152_N2240.json", + "M": 49152, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 536.4812500000005 + }, + "M=49152,N=2400": { + "file": "silu_config_M49152_N2400.json", + "M": 49152, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 570.9212500000001 + }, + "M=49152,N=2560": { + "file": "silu_config_M49152_N2560.json", + "M": 49152, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 596.1215000000004 + }, + "M=50176,N=128": { + "file": "silu_config_M50176_N128.json", + "M": 50176, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.43924999999997 + }, + "M=50176,N=160": { + "file": "silu_config_M50176_N160.json", + "M": 50176, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 67.07925000000003 + }, + "M=50176,N=192": { + "file": "silu_config_M50176_N192.json", + "M": 50176, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 72.51950000000002 + }, + "M=50176,N=256": { + "file": "silu_config_M50176_N256.json", + "M": 50176, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 61.11925000000002 + }, + "M=50176,N=320": { + "file": "silu_config_M50176_N320.json", + "M": 50176, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 129.5195 + }, + "M=50176,N=384": { + "file": "silu_config_M50176_N384.json", + "M": 50176, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 128.55949999999999 + }, + "M=50176,N=480": { + "file": "silu_config_M50176_N480.json", + "M": 50176, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 131.83950000000004 + }, + "M=50176,N=512": { + "file": "silu_config_M50176_N512.json", + "M": 50176, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 105.15950000000004 + }, + "M=50176,N=576": { + "file": "silu_config_M50176_N576.json", + "M": 50176, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 239.4000000000001 + }, + "M=50176,N=640": { + "file": "silu_config_M50176_N640.json", + "M": 50176, + "N": 640, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 254.56 + }, + "M=50176,N=768": { + "file": "silu_config_M50176_N768.json", + "M": 50176, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 249.9199999999999 + }, + "M=50176,N=800": { + "file": "silu_config_M50176_N800.json", + "M": 50176, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 251.04000000000005 + }, + "M=50176,N=896": { + "file": "silu_config_M50176_N896.json", + "M": 50176, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 249.95999999999992 + }, + "M=50176,N=960": { + "file": "silu_config_M50176_N960.json", + "M": 50176, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 248.1599999999999 + }, + "M=50176,N=1024": { + "file": "silu_config_M50176_N1024.json", + "M": 50176, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 198.9597500000001 + }, + "M=50176,N=1120": { + "file": "silu_config_M50176_N1120.json", + "M": 50176, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 486.5609999999999 + }, + "M=50176,N=1152": { + "file": "silu_config_M50176_N1152.json", + "M": 50176, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 493.5609999999998 + }, + "M=50176,N=1280": { + "file": "silu_config_M50176_N1280.json", + "M": 50176, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 483.40100000000007 + }, + "M=50176,N=1344": { + "file": "silu_config_M50176_N1344.json", + "M": 50176, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 491.0010000000001 + }, + "M=50176,N=1408": { + "file": "silu_config_M50176_N1408.json", + "M": 50176, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 487.9212500000002 + }, + "M=50176,N=1440": { + "file": "silu_config_M50176_N1440.json", + "M": 50176, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 487.6410000000001 + }, + "M=50176,N=1536": { + "file": "silu_config_M50176_N1536.json", + "M": 50176, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 467.7210000000001 + }, + "M=50176,N=1600": { + "file": "silu_config_M50176_N1600.json", + "M": 50176, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 498.08124999999984 + }, + "M=50176,N=1664": { + "file": "silu_config_M50176_N1664.json", + "M": 50176, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 478.48099999999977 + }, + "M=50176,N=1728": { + "file": "silu_config_M50176_N1728.json", + "M": 50176, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 499.1210000000001 + }, + "M=50176,N=1760": { + "file": "silu_config_M50176_N1760.json", + "M": 50176, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 499.08124999999995 + }, + "M=50176,N=1792": { + "file": "silu_config_M50176_N1792.json", + "M": 50176, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 485.7209999999998 + }, + "M=50176,N=1920": { + "file": "silu_config_M50176_N1920.json", + "M": 50176, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 486.8409999999999 + }, + "M=50176,N=2048": { + "file": "silu_config_M50176_N2048.json", + "M": 50176, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 389.80050000000006 + }, + "M=50176,N=2080": { + "file": "silu_config_M50176_N2080.json", + "M": 50176, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 520.00125 + }, + "M=50176,N=2240": { + "file": "silu_config_M50176_N2240.json", + "M": 50176, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 547.6412499999999 + }, + "M=50176,N=2400": { + "file": "silu_config_M50176_N2400.json", + "M": 50176, + "N": 2400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 603.4417500000004 + }, + "M=50176,N=2560": { + "file": "silu_config_M50176_N2560.json", + "M": 50176, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 613.4415000000001 + }, + "M=51200,N=128": { + "file": "silu_config_M51200_N128.json", + "M": 51200, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 35.11924999999998 + }, + "M=51200,N=160": { + "file": "silu_config_M51200_N160.json", + "M": 51200, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 73.79925 + }, + "M=51200,N=192": { + "file": "silu_config_M51200_N192.json", + "M": 51200, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 68.03925000000002 + }, + "M=51200,N=256": { + "file": "silu_config_M51200_N256.json", + "M": 51200, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 62.279250000000026 + }, + "M=51200,N=320": { + "file": "silu_config_M51200_N320.json", + "M": 51200, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 130.7595 + }, + "M=51200,N=384": { + "file": "silu_config_M51200_N384.json", + "M": 51200, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 129.9195 + }, + "M=51200,N=480": { + "file": "silu_config_M51200_N480.json", + "M": 51200, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 133.3995 + }, + "M=51200,N=512": { + "file": "silu_config_M51200_N512.json", + "M": 51200, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 107.11950000000002 + }, + "M=51200,N=576": { + "file": "silu_config_M51200_N576.json", + "M": 51200, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 254.24024999999978 + }, + "M=51200,N=640": { + "file": "silu_config_M51200_N640.json", + "M": 51200, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 249.28000000000014 + }, + "M=51200,N=768": { + "file": "silu_config_M51200_N768.json", + "M": 51200, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 250.35999999999999 + }, + "M=51200,N=800": { + "file": "silu_config_M51200_N800.json", + "M": 51200, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 251.72024999999996 + }, + "M=51200,N=896": { + "file": "silu_config_M51200_N896.json", + "M": 51200, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 250.79999999999995 + }, + "M=51200,N=960": { + "file": "silu_config_M51200_N960.json", + "M": 51200, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 252.71999999999977 + }, + "M=51200,N=1024": { + "file": "silu_config_M51200_N1024.json", + "M": 51200, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 202.35974999999996 + }, + "M=51200,N=1120": { + "file": "silu_config_M51200_N1120.json", + "M": 51200, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 496.7209999999999 + }, + "M=51200,N=1152": { + "file": "silu_config_M51200_N1152.json", + "M": 51200, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 482.9210000000004 + }, + "M=51200,N=1280": { + "file": "silu_config_M51200_N1280.json", + "M": 51200, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 483.56100000000026 + }, + "M=51200,N=1344": { + "file": "silu_config_M51200_N1344.json", + "M": 51200, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 486.7209999999999 + }, + "M=51200,N=1408": { + "file": "silu_config_M51200_N1408.json", + "M": 51200, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 494.44100000000014 + }, + "M=51200,N=1440": { + "file": "silu_config_M51200_N1440.json", + "M": 51200, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 498.2410000000001 + }, + "M=51200,N=1536": { + "file": "silu_config_M51200_N1536.json", + "M": 51200, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 474.121 + }, + "M=51200,N=1600": { + "file": "silu_config_M51200_N1600.json", + "M": 51200, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 486.9609999999999 + }, + "M=51200,N=1664": { + "file": "silu_config_M51200_N1664.json", + "M": 51200, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 495.44100000000003 + }, + "M=51200,N=1728": { + "file": "silu_config_M51200_N1728.json", + "M": 51200, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 497.8009999999997 + }, + "M=51200,N=1760": { + "file": "silu_config_M51200_N1760.json", + "M": 51200, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 477.961 + }, + "M=51200,N=1792": { + "file": "silu_config_M51200_N1792.json", + "M": 51200, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 496.001 + }, + "M=51200,N=1920": { + "file": "silu_config_M51200_N1920.json", + "M": 51200, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 485.56100000000015 + }, + "M=51200,N=2048": { + "file": "silu_config_M51200_N2048.json", + "M": 51200, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 395.1205 + }, + "M=51200,N=2080": { + "file": "silu_config_M51200_N2080.json", + "M": 51200, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 536.1212499999999 + }, + "M=51200,N=2240": { + "file": "silu_config_M51200_N2240.json", + "M": 51200, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 584.2815000000005 + }, + "M=51200,N=2400": { + "file": "silu_config_M51200_N2400.json", + "M": 51200, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 594.3614999999998 + }, + "M=51200,N=2560": { + "file": "silu_config_M51200_N2560.json", + "M": 51200, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 620.6815000000001 + }, + "M=52224,N=128": { + "file": "silu_config_M52224_N128.json", + "M": 52224, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 35.439249999999994 + }, + "M=52224,N=160": { + "file": "silu_config_M52224_N160.json", + "M": 52224, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 75.03925000000001 + }, + "M=52224,N=192": { + "file": "silu_config_M52224_N192.json", + "M": 52224, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 74.99950000000003 + }, + "M=52224,N=256": { + "file": "silu_config_M52224_N256.json", + "M": 52224, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 63.31925000000001 + }, + "M=52224,N=320": { + "file": "silu_config_M52224_N320.json", + "M": 52224, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 134.27950000000004 + }, + "M=52224,N=384": { + "file": "silu_config_M52224_N384.json", + "M": 52224, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 133.47975 + }, + "M=52224,N=480": { + "file": "silu_config_M52224_N480.json", + "M": 52224, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 136.23950000000002 + }, + "M=52224,N=512": { + "file": "silu_config_M52224_N512.json", + "M": 52224, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 109.07925 + }, + "M=52224,N=576": { + "file": "silu_config_M52224_N576.json", + "M": 52224, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 254.52 + }, + "M=52224,N=640": { + "file": "silu_config_M52224_N640.json", + "M": 52224, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 252.79999999999998 + }, + "M=52224,N=768": { + "file": "silu_config_M52224_N768.json", + "M": 52224, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 258.3602499999999 + }, + "M=52224,N=800": { + "file": "silu_config_M52224_N800.json", + "M": 52224, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 254.96 + }, + "M=52224,N=896": { + "file": "silu_config_M52224_N896.json", + "M": 52224, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 253.72 + }, + "M=52224,N=960": { + "file": "silu_config_M52224_N960.json", + "M": 52224, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 259.2800000000001 + }, + "M=52224,N=1024": { + "file": "silu_config_M52224_N1024.json", + "M": 52224, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 206.4797499999999 + }, + "M=52224,N=1120": { + "file": "silu_config_M52224_N1120.json", + "M": 52224, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 520.0412499999999 + }, + "M=52224,N=1152": { + "file": "silu_config_M52224_N1152.json", + "M": 52224, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 490.7210000000001 + }, + "M=52224,N=1280": { + "file": "silu_config_M52224_N1280.json", + "M": 52224, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 502.24099999999993 + }, + "M=52224,N=1344": { + "file": "silu_config_M52224_N1344.json", + "M": 52224, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 508.76100000000025 + }, + "M=52224,N=1408": { + "file": "silu_config_M52224_N1408.json", + "M": 52224, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 496.36099999999976 + }, + "M=52224,N=1440": { + "file": "silu_config_M52224_N1440.json", + "M": 52224, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 506.40100000000024 + }, + "M=52224,N=1536": { + "file": "silu_config_M52224_N1536.json", + "M": 52224, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 514.0010000000002 + }, + "M=52224,N=1600": { + "file": "silu_config_M52224_N1600.json", + "M": 52224, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 509.0009999999999 + }, + "M=52224,N=1664": { + "file": "silu_config_M52224_N1664.json", + "M": 52224, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 497.3610000000001 + }, + "M=52224,N=1728": { + "file": "silu_config_M52224_N1728.json", + "M": 52224, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 507.00100000000026 + }, + "M=52224,N=1760": { + "file": "silu_config_M52224_N1760.json", + "M": 52224, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 496.5609999999999 + }, + "M=52224,N=1792": { + "file": "silu_config_M52224_N1792.json", + "M": 52224, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 498.48099999999977 + }, + "M=52224,N=1920": { + "file": "silu_config_M52224_N1920.json", + "M": 52224, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 506.04100000000017 + }, + "M=52224,N=2048": { + "file": "silu_config_M52224_N2048.json", + "M": 52224, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 395.6005 + }, + "M=52224,N=2080": { + "file": "silu_config_M52224_N2080.json", + "M": 52224, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 546.3212500000002 + }, + "M=52224,N=2240": { + "file": "silu_config_M52224_N2240.json", + "M": 52224, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 586.1614999999999 + }, + "M=52224,N=2400": { + "file": "silu_config_M52224_N2400.json", + "M": 52224, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 616.9214999999997 + }, + "M=52224,N=2560": { + "file": "silu_config_M52224_N2560.json", + "M": 52224, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.2017500000002 + }, + "M=53248,N=128": { + "file": "silu_config_M53248_N128.json", + "M": 53248, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 36.11899999999998 + }, + "M=53248,N=160": { + "file": "silu_config_M53248_N160.json", + "M": 53248, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.75924999999998 + }, + "M=53248,N=192": { + "file": "silu_config_M53248_N192.json", + "M": 53248, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.71924999999999 + }, + "M=53248,N=256": { + "file": "silu_config_M53248_N256.json", + "M": 53248, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 74.87949999999998 + }, + "M=53248,N=320": { + "file": "silu_config_M53248_N320.json", + "M": 53248, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 136.67949999999996 + }, + "M=53248,N=384": { + "file": "silu_config_M53248_N384.json", + "M": 53248, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 135.75974999999997 + }, + "M=53248,N=480": { + "file": "silu_config_M53248_N480.json", + "M": 53248, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.07950000000002 + }, + "M=53248,N=512": { + "file": "silu_config_M53248_N512.json", + "M": 53248, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 110.95950000000005 + }, + "M=53248,N=576": { + "file": "silu_config_M53248_N576.json", + "M": 53248, + "N": 576, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 271.5999999999999 + }, + "M=53248,N=640": { + "file": "silu_config_M53248_N640.json", + "M": 53248, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 262.4799999999999 + }, + "M=53248,N=768": { + "file": "silu_config_M53248_N768.json", + "M": 53248, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 252.04000000000005 + }, + "M=53248,N=800": { + "file": "silu_config_M53248_N800.json", + "M": 53248, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 261.92000000000013 + }, + "M=53248,N=896": { + "file": "silu_config_M53248_N896.json", + "M": 53248, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 260.48025000000007 + }, + "M=53248,N=960": { + "file": "silu_config_M53248_N960.json", + "M": 53248, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 262.8800000000001 + }, + "M=53248,N=1024": { + "file": "silu_config_M53248_N1024.json", + "M": 53248, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 210.36000000000018 + }, + "M=53248,N=1120": { + "file": "silu_config_M53248_N1120.json", + "M": 53248, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 526.0412500000001 + }, + "M=53248,N=1152": { + "file": "silu_config_M53248_N1152.json", + "M": 53248, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 500.36124999999987 + }, + "M=53248,N=1280": { + "file": "silu_config_M53248_N1280.json", + "M": 53248, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 511.8412499999999 + }, + "M=53248,N=1344": { + "file": "silu_config_M53248_N1344.json", + "M": 53248, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 526.6412500000001 + }, + "M=53248,N=1408": { + "file": "silu_config_M53248_N1408.json", + "M": 53248, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 495.2410000000001 + }, + "M=53248,N=1440": { + "file": "silu_config_M53248_N1440.json", + "M": 53248, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 509.9612500000001 + }, + "M=53248,N=1536": { + "file": "silu_config_M53248_N1536.json", + "M": 53248, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 512.3612499999997 + }, + "M=53248,N=1600": { + "file": "silu_config_M53248_N1600.json", + "M": 53248, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 515.80125 + }, + "M=53248,N=1664": { + "file": "silu_config_M53248_N1664.json", + "M": 53248, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 517.24125 + }, + "M=53248,N=1728": { + "file": "silu_config_M53248_N1728.json", + "M": 53248, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 520.1612500000001 + }, + "M=53248,N=1760": { + "file": "silu_config_M53248_N1760.json", + "M": 53248, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 521.1212500000001 + }, + "M=53248,N=1792": { + "file": "silu_config_M53248_N1792.json", + "M": 53248, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 514.2412500000003 + }, + "M=53248,N=1920": { + "file": "silu_config_M53248_N1920.json", + "M": 53248, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 504.28125 + }, + "M=53248,N=2048": { + "file": "silu_config_M53248_N2048.json", + "M": 53248, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 415.8007499999999 + }, + "M=53248,N=2080": { + "file": "silu_config_M53248_N2080.json", + "M": 53248, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 561.4415000000004 + }, + "M=53248,N=2240": { + "file": "silu_config_M53248_N2240.json", + "M": 53248, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 586.4414999999999 + }, + "M=53248,N=2400": { + "file": "silu_config_M53248_N2400.json", + "M": 53248, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 628.2417500000001 + }, + "M=53248,N=2560": { + "file": "silu_config_M53248_N2560.json", + "M": 53248, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 644.5617500000001 + }, + "M=54272,N=128": { + "file": "silu_config_M54272_N128.json", + "M": 54272, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 36.67899999999999 + }, + "M=54272,N=160": { + "file": "silu_config_M54272_N160.json", + "M": 54272, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 77.87949999999998 + }, + "M=54272,N=192": { + "file": "silu_config_M54272_N192.json", + "M": 54272, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 77.87950000000001 + }, + "M=54272,N=256": { + "file": "silu_config_M54272_N256.json", + "M": 54272, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 76.31925000000001 + }, + "M=54272,N=320": { + "file": "silu_config_M54272_N320.json", + "M": 54272, + "N": 320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.3195 + }, + "M=54272,N=384": { + "file": "silu_config_M54272_N384.json", + "M": 54272, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 138.07975000000002 + }, + "M=54272,N=480": { + "file": "silu_config_M54272_N480.json", + "M": 54272, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.87975 + }, + "M=54272,N=512": { + "file": "silu_config_M54272_N512.json", + "M": 54272, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 113.11949999999996 + }, + "M=54272,N=576": { + "file": "silu_config_M54272_N576.json", + "M": 54272, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 269.5200000000001 + }, + "M=54272,N=640": { + "file": "silu_config_M54272_N640.json", + "M": 54272, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 264.3202499999999 + }, + "M=54272,N=768": { + "file": "silu_config_M54272_N768.json", + "M": 54272, + "N": 768, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 275.32025000000004 + }, + "M=54272,N=800": { + "file": "silu_config_M54272_N800.json", + "M": 54272, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 266.92024999999995 + }, + "M=54272,N=896": { + "file": "silu_config_M54272_N896.json", + "M": 54272, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 266.60000000000025 + }, + "M=54272,N=960": { + "file": "silu_config_M54272_N960.json", + "M": 54272, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 260.2399999999999 + }, + "M=54272,N=1024": { + "file": "silu_config_M54272_N1024.json", + "M": 54272, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 213.99975000000006 + }, + "M=54272,N=1120": { + "file": "silu_config_M54272_N1120.json", + "M": 54272, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 538.88125 + }, + "M=54272,N=1152": { + "file": "silu_config_M54272_N1152.json", + "M": 54272, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 534.7612500000001 + }, + "M=54272,N=1280": { + "file": "silu_config_M54272_N1280.json", + "M": 54272, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 510.321 + }, + "M=54272,N=1344": { + "file": "silu_config_M54272_N1344.json", + "M": 54272, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 536.0412500000001 + }, + "M=54272,N=1408": { + "file": "silu_config_M54272_N1408.json", + "M": 54272, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 521.6812499999999 + }, + "M=54272,N=1440": { + "file": "silu_config_M54272_N1440.json", + "M": 54272, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 529.72125 + }, + "M=54272,N=1536": { + "file": "silu_config_M54272_N1536.json", + "M": 54272, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 515.1609999999998 + }, + "M=54272,N=1600": { + "file": "silu_config_M54272_N1600.json", + "M": 54272, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 525.4412499999999 + }, + "M=54272,N=1664": { + "file": "silu_config_M54272_N1664.json", + "M": 54272, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 526.5612500000002 + }, + "M=54272,N=1728": { + "file": "silu_config_M54272_N1728.json", + "M": 54272, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 529.7612500000002 + }, + "M=54272,N=1760": { + "file": "silu_config_M54272_N1760.json", + "M": 54272, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 530.48125 + }, + "M=54272,N=1792": { + "file": "silu_config_M54272_N1792.json", + "M": 54272, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 513.1610000000001 + }, + "M=54272,N=1920": { + "file": "silu_config_M54272_N1920.json", + "M": 54272, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 529.0412499999998 + }, + "M=54272,N=2048": { + "file": "silu_config_M54272_N2048.json", + "M": 54272, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 416.0007499999999 + }, + "M=54272,N=2080": { + "file": "silu_config_M54272_N2080.json", + "M": 54272, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 567.56125 + }, + "M=54272,N=2240": { + "file": "silu_config_M54272_N2240.json", + "M": 54272, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.6415000000002 + }, + "M=54272,N=2400": { + "file": "silu_config_M54272_N2400.json", + "M": 54272, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 639.8417499999998 + }, + "M=54272,N=2560": { + "file": "silu_config_M54272_N2560.json", + "M": 54272, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 661.6817499999997 + }, + "M=55296,N=128": { + "file": "silu_config_M55296_N128.json", + "M": 55296, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 37.27899999999999 + }, + "M=55296,N=160": { + "file": "silu_config_M55296_N160.json", + "M": 55296, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 73.07924999999997 + }, + "M=55296,N=192": { + "file": "silu_config_M55296_N192.json", + "M": 55296, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 73.07925 + }, + "M=55296,N=256": { + "file": "silu_config_M55296_N256.json", + "M": 55296, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 66.63924999999998 + }, + "M=55296,N=320": { + "file": "silu_config_M55296_N320.json", + "M": 55296, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 141.99975000000003 + }, + "M=55296,N=384": { + "file": "silu_config_M55296_N384.json", + "M": 55296, + "N": 384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.75975 + }, + "M=55296,N=480": { + "file": "silu_config_M55296_N480.json", + "M": 55296, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 144.19975000000002 + }, + "M=55296,N=512": { + "file": "silu_config_M55296_N512.json", + "M": 55296, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 111.47925000000001 + }, + "M=55296,N=576": { + "file": "silu_config_M55296_N576.json", + "M": 55296, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 269.16 + }, + "M=55296,N=640": { + "file": "silu_config_M55296_N640.json", + "M": 55296, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 267.44 + }, + "M=55296,N=768": { + "file": "silu_config_M55296_N768.json", + "M": 55296, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 267.6 + }, + "M=55296,N=800": { + "file": "silu_config_M55296_N800.json", + "M": 55296, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 269.68 + }, + "M=55296,N=896": { + "file": "silu_config_M55296_N896.json", + "M": 55296, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 260.7600000000002 + }, + "M=55296,N=960": { + "file": "silu_config_M55296_N960.json", + "M": 55296, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 270.88025000000005 + }, + "M=55296,N=1024": { + "file": "silu_config_M55296_N1024.json", + "M": 55296, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 218.27974999999998 + }, + "M=55296,N=1120": { + "file": "silu_config_M55296_N1120.json", + "M": 55296, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 523.3612500000003 + }, + "M=55296,N=1152": { + "file": "silu_config_M55296_N1152.json", + "M": 55296, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 543.96125 + }, + "M=55296,N=1280": { + "file": "silu_config_M55296_N1280.json", + "M": 55296, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 533.28125 + }, + "M=55296,N=1344": { + "file": "silu_config_M55296_N1344.json", + "M": 55296, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 527.8412500000002 + }, + "M=55296,N=1408": { + "file": "silu_config_M55296_N1408.json", + "M": 55296, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 524.5212500000002 + }, + "M=55296,N=1440": { + "file": "silu_config_M55296_N1440.json", + "M": 55296, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 534.7612499999998 + }, + "M=55296,N=1536": { + "file": "silu_config_M55296_N1536.json", + "M": 55296, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 531.3612500000004 + }, + "M=55296,N=1600": { + "file": "silu_config_M55296_N1600.json", + "M": 55296, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 534.9612499999998 + }, + "M=55296,N=1664": { + "file": "silu_config_M55296_N1664.json", + "M": 55296, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 521.4012500000001 + }, + "M=55296,N=1728": { + "file": "silu_config_M55296_N1728.json", + "M": 55296, + "N": 1728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 539.6412499999999 + }, + "M=55296,N=1760": { + "file": "silu_config_M55296_N1760.json", + "M": 55296, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 529.8812499999999 + }, + "M=55296,N=1792": { + "file": "silu_config_M55296_N1792.json", + "M": 55296, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 544.6012499999999 + }, + "M=55296,N=1920": { + "file": "silu_config_M55296_N1920.json", + "M": 55296, + "N": 1920, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 538.0812500000002 + }, + "M=55296,N=2048": { + "file": "silu_config_M55296_N2048.json", + "M": 55296, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 423.84075000000007 + }, + "M=55296,N=2080": { + "file": "silu_config_M55296_N2080.json", + "M": 55296, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 588.7614999999998 + }, + "M=55296,N=2240": { + "file": "silu_config_M55296_N2240.json", + "M": 55296, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 619.4414999999997 + }, + "M=55296,N=2400": { + "file": "silu_config_M55296_N2400.json", + "M": 55296, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 651.9217500000002 + }, + "M=55296,N=2560": { + "file": "silu_config_M55296_N2560.json", + "M": 55296, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 683.4417499999997 + }, + "M=56320,N=128": { + "file": "silu_config_M56320_N128.json", + "M": 56320, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 42.75899999999999 + }, + "M=56320,N=160": { + "file": "silu_config_M56320_N160.json", + "M": 56320, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 74.19924999999998 + }, + "M=56320,N=192": { + "file": "silu_config_M56320_N192.json", + "M": 56320, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 80.3595 + }, + "M=56320,N=256": { + "file": "silu_config_M56320_N256.json", + "M": 56320, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 67.71924999999999 + }, + "M=56320,N=320": { + "file": "silu_config_M56320_N320.json", + "M": 56320, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 143.43949999999998 + }, + "M=56320,N=384": { + "file": "silu_config_M56320_N384.json", + "M": 56320, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 142.55975 + }, + "M=56320,N=480": { + "file": "silu_config_M56320_N480.json", + "M": 56320, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 146.23949999999996 + }, + "M=56320,N=512": { + "file": "silu_config_M56320_N512.json", + "M": 56320, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 114.27949999999996 + }, + "M=56320,N=576": { + "file": "silu_config_M56320_N576.json", + "M": 56320, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 276.0 + }, + "M=56320,N=640": { + "file": "silu_config_M56320_N640.json", + "M": 56320, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 272.88000000000005 + }, + "M=56320,N=768": { + "file": "silu_config_M56320_N768.json", + "M": 56320, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 277.64 + }, + "M=56320,N=800": { + "file": "silu_config_M56320_N800.json", + "M": 56320, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 285.52025 + }, + "M=56320,N=896": { + "file": "silu_config_M56320_N896.json", + "M": 56320, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 267.43999999999994 + }, + "M=56320,N=960": { + "file": "silu_config_M56320_N960.json", + "M": 56320, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 276.48000000000013 + }, + "M=56320,N=1024": { + "file": "silu_config_M56320_N1024.json", + "M": 56320, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 222.32000000000005 + }, + "M=56320,N=1120": { + "file": "silu_config_M56320_N1120.json", + "M": 56320, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 522.9612500000001 + }, + "M=56320,N=1152": { + "file": "silu_config_M56320_N1152.json", + "M": 56320, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 539.6412500000001 + }, + "M=56320,N=1280": { + "file": "silu_config_M56320_N1280.json", + "M": 56320, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 540.28125 + }, + "M=56320,N=1344": { + "file": "silu_config_M56320_N1344.json", + "M": 56320, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 543.8412500000002 + }, + "M=56320,N=1408": { + "file": "silu_config_M56320_N1408.json", + "M": 56320, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 540.28125 + }, + "M=56320,N=1440": { + "file": "silu_config_M56320_N1440.json", + "M": 56320, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 544.28125 + }, + "M=56320,N=1536": { + "file": "silu_config_M56320_N1536.json", + "M": 56320, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 530.1612499999999 + }, + "M=56320,N=1600": { + "file": "silu_config_M56320_N1600.json", + "M": 56320, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 533.9212500000001 + }, + "M=56320,N=1664": { + "file": "silu_config_M56320_N1664.json", + "M": 56320, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 541.9212499999999 + }, + "M=56320,N=1728": { + "file": "silu_config_M56320_N1728.json", + "M": 56320, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 544.8812499999997 + }, + "M=56320,N=1760": { + "file": "silu_config_M56320_N1760.json", + "M": 56320, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 525.1612499999999 + }, + "M=56320,N=1792": { + "file": "silu_config_M56320_N1792.json", + "M": 56320, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 532.32125 + }, + "M=56320,N=1920": { + "file": "silu_config_M56320_N1920.json", + "M": 56320, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 554.8812499999999 + }, + "M=56320,N=2048": { + "file": "silu_config_M56320_N2048.json", + "M": 56320, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 436.16075 + }, + "M=56320,N=2080": { + "file": "silu_config_M56320_N2080.json", + "M": 56320, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 599.2814999999998 + }, + "M=56320,N=2240": { + "file": "silu_config_M56320_N2240.json", + "M": 56320, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 630.4414999999997 + }, + "M=56320,N=2400": { + "file": "silu_config_M56320_N2400.json", + "M": 56320, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 651.7217500000002 + }, + "M=56320,N=2560": { + "file": "silu_config_M56320_N2560.json", + "M": 56320, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 681.2417500000004 + }, + "M=57344,N=128": { + "file": "silu_config_M57344_N128.json", + "M": 57344, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 43.59924999999998 + }, + "M=57344,N=160": { + "file": "silu_config_M57344_N160.json", + "M": 57344, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 75.27950000000001 + }, + "M=57344,N=192": { + "file": "silu_config_M57344_N192.json", + "M": 57344, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 81.83950000000003 + }, + "M=57344,N=256": { + "file": "silu_config_M57344_N256.json", + "M": 57344, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 68.67924999999998 + }, + "M=57344,N=320": { + "file": "silu_config_M57344_N320.json", + "M": 57344, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 146.63975000000002 + }, + "M=57344,N=384": { + "file": "silu_config_M57344_N384.json", + "M": 57344, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 145.63949999999997 + }, + "M=57344,N=480": { + "file": "silu_config_M57344_N480.json", + "M": 57344, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 149.07974999999996 + }, + "M=57344,N=512": { + "file": "silu_config_M57344_N512.json", + "M": 57344, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 118.91950000000003 + }, + "M=57344,N=576": { + "file": "silu_config_M57344_N576.json", + "M": 57344, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 280.99999999999994 + }, + "M=57344,N=640": { + "file": "silu_config_M57344_N640.json", + "M": 57344, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 278.9202499999998 + }, + "M=57344,N=768": { + "file": "silu_config_M57344_N768.json", + "M": 57344, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 281.92025000000007 + }, + "M=57344,N=800": { + "file": "silu_config_M57344_N800.json", + "M": 57344, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 284.64025000000004 + }, + "M=57344,N=896": { + "file": "silu_config_M57344_N896.json", + "M": 57344, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 280.1199999999999 + }, + "M=57344,N=960": { + "file": "silu_config_M57344_N960.json", + "M": 57344, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 288.6402499999999 + }, + "M=57344,N=1024": { + "file": "silu_config_M57344_N1024.json", + "M": 57344, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 226.27999999999986 + }, + "M=57344,N=1120": { + "file": "silu_config_M57344_N1120.json", + "M": 57344, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 542.5612499999997 + }, + "M=57344,N=1152": { + "file": "silu_config_M57344_N1152.json", + "M": 57344, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 549.2012499999998 + }, + "M=57344,N=1280": { + "file": "silu_config_M57344_N1280.json", + "M": 57344, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 560.9615000000002 + }, + "M=57344,N=1344": { + "file": "silu_config_M57344_N1344.json", + "M": 57344, + "N": 1344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 562.4014999999997 + }, + "M=57344,N=1408": { + "file": "silu_config_M57344_N1408.json", + "M": 57344, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 560.9215000000002 + }, + "M=57344,N=1440": { + "file": "silu_config_M57344_N1440.json", + "M": 57344, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 546.80125 + }, + "M=57344,N=1536": { + "file": "silu_config_M57344_N1536.json", + "M": 57344, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 539.6812500000001 + }, + "M=57344,N=1600": { + "file": "silu_config_M57344_N1600.json", + "M": 57344, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 543.2412499999998 + }, + "M=57344,N=1664": { + "file": "silu_config_M57344_N1664.json", + "M": 57344, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 554.8015 + }, + "M=57344,N=1728": { + "file": "silu_config_M57344_N1728.json", + "M": 57344, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 544.1612500000003 + }, + "M=57344,N=1760": { + "file": "silu_config_M57344_N1760.json", + "M": 57344, + "N": 1760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 565.80125 + }, + "M=57344,N=1792": { + "file": "silu_config_M57344_N1792.json", + "M": 57344, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 552.8015000000003 + }, + "M=57344,N=1920": { + "file": "silu_config_M57344_N1920.json", + "M": 57344, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 542.6812500000003 + }, + "M=57344,N=2048": { + "file": "silu_config_M57344_N2048.json", + "M": 57344, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 439.48075000000017 + }, + "M=57344,N=2080": { + "file": "silu_config_M57344_N2080.json", + "M": 57344, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 602.7217500000004 + }, + "M=57344,N=2240": { + "file": "silu_config_M57344_N2240.json", + "M": 57344, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 631.0815000000002 + }, + "M=57344,N=2400": { + "file": "silu_config_M57344_N2400.json", + "M": 57344, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 668.8417499999996 + }, + "M=57344,N=2560": { + "file": "silu_config_M57344_N2560.json", + "M": 57344, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 704.0020000000004 + }, + "M=58368,N=128": { + "file": "silu_config_M58368_N128.json", + "M": 58368, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 43.79924999999999 + }, + "M=58368,N=160": { + "file": "silu_config_M58368_N160.json", + "M": 58368, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 76.55924999999999 + }, + "M=58368,N=192": { + "file": "silu_config_M58368_N192.json", + "M": 58368, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 82.95925000000001 + }, + "M=58368,N=256": { + "file": "silu_config_M58368_N256.json", + "M": 58368, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 69.91925 + }, + "M=58368,N=320": { + "file": "silu_config_M58368_N320.json", + "M": 58368, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 149.03950000000003 + }, + "M=58368,N=384": { + "file": "silu_config_M58368_N384.json", + "M": 58368, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 148.15949999999998 + }, + "M=58368,N=480": { + "file": "silu_config_M58368_N480.json", + "M": 58368, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 151.63949999999997 + }, + "M=58368,N=512": { + "file": "silu_config_M58368_N512.json", + "M": 58368, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 120.79950000000002 + }, + "M=58368,N=576": { + "file": "silu_config_M58368_N576.json", + "M": 58368, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 288.48024999999996 + }, + "M=58368,N=640": { + "file": "silu_config_M58368_N640.json", + "M": 58368, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 293.28025 + }, + "M=58368,N=768": { + "file": "silu_config_M58368_N768.json", + "M": 58368, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 289.56025000000005 + }, + "M=58368,N=800": { + "file": "silu_config_M58368_N800.json", + "M": 58368, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 284.28025000000025 + }, + "M=58368,N=896": { + "file": "silu_config_M58368_N896.json", + "M": 58368, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 288.08024999999986 + }, + "M=58368,N=960": { + "file": "silu_config_M58368_N960.json", + "M": 58368, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 285.48024999999996 + }, + "M=58368,N=1024": { + "file": "silu_config_M58368_N1024.json", + "M": 58368, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 229.72000000000014 + }, + "M=58368,N=1120": { + "file": "silu_config_M58368_N1120.json", + "M": 58368, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 579.2415000000001 + }, + "M=58368,N=1152": { + "file": "silu_config_M58368_N1152.json", + "M": 58368, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 551.08125 + }, + "M=58368,N=1280": { + "file": "silu_config_M58368_N1280.json", + "M": 58368, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 541.4012500000001 + }, + "M=58368,N=1344": { + "file": "silu_config_M58368_N1344.json", + "M": 58368, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 566.4012500000001 + }, + "M=58368,N=1408": { + "file": "silu_config_M58368_N1408.json", + "M": 58368, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 562.48125 + }, + "M=58368,N=1440": { + "file": "silu_config_M58368_N1440.json", + "M": 58368, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 574.2815 + }, + "M=58368,N=1536": { + "file": "silu_config_M58368_N1536.json", + "M": 58368, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 571.2012499999998 + }, + "M=58368,N=1600": { + "file": "silu_config_M58368_N1600.json", + "M": 58368, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 563.7612499999998 + }, + "M=58368,N=1664": { + "file": "silu_config_M58368_N1664.json", + "M": 58368, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 564.4012499999997 + }, + "M=58368,N=1728": { + "file": "silu_config_M58368_N1728.json", + "M": 58368, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 557.52125 + }, + "M=58368,N=1760": { + "file": "silu_config_M58368_N1760.json", + "M": 58368, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 568.7612499999998 + }, + "M=58368,N=1792": { + "file": "silu_config_M58368_N1792.json", + "M": 58368, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 572.9612500000001 + }, + "M=58368,N=1920": { + "file": "silu_config_M58368_N1920.json", + "M": 58368, + "N": 1920, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 567.6812500000001 + }, + "M=58368,N=2048": { + "file": "silu_config_M58368_N2048.json", + "M": 58368, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 457.9209999999998 + }, + "M=58368,N=2080": { + "file": "silu_config_M58368_N2080.json", + "M": 58368, + "N": 2080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 628.1614999999999 + }, + "M=58368,N=2240": { + "file": "silu_config_M58368_N2240.json", + "M": 58368, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 642.0417499999999 + }, + "M=58368,N=2400": { + "file": "silu_config_M58368_N2400.json", + "M": 58368, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 676.2417499999999 + }, + "M=58368,N=2560": { + "file": "silu_config_M58368_N2560.json", + "M": 58368, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 699.6020000000001 + }, + "M=59392,N=128": { + "file": "silu_config_M59392_N128.json", + "M": 59392, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 39.27900000000001 + }, + "M=59392,N=160": { + "file": "silu_config_M59392_N160.json", + "M": 59392, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.75925000000001 + }, + "M=59392,N=192": { + "file": "silu_config_M59392_N192.json", + "M": 59392, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 84.35925 + }, + "M=59392,N=256": { + "file": "silu_config_M59392_N256.json", + "M": 59392, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.83925000000002 + }, + "M=59392,N=320": { + "file": "silu_config_M59392_N320.json", + "M": 59392, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 151.3195 + }, + "M=59392,N=384": { + "file": "silu_config_M59392_N384.json", + "M": 59392, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 150.27974999999998 + }, + "M=59392,N=480": { + "file": "silu_config_M59392_N480.json", + "M": 59392, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.47950000000003 + }, + "M=59392,N=512": { + "file": "silu_config_M59392_N512.json", + "M": 59392, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 122.87950000000004 + }, + "M=59392,N=576": { + "file": "silu_config_M59392_N576.json", + "M": 59392, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 304.56050000000005 + }, + "M=59392,N=640": { + "file": "silu_config_M59392_N640.json", + "M": 59392, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 298.48024999999984 + }, + "M=59392,N=768": { + "file": "silu_config_M59392_N768.json", + "M": 59392, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 280.56025000000005 + }, + "M=59392,N=800": { + "file": "silu_config_M59392_N800.json", + "M": 59392, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 297.32025 + }, + "M=59392,N=896": { + "file": "silu_config_M59392_N896.json", + "M": 59392, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 289.92025 + }, + "M=59392,N=960": { + "file": "silu_config_M59392_N960.json", + "M": 59392, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 292.7202500000001 + }, + "M=59392,N=1024": { + "file": "silu_config_M59392_N1024.json", + "M": 59392, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 233.79999999999995 + }, + "M=59392,N=1120": { + "file": "silu_config_M59392_N1120.json", + "M": 59392, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 555.0814999999998 + }, + "M=59392,N=1152": { + "file": "silu_config_M59392_N1152.json", + "M": 59392, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 571.0815 + }, + "M=59392,N=1280": { + "file": "silu_config_M59392_N1280.json", + "M": 59392, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 561.0015000000001 + }, + "M=59392,N=1344": { + "file": "silu_config_M59392_N1344.json", + "M": 59392, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 585.4414999999997 + }, + "M=59392,N=1408": { + "file": "silu_config_M59392_N1408.json", + "M": 59392, + "N": 1408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 577.1214999999997 + }, + "M=59392,N=1440": { + "file": "silu_config_M59392_N1440.json", + "M": 59392, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 572.9615000000001 + }, + "M=59392,N=1536": { + "file": "silu_config_M59392_N1536.json", + "M": 59392, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 578.1214999999997 + }, + "M=59392,N=1600": { + "file": "silu_config_M59392_N1600.json", + "M": 59392, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 584.2415000000001 + }, + "M=59392,N=1664": { + "file": "silu_config_M59392_N1664.json", + "M": 59392, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 559.7615000000001 + }, + "M=59392,N=1728": { + "file": "silu_config_M59392_N1728.json", + "M": 59392, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 566.9614999999999 + }, + "M=59392,N=1760": { + "file": "silu_config_M59392_N1760.json", + "M": 59392, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 575.0015000000001 + }, + "M=59392,N=1792": { + "file": "silu_config_M59392_N1792.json", + "M": 59392, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 560.9615000000001 + }, + "M=59392,N=1920": { + "file": "silu_config_M59392_N1920.json", + "M": 59392, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.6012499999999 + }, + "M=59392,N=2048": { + "file": "silu_config_M59392_N2048.json", + "M": 59392, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 466.68100000000027 + }, + "M=59392,N=2080": { + "file": "silu_config_M59392_N2080.json", + "M": 59392, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.8417499999998 + }, + "M=59392,N=2240": { + "file": "silu_config_M59392_N2240.json", + "M": 59392, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 645.84175 + }, + "M=59392,N=2400": { + "file": "silu_config_M59392_N2400.json", + "M": 59392, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 709.2019999999998 + }, + "M=59392,N=2560": { + "file": "silu_config_M59392_N2560.json", + "M": 59392, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 739.3619999999996 + }, + "M=60416,N=128": { + "file": "silu_config_M60416_N128.json", + "M": 60416, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 45.439249999999994 + }, + "M=60416,N=160": { + "file": "silu_config_M60416_N160.json", + "M": 60416, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 85.75950000000002 + }, + "M=60416,N=192": { + "file": "silu_config_M60416_N192.json", + "M": 60416, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 79.11925 + }, + "M=60416,N=256": { + "file": "silu_config_M60416_N256.json", + "M": 60416, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.51925 + }, + "M=60416,N=320": { + "file": "silu_config_M60416_N320.json", + "M": 60416, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 154.03949999999995 + }, + "M=60416,N=384": { + "file": "silu_config_M60416_N384.json", + "M": 60416, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 153.51950000000005 + }, + "M=60416,N=480": { + "file": "silu_config_M60416_N480.json", + "M": 60416, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.9995 + }, + "M=60416,N=512": { + "file": "silu_config_M60416_N512.json", + "M": 60416, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 124.67950000000002 + }, + "M=60416,N=576": { + "file": "silu_config_M60416_N576.json", + "M": 60416, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 307.44025000000005 + }, + "M=60416,N=640": { + "file": "silu_config_M60416_N640.json", + "M": 60416, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 295.72025000000025 + }, + "M=60416,N=768": { + "file": "silu_config_M60416_N768.json", + "M": 60416, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 293.80025 + }, + "M=60416,N=800": { + "file": "silu_config_M60416_N800.json", + "M": 60416, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 302.48050000000023 + }, + "M=60416,N=896": { + "file": "silu_config_M60416_N896.json", + "M": 60416, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 294.80025000000023 + }, + "M=60416,N=960": { + "file": "silu_config_M60416_N960.json", + "M": 60416, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 297.08025 + }, + "M=60416,N=1024": { + "file": "silu_config_M60416_N1024.json", + "M": 60416, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 237.24 + }, + "M=60416,N=1120": { + "file": "silu_config_M60416_N1120.json", + "M": 60416, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 574.7212499999998 + }, + "M=60416,N=1152": { + "file": "silu_config_M60416_N1152.json", + "M": 60416, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 580.5214999999998 + }, + "M=60416,N=1280": { + "file": "silu_config_M60416_N1280.json", + "M": 60416, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 578.3215000000002 + }, + "M=60416,N=1344": { + "file": "silu_config_M60416_N1344.json", + "M": 60416, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 592.5214999999998 + }, + "M=60416,N=1408": { + "file": "silu_config_M60416_N1408.json", + "M": 60416, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 570.6015 + }, + "M=60416,N=1440": { + "file": "silu_config_M60416_N1440.json", + "M": 60416, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 575.4814999999999 + }, + "M=60416,N=1536": { + "file": "silu_config_M60416_N1536.json", + "M": 60416, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 578.9615000000001 + }, + "M=60416,N=1600": { + "file": "silu_config_M60416_N1600.json", + "M": 60416, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 604.6415000000004 + }, + "M=60416,N=1664": { + "file": "silu_config_M60416_N1664.json", + "M": 60416, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 580.1215 + }, + "M=60416,N=1728": { + "file": "silu_config_M60416_N1728.json", + "M": 60416, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 573.1215000000002 + }, + "M=60416,N=1760": { + "file": "silu_config_M60416_N1760.json", + "M": 60416, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 587.5614999999998 + }, + "M=60416,N=1792": { + "file": "silu_config_M60416_N1792.json", + "M": 60416, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 570.5215000000001 + }, + "M=60416,N=1920": { + "file": "silu_config_M60416_N1920.json", + "M": 60416, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 593.6014999999998 + }, + "M=60416,N=2048": { + "file": "silu_config_M60416_N2048.json", + "M": 60416, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 462.6010000000001 + }, + "M=60416,N=2080": { + "file": "silu_config_M60416_N2080.json", + "M": 60416, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 630.8417499999998 + }, + "M=60416,N=2240": { + "file": "silu_config_M60416_N2240.json", + "M": 60416, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 687.0817500000003 + }, + "M=60416,N=2400": { + "file": "silu_config_M60416_N2400.json", + "M": 60416, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 712.3619999999996 + }, + "M=60416,N=2560": { + "file": "silu_config_M60416_N2560.json", + "M": 60416, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 741.2019999999998 + }, + "M=61440,N=128": { + "file": "silu_config_M61440_N128.json", + "M": 61440, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 40.47900000000001 + }, + "M=61440,N=160": { + "file": "silu_config_M61440_N160.json", + "M": 61440, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 80.15924999999999 + }, + "M=61440,N=192": { + "file": "silu_config_M61440_N192.json", + "M": 61440, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 86.9195 + }, + "M=61440,N=256": { + "file": "silu_config_M61440_N256.json", + "M": 61440, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 72.99925000000002 + }, + "M=61440,N=320": { + "file": "silu_config_M61440_N320.json", + "M": 61440, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 155.51950000000002 + }, + "M=61440,N=384": { + "file": "silu_config_M61440_N384.json", + "M": 61440, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 155.19949999999997 + }, + "M=61440,N=480": { + "file": "silu_config_M61440_N480.json", + "M": 61440, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 158.83974999999995 + }, + "M=61440,N=512": { + "file": "silu_config_M61440_N512.json", + "M": 61440, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 126.87974999999999 + }, + "M=61440,N=576": { + "file": "silu_config_M61440_N576.json", + "M": 61440, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.8002500000002 + }, + "M=61440,N=640": { + "file": "silu_config_M61440_N640.json", + "M": 61440, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 306.32024999999993 + }, + "M=61440,N=768": { + "file": "silu_config_M61440_N768.json", + "M": 61440, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 289.88025000000016 + }, + "M=61440,N=800": { + "file": "silu_config_M61440_N800.json", + "M": 61440, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 298.44025 + }, + "M=61440,N=896": { + "file": "silu_config_M61440_N896.json", + "M": 61440, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.20025 + }, + "M=61440,N=960": { + "file": "silu_config_M61440_N960.json", + "M": 61440, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.28025 + }, + "M=61440,N=1024": { + "file": "silu_config_M61440_N1024.json", + "M": 61440, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 241.20000000000005 + }, + "M=61440,N=1120": { + "file": "silu_config_M61440_N1120.json", + "M": 61440, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 569.9614999999999 + }, + "M=61440,N=1152": { + "file": "silu_config_M61440_N1152.json", + "M": 61440, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.4815000000001 + }, + "M=61440,N=1280": { + "file": "silu_config_M61440_N1280.json", + "M": 61440, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.8815000000002 + }, + "M=61440,N=1344": { + "file": "silu_config_M61440_N1344.json", + "M": 61440, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 579.4815000000001 + }, + "M=61440,N=1408": { + "file": "silu_config_M61440_N1408.json", + "M": 61440, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 576.4412500000003 + }, + "M=61440,N=1440": { + "file": "silu_config_M61440_N1440.json", + "M": 61440, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 570.28125 + }, + "M=61440,N=1536": { + "file": "silu_config_M61440_N1536.json", + "M": 61440, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 577.3614999999998 + }, + "M=61440,N=1600": { + "file": "silu_config_M61440_N1600.json", + "M": 61440, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 591.0815000000002 + }, + "M=61440,N=1664": { + "file": "silu_config_M61440_N1664.json", + "M": 61440, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 568.1212499999997 + }, + "M=61440,N=1728": { + "file": "silu_config_M61440_N1728.json", + "M": 61440, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 571.4414999999999 + }, + "M=61440,N=1760": { + "file": "silu_config_M61440_N1760.json", + "M": 61440, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 582.6015 + }, + "M=61440,N=1792": { + "file": "silu_config_M61440_N1792.json", + "M": 61440, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 569.2814999999998 + }, + "M=61440,N=1920": { + "file": "silu_config_M61440_N1920.json", + "M": 61440, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 570.4412499999999 + }, + "M=61440,N=2048": { + "file": "silu_config_M61440_N2048.json", + "M": 61440, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 469.04099999999994 + }, + "M=61440,N=2080": { + "file": "silu_config_M61440_N2080.json", + "M": 61440, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 639.7217499999999 + }, + "M=61440,N=2240": { + "file": "silu_config_M61440_N2240.json", + "M": 61440, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 693.4019999999998 + }, + "M=61440,N=2400": { + "file": "silu_config_M61440_N2400.json", + "M": 61440, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 699.1220000000001 + }, + "M=61440,N=2560": { + "file": "silu_config_M61440_N2560.json", + "M": 61440, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 750.3622499999997 + }, + "M=62464,N=128": { + "file": "silu_config_M62464_N128.json", + "M": 62464, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 46.719249999999995 + }, + "M=62464,N=160": { + "file": "silu_config_M62464_N160.json", + "M": 62464, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.47924999999998 + }, + "M=62464,N=192": { + "file": "silu_config_M62464_N192.json", + "M": 62464, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.35925 + }, + "M=62464,N=256": { + "file": "silu_config_M62464_N256.json", + "M": 62464, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 74.15924999999997 + }, + "M=62464,N=320": { + "file": "silu_config_M62464_N320.json", + "M": 62464, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 158.7994999999999 + }, + "M=62464,N=384": { + "file": "silu_config_M62464_N384.json", + "M": 62464, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 157.71975000000003 + }, + "M=62464,N=480": { + "file": "silu_config_M62464_N480.json", + "M": 62464, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 161.83975 + }, + "M=62464,N=512": { + "file": "silu_config_M62464_N512.json", + "M": 62464, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 128.8795 + }, + "M=62464,N=576": { + "file": "silu_config_M62464_N576.json", + "M": 62464, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 307.5602500000001 + }, + "M=62464,N=640": { + "file": "silu_config_M62464_N640.json", + "M": 62464, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 303.00025000000005 + }, + "M=62464,N=768": { + "file": "silu_config_M62464_N768.json", + "M": 62464, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 303.7605000000001 + }, + "M=62464,N=800": { + "file": "silu_config_M62464_N800.json", + "M": 62464, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 308.40025 + }, + "M=62464,N=896": { + "file": "silu_config_M62464_N896.json", + "M": 62464, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 314.6002500000001 + }, + "M=62464,N=960": { + "file": "silu_config_M62464_N960.json", + "M": 62464, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 307.72024999999996 + }, + "M=62464,N=1024": { + "file": "silu_config_M62464_N1024.json", + "M": 62464, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 245.04025000000013 + }, + "M=62464,N=1120": { + "file": "silu_config_M62464_N1120.json", + "M": 62464, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 604.0415 + }, + "M=62464,N=1152": { + "file": "silu_config_M62464_N1152.json", + "M": 62464, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 588.6014999999998 + }, + "M=62464,N=1280": { + "file": "silu_config_M62464_N1280.json", + "M": 62464, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 586.0014999999999 + }, + "M=62464,N=1344": { + "file": "silu_config_M62464_N1344.json", + "M": 62464, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 600.9214999999999 + }, + "M=62464,N=1408": { + "file": "silu_config_M62464_N1408.json", + "M": 62464, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 599.8015 + }, + "M=62464,N=1440": { + "file": "silu_config_M62464_N1440.json", + "M": 62464, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 590.8815000000002 + }, + "M=62464,N=1536": { + "file": "silu_config_M62464_N1536.json", + "M": 62464, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 598.1615000000002 + }, + "M=62464,N=1600": { + "file": "silu_config_M62464_N1600.json", + "M": 62464, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 591.0814999999998 + }, + "M=62464,N=1664": { + "file": "silu_config_M62464_N1664.json", + "M": 62464, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 599.2415000000001 + }, + "M=62464,N=1728": { + "file": "silu_config_M62464_N1728.json", + "M": 62464, + "N": 1728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 614.1215 + }, + "M=62464,N=1760": { + "file": "silu_config_M62464_N1760.json", + "M": 62464, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 592.9614999999999 + }, + "M=62464,N=1792": { + "file": "silu_config_M62464_N1792.json", + "M": 62464, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 600.6815000000001 + }, + "M=62464,N=1920": { + "file": "silu_config_M62464_N1920.json", + "M": 62464, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 601.6814999999997 + }, + "M=62464,N=2048": { + "file": "silu_config_M62464_N2048.json", + "M": 62464, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 478.76099999999997 + }, + "M=62464,N=2080": { + "file": "silu_config_M62464_N2080.json", + "M": 62464, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 650.9617500000004 + }, + "M=62464,N=2240": { + "file": "silu_config_M62464_N2240.json", + "M": 62464, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 686.8817500000002 + }, + "M=62464,N=2400": { + "file": "silu_config_M62464_N2400.json", + "M": 62464, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 723.3619999999996 + }, + "M=62464,N=2560": { + "file": "silu_config_M62464_N2560.json", + "M": 62464, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 765.9222500000001 + }, + "M=63488,N=128": { + "file": "silu_config_M63488_N128.json", + "M": 63488, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 41.679 + }, + "M=63488,N=160": { + "file": "silu_config_M63488_N160.json", + "M": 63488, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 89.6795 + }, + "M=63488,N=192": { + "file": "silu_config_M63488_N192.json", + "M": 63488, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 89.59925000000001 + }, + "M=63488,N=256": { + "file": "silu_config_M63488_N256.json", + "M": 63488, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 88.23925000000003 + }, + "M=63488,N=320": { + "file": "silu_config_M63488_N320.json", + "M": 63488, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 161.47975000000005 + }, + "M=63488,N=384": { + "file": "silu_config_M63488_N384.json", + "M": 63488, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 160.43949999999998 + }, + "M=63488,N=480": { + "file": "silu_config_M63488_N480.json", + "M": 63488, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.43975000000003 + }, + "M=63488,N=512": { + "file": "silu_config_M63488_N512.json", + "M": 63488, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 130.83950000000002 + }, + "M=63488,N=576": { + "file": "silu_config_M63488_N576.json", + "M": 63488, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 310.6402500000001 + }, + "M=63488,N=640": { + "file": "silu_config_M63488_N640.json", + "M": 63488, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 319.5202500000001 + }, + "M=63488,N=768": { + "file": "silu_config_M63488_N768.json", + "M": 63488, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 308.72024999999985 + }, + "M=63488,N=800": { + "file": "silu_config_M63488_N800.json", + "M": 63488, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 311.40025 + }, + "M=63488,N=896": { + "file": "silu_config_M63488_N896.json", + "M": 63488, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 315.6802499999999 + }, + "M=63488,N=960": { + "file": "silu_config_M63488_N960.json", + "M": 63488, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 312.5602499999999 + }, + "M=63488,N=1024": { + "file": "silu_config_M63488_N1024.json", + "M": 63488, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 248.72000000000014 + }, + "M=63488,N=1120": { + "file": "silu_config_M63488_N1120.json", + "M": 63488, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 603.2414999999999 + }, + "M=63488,N=1152": { + "file": "silu_config_M63488_N1152.json", + "M": 63488, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 606.2015000000001 + }, + "M=63488,N=1280": { + "file": "silu_config_M63488_N1280.json", + "M": 63488, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 608.5614999999998 + }, + "M=63488,N=1344": { + "file": "silu_config_M63488_N1344.json", + "M": 63488, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 612.8815 + }, + "M=63488,N=1408": { + "file": "silu_config_M63488_N1408.json", + "M": 63488, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 609.2015000000001 + }, + "M=63488,N=1440": { + "file": "silu_config_M63488_N1440.json", + "M": 63488, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 593.1614999999999 + }, + "M=63488,N=1536": { + "file": "silu_config_M63488_N1536.json", + "M": 63488, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 607.8014999999998 + }, + "M=63488,N=1600": { + "file": "silu_config_M63488_N1600.json", + "M": 63488, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 622.8815000000002 + }, + "M=63488,N=1664": { + "file": "silu_config_M63488_N1664.json", + "M": 63488, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 600.9615000000003 + }, + "M=63488,N=1728": { + "file": "silu_config_M63488_N1728.json", + "M": 63488, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 612.9214999999999 + }, + "M=63488,N=1760": { + "file": "silu_config_M63488_N1760.json", + "M": 63488, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 615.8014999999998 + }, + "M=63488,N=1792": { + "file": "silu_config_M63488_N1792.json", + "M": 63488, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 601.9615000000001 + }, + "M=63488,N=1920": { + "file": "silu_config_M63488_N1920.json", + "M": 63488, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 611.2014999999999 + }, + "M=63488,N=2048": { + "file": "silu_config_M63488_N2048.json", + "M": 63488, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 498.48099999999977 + }, + "M=63488,N=2080": { + "file": "silu_config_M63488_N2080.json", + "M": 63488, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 655.2017500000006 + }, + "M=63488,N=2240": { + "file": "silu_config_M63488_N2240.json", + "M": 63488, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 708.3220000000006 + }, + "M=63488,N=2400": { + "file": "silu_config_M63488_N2400.json", + "M": 63488, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 747.402 + }, + "M=63488,N=2560": { + "file": "silu_config_M63488_N2560.json", + "M": 63488, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 788.96225 + }, + "M=64512,N=128": { + "file": "silu_config_M64512_N128.json", + "M": 64512, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 42.079 + }, + "M=64512,N=160": { + "file": "silu_config_M64512_N160.json", + "M": 64512, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 90.79950000000001 + }, + "M=64512,N=192": { + "file": "silu_config_M64512_N192.json", + "M": 64512, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 90.75925 + }, + "M=64512,N=256": { + "file": "silu_config_M64512_N256.json", + "M": 64512, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.59949999999999 + }, + "M=64512,N=320": { + "file": "silu_config_M64512_N320.json", + "M": 64512, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 164.07975 + }, + "M=64512,N=384": { + "file": "silu_config_M64512_N384.json", + "M": 64512, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 162.75974999999997 + }, + "M=64512,N=480": { + "file": "silu_config_M64512_N480.json", + "M": 64512, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.07974999999993 + }, + "M=64512,N=512": { + "file": "silu_config_M64512_N512.json", + "M": 64512, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 129.03950000000003 + }, + "M=64512,N=576": { + "file": "silu_config_M64512_N576.json", + "M": 64512, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 324.8802499999999 + }, + "M=64512,N=640": { + "file": "silu_config_M64512_N640.json", + "M": 64512, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.6002500000001 + }, + "M=64512,N=768": { + "file": "silu_config_M64512_N768.json", + "M": 64512, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 311.5202499999999 + }, + "M=64512,N=800": { + "file": "silu_config_M64512_N800.json", + "M": 64512, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 306.88024999999993 + }, + "M=64512,N=896": { + "file": "silu_config_M64512_N896.json", + "M": 64512, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 320.6802500000001 + }, + "M=64512,N=960": { + "file": "silu_config_M64512_N960.json", + "M": 64512, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 305.64025000000004 + }, + "M=64512,N=1024": { + "file": "silu_config_M64512_N1024.json", + "M": 64512, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 253.08000000000004 + }, + "M=64512,N=1120": { + "file": "silu_config_M64512_N1120.json", + "M": 64512, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 609.6414999999997 + }, + "M=64512,N=1152": { + "file": "silu_config_M64512_N1152.json", + "M": 64512, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 615.92175 + }, + "M=64512,N=1280": { + "file": "silu_config_M64512_N1280.json", + "M": 64512, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 620.6814999999999 + }, + "M=64512,N=1344": { + "file": "silu_config_M64512_N1344.json", + "M": 64512, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.8014999999998 + }, + "M=64512,N=1408": { + "file": "silu_config_M64512_N1408.json", + "M": 64512, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 616.5217500000006 + }, + "M=64512,N=1440": { + "file": "silu_config_M64512_N1440.json", + "M": 64512, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 610.0417500000003 + }, + "M=64512,N=1536": { + "file": "silu_config_M64512_N1536.json", + "M": 64512, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 606.2417500000001 + }, + "M=64512,N=1600": { + "file": "silu_config_M64512_N1600.json", + "M": 64512, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 621.1217500000005 + }, + "M=64512,N=1664": { + "file": "silu_config_M64512_N1664.json", + "M": 64512, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 599.8015000000005 + }, + "M=64512,N=1728": { + "file": "silu_config_M64512_N1728.json", + "M": 64512, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 634.9215000000002 + }, + "M=64512,N=1760": { + "file": "silu_config_M64512_N1760.json", + "M": 64512, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 633.88175 + }, + "M=64512,N=1792": { + "file": "silu_config_M64512_N1792.json", + "M": 64512, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.8014999999998 + }, + "M=64512,N=1920": { + "file": "silu_config_M64512_N1920.json", + "M": 64512, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 620.9617500000002 + }, + "M=64512,N=2048": { + "file": "silu_config_M64512_N2048.json", + "M": 64512, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 493.3609999999999 + }, + "M=64512,N=2080": { + "file": "silu_config_M64512_N2080.json", + "M": 64512, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.5617500000001 + }, + "M=64512,N=2240": { + "file": "silu_config_M64512_N2240.json", + "M": 64512, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 708.5620000000006 + }, + "M=64512,N=2400": { + "file": "silu_config_M64512_N2400.json", + "M": 64512, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 757.4422499999996 + }, + "M=64512,N=2560": { + "file": "silu_config_M64512_N2560.json", + "M": 64512, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 789.7222499999996 + }, + "M=65536,N=128": { + "file": "silu_config_M65536_N128.json", + "M": 65536, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 42.39900000000001 + }, + "M=65536,N=160": { + "file": "silu_config_M65536_N160.json", + "M": 65536, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 84.99924999999999 + }, + "M=65536,N=192": { + "file": "silu_config_M65536_N192.json", + "M": 65536, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 85.07924999999997 + }, + "M=65536,N=256": { + "file": "silu_config_M65536_N256.json", + "M": 65536, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.39949999999999 + }, + "M=65536,N=320": { + "file": "silu_config_M65536_N320.json", + "M": 65536, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 166.59974999999991 + }, + "M=65536,N=384": { + "file": "silu_config_M65536_N384.json", + "M": 65536, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 165.15975 + }, + "M=65536,N=480": { + "file": "silu_config_M65536_N480.json", + "M": 65536, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 169.55975 + }, + "M=65536,N=512": { + "file": "silu_config_M65536_N512.json", + "M": 65536, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 129.4795 + }, + "M=65536,N=576": { + "file": "silu_config_M65536_N576.json", + "M": 65536, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.28025 + }, + "M=65536,N=640": { + "file": "silu_config_M65536_N640.json", + "M": 65536, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 308.1202499999999 + }, + "M=65536,N=768": { + "file": "silu_config_M65536_N768.json", + "M": 65536, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 308.96024999999986 + }, + "M=65536,N=800": { + "file": "silu_config_M65536_N800.json", + "M": 65536, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 319.40025 + }, + "M=65536,N=896": { + "file": "silu_config_M65536_N896.json", + "M": 65536, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.1602499999999 + }, + "M=65536,N=960": { + "file": "silu_config_M65536_N960.json", + "M": 65536, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.9602500000001 + }, + "M=65536,N=1024": { + "file": "silu_config_M65536_N1024.json", + "M": 65536, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 256.40025000000026 + }, + "M=65536,N=1120": { + "file": "silu_config_M65536_N1120.json", + "M": 65536, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 630.1215000000002 + }, + "M=65536,N=1152": { + "file": "silu_config_M65536_N1152.json", + "M": 65536, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 624.9214999999999 + }, + "M=65536,N=1280": { + "file": "silu_config_M65536_N1280.json", + "M": 65536, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 625.60175 + }, + "M=65536,N=1344": { + "file": "silu_config_M65536_N1344.json", + "M": 65536, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 618.5617500000001 + }, + "M=65536,N=1408": { + "file": "silu_config_M65536_N1408.json", + "M": 65536, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 615.1217500000002 + }, + "M=65536,N=1440": { + "file": "silu_config_M65536_N1440.json", + "M": 65536, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 612.4014999999999 + }, + "M=65536,N=1536": { + "file": "silu_config_M65536_N1536.json", + "M": 65536, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 626.7617499999999 + }, + "M=65536,N=1600": { + "file": "silu_config_M65536_N1600.json", + "M": 65536, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 619.7617500000001 + }, + "M=65536,N=1664": { + "file": "silu_config_M65536_N1664.json", + "M": 65536, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 627.8417499999998 + }, + "M=65536,N=1728": { + "file": "silu_config_M65536_N1728.json", + "M": 65536, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 621.16175 + }, + "M=65536,N=1760": { + "file": "silu_config_M65536_N1760.json", + "M": 65536, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 621.5215000000001 + }, + "M=65536,N=1792": { + "file": "silu_config_M65536_N1792.json", + "M": 65536, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 618.2415000000001 + }, + "M=65536,N=1920": { + "file": "silu_config_M65536_N1920.json", + "M": 65536, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 641.64175 + }, + "M=65536,N=2048": { + "file": "silu_config_M65536_N2048.json", + "M": 65536, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 483.96100000000047 + }, + "M=65536,N=2080": { + "file": "silu_config_M65536_N2080.json", + "M": 65536, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 682.6817499999997 + }, + "M=65536,N=2240": { + "file": "silu_config_M65536_N2240.json", + "M": 65536, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 719.8820000000001 + }, + "M=65536,N=2400": { + "file": "silu_config_M65536_N2400.json", + "M": 65536, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 758.28225 + }, + "M=65536,N=2560": { + "file": "silu_config_M65536_N2560.json", + "M": 65536, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 792.0822499999997 + }, + "M=66560,N=128": { + "file": "silu_config_M66560_N128.json", + "M": 66560, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 43.239 + }, + "M=66560,N=160": { + "file": "silu_config_M66560_N160.json", + "M": 66560, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 93.63925000000002 + }, + "M=66560,N=192": { + "file": "silu_config_M66560_N192.json", + "M": 66560, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 86.23925 + }, + "M=66560,N=256": { + "file": "silu_config_M66560_N256.json", + "M": 66560, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.95925 + }, + "M=66560,N=320": { + "file": "silu_config_M66560_N320.json", + "M": 66560, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 167.71975000000003 + }, + "M=66560,N=384": { + "file": "silu_config_M66560_N384.json", + "M": 66560, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 166.95975000000004 + }, + "M=66560,N=480": { + "file": "silu_config_M66560_N480.json", + "M": 66560, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 170.95974999999999 + }, + "M=66560,N=512": { + "file": "silu_config_M66560_N512.json", + "M": 66560, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 134.03950000000003 + }, + "M=66560,N=576": { + "file": "silu_config_M66560_N576.json", + "M": 66560, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 328.92025 + }, + "M=66560,N=640": { + "file": "silu_config_M66560_N640.json", + "M": 66560, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 324.0002499999999 + }, + "M=66560,N=768": { + "file": "silu_config_M66560_N768.json", + "M": 66560, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 319.6402499999999 + }, + "M=66560,N=800": { + "file": "silu_config_M66560_N800.json", + "M": 66560, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 326.16050000000007 + }, + "M=66560,N=896": { + "file": "silu_config_M66560_N896.json", + "M": 66560, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.8805 + }, + "M=66560,N=960": { + "file": "silu_config_M66560_N960.json", + "M": 66560, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 327.48024999999996 + }, + "M=66560,N=1024": { + "file": "silu_config_M66560_N1024.json", + "M": 66560, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 260.19999999999993 + }, + "M=66560,N=1120": { + "file": "silu_config_M66560_N1120.json", + "M": 66560, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.2817499999999 + }, + "M=66560,N=1152": { + "file": "silu_config_M66560_N1152.json", + "M": 66560, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 622.2815 + }, + "M=66560,N=1280": { + "file": "silu_config_M66560_N1280.json", + "M": 66560, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 645.7217500000002 + }, + "M=66560,N=1344": { + "file": "silu_config_M66560_N1344.json", + "M": 66560, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 616.1614999999999 + }, + "M=66560,N=1408": { + "file": "silu_config_M66560_N1408.json", + "M": 66560, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 644.0017499999999 + }, + "M=66560,N=1440": { + "file": "silu_config_M66560_N1440.json", + "M": 66560, + "N": 1440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 652.9217500000002 + }, + "M=66560,N=1536": { + "file": "silu_config_M66560_N1536.json", + "M": 66560, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 624.0815 + }, + "M=66560,N=1600": { + "file": "silu_config_M66560_N1600.json", + "M": 66560, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 629.5615000000003 + }, + "M=66560,N=1664": { + "file": "silu_config_M66560_N1664.json", + "M": 66560, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 625.2014999999999 + }, + "M=66560,N=1728": { + "file": "silu_config_M66560_N1728.json", + "M": 66560, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 639.20175 + }, + "M=66560,N=1760": { + "file": "silu_config_M66560_N1760.json", + "M": 66560, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 619.3215 + }, + "M=66560,N=1792": { + "file": "silu_config_M66560_N1792.json", + "M": 66560, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.0017500000001 + }, + "M=66560,N=1920": { + "file": "silu_config_M66560_N1920.json", + "M": 66560, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 627.8414999999998 + }, + "M=66560,N=2048": { + "file": "silu_config_M66560_N2048.json", + "M": 66560, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 512.48125 + }, + "M=66560,N=2080": { + "file": "silu_config_M66560_N2080.json", + "M": 66560, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 681.5617500000003 + }, + "M=66560,N=2240": { + "file": "silu_config_M66560_N2240.json", + "M": 66560, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 727.922 + }, + "M=66560,N=2400": { + "file": "silu_config_M66560_N2400.json", + "M": 66560, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 786.2022499999998 + }, + "M=66560,N=2560": { + "file": "silu_config_M66560_N2560.json", + "M": 66560, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 814.8024999999998 + }, + "M=67584,N=128": { + "file": "silu_config_M67584_N128.json", + "M": 67584, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 43.71899999999998 + }, + "M=67584,N=160": { + "file": "silu_config_M67584_N160.json", + "M": 67584, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.63949999999997 + }, + "M=67584,N=192": { + "file": "silu_config_M67584_N192.json", + "M": 67584, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.47950000000002 + }, + "M=67584,N=256": { + "file": "silu_config_M67584_N256.json", + "M": 67584, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 90.91950000000001 + }, + "M=67584,N=320": { + "file": "silu_config_M67584_N320.json", + "M": 67584, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 171.55975 + }, + "M=67584,N=384": { + "file": "silu_config_M67584_N384.json", + "M": 67584, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 170.03975 + }, + "M=67584,N=480": { + "file": "silu_config_M67584_N480.json", + "M": 67584, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 174.39974999999998 + }, + "M=67584,N=512": { + "file": "silu_config_M67584_N512.json", + "M": 67584, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 138.75975 + }, + "M=67584,N=576": { + "file": "silu_config_M67584_N576.json", + "M": 67584, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 327.48025 + }, + "M=67584,N=640": { + "file": "silu_config_M67584_N640.json", + "M": 67584, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 317.56049999999993 + }, + "M=67584,N=768": { + "file": "silu_config_M67584_N768.json", + "M": 67584, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 318.44025 + }, + "M=67584,N=800": { + "file": "silu_config_M67584_N800.json", + "M": 67584, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 328.4805000000001 + }, + "M=67584,N=896": { + "file": "silu_config_M67584_N896.json", + "M": 67584, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 327.08050000000003 + }, + "M=67584,N=960": { + "file": "silu_config_M67584_N960.json", + "M": 67584, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 333.5604999999998 + }, + "M=67584,N=1024": { + "file": "silu_config_M67584_N1024.json", + "M": 67584, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 264.2800000000001 + }, + "M=67584,N=1120": { + "file": "silu_config_M67584_N1120.json", + "M": 67584, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 649.0817499999998 + }, + "M=67584,N=1152": { + "file": "silu_config_M67584_N1152.json", + "M": 67584, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 643.9617499999997 + }, + "M=67584,N=1280": { + "file": "silu_config_M67584_N1280.json", + "M": 67584, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 644.5217499999999 + }, + "M=67584,N=1344": { + "file": "silu_config_M67584_N1344.json", + "M": 67584, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 650.68175 + }, + "M=67584,N=1408": { + "file": "silu_config_M67584_N1408.json", + "M": 67584, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 645.0817499999998 + }, + "M=67584,N=1440": { + "file": "silu_config_M67584_N1440.json", + "M": 67584, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 641.2417500000001 + }, + "M=67584,N=1536": { + "file": "silu_config_M67584_N1536.json", + "M": 67584, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 645.7617500000001 + }, + "M=67584,N=1600": { + "file": "silu_config_M67584_N1600.json", + "M": 67584, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 641.5217499999999 + }, + "M=67584,N=1664": { + "file": "silu_config_M67584_N1664.json", + "M": 67584, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 638.4017499999998 + }, + "M=67584,N=1728": { + "file": "silu_config_M67584_N1728.json", + "M": 67584, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 653.0417500000003 + }, + "M=67584,N=1760": { + "file": "silu_config_M67584_N1760.json", + "M": 67584, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 640.8417500000003 + }, + "M=67584,N=1792": { + "file": "silu_config_M67584_N1792.json", + "M": 67584, + "N": 1792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 654.3617500000003 + }, + "M=67584,N=1920": { + "file": "silu_config_M67584_N1920.json", + "M": 67584, + "N": 1920, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 656.0817499999998 + }, + "M=67584,N=2048": { + "file": "silu_config_M67584_N2048.json", + "M": 67584, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 514.8810000000001 + }, + "M=67584,N=2080": { + "file": "silu_config_M67584_N2080.json", + "M": 67584, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 696.9219999999998 + }, + "M=67584,N=2240": { + "file": "silu_config_M67584_N2240.json", + "M": 67584, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 752.8022500000002 + }, + "M=67584,N=2400": { + "file": "silu_config_M67584_N2400.json", + "M": 67584, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 774.8022499999997 + }, + "M=67584,N=2560": { + "file": "silu_config_M67584_N2560.json", + "M": 67584, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 826.8425000000002 + }, + "M=68608,N=128": { + "file": "silu_config_M68608_N128.json", + "M": 68608, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.31900000000001 + }, + "M=68608,N=160": { + "file": "silu_config_M68608_N160.json", + "M": 68608, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 96.23925000000001 + }, + "M=68608,N=192": { + "file": "silu_config_M68608_N192.json", + "M": 68608, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 96.23949999999998 + }, + "M=68608,N=256": { + "file": "silu_config_M68608_N256.json", + "M": 68608, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 80.59925 + }, + "M=68608,N=320": { + "file": "silu_config_M68608_N320.json", + "M": 68608, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 173.75974999999994 + }, + "M=68608,N=384": { + "file": "silu_config_M68608_N384.json", + "M": 68608, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 173.03975000000003 + }, + "M=68608,N=480": { + "file": "silu_config_M68608_N480.json", + "M": 68608, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 177.15974999999992 + }, + "M=68608,N=512": { + "file": "silu_config_M68608_N512.json", + "M": 68608, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 140.75975000000005 + }, + "M=68608,N=576": { + "file": "silu_config_M68608_N576.json", + "M": 68608, + "N": 576, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 340.88049999999987 + }, + "M=68608,N=640": { + "file": "silu_config_M68608_N640.json", + "M": 68608, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 338.4005000000001 + }, + "M=68608,N=768": { + "file": "silu_config_M68608_N768.json", + "M": 68608, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 339.52049999999986 + }, + "M=68608,N=800": { + "file": "silu_config_M68608_N800.json", + "M": 68608, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 337.08025 + }, + "M=68608,N=896": { + "file": "silu_config_M68608_N896.json", + "M": 68608, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 342.6405000000001 + }, + "M=68608,N=960": { + "file": "silu_config_M68608_N960.json", + "M": 68608, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 337.44050000000004 + }, + "M=68608,N=1024": { + "file": "silu_config_M68608_N1024.json", + "M": 68608, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 267.75999999999976 + }, + "M=68608,N=1120": { + "file": "silu_config_M68608_N1120.json", + "M": 68608, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 649.9617499999999 + }, + "M=68608,N=1152": { + "file": "silu_config_M68608_N1152.json", + "M": 68608, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 642.16175 + }, + "M=68608,N=1280": { + "file": "silu_config_M68608_N1280.json", + "M": 68608, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 655.36175 + }, + "M=68608,N=1344": { + "file": "silu_config_M68608_N1344.json", + "M": 68608, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 658.2417499999999 + }, + "M=68608,N=1408": { + "file": "silu_config_M68608_N1408.json", + "M": 68608, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 665.5617500000001 + }, + "M=68608,N=1440": { + "file": "silu_config_M68608_N1440.json", + "M": 68608, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 650.8017499999999 + }, + "M=68608,N=1536": { + "file": "silu_config_M68608_N1536.json", + "M": 68608, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 667.1617500000002 + }, + "M=68608,N=1600": { + "file": "silu_config_M68608_N1600.json", + "M": 68608, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 648.8417500000005 + }, + "M=68608,N=1664": { + "file": "silu_config_M68608_N1664.json", + "M": 68608, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.8017500000001 + }, + "M=68608,N=1728": { + "file": "silu_config_M68608_N1728.json", + "M": 68608, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 672.8017500000001 + }, + "M=68608,N=1760": { + "file": "silu_config_M68608_N1760.json", + "M": 68608, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 663.4017500000004 + }, + "M=68608,N=1792": { + "file": "silu_config_M68608_N1792.json", + "M": 68608, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 669.7617499999997 + }, + "M=68608,N=1920": { + "file": "silu_config_M68608_N1920.json", + "M": 68608, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 659.2417500000001 + }, + "M=68608,N=2048": { + "file": "silu_config_M68608_N2048.json", + "M": 68608, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 512.7610000000002 + }, + "M=68608,N=2080": { + "file": "silu_config_M68608_N2080.json", + "M": 68608, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 707.0819999999999 + }, + "M=68608,N=2240": { + "file": "silu_config_M68608_N2240.json", + "M": 68608, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 753.2022499999996 + }, + "M=68608,N=2400": { + "file": "silu_config_M68608_N2400.json", + "M": 68608, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 804.1222500000003 + }, + "M=68608,N=2560": { + "file": "silu_config_M68608_N2560.json", + "M": 68608, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 828.2825000000004 + }, + "M=69632,N=128": { + "file": "silu_config_M69632_N128.json", + "M": 69632, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 51.319250000000004 + }, + "M=69632,N=160": { + "file": "silu_config_M69632_N160.json", + "M": 69632, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 97.39949999999999 + }, + "M=69632,N=192": { + "file": "silu_config_M69632_N192.json", + "M": 69632, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 97.67925000000001 + }, + "M=69632,N=256": { + "file": "silu_config_M69632_N256.json", + "M": 69632, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.83924999999999 + }, + "M=69632,N=320": { + "file": "silu_config_M69632_N320.json", + "M": 69632, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 176.35974999999996 + }, + "M=69632,N=384": { + "file": "silu_config_M69632_N384.json", + "M": 69632, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 175.03975 + }, + "M=69632,N=480": { + "file": "silu_config_M69632_N480.json", + "M": 69632, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 179.39975000000004 + }, + "M=69632,N=512": { + "file": "silu_config_M69632_N512.json", + "M": 69632, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 139.83950000000004 + }, + "M=69632,N=576": { + "file": "silu_config_M69632_N576.json", + "M": 69632, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 339.9604999999999 + }, + "M=69632,N=640": { + "file": "silu_config_M69632_N640.json", + "M": 69632, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 349.3605000000001 + }, + "M=69632,N=768": { + "file": "silu_config_M69632_N768.json", + "M": 69632, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 337.96025000000003 + }, + "M=69632,N=800": { + "file": "silu_config_M69632_N800.json", + "M": 69632, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 340.8405 + }, + "M=69632,N=896": { + "file": "silu_config_M69632_N896.json", + "M": 69632, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 345.96049999999997 + }, + "M=69632,N=960": { + "file": "silu_config_M69632_N960.json", + "M": 69632, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 354.1204999999998 + }, + "M=69632,N=1024": { + "file": "silu_config_M69632_N1024.json", + "M": 69632, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 272.3202500000001 + }, + "M=69632,N=1120": { + "file": "silu_config_M69632_N1120.json", + "M": 69632, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 658.8417500000002 + }, + "M=69632,N=1152": { + "file": "silu_config_M69632_N1152.json", + "M": 69632, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 651.8417500000002 + }, + "M=69632,N=1280": { + "file": "silu_config_M69632_N1280.json", + "M": 69632, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 663.5619999999998 + }, + "M=69632,N=1344": { + "file": "silu_config_M69632_N1344.json", + "M": 69632, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 667.8017499999999 + }, + "M=69632,N=1408": { + "file": "silu_config_M69632_N1408.json", + "M": 69632, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 655.2019999999999 + }, + "M=69632,N=1440": { + "file": "silu_config_M69632_N1440.json", + "M": 69632, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 657.84175 + }, + "M=69632,N=1536": { + "file": "silu_config_M69632_N1536.json", + "M": 69632, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 675.7617499999998 + }, + "M=69632,N=1600": { + "file": "silu_config_M69632_N1600.json", + "M": 69632, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 669.1220000000001 + }, + "M=69632,N=1664": { + "file": "silu_config_M69632_N1664.json", + "M": 69632, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 655.1217500000002 + }, + "M=69632,N=1728": { + "file": "silu_config_M69632_N1728.json", + "M": 69632, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 671.842 + }, + "M=69632,N=1760": { + "file": "silu_config_M69632_N1760.json", + "M": 69632, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 671.202 + }, + "M=69632,N=1792": { + "file": "silu_config_M69632_N1792.json", + "M": 69632, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 678.4419999999999 + }, + "M=69632,N=1920": { + "file": "silu_config_M69632_N1920.json", + "M": 69632, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 679.4817500000001 + }, + "M=69632,N=2048": { + "file": "silu_config_M69632_N2048.json", + "M": 69632, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 521.00125 + }, + "M=69632,N=2080": { + "file": "silu_config_M69632_N2080.json", + "M": 69632, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 725.002 + }, + "M=69632,N=2240": { + "file": "silu_config_M69632_N2240.json", + "M": 69632, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 776.1222500000002 + }, + "M=69632,N=2400": { + "file": "silu_config_M69632_N2400.json", + "M": 69632, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 817.2025000000001 + }, + "M=69632,N=2560": { + "file": "silu_config_M69632_N2560.json", + "M": 69632, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 858.8024999999998 + }, + "M=70656,N=128": { + "file": "silu_config_M70656_N128.json", + "M": 70656, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 45.43899999999998 + }, + "M=70656,N=160": { + "file": "silu_config_M70656_N160.json", + "M": 70656, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 90.9995 + }, + "M=70656,N=192": { + "file": "silu_config_M70656_N192.json", + "M": 70656, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 98.71925 + }, + "M=70656,N=256": { + "file": "silu_config_M70656_N256.json", + "M": 70656, + "N": 256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 90.11950000000002 + }, + "M=70656,N=320": { + "file": "silu_config_M70656_N320.json", + "M": 70656, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 178.59974999999991 + }, + "M=70656,N=384": { + "file": "silu_config_M70656_N384.json", + "M": 70656, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 177.11975 + }, + "M=70656,N=480": { + "file": "silu_config_M70656_N480.json", + "M": 70656, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 181.83975000000004 + }, + "M=70656,N=512": { + "file": "silu_config_M70656_N512.json", + "M": 70656, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 144.87975 + }, + "M=70656,N=576": { + "file": "silu_config_M70656_N576.json", + "M": 70656, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 345.4804999999999 + }, + "M=70656,N=640": { + "file": "silu_config_M70656_N640.json", + "M": 70656, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 350.0804999999999 + }, + "M=70656,N=768": { + "file": "silu_config_M70656_N768.json", + "M": 70656, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 341.4005000000001 + }, + "M=70656,N=800": { + "file": "silu_config_M70656_N800.json", + "M": 70656, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 345.00025000000016 + }, + "M=70656,N=896": { + "file": "silu_config_M70656_N896.json", + "M": 70656, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 335.4002499999999 + }, + "M=70656,N=960": { + "file": "silu_config_M70656_N960.json", + "M": 70656, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 337.8805 + }, + "M=70656,N=1024": { + "file": "silu_config_M70656_N1024.json", + "M": 70656, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 280.7602499999999 + }, + "M=70656,N=1120": { + "file": "silu_config_M70656_N1120.json", + "M": 70656, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 679.7217499999999 + }, + "M=70656,N=1152": { + "file": "silu_config_M70656_N1152.json", + "M": 70656, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 664.4017500000002 + }, + "M=70656,N=1280": { + "file": "silu_config_M70656_N1280.json", + "M": 70656, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 665.5617500000001 + }, + "M=70656,N=1344": { + "file": "silu_config_M70656_N1344.json", + "M": 70656, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 680.8017499999999 + }, + "M=70656,N=1408": { + "file": "silu_config_M70656_N1408.json", + "M": 70656, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 675.3617499999998 + }, + "M=70656,N=1440": { + "file": "silu_config_M70656_N1440.json", + "M": 70656, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 693.402 + }, + "M=70656,N=1536": { + "file": "silu_config_M70656_N1536.json", + "M": 70656, + "N": 1536, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 683.60175 + }, + "M=70656,N=1600": { + "file": "silu_config_M70656_N1600.json", + "M": 70656, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.3217500000001 + }, + "M=70656,N=1664": { + "file": "silu_config_M70656_N1664.json", + "M": 70656, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.8417499999998 + }, + "M=70656,N=1728": { + "file": "silu_config_M70656_N1728.json", + "M": 70656, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 685.0417499999996 + }, + "M=70656,N=1760": { + "file": "silu_config_M70656_N1760.json", + "M": 70656, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 686.20175 + }, + "M=70656,N=1792": { + "file": "silu_config_M70656_N1792.json", + "M": 70656, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 669.9617500000006 + }, + "M=70656,N=1920": { + "file": "silu_config_M70656_N1920.json", + "M": 70656, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.0817499999998 + }, + "M=70656,N=2048": { + "file": "silu_config_M70656_N2048.json", + "M": 70656, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 551.8012499999998 + }, + "M=70656,N=2080": { + "file": "silu_config_M70656_N2080.json", + "M": 70656, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 753.20225 + }, + "M=70656,N=2240": { + "file": "silu_config_M70656_N2240.json", + "M": 70656, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 795.80225 + }, + "M=70656,N=2400": { + "file": "silu_config_M70656_N2400.json", + "M": 70656, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 839.8824999999997 + }, + "M=70656,N=2560": { + "file": "silu_config_M70656_N2560.json", + "M": 70656, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 860.2024999999999 + }, + "M=71680,N=128": { + "file": "silu_config_M71680_N128.json", + "M": 71680, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.71924999999999 + }, + "M=71680,N=160": { + "file": "silu_config_M71680_N160.json", + "M": 71680, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 100.15925 + }, + "M=71680,N=192": { + "file": "silu_config_M71680_N192.json", + "M": 71680, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 100.23925000000003 + }, + "M=71680,N=256": { + "file": "silu_config_M71680_N256.json", + "M": 71680, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.83924999999999 + }, + "M=71680,N=320": { + "file": "silu_config_M71680_N320.json", + "M": 71680, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 180.0797499999999 + }, + "M=71680,N=384": { + "file": "silu_config_M71680_N384.json", + "M": 71680, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 178.71974999999998 + }, + "M=71680,N=480": { + "file": "silu_config_M71680_N480.json", + "M": 71680, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 183.55974999999995 + }, + "M=71680,N=512": { + "file": "silu_config_M71680_N512.json", + "M": 71680, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 146.59975000000003 + }, + "M=71680,N=576": { + "file": "silu_config_M71680_N576.json", + "M": 71680, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 350.0005 + }, + "M=71680,N=640": { + "file": "silu_config_M71680_N640.json", + "M": 71680, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 347.3605 + }, + "M=71680,N=768": { + "file": "silu_config_M71680_N768.json", + "M": 71680, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 360.1204999999999 + }, + "M=71680,N=800": { + "file": "silu_config_M71680_N800.json", + "M": 71680, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 347.32050000000004 + }, + "M=71680,N=896": { + "file": "silu_config_M71680_N896.json", + "M": 71680, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.40049999999997 + }, + "M=71680,N=960": { + "file": "silu_config_M71680_N960.json", + "M": 71680, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 344.3605 + }, + "M=71680,N=1024": { + "file": "silu_config_M71680_N1024.json", + "M": 71680, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 283.76025000000004 + }, + "M=71680,N=1120": { + "file": "silu_config_M71680_N1120.json", + "M": 71680, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 676.3217499999998 + }, + "M=71680,N=1152": { + "file": "silu_config_M71680_N1152.json", + "M": 71680, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 659.2817499999996 + }, + "M=71680,N=1280": { + "file": "silu_config_M71680_N1280.json", + "M": 71680, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 670.3217500000001 + }, + "M=71680,N=1344": { + "file": "silu_config_M71680_N1344.json", + "M": 71680, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.8017499999999 + }, + "M=71680,N=1408": { + "file": "silu_config_M71680_N1408.json", + "M": 71680, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 674.08175 + }, + "M=71680,N=1440": { + "file": "silu_config_M71680_N1440.json", + "M": 71680, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.0017499999999 + }, + "M=71680,N=1536": { + "file": "silu_config_M71680_N1536.json", + "M": 71680, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 673.7617499999999 + }, + "M=71680,N=1600": { + "file": "silu_config_M71680_N1600.json", + "M": 71680, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.4817500000004 + }, + "M=71680,N=1664": { + "file": "silu_config_M71680_N1664.json", + "M": 71680, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 685.0417500000001 + }, + "M=71680,N=1728": { + "file": "silu_config_M71680_N1728.json", + "M": 71680, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 681.44175 + }, + "M=71680,N=1760": { + "file": "silu_config_M71680_N1760.json", + "M": 71680, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 695.9620000000002 + }, + "M=71680,N=1792": { + "file": "silu_config_M71680_N1792.json", + "M": 71680, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.8417499999996 + }, + "M=71680,N=1920": { + "file": "silu_config_M71680_N1920.json", + "M": 71680, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 690.2817500000001 + }, + "M=71680,N=2048": { + "file": "silu_config_M71680_N2048.json", + "M": 71680, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 548.8812499999999 + }, + "M=71680,N=2080": { + "file": "silu_config_M71680_N2080.json", + "M": 71680, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 753.2822499999997 + }, + "M=71680,N=2240": { + "file": "silu_config_M71680_N2240.json", + "M": 71680, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 796.6822499999998 + }, + "M=71680,N=2400": { + "file": "silu_config_M71680_N2400.json", + "M": 71680, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 851.4425000000001 + }, + "M=71680,N=2560": { + "file": "silu_config_M71680_N2560.json", + "M": 71680, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 889.6827500000004 + }, + "M=72704,N=128": { + "file": "silu_config_M72704_N128.json", + "M": 72704, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 46.43900000000001 + }, + "M=72704,N=160": { + "file": "silu_config_M72704_N160.json", + "M": 72704, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 101.43950000000002 + }, + "M=72704,N=192": { + "file": "silu_config_M72704_N192.json", + "M": 72704, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 93.5995 + }, + "M=72704,N=256": { + "file": "silu_config_M72704_N256.json", + "M": 72704, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 84.95925000000001 + }, + "M=72704,N=320": { + "file": "silu_config_M72704_N320.json", + "M": 72704, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 183.87975000000003 + }, + "M=72704,N=384": { + "file": "silu_config_M72704_N384.json", + "M": 72704, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 182.31974999999994 + }, + "M=72704,N=480": { + "file": "silu_config_M72704_N480.json", + "M": 72704, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 187.07975000000005 + }, + "M=72704,N=512": { + "file": "silu_config_M72704_N512.json", + "M": 72704, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 148.6395 + }, + "M=72704,N=576": { + "file": "silu_config_M72704_N576.json", + "M": 72704, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 354.9604999999999 + }, + "M=72704,N=640": { + "file": "silu_config_M72704_N640.json", + "M": 72704, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 358.9604999999998 + }, + "M=72704,N=768": { + "file": "silu_config_M72704_N768.json", + "M": 72704, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 353.9604999999998 + }, + "M=72704,N=800": { + "file": "silu_config_M72704_N800.json", + "M": 72704, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 357.9604999999999 + }, + "M=72704,N=896": { + "file": "silu_config_M72704_N896.json", + "M": 72704, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 366.5205000000002 + }, + "M=72704,N=960": { + "file": "silu_config_M72704_N960.json", + "M": 72704, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 366.32050000000004 + }, + "M=72704,N=1024": { + "file": "silu_config_M72704_N1024.json", + "M": 72704, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 287.7202500000002 + }, + "M=72704,N=1120": { + "file": "silu_config_M72704_N1120.json", + "M": 72704, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 690.88175 + }, + "M=72704,N=1152": { + "file": "silu_config_M72704_N1152.json", + "M": 72704, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 703.8819999999998 + }, + "M=72704,N=1280": { + "file": "silu_config_M72704_N1280.json", + "M": 72704, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 694.762 + }, + "M=72704,N=1344": { + "file": "silu_config_M72704_N1344.json", + "M": 72704, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 691.6817500000002 + }, + "M=72704,N=1408": { + "file": "silu_config_M72704_N1408.json", + "M": 72704, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 694.4820000000002 + }, + "M=72704,N=1440": { + "file": "silu_config_M72704_N1440.json", + "M": 72704, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 713.0420000000001 + }, + "M=72704,N=1536": { + "file": "silu_config_M72704_N1536.json", + "M": 72704, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 687.1617500000002 + }, + "M=72704,N=1600": { + "file": "silu_config_M72704_N1600.json", + "M": 72704, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 702.682 + }, + "M=72704,N=1664": { + "file": "silu_config_M72704_N1664.json", + "M": 72704, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 697.1220000000001 + }, + "M=72704,N=1728": { + "file": "silu_config_M72704_N1728.json", + "M": 72704, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.0819999999999 + }, + "M=72704,N=1760": { + "file": "silu_config_M72704_N1760.json", + "M": 72704, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 705.6020000000003 + }, + "M=72704,N=1792": { + "file": "silu_config_M72704_N1792.json", + "M": 72704, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 680.9217499999997 + }, + "M=72704,N=1920": { + "file": "silu_config_M72704_N1920.json", + "M": 72704, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 701.5220000000004 + }, + "M=72704,N=2048": { + "file": "silu_config_M72704_N2048.json", + "M": 72704, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 571.1215000000002 + }, + "M=72704,N=2080": { + "file": "silu_config_M72704_N2080.json", + "M": 72704, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 763.56225 + }, + "M=72704,N=2240": { + "file": "silu_config_M72704_N2240.json", + "M": 72704, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 807.4825000000001 + }, + "M=72704,N=2400": { + "file": "silu_config_M72704_N2400.json", + "M": 72704, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 878.2827499999999 + }, + "M=72704,N=2560": { + "file": "silu_config_M72704_N2560.json", + "M": 72704, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 881.4027500000007 + }, + "M=73728,N=128": { + "file": "silu_config_M73728_N128.json", + "M": 73728, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 53.559250000000006 + }, + "M=73728,N=160": { + "file": "silu_config_M73728_N160.json", + "M": 73728, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 94.55925000000003 + }, + "M=73728,N=192": { + "file": "silu_config_M73728_N192.json", + "M": 73728, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 102.59924999999998 + }, + "M=73728,N=256": { + "file": "silu_config_M73728_N256.json", + "M": 73728, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 97.43925 + }, + "M=73728,N=320": { + "file": "silu_config_M73728_N320.json", + "M": 73728, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 185.80000000000004 + }, + "M=73728,N=384": { + "file": "silu_config_M73728_N384.json", + "M": 73728, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 184.92000000000004 + }, + "M=73728,N=480": { + "file": "silu_config_M73728_N480.json", + "M": 73728, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 189.67975000000007 + }, + "M=73728,N=512": { + "file": "silu_config_M73728_N512.json", + "M": 73728, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 150.5195 + }, + "M=73728,N=576": { + "file": "silu_config_M73728_N576.json", + "M": 73728, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 348.84050000000013 + }, + "M=73728,N=640": { + "file": "silu_config_M73728_N640.json", + "M": 73728, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 363.76049999999987 + }, + "M=73728,N=768": { + "file": "silu_config_M73728_N768.json", + "M": 73728, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 355.4805 + }, + "M=73728,N=800": { + "file": "silu_config_M73728_N800.json", + "M": 73728, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 359.44049999999993 + }, + "M=73728,N=896": { + "file": "silu_config_M73728_N896.json", + "M": 73728, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 367.88049999999976 + }, + "M=73728,N=960": { + "file": "silu_config_M73728_N960.json", + "M": 73728, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 352.00049999999976 + }, + "M=73728,N=1024": { + "file": "silu_config_M73728_N1024.json", + "M": 73728, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 291.2402500000002 + }, + "M=73728,N=1120": { + "file": "silu_config_M73728_N1120.json", + "M": 73728, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 710.6020000000001 + }, + "M=73728,N=1152": { + "file": "silu_config_M73728_N1152.json", + "M": 73728, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 701.5219999999999 + }, + "M=73728,N=1280": { + "file": "silu_config_M73728_N1280.json", + "M": 73728, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 702.8020000000001 + }, + "M=73728,N=1344": { + "file": "silu_config_M73728_N1344.json", + "M": 73728, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 709.6419999999998 + }, + "M=73728,N=1408": { + "file": "silu_config_M73728_N1408.json", + "M": 73728, + "N": 1408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 711.4819999999997 + }, + "M=73728,N=1440": { + "file": "silu_config_M73728_N1440.json", + "M": 73728, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 693.2019999999998 + }, + "M=73728,N=1536": { + "file": "silu_config_M73728_N1536.json", + "M": 73728, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 705.3220000000001 + }, + "M=73728,N=1600": { + "file": "silu_config_M73728_N1600.json", + "M": 73728, + "N": 1600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 721.4820000000002 + }, + "M=73728,N=1664": { + "file": "silu_config_M73728_N1664.json", + "M": 73728, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 708.1220000000003 + }, + "M=73728,N=1728": { + "file": "silu_config_M73728_N1728.json", + "M": 73728, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 715.8019999999997 + }, + "M=73728,N=1760": { + "file": "silu_config_M73728_N1760.json", + "M": 73728, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 718.0819999999999 + }, + "M=73728,N=1792": { + "file": "silu_config_M73728_N1792.json", + "M": 73728, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 698.6019999999999 + }, + "M=73728,N=1920": { + "file": "silu_config_M73728_N1920.json", + "M": 73728, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 700.4420000000002 + }, + "M=73728,N=2048": { + "file": "silu_config_M73728_N2048.json", + "M": 73728, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 569.4014999999997 + }, + "M=73728,N=2080": { + "file": "silu_config_M73728_N2080.json", + "M": 73728, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 785.1622499999996 + }, + "M=73728,N=2240": { + "file": "silu_config_M73728_N2240.json", + "M": 73728, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 819.0025000000005 + }, + "M=73728,N=2400": { + "file": "silu_config_M73728_N2400.json", + "M": 73728, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 864.6025 + }, + "M=73728,N=2560": { + "file": "silu_config_M73728_N2560.json", + "M": 73728, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 905.4027500000002 + }, + "M=74752,N=128": { + "file": "silu_config_M74752_N128.json", + "M": 74752, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 47.67899999999998 + }, + "M=74752,N=160": { + "file": "silu_config_M74752_N160.json", + "M": 74752, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 103.99949999999998 + }, + "M=74752,N=192": { + "file": "silu_config_M74752_N192.json", + "M": 74752, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 104.11924999999998 + }, + "M=74752,N=256": { + "file": "silu_config_M74752_N256.json", + "M": 74752, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.35925000000002 + }, + "M=74752,N=320": { + "file": "silu_config_M74752_N320.json", + "M": 74752, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 188.35975000000002 + }, + "M=74752,N=384": { + "file": "silu_config_M74752_N384.json", + "M": 74752, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 187.03975000000003 + }, + "M=74752,N=480": { + "file": "silu_config_M74752_N480.json", + "M": 74752, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 192.35974999999996 + }, + "M=74752,N=512": { + "file": "silu_config_M74752_N512.json", + "M": 74752, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 149.47975000000002 + }, + "M=74752,N=576": { + "file": "silu_config_M74752_N576.json", + "M": 74752, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 364.56049999999993 + }, + "M=74752,N=640": { + "file": "silu_config_M74752_N640.json", + "M": 74752, + "N": 640, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 377.1205000000001 + }, + "M=74752,N=768": { + "file": "silu_config_M74752_N768.json", + "M": 74752, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 363.28049999999996 + }, + "M=74752,N=800": { + "file": "silu_config_M74752_N800.json", + "M": 74752, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 374.7204999999998 + }, + "M=74752,N=896": { + "file": "silu_config_M74752_N896.json", + "M": 74752, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 361.88049999999987 + }, + "M=74752,N=960": { + "file": "silu_config_M74752_N960.json", + "M": 74752, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 369.56050000000016 + }, + "M=74752,N=1024": { + "file": "silu_config_M74752_N1024.json", + "M": 74752, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 295.88025000000005 + }, + "M=74752,N=1120": { + "file": "silu_config_M74752_N1120.json", + "M": 74752, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 718.242 + }, + "M=74752,N=1152": { + "file": "silu_config_M74752_N1152.json", + "M": 74752, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 721.8019999999999 + }, + "M=74752,N=1280": { + "file": "silu_config_M74752_N1280.json", + "M": 74752, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 712.3619999999999 + }, + "M=74752,N=1344": { + "file": "silu_config_M74752_N1344.json", + "M": 74752, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 710.6019999999999 + }, + "M=74752,N=1408": { + "file": "silu_config_M74752_N1408.json", + "M": 74752, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 714.8420000000001 + }, + "M=74752,N=1440": { + "file": "silu_config_M74752_N1440.json", + "M": 74752, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 712.962 + }, + "M=74752,N=1536": { + "file": "silu_config_M74752_N1536.json", + "M": 74752, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.0819999999999 + }, + "M=74752,N=1600": { + "file": "silu_config_M74752_N1600.json", + "M": 74752, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 713.242 + }, + "M=74752,N=1664": { + "file": "silu_config_M74752_N1664.json", + "M": 74752, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 697.0419999999997 + }, + "M=74752,N=1728": { + "file": "silu_config_M74752_N1728.json", + "M": 74752, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 725.5619999999999 + }, + "M=74752,N=1760": { + "file": "silu_config_M74752_N1760.json", + "M": 74752, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 717.5619999999997 + }, + "M=74752,N=1792": { + "file": "silu_config_M74752_N1792.json", + "M": 74752, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 708.2419999999997 + }, + "M=74752,N=1920": { + "file": "silu_config_M74752_N1920.json", + "M": 74752, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 723.7220000000002 + }, + "M=74752,N=2048": { + "file": "silu_config_M74752_N2048.json", + "M": 74752, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 588.4815000000001 + }, + "M=74752,N=2080": { + "file": "silu_config_M74752_N2080.json", + "M": 74752, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 795.72225 + }, + "M=74752,N=2240": { + "file": "silu_config_M74752_N2240.json", + "M": 74752, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 819.7624999999998 + }, + "M=74752,N=2400": { + "file": "silu_config_M74752_N2400.json", + "M": 74752, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 887.08275 + }, + "M=74752,N=2560": { + "file": "silu_config_M74752_N2560.json", + "M": 74752, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 914.7227499999999 + }, + "M=75776,N=128": { + "file": "silu_config_M75776_N128.json", + "M": 75776, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 55.31924999999997 + }, + "M=75776,N=160": { + "file": "silu_config_M75776_N160.json", + "M": 75776, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 96.59949999999998 + }, + "M=75776,N=192": { + "file": "silu_config_M75776_N192.json", + "M": 75776, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 105.47925000000002 + }, + "M=75776,N=256": { + "file": "silu_config_M75776_N256.json", + "M": 75776, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 88.23925000000001 + }, + "M=75776,N=320": { + "file": "silu_config_M75776_N320.json", + "M": 75776, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 191.19975000000002 + }, + "M=75776,N=384": { + "file": "silu_config_M75776_N384.json", + "M": 75776, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 189.55975 + }, + "M=75776,N=480": { + "file": "silu_config_M75776_N480.json", + "M": 75776, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 194.39974999999998 + }, + "M=75776,N=512": { + "file": "silu_config_M75776_N512.json", + "M": 75776, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 154.51975 + }, + "M=75776,N=576": { + "file": "silu_config_M75776_N576.json", + "M": 75776, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.5205000000001 + }, + "M=75776,N=640": { + "file": "silu_config_M75776_N640.json", + "M": 75776, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 366.80050000000017 + }, + "M=75776,N=768": { + "file": "silu_config_M75776_N768.json", + "M": 75776, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 357.16049999999984 + }, + "M=75776,N=800": { + "file": "silu_config_M75776_N800.json", + "M": 75776, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 361.4005000000002 + }, + "M=75776,N=896": { + "file": "silu_config_M75776_N896.json", + "M": 75776, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 369.56049999999993 + }, + "M=75776,N=960": { + "file": "silu_config_M75776_N960.json", + "M": 75776, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 363.3604999999999 + }, + "M=75776,N=1024": { + "file": "silu_config_M75776_N1024.json", + "M": 75776, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 300.52025000000015 + }, + "M=75776,N=1120": { + "file": "silu_config_M75776_N1120.json", + "M": 75776, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 719.2820000000004 + }, + "M=75776,N=1152": { + "file": "silu_config_M75776_N1152.json", + "M": 75776, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 709.6419999999996 + }, + "M=75776,N=1280": { + "file": "silu_config_M75776_N1280.json", + "M": 75776, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 735.7220000000002 + }, + "M=75776,N=1344": { + "file": "silu_config_M75776_N1344.json", + "M": 75776, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 730.5220000000002 + }, + "M=75776,N=1408": { + "file": "silu_config_M75776_N1408.json", + "M": 75776, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 734.0819999999999 + }, + "M=75776,N=1440": { + "file": "silu_config_M75776_N1440.json", + "M": 75776, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 719.922 + }, + "M=75776,N=1536": { + "file": "silu_config_M75776_N1536.json", + "M": 75776, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 705.1619999999998 + }, + "M=75776,N=1600": { + "file": "silu_config_M75776_N1600.json", + "M": 75776, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 733.1220000000001 + }, + "M=75776,N=1664": { + "file": "silu_config_M75776_N1664.json", + "M": 75776, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 737.3619999999996 + }, + "M=75776,N=1728": { + "file": "silu_config_M75776_N1728.json", + "M": 75776, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 722.762 + }, + "M=75776,N=1760": { + "file": "silu_config_M75776_N1760.json", + "M": 75776, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 737.3220000000001 + }, + "M=75776,N=1792": { + "file": "silu_config_M75776_N1792.json", + "M": 75776, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 750.7222500000005 + }, + "M=75776,N=1920": { + "file": "silu_config_M75776_N1920.json", + "M": 75776, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 743.3219999999997 + }, + "M=75776,N=2048": { + "file": "silu_config_M75776_N2048.json", + "M": 75776, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 568.9614999999999 + }, + "M=75776,N=2080": { + "file": "silu_config_M75776_N2080.json", + "M": 75776, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 795.6022500000004 + }, + "M=75776,N=2240": { + "file": "silu_config_M75776_N2240.json", + "M": 75776, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 852.3625000000002 + }, + "M=75776,N=2400": { + "file": "silu_config_M75776_N2400.json", + "M": 75776, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 899.2427499999999 + }, + "M=75776,N=2560": { + "file": "silu_config_M75776_N2560.json", + "M": 75776, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 937.2429999999995 + }, + "M=76800,N=128": { + "file": "silu_config_M76800_N128.json", + "M": 76800, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 55.59924999999999 + }, + "M=76800,N=160": { + "file": "silu_config_M76800_N160.json", + "M": 76800, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 106.51950000000002 + }, + "M=76800,N=192": { + "file": "silu_config_M76800_N192.json", + "M": 76800, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 98.31925000000001 + }, + "M=76800,N=256": { + "file": "silu_config_M76800_N256.json", + "M": 76800, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.35925 + }, + "M=76800,N=320": { + "file": "silu_config_M76800_N320.json", + "M": 76800, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 192.24 + }, + "M=76800,N=384": { + "file": "silu_config_M76800_N384.json", + "M": 76800, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 190.9197499999999 + }, + "M=76800,N=480": { + "file": "silu_config_M76800_N480.json", + "M": 76800, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 195.60000000000002 + }, + "M=76800,N=512": { + "file": "silu_config_M76800_N512.json", + "M": 76800, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 156.47975000000002 + }, + "M=76800,N=576": { + "file": "silu_config_M76800_N576.json", + "M": 76800, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.0405000000003 + }, + "M=76800,N=640": { + "file": "silu_config_M76800_N640.json", + "M": 76800, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 376.2007500000001 + }, + "M=76800,N=768": { + "file": "silu_config_M76800_N768.json", + "M": 76800, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 367.2007500000002 + }, + "M=76800,N=800": { + "file": "silu_config_M76800_N800.json", + "M": 76800, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 374.44074999999987 + }, + "M=76800,N=896": { + "file": "silu_config_M76800_N896.json", + "M": 76800, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 364.08050000000003 + }, + "M=76800,N=960": { + "file": "silu_config_M76800_N960.json", + "M": 76800, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 374.48074999999994 + }, + "M=76800,N=1024": { + "file": "silu_config_M76800_N1024.json", + "M": 76800, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 303.56049999999993 + }, + "M=76800,N=1120": { + "file": "silu_config_M76800_N1120.json", + "M": 76800, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 726.8422500000001 + }, + "M=76800,N=1152": { + "file": "silu_config_M76800_N1152.json", + "M": 76800, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 726.722 + }, + "M=76800,N=1280": { + "file": "silu_config_M76800_N1280.json", + "M": 76800, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 731.4022500000001 + }, + "M=76800,N=1344": { + "file": "silu_config_M76800_N1344.json", + "M": 76800, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 745.1622500000001 + }, + "M=76800,N=1408": { + "file": "silu_config_M76800_N1408.json", + "M": 76800, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 729.402 + }, + "M=76800,N=1440": { + "file": "silu_config_M76800_N1440.json", + "M": 76800, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 736.9622499999998 + }, + "M=76800,N=1536": { + "file": "silu_config_M76800_N1536.json", + "M": 76800, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 730.9622499999998 + }, + "M=76800,N=1600": { + "file": "silu_config_M76800_N1600.json", + "M": 76800, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 737.4022499999999 + }, + "M=76800,N=1664": { + "file": "silu_config_M76800_N1664.json", + "M": 76800, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 731.642 + }, + "M=76800,N=1728": { + "file": "silu_config_M76800_N1728.json", + "M": 76800, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 743.1220000000003 + }, + "M=76800,N=1760": { + "file": "silu_config_M76800_N1760.json", + "M": 76800, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 744.5219999999997 + }, + "M=76800,N=1792": { + "file": "silu_config_M76800_N1792.json", + "M": 76800, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 745.2019999999998 + }, + "M=76800,N=1920": { + "file": "silu_config_M76800_N1920.json", + "M": 76800, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 748.0820000000001 + }, + "M=76800,N=2048": { + "file": "silu_config_M76800_N2048.json", + "M": 76800, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 591.1615000000002 + }, + "M=76800,N=2080": { + "file": "silu_config_M76800_N2080.json", + "M": 76800, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 806.52225 + }, + "M=76800,N=2240": { + "file": "silu_config_M76800_N2240.json", + "M": 76800, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 856.6824999999999 + }, + "M=76800,N=2400": { + "file": "silu_config_M76800_N2400.json", + "M": 76800, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 884.3227499999998 + }, + "M=76800,N=2560": { + "file": "silu_config_M76800_N2560.json", + "M": 76800, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 933.5230000000001 + }, + "M=77824,N=128": { + "file": "silu_config_M77824_N128.json", + "M": 77824, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 56.51925000000001 + }, + "M=77824,N=160": { + "file": "silu_config_M77824_N160.json", + "M": 77824, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 107.9195 + }, + "M=77824,N=192": { + "file": "silu_config_M77824_N192.json", + "M": 77824, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 99.47950000000003 + }, + "M=77824,N=256": { + "file": "silu_config_M77824_N256.json", + "M": 77824, + "N": 256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 106.4795 + }, + "M=77824,N=320": { + "file": "silu_config_M77824_N320.json", + "M": 77824, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 196.16 + }, + "M=77824,N=384": { + "file": "silu_config_M77824_N384.json", + "M": 77824, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.59975000000003 + }, + "M=77824,N=480": { + "file": "silu_config_M77824_N480.json", + "M": 77824, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 199.71974999999998 + }, + "M=77824,N=512": { + "file": "silu_config_M77824_N512.json", + "M": 77824, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 158.63975 + }, + "M=77824,N=576": { + "file": "silu_config_M77824_N576.json", + "M": 77824, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 368.04075 + }, + "M=77824,N=640": { + "file": "silu_config_M77824_N640.json", + "M": 77824, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 376.56050000000005 + }, + "M=77824,N=768": { + "file": "silu_config_M77824_N768.json", + "M": 77824, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 377.4805 + }, + "M=77824,N=800": { + "file": "silu_config_M77824_N800.json", + "M": 77824, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 378.4805 + }, + "M=77824,N=896": { + "file": "silu_config_M77824_N896.json", + "M": 77824, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 379.56049999999993 + }, + "M=77824,N=960": { + "file": "silu_config_M77824_N960.json", + "M": 77824, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 384.32050000000015 + }, + "M=77824,N=1024": { + "file": "silu_config_M77824_N1024.json", + "M": 77824, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 308.04025 + }, + "M=77824,N=1120": { + "file": "silu_config_M77824_N1120.json", + "M": 77824, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 736.24225 + }, + "M=77824,N=1152": { + "file": "silu_config_M77824_N1152.json", + "M": 77824, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 739.3222500000004 + }, + "M=77824,N=1280": { + "file": "silu_config_M77824_N1280.json", + "M": 77824, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 741.1222499999997 + }, + "M=77824,N=1344": { + "file": "silu_config_M77824_N1344.json", + "M": 77824, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 739.0020000000004 + }, + "M=77824,N=1408": { + "file": "silu_config_M77824_N1408.json", + "M": 77824, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 742.3222500000002 + }, + "M=77824,N=1440": { + "file": "silu_config_M77824_N1440.json", + "M": 77824, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 751.9219999999998 + }, + "M=77824,N=1536": { + "file": "silu_config_M77824_N1536.json", + "M": 77824, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 743.56225 + }, + "M=77824,N=1600": { + "file": "silu_config_M77824_N1600.json", + "M": 77824, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 750.3620000000001 + }, + "M=77824,N=1664": { + "file": "silu_config_M77824_N1664.json", + "M": 77824, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 756.1622500000001 + }, + "M=77824,N=1728": { + "file": "silu_config_M77824_N1728.json", + "M": 77824, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 764.4422500000001 + }, + "M=77824,N=1760": { + "file": "silu_config_M77824_N1760.json", + "M": 77824, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 743.2819999999997 + }, + "M=77824,N=1792": { + "file": "silu_config_M77824_N1792.json", + "M": 77824, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 748.962 + }, + "M=77824,N=1920": { + "file": "silu_config_M77824_N1920.json", + "M": 77824, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 741.9219999999996 + }, + "M=77824,N=2048": { + "file": "silu_config_M77824_N2048.json", + "M": 77824, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 614.0014999999999 + }, + "M=77824,N=2080": { + "file": "silu_config_M77824_N2080.json", + "M": 77824, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 823.3224999999998 + }, + "M=77824,N=2240": { + "file": "silu_config_M77824_N2240.json", + "M": 77824, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 874.8427499999998 + }, + "M=77824,N=2400": { + "file": "silu_config_M77824_N2400.json", + "M": 77824, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 907.3227500000007 + }, + "M=77824,N=2560": { + "file": "silu_config_M77824_N2560.json", + "M": 77824, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 952.2829999999999 + }, + "M=78848,N=128": { + "file": "silu_config_M78848_N128.json", + "M": 78848, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 49.75924999999999 + }, + "M=78848,N=160": { + "file": "silu_config_M78848_N160.json", + "M": 78848, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 109.39950000000002 + }, + "M=78848,N=192": { + "file": "silu_config_M78848_N192.json", + "M": 78848, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 109.3995 + }, + "M=78848,N=256": { + "file": "silu_config_M78848_N256.json", + "M": 78848, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.51925000000001 + }, + "M=78848,N=320": { + "file": "silu_config_M78848_N320.json", + "M": 78848, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 198.83975 + }, + "M=78848,N=384": { + "file": "silu_config_M78848_N384.json", + "M": 78848, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 197.19975 + }, + "M=78848,N=480": { + "file": "silu_config_M78848_N480.json", + "M": 78848, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 202.19975 + }, + "M=78848,N=512": { + "file": "silu_config_M78848_N512.json", + "M": 78848, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 160.27975000000004 + }, + "M=78848,N=576": { + "file": "silu_config_M78848_N576.json", + "M": 78848, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 380.04075 + }, + "M=78848,N=640": { + "file": "silu_config_M78848_N640.json", + "M": 78848, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.9607500000001 + }, + "M=78848,N=768": { + "file": "silu_config_M78848_N768.json", + "M": 78848, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 382.40049999999997 + }, + "M=78848,N=800": { + "file": "silu_config_M78848_N800.json", + "M": 78848, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 386.72074999999984 + }, + "M=78848,N=896": { + "file": "silu_config_M78848_N896.json", + "M": 78848, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 373.8007500000001 + }, + "M=78848,N=960": { + "file": "silu_config_M78848_N960.json", + "M": 78848, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 388.8805000000001 + }, + "M=78848,N=1024": { + "file": "silu_config_M78848_N1024.json", + "M": 78848, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 311.96024999999986 + }, + "M=78848,N=1120": { + "file": "silu_config_M78848_N1120.json", + "M": 78848, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 747.5619999999999 + }, + "M=78848,N=1152": { + "file": "silu_config_M78848_N1152.json", + "M": 78848, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 770.3222499999997 + }, + "M=78848,N=1280": { + "file": "silu_config_M78848_N1280.json", + "M": 78848, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 750.6022500000001 + }, + "M=78848,N=1344": { + "file": "silu_config_M78848_N1344.json", + "M": 78848, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 746.722 + }, + "M=78848,N=1408": { + "file": "silu_config_M78848_N1408.json", + "M": 78848, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 751.80225 + }, + "M=78848,N=1440": { + "file": "silu_config_M78848_N1440.json", + "M": 78848, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 751.2822499999997 + }, + "M=78848,N=1536": { + "file": "silu_config_M78848_N1536.json", + "M": 78848, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 753.3622499999999 + }, + "M=78848,N=1600": { + "file": "silu_config_M78848_N1600.json", + "M": 78848, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 751.0822499999999 + }, + "M=78848,N=1664": { + "file": "silu_config_M78848_N1664.json", + "M": 78848, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 765.6022499999997 + }, + "M=78848,N=1728": { + "file": "silu_config_M78848_N1728.json", + "M": 78848, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 762.2022500000007 + }, + "M=78848,N=1760": { + "file": "silu_config_M78848_N1760.json", + "M": 78848, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 774.7222499999998 + }, + "M=78848,N=1792": { + "file": "silu_config_M78848_N1792.json", + "M": 78848, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 757.72225 + }, + "M=78848,N=1920": { + "file": "silu_config_M78848_N1920.json", + "M": 78848, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 759.8022499999997 + }, + "M=78848,N=2048": { + "file": "silu_config_M78848_N2048.json", + "M": 78848, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 599.2015000000001 + }, + "M=78848,N=2080": { + "file": "silu_config_M78848_N2080.json", + "M": 78848, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 838.2824999999998 + }, + "M=78848,N=2240": { + "file": "silu_config_M78848_N2240.json", + "M": 78848, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 894.4427500000006 + }, + "M=78848,N=2400": { + "file": "silu_config_M78848_N2400.json", + "M": 78848, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 913.2827499999999 + }, + "M=78848,N=2560": { + "file": "silu_config_M78848_N2560.json", + "M": 78848, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 954.0429999999997 + }, + "M=79872,N=128": { + "file": "silu_config_M79872_N128.json", + "M": 79872, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 50.31925000000002 + }, + "M=79872,N=160": { + "file": "silu_config_M79872_N160.json", + "M": 79872, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 110.39950000000003 + }, + "M=79872,N=192": { + "file": "silu_config_M79872_N192.json", + "M": 79872, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 110.5995 + }, + "M=79872,N=256": { + "file": "silu_config_M79872_N256.json", + "M": 79872, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.63925000000002 + }, + "M=79872,N=320": { + "file": "silu_config_M79872_N320.json", + "M": 79872, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 201.03999999999996 + }, + "M=79872,N=384": { + "file": "silu_config_M79872_N384.json", + "M": 79872, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 199.31975 + }, + "M=79872,N=480": { + "file": "silu_config_M79872_N480.json", + "M": 79872, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 204.71974999999998 + }, + "M=79872,N=512": { + "file": "silu_config_M79872_N512.json", + "M": 79872, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 158.27975000000004 + }, + "M=79872,N=576": { + "file": "silu_config_M79872_N576.json", + "M": 79872, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 384.9605 + }, + "M=79872,N=640": { + "file": "silu_config_M79872_N640.json", + "M": 79872, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 383.80075 + }, + "M=79872,N=768": { + "file": "silu_config_M79872_N768.json", + "M": 79872, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.76075000000014 + }, + "M=79872,N=800": { + "file": "silu_config_M79872_N800.json", + "M": 79872, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 389.0004999999999 + }, + "M=79872,N=896": { + "file": "silu_config_M79872_N896.json", + "M": 79872, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 389.6807499999999 + }, + "M=79872,N=960": { + "file": "silu_config_M79872_N960.json", + "M": 79872, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 402.8807499999999 + }, + "M=79872,N=1024": { + "file": "silu_config_M79872_N1024.json", + "M": 79872, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 315.5202499999999 + }, + "M=79872,N=1120": { + "file": "silu_config_M79872_N1120.json", + "M": 79872, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.1222499999997 + }, + "M=79872,N=1152": { + "file": "silu_config_M79872_N1152.json", + "M": 79872, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 758.6022499999997 + }, + "M=79872,N=1280": { + "file": "silu_config_M79872_N1280.json", + "M": 79872, + "N": 1280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 768.7222499999998 + }, + "M=79872,N=1344": { + "file": "silu_config_M79872_N1344.json", + "M": 79872, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 779.0422500000004 + }, + "M=79872,N=1408": { + "file": "silu_config_M79872_N1408.json", + "M": 79872, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 751.8022499999997 + }, + "M=79872,N=1440": { + "file": "silu_config_M79872_N1440.json", + "M": 79872, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 780.5222500000002 + }, + "M=79872,N=1536": { + "file": "silu_config_M79872_N1536.json", + "M": 79872, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 762.6422499999999 + }, + "M=79872,N=1600": { + "file": "silu_config_M79872_N1600.json", + "M": 79872, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 770.0022500000005 + }, + "M=79872,N=1664": { + "file": "silu_config_M79872_N1664.json", + "M": 79872, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 763.9222499999996 + }, + "M=79872,N=1728": { + "file": "silu_config_M79872_N1728.json", + "M": 79872, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 761.4022499999999 + }, + "M=79872,N=1760": { + "file": "silu_config_M79872_N1760.json", + "M": 79872, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 764.9622500000003 + }, + "M=79872,N=1792": { + "file": "silu_config_M79872_N1792.json", + "M": 79872, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 768.2422499999998 + }, + "M=79872,N=1920": { + "file": "silu_config_M79872_N1920.json", + "M": 79872, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 780.2822499999997 + }, + "M=79872,N=2048": { + "file": "silu_config_M79872_N2048.json", + "M": 79872, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 603.6814999999997 + }, + "M=79872,N=2080": { + "file": "silu_config_M79872_N2080.json", + "M": 79872, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 834.6825000000003 + }, + "M=79872,N=2240": { + "file": "silu_config_M79872_N2240.json", + "M": 79872, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 875.2827499999999 + }, + "M=79872,N=2400": { + "file": "silu_config_M79872_N2400.json", + "M": 79872, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 924.7627499999999 + }, + "M=79872,N=2560": { + "file": "silu_config_M79872_N2560.json", + "M": 79872, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 976.123 + }, + "M=80896,N=128": { + "file": "silu_config_M80896_N128.json", + "M": 80896, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 58.67925000000001 + }, + "M=80896,N=160": { + "file": "silu_config_M80896_N160.json", + "M": 80896, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.95924999999997 + }, + "M=80896,N=192": { + "file": "silu_config_M80896_N192.json", + "M": 80896, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.11950000000003 + }, + "M=80896,N=256": { + "file": "silu_config_M80896_N256.json", + "M": 80896, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.95925 + }, + "M=80896,N=320": { + "file": "silu_config_M80896_N320.json", + "M": 80896, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 203.6 + }, + "M=80896,N=384": { + "file": "silu_config_M80896_N384.json", + "M": 80896, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 202.23975000000002 + }, + "M=80896,N=480": { + "file": "silu_config_M80896_N480.json", + "M": 80896, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 207.31975 + }, + "M=80896,N=512": { + "file": "silu_config_M80896_N512.json", + "M": 80896, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 164.35975000000002 + }, + "M=80896,N=576": { + "file": "silu_config_M80896_N576.json", + "M": 80896, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 400.12075000000016 + }, + "M=80896,N=640": { + "file": "silu_config_M80896_N640.json", + "M": 80896, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 390.52049999999997 + }, + "M=80896,N=768": { + "file": "silu_config_M80896_N768.json", + "M": 80896, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 392.0007499999999 + }, + "M=80896,N=800": { + "file": "silu_config_M80896_N800.json", + "M": 80896, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 382.04049999999995 + }, + "M=80896,N=896": { + "file": "silu_config_M80896_N896.json", + "M": 80896, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 394.32074999999975 + }, + "M=80896,N=960": { + "file": "silu_config_M80896_N960.json", + "M": 80896, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 407.12074999999993 + }, + "M=80896,N=1024": { + "file": "silu_config_M80896_N1024.json", + "M": 80896, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 319.0002500000004 + }, + "M=80896,N=1120": { + "file": "silu_config_M80896_N1120.json", + "M": 80896, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 766.6022500000006 + }, + "M=80896,N=1152": { + "file": "silu_config_M80896_N1152.json", + "M": 80896, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 757.0822499999999 + }, + "M=80896,N=1280": { + "file": "silu_config_M80896_N1280.json", + "M": 80896, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 759.4822500000002 + }, + "M=80896,N=1344": { + "file": "silu_config_M80896_N1344.json", + "M": 80896, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 765.9622499999996 + }, + "M=80896,N=1408": { + "file": "silu_config_M80896_N1408.json", + "M": 80896, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 781.8022500000002 + }, + "M=80896,N=1440": { + "file": "silu_config_M80896_N1440.json", + "M": 80896, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 779.1624999999999 + }, + "M=80896,N=1536": { + "file": "silu_config_M80896_N1536.json", + "M": 80896, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 761.28225 + }, + "M=80896,N=1600": { + "file": "silu_config_M80896_N1600.json", + "M": 80896, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 770.28225 + }, + "M=80896,N=1664": { + "file": "silu_config_M80896_N1664.json", + "M": 80896, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 763.6822500000001 + }, + "M=80896,N=1728": { + "file": "silu_config_M80896_N1728.json", + "M": 80896, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 793.0022500000002 + }, + "M=80896,N=1760": { + "file": "silu_config_M80896_N1760.json", + "M": 80896, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 785.0822499999997 + }, + "M=80896,N=1792": { + "file": "silu_config_M80896_N1792.json", + "M": 80896, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.0022500000002 + }, + "M=80896,N=1920": { + "file": "silu_config_M80896_N1920.json", + "M": 80896, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 759.8822500000001 + }, + "M=80896,N=2048": { + "file": "silu_config_M80896_N2048.json", + "M": 80896, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 639.5217499999997 + }, + "M=80896,N=2080": { + "file": "silu_config_M80896_N2080.json", + "M": 80896, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 848.5625 + }, + "M=80896,N=2240": { + "file": "silu_config_M80896_N2240.json", + "M": 80896, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 886.4827499999997 + }, + "M=80896,N=2400": { + "file": "silu_config_M80896_N2400.json", + "M": 80896, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 937.3629999999998 + }, + "M=80896,N=2560": { + "file": "silu_config_M80896_N2560.json", + "M": 80896, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 968.6430000000005 + }, + "M=81920,N=128": { + "file": "silu_config_M81920_N128.json", + "M": 81920, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 51.799250000000015 + }, + "M=81920,N=160": { + "file": "silu_config_M81920_N160.json", + "M": 81920, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 102.7995 + }, + "M=81920,N=192": { + "file": "silu_config_M81920_N192.json", + "M": 81920, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 102.95925000000001 + }, + "M=81920,N=256": { + "file": "silu_config_M81920_N256.json", + "M": 81920, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 94.83924999999998 + }, + "M=81920,N=320": { + "file": "silu_config_M81920_N320.json", + "M": 81920, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 204.20000000000005 + }, + "M=81920,N=384": { + "file": "silu_config_M81920_N384.json", + "M": 81920, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 202.99975 + }, + "M=81920,N=480": { + "file": "silu_config_M81920_N480.json", + "M": 81920, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 208.27974999999998 + }, + "M=81920,N=512": { + "file": "silu_config_M81920_N512.json", + "M": 81920, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 166.35974999999996 + }, + "M=81920,N=576": { + "file": "silu_config_M81920_N576.json", + "M": 81920, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 381.4007499999999 + }, + "M=81920,N=640": { + "file": "silu_config_M81920_N640.json", + "M": 81920, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 384.0005000000002 + }, + "M=81920,N=768": { + "file": "silu_config_M81920_N768.json", + "M": 81920, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 385.48050000000035 + }, + "M=81920,N=800": { + "file": "silu_config_M81920_N800.json", + "M": 81920, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 384.08050000000026 + }, + "M=81920,N=896": { + "file": "silu_config_M81920_N896.json", + "M": 81920, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 392.6004999999999 + }, + "M=81920,N=960": { + "file": "silu_config_M81920_N960.json", + "M": 81920, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 387.7205 + }, + "M=81920,N=1024": { + "file": "silu_config_M81920_N1024.json", + "M": 81920, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 322.6005 + }, + "M=81920,N=1120": { + "file": "silu_config_M81920_N1120.json", + "M": 81920, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 770.6822500000001 + }, + "M=81920,N=1152": { + "file": "silu_config_M81920_N1152.json", + "M": 81920, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 752.5622500000002 + }, + "M=81920,N=1280": { + "file": "silu_config_M81920_N1280.json", + "M": 81920, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 753.6422500000003 + }, + "M=81920,N=1344": { + "file": "silu_config_M81920_N1344.json", + "M": 81920, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 761.1222499999999 + }, + "M=81920,N=1408": { + "file": "silu_config_M81920_N1408.json", + "M": 81920, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 765.6022499999999 + }, + "M=81920,N=1440": { + "file": "silu_config_M81920_N1440.json", + "M": 81920, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 763.2822499999997 + }, + "M=81920,N=1536": { + "file": "silu_config_M81920_N1536.json", + "M": 81920, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 767.3222499999997 + }, + "M=81920,N=1600": { + "file": "silu_config_M81920_N1600.json", + "M": 81920, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 763.6822499999998 + }, + "M=81920,N=1664": { + "file": "silu_config_M81920_N1664.json", + "M": 81920, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 758.1622500000001 + }, + "M=81920,N=1728": { + "file": "silu_config_M81920_N1728.json", + "M": 81920, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 776.0422499999997 + }, + "M=81920,N=1760": { + "file": "silu_config_M81920_N1760.json", + "M": 81920, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 768.1222499999997 + }, + "M=81920,N=1792": { + "file": "silu_config_M81920_N1792.json", + "M": 81920, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 760.76225 + }, + "M=81920,N=1920": { + "file": "silu_config_M81920_N1920.json", + "M": 81920, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 774.5222499999995 + }, + "M=81920,N=2048": { + "file": "silu_config_M81920_N2048.json", + "M": 81920, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 614.1617500000002 + }, + "M=81920,N=2080": { + "file": "silu_config_M81920_N2080.json", + "M": 81920, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 849.4425000000001 + }, + "M=81920,N=2240": { + "file": "silu_config_M81920_N2240.json", + "M": 81920, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.2827499999999 + }, + "M=81920,N=2400": { + "file": "silu_config_M81920_N2400.json", + "M": 81920, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 932.0029999999997 + }, + "M=81920,N=2560": { + "file": "silu_config_M81920_N2560.json", + "M": 81920, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.4029999999998 + }, + "M=82944,N=128": { + "file": "silu_config_M82944_N128.json", + "M": 82944, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 51.91924999999999 + }, + "M=82944,N=160": { + "file": "silu_config_M82944_N160.json", + "M": 82944, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 105.35924999999993 + }, + "M=82944,N=192": { + "file": "silu_config_M82944_N192.json", + "M": 82944, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 114.51950000000002 + }, + "M=82944,N=256": { + "file": "silu_config_M82944_N256.json", + "M": 82944, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 95.91925 + }, + "M=82944,N=320": { + "file": "silu_config_M82944_N320.json", + "M": 82944, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 208.04000000000002 + }, + "M=82944,N=384": { + "file": "silu_config_M82944_N384.json", + "M": 82944, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 207.19975 + }, + "M=82944,N=480": { + "file": "silu_config_M82944_N480.json", + "M": 82944, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 212.27999999999992 + }, + "M=82944,N=512": { + "file": "silu_config_M82944_N512.json", + "M": 82944, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 168.31975 + }, + "M=82944,N=576": { + "file": "silu_config_M82944_N576.json", + "M": 82944, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 402.9207500000002 + }, + "M=82944,N=640": { + "file": "silu_config_M82944_N640.json", + "M": 82944, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 399.64075 + }, + "M=82944,N=768": { + "file": "silu_config_M82944_N768.json", + "M": 82944, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 390.40049999999997 + }, + "M=82944,N=800": { + "file": "silu_config_M82944_N800.json", + "M": 82944, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 391.7605000000001 + }, + "M=82944,N=896": { + "file": "silu_config_M82944_N896.json", + "M": 82944, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 412.28075 + }, + "M=82944,N=960": { + "file": "silu_config_M82944_N960.json", + "M": 82944, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 406.16075 + }, + "M=82944,N=1024": { + "file": "silu_config_M82944_N1024.json", + "M": 82944, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 326.72024999999985 + }, + "M=82944,N=1120": { + "file": "silu_config_M82944_N1120.json", + "M": 82944, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 795.0022499999998 + }, + "M=82944,N=1152": { + "file": "silu_config_M82944_N1152.json", + "M": 82944, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 776.8422500000004 + }, + "M=82944,N=1280": { + "file": "silu_config_M82944_N1280.json", + "M": 82944, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 778.2422500000007 + }, + "M=82944,N=1344": { + "file": "silu_config_M82944_N1344.json", + "M": 82944, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 796.1222500000003 + }, + "M=82944,N=1408": { + "file": "silu_config_M82944_N1408.json", + "M": 82944, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 790.20225 + }, + "M=82944,N=1440": { + "file": "silu_config_M82944_N1440.json", + "M": 82944, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 809.2822499999997 + }, + "M=82944,N=1536": { + "file": "silu_config_M82944_N1536.json", + "M": 82944, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 792.0822500000002 + }, + "M=82944,N=1600": { + "file": "silu_config_M82944_N1600.json", + "M": 82944, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 809.5222499999998 + }, + "M=82944,N=1664": { + "file": "silu_config_M82944_N1664.json", + "M": 82944, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 782.8422500000001 + }, + "M=82944,N=1728": { + "file": "silu_config_M82944_N1728.json", + "M": 82944, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 801.28225 + }, + "M=82944,N=1760": { + "file": "silu_config_M82944_N1760.json", + "M": 82944, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 802.6422500000001 + }, + "M=82944,N=1792": { + "file": "silu_config_M82944_N1792.json", + "M": 82944, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 807.0422499999997 + }, + "M=82944,N=1920": { + "file": "silu_config_M82944_N1920.json", + "M": 82944, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 787.2022500000005 + }, + "M=82944,N=2048": { + "file": "silu_config_M82944_N2048.json", + "M": 82944, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 632.7217499999999 + }, + "M=82944,N=2080": { + "file": "silu_config_M82944_N2080.json", + "M": 82944, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 870.2427500000003 + }, + "M=82944,N=2240": { + "file": "silu_config_M82944_N2240.json", + "M": 82944, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 910.8027500000007 + }, + "M=82944,N=2400": { + "file": "silu_config_M82944_N2400.json", + "M": 82944, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 960.7629999999999 + }, + "M=82944,N=2560": { + "file": "silu_config_M82944_N2560.json", + "M": 82944, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1012.5632499999997 + }, + "M=83968,N=128": { + "file": "silu_config_M83968_N128.json", + "M": 83968, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 52.43925 + }, + "M=83968,N=160": { + "file": "silu_config_M83968_N160.json", + "M": 83968, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 115.91950000000001 + }, + "M=83968,N=192": { + "file": "silu_config_M83968_N192.json", + "M": 83968, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 115.9995 + }, + "M=83968,N=256": { + "file": "silu_config_M83968_N256.json", + "M": 83968, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 97.03925 + }, + "M=83968,N=320": { + "file": "silu_config_M83968_N320.json", + "M": 83968, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 211.03974999999997 + }, + "M=83968,N=384": { + "file": "silu_config_M83968_N384.json", + "M": 83968, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 209.03974999999997 + }, + "M=83968,N=480": { + "file": "silu_config_M83968_N480.json", + "M": 83968, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 214.95999999999998 + }, + "M=83968,N=512": { + "file": "silu_config_M83968_N512.json", + "M": 83968, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 170.39975000000004 + }, + "M=83968,N=576": { + "file": "silu_config_M83968_N576.json", + "M": 83968, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 414.04075 + }, + "M=83968,N=640": { + "file": "silu_config_M83968_N640.json", + "M": 83968, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 400.20074999999997 + }, + "M=83968,N=768": { + "file": "silu_config_M83968_N768.json", + "M": 83968, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 407.44050000000016 + }, + "M=83968,N=800": { + "file": "silu_config_M83968_N800.json", + "M": 83968, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 410.72075000000007 + }, + "M=83968,N=896": { + "file": "silu_config_M83968_N896.json", + "M": 83968, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 420.6807500000001 + }, + "M=83968,N=960": { + "file": "silu_config_M83968_N960.json", + "M": 83968, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 422.2807500000001 + }, + "M=83968,N=1024": { + "file": "silu_config_M83968_N1024.json", + "M": 83968, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 331.1205 + }, + "M=83968,N=1120": { + "file": "silu_config_M83968_N1120.json", + "M": 83968, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 804.8822499999997 + }, + "M=83968,N=1152": { + "file": "silu_config_M83968_N1152.json", + "M": 83968, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 786.48225 + }, + "M=83968,N=1280": { + "file": "silu_config_M83968_N1280.json", + "M": 83968, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 797.80225 + }, + "M=83968,N=1344": { + "file": "silu_config_M83968_N1344.json", + "M": 83968, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 805.6422499999999 + }, + "M=83968,N=1408": { + "file": "silu_config_M83968_N1408.json", + "M": 83968, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 799.4022500000001 + }, + "M=83968,N=1440": { + "file": "silu_config_M83968_N1440.json", + "M": 83968, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 797.0422499999997 + }, + "M=83968,N=1536": { + "file": "silu_config_M83968_N1536.json", + "M": 83968, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 811.7624999999998 + }, + "M=83968,N=1600": { + "file": "silu_config_M83968_N1600.json", + "M": 83968, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 798.8822499999999 + }, + "M=83968,N=1664": { + "file": "silu_config_M83968_N1664.json", + "M": 83968, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 813.1225 + }, + "M=83968,N=1728": { + "file": "silu_config_M83968_N1728.json", + "M": 83968, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 810.76225 + }, + "M=83968,N=1760": { + "file": "silu_config_M83968_N1760.json", + "M": 83968, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 812.3224999999998 + }, + "M=83968,N=1792": { + "file": "silu_config_M83968_N1792.json", + "M": 83968, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 795.8022499999997 + }, + "M=83968,N=1920": { + "file": "silu_config_M83968_N1920.json", + "M": 83968, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 818.6824999999999 + }, + "M=83968,N=2048": { + "file": "silu_config_M83968_N2048.json", + "M": 83968, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 647.9617499999999 + }, + "M=83968,N=2080": { + "file": "silu_config_M83968_N2080.json", + "M": 83968, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 886.6427499999995 + }, + "M=83968,N=2240": { + "file": "silu_config_M83968_N2240.json", + "M": 83968, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 916.12275 + }, + "M=83968,N=2400": { + "file": "silu_config_M83968_N2400.json", + "M": 83968, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 971.8829999999998 + }, + "M=83968,N=2560": { + "file": "silu_config_M83968_N2560.json", + "M": 83968, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1014.5632500000002 + }, + "M=84992,N=128": { + "file": "silu_config_M84992_N128.json", + "M": 84992, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 54.15924999999999 + }, + "M=84992,N=160": { + "file": "silu_config_M84992_N160.json", + "M": 84992, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 107.75925000000001 + }, + "M=84992,N=192": { + "file": "silu_config_M84992_N192.json", + "M": 84992, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 117.35950000000001 + }, + "M=84992,N=256": { + "file": "silu_config_M84992_N256.json", + "M": 84992, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 98.11925000000002 + }, + "M=84992,N=320": { + "file": "silu_config_M84992_N320.json", + "M": 84992, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 213.07975 + }, + "M=84992,N=384": { + "file": "silu_config_M84992_N384.json", + "M": 84992, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 211.59975000000003 + }, + "M=84992,N=480": { + "file": "silu_config_M84992_N480.json", + "M": 84992, + "N": 480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 217.31975000000006 + }, + "M=84992,N=512": { + "file": "silu_config_M84992_N512.json", + "M": 84992, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 172.27975000000004 + }, + "M=84992,N=576": { + "file": "silu_config_M84992_N576.json", + "M": 84992, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 401.2404999999999 + }, + "M=84992,N=640": { + "file": "silu_config_M84992_N640.json", + "M": 84992, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 405.00075000000004 + }, + "M=84992,N=768": { + "file": "silu_config_M84992_N768.json", + "M": 84992, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 406.8407500000003 + }, + "M=84992,N=800": { + "file": "silu_config_M84992_N800.json", + "M": 84992, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 417.08074999999985 + }, + "M=84992,N=896": { + "file": "silu_config_M84992_N896.json", + "M": 84992, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 413.24075000000005 + }, + "M=84992,N=960": { + "file": "silu_config_M84992_N960.json", + "M": 84992, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 419.3207500000001 + }, + "M=84992,N=1024": { + "file": "silu_config_M84992_N1024.json", + "M": 84992, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 334.5202499999998 + }, + "M=84992,N=1120": { + "file": "silu_config_M84992_N1120.json", + "M": 84992, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 804.4425000000001 + }, + "M=84992,N=1152": { + "file": "silu_config_M84992_N1152.json", + "M": 84992, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 806.0824999999998 + }, + "M=84992,N=1280": { + "file": "silu_config_M84992_N1280.json", + "M": 84992, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 796.9222500000005 + }, + "M=84992,N=1344": { + "file": "silu_config_M84992_N1344.json", + "M": 84992, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 815.4024999999999 + }, + "M=84992,N=1408": { + "file": "silu_config_M84992_N1408.json", + "M": 84992, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 808.8822500000003 + }, + "M=84992,N=1440": { + "file": "silu_config_M84992_N1440.json", + "M": 84992, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 806.80225 + }, + "M=84992,N=1536": { + "file": "silu_config_M84992_N1536.json", + "M": 84992, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 810.5224999999998 + }, + "M=84992,N=1600": { + "file": "silu_config_M84992_N1600.json", + "M": 84992, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 818.8424999999997 + }, + "M=84992,N=1664": { + "file": "silu_config_M84992_N1664.json", + "M": 84992, + "N": 1664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 830.0825000000004 + }, + "M=84992,N=1728": { + "file": "silu_config_M84992_N1728.json", + "M": 84992, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 821.0424999999998 + }, + "M=84992,N=1760": { + "file": "silu_config_M84992_N1760.json", + "M": 84992, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 822.0025 + }, + "M=84992,N=1792": { + "file": "silu_config_M84992_N1792.json", + "M": 84992, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 826.5224999999998 + }, + "M=84992,N=1920": { + "file": "silu_config_M84992_N1920.json", + "M": 84992, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 817.2424999999998 + }, + "M=84992,N=2048": { + "file": "silu_config_M84992_N2048.json", + "M": 84992, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 647.2017499999997 + }, + "M=84992,N=2080": { + "file": "silu_config_M84992_N2080.json", + "M": 84992, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 897.7227499999999 + }, + "M=84992,N=2240": { + "file": "silu_config_M84992_N2240.json", + "M": 84992, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 942.8829999999998 + }, + "M=84992,N=2400": { + "file": "silu_config_M84992_N2400.json", + "M": 84992, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 994.0832500000006 + }, + "M=84992,N=2560": { + "file": "silu_config_M84992_N2560.json", + "M": 84992, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1027.28325 + }, + "M=86016,N=128": { + "file": "silu_config_M86016_N128.json", + "M": 86016, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 54.51924999999998 + }, + "M=86016,N=160": { + "file": "silu_config_M86016_N160.json", + "M": 86016, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 118.23949999999998 + }, + "M=86016,N=192": { + "file": "silu_config_M86016_N192.json", + "M": 86016, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 118.39949999999999 + }, + "M=86016,N=256": { + "file": "silu_config_M86016_N256.json", + "M": 86016, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 99.27925000000002 + }, + "M=86016,N=320": { + "file": "silu_config_M86016_N320.json", + "M": 86016, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 215.39975000000004 + }, + "M=86016,N=384": { + "file": "silu_config_M86016_N384.json", + "M": 86016, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 213.9197499999999 + }, + "M=86016,N=480": { + "file": "silu_config_M86016_N480.json", + "M": 86016, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 219.68 + }, + "M=86016,N=512": { + "file": "silu_config_M86016_N512.json", + "M": 86016, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 174.31975 + }, + "M=86016,N=576": { + "file": "silu_config_M86016_N576.json", + "M": 86016, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 417.20074999999997 + }, + "M=86016,N=640": { + "file": "silu_config_M86016_N640.json", + "M": 86016, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 419.96074999999996 + }, + "M=86016,N=768": { + "file": "silu_config_M86016_N768.json", + "M": 86016, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 414.44075 + }, + "M=86016,N=800": { + "file": "silu_config_M86016_N800.json", + "M": 86016, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 418.68075 + }, + "M=86016,N=896": { + "file": "silu_config_M86016_N896.json", + "M": 86016, + "N": 896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 427.44074999999987 + }, + "M=86016,N=960": { + "file": "silu_config_M86016_N960.json", + "M": 86016, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 409.84074999999996 + }, + "M=86016,N=1024": { + "file": "silu_config_M86016_N1024.json", + "M": 86016, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 339.04050000000007 + }, + "M=86016,N=1120": { + "file": "silu_config_M86016_N1120.json", + "M": 86016, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 834.6025 + }, + "M=86016,N=1152": { + "file": "silu_config_M86016_N1152.json", + "M": 86016, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 805.1222500000001 + }, + "M=86016,N=1280": { + "file": "silu_config_M86016_N1280.json", + "M": 86016, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 806.4422500000001 + }, + "M=86016,N=1344": { + "file": "silu_config_M86016_N1344.json", + "M": 86016, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 824.8825000000006 + }, + "M=86016,N=1408": { + "file": "silu_config_M86016_N1408.json", + "M": 86016, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 818.6025000000002 + }, + "M=86016,N=1440": { + "file": "silu_config_M86016_N1440.json", + "M": 86016, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 827.2825 + }, + "M=86016,N=1536": { + "file": "silu_config_M86016_N1536.json", + "M": 86016, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 819.9225000000004 + }, + "M=86016,N=1600": { + "file": "silu_config_M86016_N1600.json", + "M": 86016, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 838.8024999999998 + }, + "M=86016,N=1664": { + "file": "silu_config_M86016_N1664.json", + "M": 86016, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 821.4424999999997 + }, + "M=86016,N=1728": { + "file": "silu_config_M86016_N1728.json", + "M": 86016, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 830.5224999999996 + }, + "M=86016,N=1760": { + "file": "silu_config_M86016_N1760.json", + "M": 86016, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 820.6824999999999 + }, + "M=86016,N=1792": { + "file": "silu_config_M86016_N1792.json", + "M": 86016, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 814.6425000000004 + }, + "M=86016,N=1920": { + "file": "silu_config_M86016_N1920.json", + "M": 86016, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 828.4425000000001 + }, + "M=86016,N=2048": { + "file": "silu_config_M86016_N2048.json", + "M": 86016, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 660.36175 + }, + "M=86016,N=2080": { + "file": "silu_config_M86016_N2080.json", + "M": 86016, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 912.56275 + }, + "M=86016,N=2240": { + "file": "silu_config_M86016_N2240.json", + "M": 86016, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 944.1229999999996 + }, + "M=86016,N=2400": { + "file": "silu_config_M86016_N2400.json", + "M": 86016, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1005.6832499999997 + }, + "M=86016,N=2560": { + "file": "silu_config_M86016_N2560.json", + "M": 86016, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1039.4032499999998 + }, + "M=87040,N=128": { + "file": "silu_config_M87040_N128.json", + "M": 87040, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 54.07925 + }, + "M=87040,N=160": { + "file": "silu_config_M87040_N160.json", + "M": 87040, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 108.79924999999999 + }, + "M=87040,N=192": { + "file": "silu_config_M87040_N192.json", + "M": 87040, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 119.99950000000001 + }, + "M=87040,N=256": { + "file": "silu_config_M87040_N256.json", + "M": 87040, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 111.67949999999999 + }, + "M=87040,N=320": { + "file": "silu_config_M87040_N320.json", + "M": 87040, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 216.8 + }, + "M=87040,N=384": { + "file": "silu_config_M87040_N384.json", + "M": 87040, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 214.99974999999995 + }, + "M=87040,N=480": { + "file": "silu_config_M87040_N480.json", + "M": 87040, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 220.99999999999994 + }, + "M=87040,N=512": { + "file": "silu_config_M87040_N512.json", + "M": 87040, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 176.27974999999998 + }, + "M=87040,N=576": { + "file": "silu_config_M87040_N576.json", + "M": 87040, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 415.28075 + }, + "M=87040,N=640": { + "file": "silu_config_M87040_N640.json", + "M": 87040, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 418.68075 + }, + "M=87040,N=768": { + "file": "silu_config_M87040_N768.json", + "M": 87040, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 431.40099999999984 + }, + "M=87040,N=800": { + "file": "silu_config_M87040_N800.json", + "M": 87040, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 425.2805000000001 + }, + "M=87040,N=896": { + "file": "silu_config_M87040_N896.json", + "M": 87040, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 416.1607500000001 + }, + "M=87040,N=960": { + "file": "silu_config_M87040_N960.json", + "M": 87040, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 422.1607499999999 + }, + "M=87040,N=1024": { + "file": "silu_config_M87040_N1024.json", + "M": 87040, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 343.4005000000002 + }, + "M=87040,N=1120": { + "file": "silu_config_M87040_N1120.json", + "M": 87040, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 817.8024999999998 + }, + "M=87040,N=1152": { + "file": "silu_config_M87040_N1152.json", + "M": 87040, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 836.1225000000002 + }, + "M=87040,N=1280": { + "file": "silu_config_M87040_N1280.json", + "M": 87040, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 820.9624999999996 + }, + "M=87040,N=1344": { + "file": "silu_config_M87040_N1344.json", + "M": 87040, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 829.1225000000004 + }, + "M=87040,N=1408": { + "file": "silu_config_M87040_N1408.json", + "M": 87040, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 827.8824999999997 + }, + "M=87040,N=1440": { + "file": "silu_config_M87040_N1440.json", + "M": 87040, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 825.9624999999999 + }, + "M=87040,N=1536": { + "file": "silu_config_M87040_N1536.json", + "M": 87040, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 803.7622499999998 + }, + "M=87040,N=1600": { + "file": "silu_config_M87040_N1600.json", + "M": 87040, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 826.4825000000003 + }, + "M=87040,N=1664": { + "file": "silu_config_M87040_N1664.json", + "M": 87040, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 825.3624999999997 + }, + "M=87040,N=1728": { + "file": "silu_config_M87040_N1728.json", + "M": 87040, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 833.8025 + }, + "M=87040,N=1760": { + "file": "silu_config_M87040_N1760.json", + "M": 87040, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 841.3625 + }, + "M=87040,N=1792": { + "file": "silu_config_M87040_N1792.json", + "M": 87040, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 829.1624999999997 + }, + "M=87040,N=1920": { + "file": "silu_config_M87040_N1920.json", + "M": 87040, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 822.0425000000005 + }, + "M=87040,N=2048": { + "file": "silu_config_M87040_N2048.json", + "M": 87040, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 662.1617499999998 + }, + "M=87040,N=2080": { + "file": "silu_config_M87040_N2080.json", + "M": 87040, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 912.1627499999995 + }, + "M=87040,N=2240": { + "file": "silu_config_M87040_N2240.json", + "M": 87040, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 956.163 + }, + "M=87040,N=2400": { + "file": "silu_config_M87040_N2400.json", + "M": 87040, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1009.2032500000006 + }, + "M=87040,N=2560": { + "file": "silu_config_M87040_N2560.json", + "M": 87040, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.5632500000002 + }, + "M=88064,N=128": { + "file": "silu_config_M88064_N128.json", + "M": 88064, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 54.71925000000001 + }, + "M=88064,N=160": { + "file": "silu_config_M88064_N160.json", + "M": 88064, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 121.19949999999997 + }, + "M=88064,N=192": { + "file": "silu_config_M88064_N192.json", + "M": 88064, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 121.11950000000002 + }, + "M=88064,N=256": { + "file": "silu_config_M88064_N256.json", + "M": 88064, + "N": 256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 111.07925000000003 + }, + "M=88064,N=320": { + "file": "silu_config_M88064_N320.json", + "M": 88064, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 220.32 + }, + "M=88064,N=384": { + "file": "silu_config_M88064_N384.json", + "M": 88064, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 219.35974999999996 + }, + "M=88064,N=480": { + "file": "silu_config_M88064_N480.json", + "M": 88064, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 224.96000000000015 + }, + "M=88064,N=512": { + "file": "silu_config_M88064_N512.json", + "M": 88064, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 178.31975 + }, + "M=88064,N=576": { + "file": "silu_config_M88064_N576.json", + "M": 88064, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 428.76075000000003 + }, + "M=88064,N=640": { + "file": "silu_config_M88064_N640.json", + "M": 88064, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 425.48075000000006 + }, + "M=88064,N=768": { + "file": "silu_config_M88064_N768.json", + "M": 88064, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 425.12049999999977 + }, + "M=88064,N=800": { + "file": "silu_config_M88064_N800.json", + "M": 88064, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 432.04075 + }, + "M=88064,N=896": { + "file": "silu_config_M88064_N896.json", + "M": 88064, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 427.7207500000002 + }, + "M=88064,N=960": { + "file": "silu_config_M88064_N960.json", + "M": 88064, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 432.5207499999998 + }, + "M=88064,N=1024": { + "file": "silu_config_M88064_N1024.json", + "M": 88064, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 346.1205000000002 + }, + "M=88064,N=1120": { + "file": "silu_config_M88064_N1120.json", + "M": 88064, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 832.1624999999999 + }, + "M=88064,N=1152": { + "file": "silu_config_M88064_N1152.json", + "M": 88064, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 834.0824999999998 + }, + "M=88064,N=1280": { + "file": "silu_config_M88064_N1280.json", + "M": 88064, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 825.1225000000002 + }, + "M=88064,N=1344": { + "file": "silu_config_M88064_N1344.json", + "M": 88064, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 833.1225 + }, + "M=88064,N=1408": { + "file": "silu_config_M88064_N1408.json", + "M": 88064, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 826.7625 + }, + "M=88064,N=1440": { + "file": "silu_config_M88064_N1440.json", + "M": 88064, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 857.4024999999999 + }, + "M=88064,N=1536": { + "file": "silu_config_M88064_N1536.json", + "M": 88064, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 849.3625000000004 + }, + "M=88064,N=1600": { + "file": "silu_config_M88064_N1600.json", + "M": 88064, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 857.9624999999996 + }, + "M=88064,N=1664": { + "file": "silu_config_M88064_N1664.json", + "M": 88064, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 851.0824999999998 + }, + "M=88064,N=1728": { + "file": "silu_config_M88064_N1728.json", + "M": 88064, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 839.3624999999997 + }, + "M=88064,N=1760": { + "file": "silu_config_M88064_N1760.json", + "M": 88064, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 862.6025000000002 + }, + "M=88064,N=1792": { + "file": "silu_config_M88064_N1792.json", + "M": 88064, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 844.2425000000003 + }, + "M=88064,N=1920": { + "file": "silu_config_M88064_N1920.json", + "M": 88064, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 836.9225000000001 + }, + "M=88064,N=2048": { + "file": "silu_config_M88064_N2048.json", + "M": 88064, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 678.92175 + }, + "M=88064,N=2080": { + "file": "silu_config_M88064_N2080.json", + "M": 88064, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 923.2827499999999 + }, + "M=88064,N=2240": { + "file": "silu_config_M88064_N2240.json", + "M": 88064, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 975.7629999999999 + }, + "M=88064,N=2400": { + "file": "silu_config_M88064_N2400.json", + "M": 88064, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1029.6832499999996 + }, + "M=88064,N=2560": { + "file": "silu_config_M88064_N2560.json", + "M": 88064, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1063.5234999999998 + }, + "M=89088,N=128": { + "file": "silu_config_M89088_N128.json", + "M": 89088, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 55.119250000000015 + }, + "M=89088,N=160": { + "file": "silu_config_M89088_N160.json", + "M": 89088, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 112.55950000000001 + }, + "M=89088,N=192": { + "file": "silu_config_M89088_N192.json", + "M": 89088, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 122.2795 + }, + "M=89088,N=256": { + "file": "silu_config_M89088_N256.json", + "M": 89088, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.43925000000002 + }, + "M=89088,N=320": { + "file": "silu_config_M89088_N320.json", + "M": 89088, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 223.0399999999999 + }, + "M=89088,N=384": { + "file": "silu_config_M89088_N384.json", + "M": 89088, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 221.27975000000004 + }, + "M=89088,N=480": { + "file": "silu_config_M89088_N480.json", + "M": 89088, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 227.64 + }, + "M=89088,N=512": { + "file": "silu_config_M89088_N512.json", + "M": 89088, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 180.15975000000003 + }, + "M=89088,N=576": { + "file": "silu_config_M89088_N576.json", + "M": 89088, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 442.4010000000002 + }, + "M=89088,N=640": { + "file": "silu_config_M89088_N640.json", + "M": 89088, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.0407499999998 + }, + "M=89088,N=768": { + "file": "silu_config_M89088_N768.json", + "M": 89088, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 429.8007499999999 + }, + "M=89088,N=800": { + "file": "silu_config_M89088_N800.json", + "M": 89088, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 433.3207500000001 + }, + "M=89088,N=896": { + "file": "silu_config_M89088_N896.json", + "M": 89088, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 430.6407499999999 + }, + "M=89088,N=960": { + "file": "silu_config_M89088_N960.json", + "M": 89088, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 447.5207499999999 + }, + "M=89088,N=1024": { + "file": "silu_config_M89088_N1024.json", + "M": 89088, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 350.40049999999985 + }, + "M=89088,N=1120": { + "file": "silu_config_M89088_N1120.json", + "M": 89088, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 863.2025000000003 + }, + "M=89088,N=1152": { + "file": "silu_config_M89088_N1152.json", + "M": 89088, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 844.0425 + }, + "M=89088,N=1280": { + "file": "silu_config_M89088_N1280.json", + "M": 89088, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 834.7624999999998 + }, + "M=89088,N=1344": { + "file": "silu_config_M89088_N1344.json", + "M": 89088, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 843.4825000000001 + }, + "M=89088,N=1408": { + "file": "silu_config_M89088_N1408.json", + "M": 89088, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 857.1225000000004 + }, + "M=89088,N=1440": { + "file": "silu_config_M89088_N1440.json", + "M": 89088, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 845.1224999999997 + }, + "M=89088,N=1536": { + "file": "silu_config_M89088_N1536.json", + "M": 89088, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 858.8424999999997 + }, + "M=89088,N=1600": { + "file": "silu_config_M89088_N1600.json", + "M": 89088, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 856.8424999999997 + }, + "M=89088,N=1664": { + "file": "silu_config_M89088_N1664.json", + "M": 89088, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 849.8824999999999 + }, + "M=89088,N=1728": { + "file": "silu_config_M89088_N1728.json", + "M": 89088, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 859.0824999999995 + }, + "M=89088,N=1760": { + "file": "silu_config_M89088_N1760.json", + "M": 89088, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 860.8025000000007 + }, + "M=89088,N=1792": { + "file": "silu_config_M89088_N1792.json", + "M": 89088, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 854.2424999999998 + }, + "M=89088,N=1920": { + "file": "silu_config_M89088_N1920.json", + "M": 89088, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 844.9625000000001 + }, + "M=89088,N=2048": { + "file": "silu_config_M89088_N2048.json", + "M": 89088, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 687.3620000000001 + }, + "M=89088,N=2080": { + "file": "silu_config_M89088_N2080.json", + "M": 89088, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 928.0827500000005 + }, + "M=89088,N=2240": { + "file": "silu_config_M89088_N2240.json", + "M": 89088, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 996.5232499999997 + }, + "M=89088,N=2400": { + "file": "silu_config_M89088_N2400.json", + "M": 89088, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1050.5234999999998 + }, + "M=89088,N=2560": { + "file": "silu_config_M89088_N2560.json", + "M": 89088, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1105.7234999999996 + }, + "M=90112,N=128": { + "file": "silu_config_M90112_N128.json", + "M": 90112, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 55.71925 + }, + "M=90112,N=160": { + "file": "silu_config_M90112_N160.json", + "M": 90112, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 113.63950000000001 + }, + "M=90112,N=192": { + "file": "silu_config_M90112_N192.json", + "M": 90112, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 123.71950000000001 + }, + "M=90112,N=256": { + "file": "silu_config_M90112_N256.json", + "M": 90112, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.51925000000003 + }, + "M=90112,N=320": { + "file": "silu_config_M90112_N320.json", + "M": 90112, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 225.5200000000001 + }, + "M=90112,N=384": { + "file": "silu_config_M90112_N384.json", + "M": 90112, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 223.79999999999995 + }, + "M=90112,N=480": { + "file": "silu_config_M90112_N480.json", + "M": 90112, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 229.72000000000014 + }, + "M=90112,N=512": { + "file": "silu_config_M90112_N512.json", + "M": 90112, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 182.19975 + }, + "M=90112,N=576": { + "file": "silu_config_M90112_N576.json", + "M": 90112, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 453.001 + }, + "M=90112,N=640": { + "file": "silu_config_M90112_N640.json", + "M": 90112, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 421.84074999999973 + }, + "M=90112,N=768": { + "file": "silu_config_M90112_N768.json", + "M": 90112, + "N": 768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 445.64099999999985 + }, + "M=90112,N=800": { + "file": "silu_config_M90112_N800.json", + "M": 90112, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 439.4409999999997 + }, + "M=90112,N=896": { + "file": "silu_config_M90112_N896.json", + "M": 90112, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 437.2007500000002 + }, + "M=90112,N=960": { + "file": "silu_config_M90112_N960.json", + "M": 90112, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 450.0010000000002 + }, + "M=90112,N=1024": { + "file": "silu_config_M90112_N1024.json", + "M": 90112, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 354.32050000000004 + }, + "M=90112,N=1120": { + "file": "silu_config_M90112_N1120.json", + "M": 90112, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 852.0425 + }, + "M=90112,N=1152": { + "file": "silu_config_M90112_N1152.json", + "M": 90112, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 852.8824999999999 + }, + "M=90112,N=1280": { + "file": "silu_config_M90112_N1280.json", + "M": 90112, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 854.1225 + }, + "M=90112,N=1344": { + "file": "silu_config_M90112_N1344.json", + "M": 90112, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 852.5224999999998 + }, + "M=90112,N=1408": { + "file": "silu_config_M90112_N1408.json", + "M": 90112, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 845.8025 + }, + "M=90112,N=1440": { + "file": "silu_config_M90112_N1440.json", + "M": 90112, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 865.8425 + }, + "M=90112,N=1536": { + "file": "silu_config_M90112_N1536.json", + "M": 90112, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 847.5625 + }, + "M=90112,N=1600": { + "file": "silu_config_M90112_N1600.json", + "M": 90112, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 876.7627500000003 + }, + "M=90112,N=1664": { + "file": "silu_config_M90112_N1664.json", + "M": 90112, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 860.0025000000005 + }, + "M=90112,N=1728": { + "file": "silu_config_M90112_N1728.json", + "M": 90112, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 858.1225000000004 + }, + "M=90112,N=1760": { + "file": "silu_config_M90112_N1760.json", + "M": 90112, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 859.4825000000001 + }, + "M=90112,N=1792": { + "file": "silu_config_M90112_N1792.json", + "M": 90112, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 863.6425000000004 + }, + "M=90112,N=1920": { + "file": "silu_config_M90112_N1920.json", + "M": 90112, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 866.4825000000001 + }, + "M=90112,N=2048": { + "file": "silu_config_M90112_N2048.json", + "M": 90112, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 687.682 + }, + "M=90112,N=2080": { + "file": "silu_config_M90112_N2080.json", + "M": 90112, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 942.0029999999997 + }, + "M=90112,N=2240": { + "file": "silu_config_M90112_N2240.json", + "M": 90112, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 989.2832500000009 + }, + "M=90112,N=2400": { + "file": "silu_config_M90112_N2400.json", + "M": 90112, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1041.8432500000008 + }, + "M=90112,N=2560": { + "file": "silu_config_M90112_N2560.json", + "M": 90112, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.2435 + }, + "M=91136,N=128": { + "file": "silu_config_M91136_N128.json", + "M": 91136, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 56.15924999999998 + }, + "M=91136,N=160": { + "file": "silu_config_M91136_N160.json", + "M": 91136, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 114.83949999999997 + }, + "M=91136,N=192": { + "file": "silu_config_M91136_N192.json", + "M": 91136, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 125.19950000000001 + }, + "M=91136,N=256": { + "file": "silu_config_M91136_N256.json", + "M": 91136, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 116.07949999999997 + }, + "M=91136,N=320": { + "file": "silu_config_M91136_N320.json", + "M": 91136, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 227.36 + }, + "M=91136,N=384": { + "file": "silu_config_M91136_N384.json", + "M": 91136, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 226.4400000000001 + }, + "M=91136,N=480": { + "file": "silu_config_M91136_N480.json", + "M": 91136, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 232.5599999999999 + }, + "M=91136,N=512": { + "file": "silu_config_M91136_N512.json", + "M": 91136, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 184.15974999999997 + }, + "M=91136,N=576": { + "file": "silu_config_M91136_N576.json", + "M": 91136, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 443.4809999999999 + }, + "M=91136,N=640": { + "file": "silu_config_M91136_N640.json", + "M": 91136, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 426.64075000000014 + }, + "M=91136,N=768": { + "file": "silu_config_M91136_N768.json", + "M": 91136, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 450.5207499999999 + }, + "M=91136,N=800": { + "file": "silu_config_M91136_N800.json", + "M": 91136, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 440.36075000000005 + }, + "M=91136,N=896": { + "file": "silu_config_M91136_N896.json", + "M": 91136, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 441.96074999999985 + }, + "M=91136,N=960": { + "file": "silu_config_M91136_N960.json", + "M": 91136, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 454.72074999999995 + }, + "M=91136,N=1024": { + "file": "silu_config_M91136_N1024.json", + "M": 91136, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 358.2405000000001 + }, + "M=91136,N=1120": { + "file": "silu_config_M91136_N1120.json", + "M": 91136, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 871.64275 + }, + "M=91136,N=1152": { + "file": "silu_config_M91136_N1152.json", + "M": 91136, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 852.1225 + }, + "M=91136,N=1280": { + "file": "silu_config_M91136_N1280.json", + "M": 91136, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 864.8024999999998 + }, + "M=91136,N=1344": { + "file": "silu_config_M91136_N1344.json", + "M": 91136, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 852.2424999999998 + }, + "M=91136,N=1408": { + "file": "silu_config_M91136_N1408.json", + "M": 91136, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 865.6424999999997 + }, + "M=91136,N=1440": { + "file": "silu_config_M91136_N1440.json", + "M": 91136, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 886.2827499999999 + }, + "M=91136,N=1536": { + "file": "silu_config_M91136_N1536.json", + "M": 91136, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 857.1624999999999 + }, + "M=91136,N=1600": { + "file": "silu_config_M91136_N1600.json", + "M": 91136, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 865.0425000000007 + }, + "M=91136,N=1664": { + "file": "silu_config_M91136_N1664.json", + "M": 91136, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 879.2027499999999 + }, + "M=91136,N=1728": { + "file": "silu_config_M91136_N1728.json", + "M": 91136, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 889.5627500000005 + }, + "M=91136,N=1760": { + "file": "silu_config_M91136_N1760.json", + "M": 91136, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.3627499999998 + }, + "M=91136,N=1792": { + "file": "silu_config_M91136_N1792.json", + "M": 91136, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 862.1225 + }, + "M=91136,N=1920": { + "file": "silu_config_M91136_N1920.json", + "M": 91136, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 875.0827499999996 + }, + "M=91136,N=2048": { + "file": "silu_config_M91136_N2048.json", + "M": 91136, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 702.0819999999999 + }, + "M=91136,N=2080": { + "file": "silu_config_M91136_N2080.json", + "M": 91136, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 949.4029999999998 + }, + "M=91136,N=2240": { + "file": "silu_config_M91136_N2240.json", + "M": 91136, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1008.8032500000004 + }, + "M=91136,N=2400": { + "file": "silu_config_M91136_N2400.json", + "M": 91136, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1054.2835000000005 + }, + "M=91136,N=2560": { + "file": "silu_config_M91136_N2560.json", + "M": 91136, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1100.4434999999999 + }, + "M=92160,N=128": { + "file": "silu_config_M92160_N128.json", + "M": 92160, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 65.51925 + }, + "M=92160,N=160": { + "file": "silu_config_M92160_N160.json", + "M": 92160, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 116.19950000000001 + }, + "M=92160,N=192": { + "file": "silu_config_M92160_N192.json", + "M": 92160, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 125.55949999999999 + }, + "M=92160,N=256": { + "file": "silu_config_M92160_N256.json", + "M": 92160, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 105.75925000000004 + }, + "M=92160,N=320": { + "file": "silu_config_M92160_N320.json", + "M": 92160, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 229.36 + }, + "M=92160,N=384": { + "file": "silu_config_M92160_N384.json", + "M": 92160, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 226.87999999999994 + }, + "M=92160,N=480": { + "file": "silu_config_M92160_N480.json", + "M": 92160, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 233.44000000000005 + }, + "M=92160,N=512": { + "file": "silu_config_M92160_N512.json", + "M": 92160, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 186.23974999999996 + }, + "M=92160,N=576": { + "file": "silu_config_M92160_N576.json", + "M": 92160, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 445.241 + }, + "M=92160,N=640": { + "file": "silu_config_M92160_N640.json", + "M": 92160, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 442.0407499999998 + }, + "M=92160,N=768": { + "file": "silu_config_M92160_N768.json", + "M": 92160, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 447.7207499999997 + }, + "M=92160,N=800": { + "file": "silu_config_M92160_N800.json", + "M": 92160, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 441.6807499999999 + }, + "M=92160,N=896": { + "file": "silu_config_M92160_N896.json", + "M": 92160, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 446.88075000000015 + }, + "M=92160,N=960": { + "file": "silu_config_M92160_N960.json", + "M": 92160, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.40075 + }, + "M=92160,N=1024": { + "file": "silu_config_M92160_N1024.json", + "M": 92160, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 362.32050000000004 + }, + "M=92160,N=1120": { + "file": "silu_config_M92160_N1120.json", + "M": 92160, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 875.1227500000005 + }, + "M=92160,N=1152": { + "file": "silu_config_M92160_N1152.json", + "M": 92160, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 861.6424999999999 + }, + "M=92160,N=1280": { + "file": "silu_config_M92160_N1280.json", + "M": 92160, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 863.4824999999998 + }, + "M=92160,N=1344": { + "file": "silu_config_M92160_N1344.json", + "M": 92160, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 876.12275 + }, + "M=92160,N=1408": { + "file": "silu_config_M92160_N1408.json", + "M": 92160, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 859.0824999999998 + }, + "M=92160,N=1440": { + "file": "silu_config_M92160_N1440.json", + "M": 92160, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 868.4424999999999 + }, + "M=92160,N=1536": { + "file": "silu_config_M92160_N1536.json", + "M": 92160, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 850.5625000000002 + }, + "M=92160,N=1600": { + "file": "silu_config_M92160_N1600.json", + "M": 92160, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 879.4027500000002 + }, + "M=92160,N=1664": { + "file": "silu_config_M92160_N1664.json", + "M": 92160, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 851.9624999999999 + }, + "M=92160,N=1728": { + "file": "silu_config_M92160_N1728.json", + "M": 92160, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 870.6824999999999 + }, + "M=92160,N=1760": { + "file": "silu_config_M92160_N1760.json", + "M": 92160, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 878.8027499999998 + }, + "M=92160,N=1792": { + "file": "silu_config_M92160_N1792.json", + "M": 92160, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 876.0827499999996 + }, + "M=92160,N=1920": { + "file": "silu_config_M92160_N1920.json", + "M": 92160, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 858.8025000000016 + }, + "M=92160,N=2048": { + "file": "silu_config_M92160_N2048.json", + "M": 92160, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 697.3219999999997 + }, + "M=92160,N=2080": { + "file": "silu_config_M92160_N2080.json", + "M": 92160, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 953.163 + }, + "M=92160,N=2240": { + "file": "silu_config_M92160_N2240.json", + "M": 92160, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1012.8832499999997 + }, + "M=92160,N=2400": { + "file": "silu_config_M92160_N2400.json", + "M": 92160, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1066.7635 + }, + "M=92160,N=2560": { + "file": "silu_config_M92160_N2560.json", + "M": 92160, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1112.28375 + }, + "M=93184,N=128": { + "file": "silu_config_M93184_N128.json", + "M": 93184, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 66.39925 + }, + "M=93184,N=160": { + "file": "silu_config_M93184_N160.json", + "M": 93184, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 127.79950000000001 + }, + "M=93184,N=192": { + "file": "silu_config_M93184_N192.json", + "M": 93184, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 127.75949999999995 + }, + "M=93184,N=256": { + "file": "silu_config_M93184_N256.json", + "M": 93184, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 106.7595 + }, + "M=93184,N=320": { + "file": "silu_config_M93184_N320.json", + "M": 93184, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 232.99999999999994 + }, + "M=93184,N=384": { + "file": "silu_config_M93184_N384.json", + "M": 93184, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 231.03999999999996 + }, + "M=93184,N=480": { + "file": "silu_config_M93184_N480.json", + "M": 93184, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 237.16000000000003 + }, + "M=93184,N=512": { + "file": "silu_config_M93184_N512.json", + "M": 93184, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 188.15974999999992 + }, + "M=93184,N=576": { + "file": "silu_config_M93184_N576.json", + "M": 93184, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 461.3607499999998 + }, + "M=93184,N=640": { + "file": "silu_config_M93184_N640.json", + "M": 93184, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 458.32074999999986 + }, + "M=93184,N=768": { + "file": "silu_config_M93184_N768.json", + "M": 93184, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 454.8807499999999 + }, + "M=93184,N=800": { + "file": "silu_config_M93184_N800.json", + "M": 93184, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 466.0409999999998 + }, + "M=93184,N=896": { + "file": "silu_config_M93184_N896.json", + "M": 93184, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 440.5207499999998 + }, + "M=93184,N=960": { + "file": "silu_config_M93184_N960.json", + "M": 93184, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 478.80099999999993 + }, + "M=93184,N=1024": { + "file": "silu_config_M93184_N1024.json", + "M": 93184, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 365.7605000000002 + }, + "M=93184,N=1120": { + "file": "silu_config_M93184_N1120.json", + "M": 93184, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 880.4027499999997 + }, + "M=93184,N=1152": { + "file": "silu_config_M93184_N1152.json", + "M": 93184, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.3227499999998 + }, + "M=93184,N=1280": { + "file": "silu_config_M93184_N1280.json", + "M": 93184, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 882.6027499999998 + }, + "M=93184,N=1344": { + "file": "silu_config_M93184_N1344.json", + "M": 93184, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.8027500000001 + }, + "M=93184,N=1408": { + "file": "silu_config_M93184_N1408.json", + "M": 93184, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 884.4827499999997 + }, + "M=93184,N=1440": { + "file": "silu_config_M93184_N1440.json", + "M": 93184, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 894.6827500000002 + }, + "M=93184,N=1536": { + "file": "silu_config_M93184_N1536.json", + "M": 93184, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 896.5627499999998 + }, + "M=93184,N=1600": { + "file": "silu_config_M93184_N1600.json", + "M": 93184, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 895.08275 + }, + "M=93184,N=1664": { + "file": "silu_config_M93184_N1664.json", + "M": 93184, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 877.8427500000003 + }, + "M=93184,N=1728": { + "file": "silu_config_M93184_N1728.json", + "M": 93184, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 898.0427500000001 + }, + "M=93184,N=1760": { + "file": "silu_config_M93184_N1760.json", + "M": 93184, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 888.6827499999999 + }, + "M=93184,N=1792": { + "file": "silu_config_M93184_N1792.json", + "M": 93184, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 892.5227499999996 + }, + "M=93184,N=1920": { + "file": "silu_config_M93184_N1920.json", + "M": 93184, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 894.7627499999999 + }, + "M=93184,N=2048": { + "file": "silu_config_M93184_N2048.json", + "M": 93184, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 715.1219999999998 + }, + "M=93184,N=2080": { + "file": "silu_config_M93184_N2080.json", + "M": 93184, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 970.4830000000002 + }, + "M=93184,N=2240": { + "file": "silu_config_M93184_N2240.json", + "M": 93184, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1020.68325 + }, + "M=93184,N=2400": { + "file": "silu_config_M93184_N2400.json", + "M": 93184, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1088.6435000000001 + }, + "M=93184,N=2560": { + "file": "silu_config_M93184_N2560.json", + "M": 93184, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.6837500000001 + }, + "M=94208,N=128": { + "file": "silu_config_M94208_N128.json", + "M": 94208, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 57.87925 + }, + "M=94208,N=160": { + "file": "silu_config_M94208_N160.json", + "M": 94208, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 118.51949999999997 + }, + "M=94208,N=192": { + "file": "silu_config_M94208_N192.json", + "M": 94208, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 118.3195 + }, + "M=94208,N=256": { + "file": "silu_config_M94208_N256.json", + "M": 94208, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.31950000000003 + }, + "M=94208,N=320": { + "file": "silu_config_M94208_N320.json", + "M": 94208, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 235.60000000000002 + }, + "M=94208,N=384": { + "file": "silu_config_M94208_N384.json", + "M": 94208, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 233.48000000000008 + }, + "M=94208,N=480": { + "file": "silu_config_M94208_N480.json", + "M": 94208, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 239.79999999999995 + }, + "M=94208,N=512": { + "file": "silu_config_M94208_N512.json", + "M": 94208, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 189.99975000000012 + }, + "M=94208,N=576": { + "file": "silu_config_M94208_N576.json", + "M": 94208, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 466.16100000000006 + }, + "M=94208,N=640": { + "file": "silu_config_M94208_N640.json", + "M": 94208, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 447.0407500000001 + }, + "M=94208,N=768": { + "file": "silu_config_M94208_N768.json", + "M": 94208, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 470.081 + }, + "M=94208,N=800": { + "file": "silu_config_M94208_N800.json", + "M": 94208, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 469.76099999999985 + }, + "M=94208,N=896": { + "file": "silu_config_M94208_N896.json", + "M": 94208, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 470.001 + }, + "M=94208,N=960": { + "file": "silu_config_M94208_N960.json", + "M": 94208, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 464.28100000000063 + }, + "M=94208,N=1024": { + "file": "silu_config_M94208_N1024.json", + "M": 94208, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 369.68050000000005 + }, + "M=94208,N=1120": { + "file": "silu_config_M94208_N1120.json", + "M": 94208, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 900.1627499999997 + }, + "M=94208,N=1152": { + "file": "silu_config_M94208_N1152.json", + "M": 94208, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 890.2427499999999 + }, + "M=94208,N=1280": { + "file": "silu_config_M94208_N1280.json", + "M": 94208, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.6827499999997 + }, + "M=94208,N=1344": { + "file": "silu_config_M94208_N1344.json", + "M": 94208, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 912.7627500000001 + }, + "M=94208,N=1408": { + "file": "silu_config_M94208_N1408.json", + "M": 94208, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 893.9627499999997 + }, + "M=94208,N=1440": { + "file": "silu_config_M94208_N1440.json", + "M": 94208, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 904.16275 + }, + "M=94208,N=1536": { + "file": "silu_config_M94208_N1536.json", + "M": 94208, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 895.6827499999999 + }, + "M=94208,N=1600": { + "file": "silu_config_M94208_N1600.json", + "M": 94208, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 904.7227499999999 + }, + "M=94208,N=1664": { + "file": "silu_config_M94208_N1664.json", + "M": 94208, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 886.7627500000003 + }, + "M=94208,N=1728": { + "file": "silu_config_M94208_N1728.json", + "M": 94208, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 906.7227499999995 + }, + "M=94208,N=1760": { + "file": "silu_config_M94208_N1760.json", + "M": 94208, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 920.16275 + }, + "M=94208,N=1792": { + "file": "silu_config_M94208_N1792.json", + "M": 94208, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 911.4827499999997 + }, + "M=94208,N=1920": { + "file": "silu_config_M94208_N1920.json", + "M": 94208, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 904.56275 + }, + "M=94208,N=2048": { + "file": "silu_config_M94208_N2048.json", + "M": 94208, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 715.7619999999997 + }, + "M=94208,N=2080": { + "file": "silu_config_M94208_N2080.json", + "M": 94208, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 980.163 + }, + "M=94208,N=2240": { + "file": "silu_config_M94208_N2240.json", + "M": 94208, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1052.0434999999998 + }, + "M=94208,N=2400": { + "file": "silu_config_M94208_N2400.json", + "M": 94208, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1088.6034999999997 + }, + "M=94208,N=2560": { + "file": "silu_config_M94208_N2560.json", + "M": 94208, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1156.2837500000005 + }, + "M=95232,N=128": { + "file": "silu_config_M95232_N128.json", + "M": 95232, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 58.39924999999999 + }, + "M=95232,N=160": { + "file": "silu_config_M95232_N160.json", + "M": 95232, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 130.23950000000002 + }, + "M=95232,N=192": { + "file": "silu_config_M95232_N192.json", + "M": 95232, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 130.35950000000003 + }, + "M=95232,N=256": { + "file": "silu_config_M95232_N256.json", + "M": 95232, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 108.95950000000005 + }, + "M=95232,N=320": { + "file": "silu_config_M95232_N320.json", + "M": 95232, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 238.04000000000002 + }, + "M=95232,N=384": { + "file": "silu_config_M95232_N384.json", + "M": 95232, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 236.36000000000007 + }, + "M=95232,N=480": { + "file": "silu_config_M95232_N480.json", + "M": 95232, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 242.07999999999993 + }, + "M=95232,N=512": { + "file": "silu_config_M95232_N512.json", + "M": 95232, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 187.31975 + }, + "M=95232,N=576": { + "file": "silu_config_M95232_N576.json", + "M": 95232, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 455.56074999999987 + }, + "M=95232,N=640": { + "file": "silu_config_M95232_N640.json", + "M": 95232, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 456.80100000000004 + }, + "M=95232,N=768": { + "file": "silu_config_M95232_N768.json", + "M": 95232, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 458.36099999999976 + }, + "M=95232,N=800": { + "file": "silu_config_M95232_N800.json", + "M": 95232, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 462.96100000000035 + }, + "M=95232,N=896": { + "file": "silu_config_M95232_N896.json", + "M": 95232, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 467.0007500000004 + }, + "M=95232,N=960": { + "file": "silu_config_M95232_N960.json", + "M": 95232, + "N": 960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 478.24099999999976 + }, + "M=95232,N=1024": { + "file": "silu_config_M95232_N1024.json", + "M": 95232, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 373.68050000000005 + }, + "M=95232,N=1120": { + "file": "silu_config_M95232_N1120.json", + "M": 95232, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.7227500000006 + }, + "M=95232,N=1152": { + "file": "silu_config_M95232_N1152.json", + "M": 95232, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 889.6427499999998 + }, + "M=95232,N=1280": { + "file": "silu_config_M95232_N1280.json", + "M": 95232, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 901.2827499999999 + }, + "M=95232,N=1344": { + "file": "silu_config_M95232_N1344.json", + "M": 95232, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 910.8027499999998 + }, + "M=95232,N=1408": { + "file": "silu_config_M95232_N1408.json", + "M": 95232, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 903.40275 + }, + "M=95232,N=1440": { + "file": "silu_config_M95232_N1440.json", + "M": 95232, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 913.9627499999997 + }, + "M=95232,N=1536": { + "file": "silu_config_M95232_N1536.json", + "M": 95232, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 905.2827499999999 + }, + "M=95232,N=1600": { + "file": "silu_config_M95232_N1600.json", + "M": 95232, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 903.8427499999998 + }, + "M=95232,N=1664": { + "file": "silu_config_M95232_N1664.json", + "M": 95232, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 917.0027499999997 + }, + "M=95232,N=1728": { + "file": "silu_config_M95232_N1728.json", + "M": 95232, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 916.4027499999997 + }, + "M=95232,N=1760": { + "file": "silu_config_M95232_N1760.json", + "M": 95232, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 929.8027499999998 + }, + "M=95232,N=1792": { + "file": "silu_config_M95232_N1792.json", + "M": 95232, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 910.3227500000003 + }, + "M=95232,N=1920": { + "file": "silu_config_M95232_N1920.json", + "M": 95232, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 903.6027500000005 + }, + "M=95232,N=2048": { + "file": "silu_config_M95232_N2048.json", + "M": 95232, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 712.2420000000002 + }, + "M=95232,N=2080": { + "file": "silu_config_M95232_N2080.json", + "M": 95232, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 991.9632500000002 + }, + "M=95232,N=2240": { + "file": "silu_config_M95232_N2240.json", + "M": 95232, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1042.76325 + }, + "M=95232,N=2400": { + "file": "silu_config_M95232_N2400.json", + "M": 95232, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1112.1637499999997 + }, + "M=95232,N=2560": { + "file": "silu_config_M95232_N2560.json", + "M": 95232, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1138.4837499999999 + }, + "M=96256,N=128": { + "file": "silu_config_M96256_N128.json", + "M": 96256, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 58.99925 + }, + "M=96256,N=160": { + "file": "silu_config_M96256_N160.json", + "M": 96256, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.87950000000002 + }, + "M=96256,N=192": { + "file": "silu_config_M96256_N192.json", + "M": 96256, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 130.95950000000005 + }, + "M=96256,N=256": { + "file": "silu_config_M96256_N256.json", + "M": 96256, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.55950000000001 + }, + "M=96256,N=320": { + "file": "silu_config_M96256_N320.json", + "M": 96256, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 240.40000000000003 + }, + "M=96256,N=384": { + "file": "silu_config_M96256_N384.json", + "M": 96256, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 238.88 + }, + "M=96256,N=480": { + "file": "silu_config_M96256_N480.json", + "M": 96256, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 245.1599999999999 + }, + "M=96256,N=512": { + "file": "silu_config_M96256_N512.json", + "M": 96256, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 194.03975000000003 + }, + "M=96256,N=576": { + "file": "silu_config_M96256_N576.json", + "M": 96256, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 468.1610000000002 + }, + "M=96256,N=640": { + "file": "silu_config_M96256_N640.json", + "M": 96256, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 461.121 + }, + "M=96256,N=768": { + "file": "silu_config_M96256_N768.json", + "M": 96256, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 458.7207499999997 + }, + "M=96256,N=800": { + "file": "silu_config_M96256_N800.json", + "M": 96256, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 471.72100000000023 + }, + "M=96256,N=896": { + "file": "silu_config_M96256_N896.json", + "M": 96256, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 461.3609999999999 + }, + "M=96256,N=960": { + "file": "silu_config_M96256_N960.json", + "M": 96256, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 474.12099999999987 + }, + "M=96256,N=1024": { + "file": "silu_config_M96256_N1024.json", + "M": 96256, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 377.64049999999986 + }, + "M=96256,N=1120": { + "file": "silu_config_M96256_N1120.json", + "M": 96256, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 909.1227499999998 + }, + "M=96256,N=1152": { + "file": "silu_config_M96256_N1152.json", + "M": 96256, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 919.4827499999999 + }, + "M=96256,N=1280": { + "file": "silu_config_M96256_N1280.json", + "M": 96256, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.9227500000002 + }, + "M=96256,N=1344": { + "file": "silu_config_M96256_N1344.json", + "M": 96256, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 931.0029999999997 + }, + "M=96256,N=1408": { + "file": "silu_config_M96256_N1408.json", + "M": 96256, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.8827499999998 + }, + "M=96256,N=1440": { + "file": "silu_config_M96256_N1440.json", + "M": 96256, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 923.4427499999999 + }, + "M=96256,N=1536": { + "file": "silu_config_M96256_N1536.json", + "M": 96256, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 914.3627499999998 + }, + "M=96256,N=1600": { + "file": "silu_config_M96256_N1600.json", + "M": 96256, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 923.7227499999999 + }, + "M=96256,N=1664": { + "file": "silu_config_M96256_N1664.json", + "M": 96256, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 916.1227500000005 + }, + "M=96256,N=1728": { + "file": "silu_config_M96256_N1728.json", + "M": 96256, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 915.8027499999998 + }, + "M=96256,N=1760": { + "file": "silu_config_M96256_N1760.json", + "M": 96256, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 928.6427500000004 + }, + "M=96256,N=1792": { + "file": "silu_config_M96256_N1792.json", + "M": 96256, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 919.9227500000002 + }, + "M=96256,N=1920": { + "file": "silu_config_M96256_N1920.json", + "M": 96256, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 919.3227499999998 + }, + "M=96256,N=2048": { + "file": "silu_config_M96256_N2048.json", + "M": 96256, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 737.002 + }, + "M=96256,N=2080": { + "file": "silu_config_M96256_N2080.json", + "M": 96256, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1001.2032500000006 + }, + "M=96256,N=2240": { + "file": "silu_config_M96256_N2240.json", + "M": 96256, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1063.4434999999999 + }, + "M=96256,N=2400": { + "file": "silu_config_M96256_N2400.json", + "M": 96256, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1121.6837499999997 + }, + "M=96256,N=2560": { + "file": "silu_config_M96256_N2560.json", + "M": 96256, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1161.92375 + }, + "M=97280,N=128": { + "file": "silu_config_M97280_N128.json", + "M": 97280, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 59.63925 + }, + "M=97280,N=160": { + "file": "silu_config_M97280_N160.json", + "M": 97280, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 132.99975 + }, + "M=97280,N=192": { + "file": "silu_config_M97280_N192.json", + "M": 97280, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 133.15949999999998 + }, + "M=97280,N=256": { + "file": "silu_config_M97280_N256.json", + "M": 97280, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 122.71950000000001 + }, + "M=97280,N=320": { + "file": "silu_config_M97280_N320.json", + "M": 97280, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 241.2399999999999 + }, + "M=97280,N=384": { + "file": "silu_config_M97280_N384.json", + "M": 97280, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 239.35999999999996 + }, + "M=97280,N=480": { + "file": "silu_config_M97280_N480.json", + "M": 97280, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 245.75999999999993 + }, + "M=97280,N=512": { + "file": "silu_config_M97280_N512.json", + "M": 97280, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 192.1997500000001 + }, + "M=97280,N=576": { + "file": "silu_config_M97280_N576.json", + "M": 97280, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 469.40100000000007 + }, + "M=97280,N=640": { + "file": "silu_config_M97280_N640.json", + "M": 97280, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 466.1610000000002 + }, + "M=97280,N=768": { + "file": "silu_config_M97280_N768.json", + "M": 97280, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 471.1610000000002 + }, + "M=97280,N=800": { + "file": "silu_config_M97280_N800.json", + "M": 97280, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 476.76099999999997 + }, + "M=97280,N=896": { + "file": "silu_config_M97280_N896.json", + "M": 97280, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 463.24099999999976 + }, + "M=97280,N=960": { + "file": "silu_config_M97280_N960.json", + "M": 97280, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 459.08100000000013 + }, + "M=97280,N=1024": { + "file": "silu_config_M97280_N1024.json", + "M": 97280, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 381.9205000000002 + }, + "M=97280,N=1120": { + "file": "silu_config_M97280_N1120.json", + "M": 97280, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 940.4029999999996 + }, + "M=97280,N=1152": { + "file": "silu_config_M97280_N1152.json", + "M": 97280, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 902.7227500000004 + }, + "M=97280,N=1280": { + "file": "silu_config_M97280_N1280.json", + "M": 97280, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 904.0427499999998 + }, + "M=97280,N=1344": { + "file": "silu_config_M97280_N1344.json", + "M": 97280, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 913.0027499999997 + }, + "M=97280,N=1408": { + "file": "silu_config_M97280_N1408.json", + "M": 97280, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 905.8427499999998 + }, + "M=97280,N=1440": { + "file": "silu_config_M97280_N1440.json", + "M": 97280, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 933.4029999999998 + }, + "M=97280,N=1536": { + "file": "silu_config_M97280_N1536.json", + "M": 97280, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 918.2827500000003 + }, + "M=97280,N=1600": { + "file": "silu_config_M97280_N1600.json", + "M": 97280, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 933.6030000000001 + }, + "M=97280,N=1664": { + "file": "silu_config_M97280_N1664.json", + "M": 97280, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 909.16275 + }, + "M=97280,N=1728": { + "file": "silu_config_M97280_N1728.json", + "M": 97280, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 939.0029999999997 + }, + "M=97280,N=1760": { + "file": "silu_config_M97280_N1760.json", + "M": 97280, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 921.2827499999999 + }, + "M=97280,N=1792": { + "file": "silu_config_M97280_N1792.json", + "M": 97280, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 923.2827499999999 + }, + "M=97280,N=1920": { + "file": "silu_config_M97280_N1920.json", + "M": 97280, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 934.7629999999995 + }, + "M=97280,N=2048": { + "file": "silu_config_M97280_N2048.json", + "M": 97280, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 741.0820000000003 + }, + "M=97280,N=2080": { + "file": "silu_config_M97280_N2080.json", + "M": 97280, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 994.3232499999999 + }, + "M=97280,N=2240": { + "file": "silu_config_M97280_N2240.json", + "M": 97280, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1067.3635 + }, + "M=97280,N=2400": { + "file": "silu_config_M97280_N2400.json", + "M": 97280, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1114.0437499999998 + }, + "M=97280,N=2560": { + "file": "silu_config_M97280_N2560.json", + "M": 97280, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1174.8839999999996 + }, + "M=98304,N=128": { + "file": "silu_config_M98304_N128.json", + "M": 98304, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 60.119249999999994 + }, + "M=98304,N=160": { + "file": "silu_config_M98304_N160.json", + "M": 98304, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 134.23975000000002 + }, + "M=98304,N=192": { + "file": "silu_config_M98304_N192.json", + "M": 98304, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 123.5195 + }, + "M=98304,N=256": { + "file": "silu_config_M98304_N256.json", + "M": 98304, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 123.5995 + }, + "M=98304,N=320": { + "file": "silu_config_M98304_N320.json", + "M": 98304, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 245.5200000000001 + }, + "M=98304,N=384": { + "file": "silu_config_M98304_N384.json", + "M": 98304, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 243.1599999999999 + }, + "M=98304,N=480": { + "file": "silu_config_M98304_N480.json", + "M": 98304, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 250.00000000000017 + }, + "M=98304,N=512": { + "file": "silu_config_M98304_N512.json", + "M": 98304, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 193.20000000000005 + }, + "M=98304,N=576": { + "file": "silu_config_M98304_N576.json", + "M": 98304, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 463.20100000000036 + }, + "M=98304,N=640": { + "file": "silu_config_M98304_N640.json", + "M": 98304, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 459.80100000000004 + }, + "M=98304,N=768": { + "file": "silu_config_M98304_N768.json", + "M": 98304, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 472.84100000000024 + }, + "M=98304,N=800": { + "file": "silu_config_M98304_N800.json", + "M": 98304, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 466.9609999999999 + }, + "M=98304,N=896": { + "file": "silu_config_M98304_N896.json", + "M": 98304, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 464.44100000000003 + }, + "M=98304,N=960": { + "file": "silu_config_M98304_N960.json", + "M": 98304, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 469.6009999999999 + }, + "M=98304,N=1024": { + "file": "silu_config_M98304_N1024.json", + "M": 98304, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 385.32074999999986 + }, + "M=98304,N=1120": { + "file": "silu_config_M98304_N1120.json", + "M": 98304, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 927.8027499999998 + }, + "M=98304,N=1152": { + "file": "silu_config_M98304_N1152.json", + "M": 98304, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 917.5627499999998 + }, + "M=98304,N=1280": { + "file": "silu_config_M98304_N1280.json", + "M": 98304, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 919.1227499999998 + }, + "M=98304,N=1344": { + "file": "silu_config_M98304_N1344.json", + "M": 98304, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 929.4027499999997 + }, + "M=98304,N=1408": { + "file": "silu_config_M98304_N1408.json", + "M": 98304, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 921.36275 + }, + "M=98304,N=1440": { + "file": "silu_config_M98304_N1440.json", + "M": 98304, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 932.0027499999997 + }, + "M=98304,N=1536": { + "file": "silu_config_M98304_N1536.json", + "M": 98304, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 923.8027499999998 + }, + "M=98304,N=1600": { + "file": "silu_config_M98304_N1600.json", + "M": 98304, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 932.5227500000001 + }, + "M=98304,N=1664": { + "file": "silu_config_M98304_N1664.json", + "M": 98304, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 924.4027499999997 + }, + "M=98304,N=1728": { + "file": "silu_config_M98304_N1728.json", + "M": 98304, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 935.4830000000006 + }, + "M=98304,N=1760": { + "file": "silu_config_M98304_N1760.json", + "M": 98304, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 937.0829999999996 + }, + "M=98304,N=1792": { + "file": "silu_config_M98304_N1792.json", + "M": 98304, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 929.4827499999997 + }, + "M=98304,N=1920": { + "file": "silu_config_M98304_N1920.json", + "M": 98304, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 929.9227499999997 + }, + "M=98304,N=2048": { + "file": "silu_config_M98304_N2048.json", + "M": 98304, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 732.6820000000012 + }, + "M=98304,N=2080": { + "file": "silu_config_M98304_N2080.json", + "M": 98304, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1022.2432500000004 + }, + "M=98304,N=2240": { + "file": "silu_config_M98304_N2240.json", + "M": 98304, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1075.4434999999999 + }, + "M=98304,N=2400": { + "file": "silu_config_M98304_N2400.json", + "M": 98304, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1144.84375 + }, + "M=98304,N=2560": { + "file": "silu_config_M98304_N2560.json", + "M": 98304, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1186.324 + }, + "M=99328,N=128": { + "file": "silu_config_M99328_N128.json", + "M": 99328, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 60.71925000000001 + }, + "M=99328,N=160": { + "file": "silu_config_M99328_N160.json", + "M": 99328, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.4795 + }, + "M=99328,N=192": { + "file": "silu_config_M99328_N192.json", + "M": 99328, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.71950000000001 + }, + "M=99328,N=256": { + "file": "silu_config_M99328_N256.json", + "M": 99328, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 113.35949999999997 + }, + "M=99328,N=320": { + "file": "silu_config_M99328_N320.json", + "M": 99328, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 247.64000000000004 + }, + "M=99328,N=384": { + "file": "silu_config_M99328_N384.json", + "M": 99328, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 245.9999999999999 + }, + "M=99328,N=480": { + "file": "silu_config_M99328_N480.json", + "M": 99328, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 252.6402499999999 + }, + "M=99328,N=512": { + "file": "silu_config_M99328_N512.json", + "M": 99328, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 199.84000000000003 + }, + "M=99328,N=576": { + "file": "silu_config_M99328_N576.json", + "M": 99328, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 483.0010000000001 + }, + "M=99328,N=640": { + "file": "silu_config_M99328_N640.json", + "M": 99328, + "N": 640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 488.52099999999984 + }, + "M=99328,N=768": { + "file": "silu_config_M99328_N768.json", + "M": 99328, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 481.0010000000002 + }, + "M=99328,N=800": { + "file": "silu_config_M99328_N800.json", + "M": 99328, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 488.7610000000002 + }, + "M=99328,N=896": { + "file": "silu_config_M99328_N896.json", + "M": 99328, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 475.201 + }, + "M=99328,N=960": { + "file": "silu_config_M99328_N960.json", + "M": 99328, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 493.0010000000002 + }, + "M=99328,N=1024": { + "file": "silu_config_M99328_N1024.json", + "M": 99328, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 389.5205000000001 + }, + "M=99328,N=1120": { + "file": "silu_config_M99328_N1120.json", + "M": 99328, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 947.6829999999998 + }, + "M=99328,N=1152": { + "file": "silu_config_M99328_N1152.json", + "M": 99328, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 916.7227500000001 + }, + "M=99328,N=1280": { + "file": "silu_config_M99328_N1280.json", + "M": 99328, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 928.60275 + }, + "M=99328,N=1344": { + "file": "silu_config_M99328_N1344.json", + "M": 99328, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 938.4830000000002 + }, + "M=99328,N=1408": { + "file": "silu_config_M99328_N1408.json", + "M": 99328, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 951.2030000000002 + }, + "M=99328,N=1440": { + "file": "silu_config_M99328_N1440.json", + "M": 99328, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 941.5629999999996 + }, + "M=99328,N=1536": { + "file": "silu_config_M99328_N1536.json", + "M": 99328, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 932.4827500000001 + }, + "M=99328,N=1600": { + "file": "silu_config_M99328_N1600.json", + "M": 99328, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 941.9229999999995 + }, + "M=99328,N=1664": { + "file": "silu_config_M99328_N1664.json", + "M": 99328, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 954.5630000000001 + }, + "M=99328,N=1728": { + "file": "silu_config_M99328_N1728.json", + "M": 99328, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 944.2429999999997 + }, + "M=99328,N=1760": { + "file": "silu_config_M99328_N1760.json", + "M": 99328, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 946.9630000000002 + }, + "M=99328,N=1792": { + "file": "silu_config_M99328_N1792.json", + "M": 99328, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 958.8029999999999 + }, + "M=99328,N=1920": { + "file": "silu_config_M99328_N1920.json", + "M": 99328, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 935.683 + }, + "M=99328,N=2048": { + "file": "silu_config_M99328_N2048.json", + "M": 99328, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 763.2822499999997 + }, + "M=99328,N=2080": { + "file": "silu_config_M99328_N2080.json", + "M": 99328, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1034.20325 + }, + "M=99328,N=2240": { + "file": "silu_config_M99328_N2240.json", + "M": 99328, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1097.0034999999998 + }, + "M=99328,N=2400": { + "file": "silu_config_M99328_N2400.json", + "M": 99328, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1146.3237499999998 + }, + "M=99328,N=2560": { + "file": "silu_config_M99328_N2560.json", + "M": 99328, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1198.6840000000002 + }, + "M=100352,N=128": { + "file": "silu_config_M100352_N128.json", + "M": 100352, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 70.95925 + }, + "M=100352,N=160": { + "file": "silu_config_M100352_N160.json", + "M": 100352, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.55949999999999 + }, + "M=100352,N=192": { + "file": "silu_config_M100352_N192.json", + "M": 100352, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 135.67950000000002 + }, + "M=100352,N=256": { + "file": "silu_config_M100352_N256.json", + "M": 100352, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.83975 + }, + "M=100352,N=320": { + "file": "silu_config_M100352_N320.json", + "M": 100352, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 250.03999999999994 + }, + "M=100352,N=384": { + "file": "silu_config_M100352_N384.json", + "M": 100352, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 248.96025000000014 + }, + "M=100352,N=480": { + "file": "silu_config_M100352_N480.json", + "M": 100352, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 254.07999999999987 + }, + "M=100352,N=512": { + "file": "silu_config_M100352_N512.json", + "M": 100352, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 201.80000000000007 + }, + "M=100352,N=576": { + "file": "silu_config_M100352_N576.json", + "M": 100352, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 483.2812499999999 + }, + "M=100352,N=640": { + "file": "silu_config_M100352_N640.json", + "M": 100352, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 483.7209999999998 + }, + "M=100352,N=768": { + "file": "silu_config_M100352_N768.json", + "M": 100352, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 484.7209999999999 + }, + "M=100352,N=800": { + "file": "silu_config_M100352_N800.json", + "M": 100352, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 496.08100000000013 + }, + "M=100352,N=896": { + "file": "silu_config_M100352_N896.json", + "M": 100352, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 478.0410000000004 + }, + "M=100352,N=960": { + "file": "silu_config_M100352_N960.json", + "M": 100352, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 493.2810000000002 + }, + "M=100352,N=1024": { + "file": "silu_config_M100352_N1024.json", + "M": 100352, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 387.6007500000003 + }, + "M=100352,N=1120": { + "file": "silu_config_M100352_N1120.json", + "M": 100352, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 943.1630000000002 + }, + "M=100352,N=1152": { + "file": "silu_config_M100352_N1152.json", + "M": 100352, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 947.643 + }, + "M=100352,N=1280": { + "file": "silu_config_M100352_N1280.json", + "M": 100352, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 937.0430000000001 + }, + "M=100352,N=1344": { + "file": "silu_config_M100352_N1344.json", + "M": 100352, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 956.8029999999999 + }, + "M=100352,N=1408": { + "file": "silu_config_M100352_N1408.json", + "M": 100352, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 949.2429999999997 + }, + "M=100352,N=1440": { + "file": "silu_config_M100352_N1440.json", + "M": 100352, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 959.4030000000002 + }, + "M=100352,N=1536": { + "file": "silu_config_M100352_N1536.json", + "M": 100352, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 951.0829999999996 + }, + "M=100352,N=1600": { + "file": "silu_config_M100352_N1600.json", + "M": 100352, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 960.9229999999998 + }, + "M=100352,N=1664": { + "file": "silu_config_M100352_N1664.json", + "M": 100352, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 952.123 + }, + "M=100352,N=1728": { + "file": "silu_config_M100352_N1728.json", + "M": 100352, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 952.0430000000001 + }, + "M=100352,N=1760": { + "file": "silu_config_M100352_N1760.json", + "M": 100352, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 975.3229999999999 + }, + "M=100352,N=1792": { + "file": "silu_config_M100352_N1792.json", + "M": 100352, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 954.1230000000005 + }, + "M=100352,N=1920": { + "file": "silu_config_M100352_N1920.json", + "M": 100352, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 958.203 + }, + "M=100352,N=2048": { + "file": "silu_config_M100352_N2048.json", + "M": 100352, + "N": 2048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 766.56225 + }, + "M=100352,N=2080": { + "file": "silu_config_M100352_N2080.json", + "M": 100352, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1045.0032500000002 + }, + "M=100352,N=2240": { + "file": "silu_config_M100352_N2240.json", + "M": 100352, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1113.6437500000006 + }, + "M=100352,N=2400": { + "file": "silu_config_M100352_N2400.json", + "M": 100352, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1162.5237500000003 + }, + "M=100352,N=2560": { + "file": "silu_config_M100352_N2560.json", + "M": 100352, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1223.2439999999997 + }, + "M=101376,N=128": { + "file": "silu_config_M101376_N128.json", + "M": 101376, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 71.31924999999998 + }, + "M=101376,N=160": { + "file": "silu_config_M101376_N160.json", + "M": 101376, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 137.9595 + }, + "M=101376,N=192": { + "file": "silu_config_M101376_N192.json", + "M": 101376, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 138.1195 + }, + "M=101376,N=256": { + "file": "silu_config_M101376_N256.json", + "M": 101376, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 115.63949999999997 + }, + "M=101376,N=320": { + "file": "silu_config_M101376_N320.json", + "M": 101376, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 252.39999999999995 + }, + "M=101376,N=384": { + "file": "silu_config_M101376_N384.json", + "M": 101376, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 250.64 + }, + "M=101376,N=480": { + "file": "silu_config_M101376_N480.json", + "M": 101376, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 256.91999999999996 + }, + "M=101376,N=512": { + "file": "silu_config_M101376_N512.json", + "M": 101376, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 203.63974999999994 + }, + "M=101376,N=576": { + "file": "silu_config_M101376_N576.json", + "M": 101376, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 472.4409999999999 + }, + "M=101376,N=640": { + "file": "silu_config_M101376_N640.json", + "M": 101376, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 484.28099999999995 + }, + "M=101376,N=768": { + "file": "silu_config_M101376_N768.json", + "M": 101376, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 496.72100000000023 + }, + "M=101376,N=800": { + "file": "silu_config_M101376_N800.json", + "M": 101376, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 495.001 + }, + "M=101376,N=896": { + "file": "silu_config_M101376_N896.json", + "M": 101376, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 476.7209999999998 + }, + "M=101376,N=960": { + "file": "silu_config_M101376_N960.json", + "M": 101376, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 491.92100000000005 + }, + "M=101376,N=1024": { + "file": "silu_config_M101376_N1024.json", + "M": 101376, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 391.7605000000003 + }, + "M=101376,N=1120": { + "file": "silu_config_M101376_N1120.json", + "M": 101376, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 954.123 + }, + "M=101376,N=1152": { + "file": "silu_config_M101376_N1152.json", + "M": 101376, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 946.403 + }, + "M=101376,N=1280": { + "file": "silu_config_M101376_N1280.json", + "M": 101376, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 956.7629999999997 + }, + "M=101376,N=1344": { + "file": "silu_config_M101376_N1344.json", + "M": 101376, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 967.9230000000009 + }, + "M=101376,N=1408": { + "file": "silu_config_M101376_N1408.json", + "M": 101376, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 948.3630000000003 + }, + "M=101376,N=1440": { + "file": "silu_config_M101376_N1440.json", + "M": 101376, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 959.2829999999999 + }, + "M=101376,N=1536": { + "file": "silu_config_M101376_N1536.json", + "M": 101376, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 962.3629999999998 + }, + "M=101376,N=1600": { + "file": "silu_config_M101376_N1600.json", + "M": 101376, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 970.2829999999999 + }, + "M=101376,N=1664": { + "file": "silu_config_M101376_N1664.json", + "M": 101376, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 961.9630000000002 + }, + "M=101376,N=1728": { + "file": "silu_config_M101376_N1728.json", + "M": 101376, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 951.2029999999995 + }, + "M=101376,N=1760": { + "file": "silu_config_M101376_N1760.json", + "M": 101376, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 991.3630000000003 + }, + "M=101376,N=1792": { + "file": "silu_config_M101376_N1792.json", + "M": 101376, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 942.7629999999995 + }, + "M=101376,N=1920": { + "file": "silu_config_M101376_N1920.json", + "M": 101376, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 966.2829999999994 + }, + "M=101376,N=2048": { + "file": "silu_config_M101376_N2048.json", + "M": 101376, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 758.56225 + }, + "M=101376,N=2080": { + "file": "silu_config_M101376_N2080.json", + "M": 101376, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1066.0035000000003 + }, + "M=101376,N=2240": { + "file": "silu_config_M101376_N2240.json", + "M": 101376, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.9234999999999 + }, + "M=101376,N=2400": { + "file": "silu_config_M101376_N2400.json", + "M": 101376, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1163.8837499999995 + }, + "M=101376,N=2560": { + "file": "silu_config_M101376_N2560.json", + "M": 101376, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1224.4039999999995 + }, + "M=102400,N=128": { + "file": "silu_config_M102400_N128.json", + "M": 102400, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 72.39925000000001 + }, + "M=102400,N=160": { + "file": "silu_config_M102400_N160.json", + "M": 102400, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.1995 + }, + "M=102400,N=192": { + "file": "silu_config_M102400_N192.json", + "M": 102400, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 126.71950000000001 + }, + "M=102400,N=256": { + "file": "silu_config_M102400_N256.json", + "M": 102400, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 116.75950000000003 + }, + "M=102400,N=320": { + "file": "silu_config_M102400_N320.json", + "M": 102400, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 253.28 + }, + "M=102400,N=384": { + "file": "silu_config_M102400_N384.json", + "M": 102400, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 251.23999999999984 + }, + "M=102400,N=480": { + "file": "silu_config_M102400_N480.json", + "M": 102400, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 257.59999999999997 + }, + "M=102400,N=512": { + "file": "silu_config_M102400_N512.json", + "M": 102400, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 205.80000000000007 + }, + "M=102400,N=576": { + "file": "silu_config_M102400_N576.json", + "M": 102400, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 474.20100000000014 + }, + "M=102400,N=640": { + "file": "silu_config_M102400_N640.json", + "M": 102400, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 470.5210000000002 + }, + "M=102400,N=768": { + "file": "silu_config_M102400_N768.json", + "M": 102400, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 472.001 + }, + "M=102400,N=800": { + "file": "silu_config_M102400_N800.json", + "M": 102400, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 486.08124999999984 + }, + "M=102400,N=896": { + "file": "silu_config_M102400_N896.json", + "M": 102400, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 474.12099999999987 + }, + "M=102400,N=960": { + "file": "silu_config_M102400_N960.json", + "M": 102400, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 478.481 + }, + "M=102400,N=1024": { + "file": "silu_config_M102400_N1024.json", + "M": 102400, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 395.6004999999998 + }, + "M=102400,N=1120": { + "file": "silu_config_M102400_N1120.json", + "M": 102400, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 944.8430000000001 + }, + "M=102400,N=1152": { + "file": "silu_config_M102400_N1152.json", + "M": 102400, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 937.6029999999998 + }, + "M=102400,N=1280": { + "file": "silu_config_M102400_N1280.json", + "M": 102400, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 949.8429999999998 + }, + "M=102400,N=1344": { + "file": "silu_config_M102400_N1344.json", + "M": 102400, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 948.0430000000001 + }, + "M=102400,N=1408": { + "file": "silu_config_M102400_N1408.json", + "M": 102400, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 941.4829999999997 + }, + "M=102400,N=1440": { + "file": "silu_config_M102400_N1440.json", + "M": 102400, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 950.4830000000002 + }, + "M=102400,N=1536": { + "file": "silu_config_M102400_N1536.json", + "M": 102400, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 953.3230000000008 + }, + "M=102400,N=1600": { + "file": "silu_config_M102400_N1600.json", + "M": 102400, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 950.8429999999998 + }, + "M=102400,N=1664": { + "file": "silu_config_M102400_N1664.json", + "M": 102400, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 955.1629999999996 + }, + "M=102400,N=1728": { + "file": "silu_config_M102400_N1728.json", + "M": 102400, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 953.4030000000002 + }, + "M=102400,N=1760": { + "file": "silu_config_M102400_N1760.json", + "M": 102400, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 962.723 + }, + "M=102400,N=1792": { + "file": "silu_config_M102400_N1792.json", + "M": 102400, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 966.643 + }, + "M=102400,N=1920": { + "file": "silu_config_M102400_N1920.json", + "M": 102400, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 958.7629999999999 + }, + "M=102400,N=2048": { + "file": "silu_config_M102400_N2048.json", + "M": 102400, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 763.3622500000006 + }, + "M=102400,N=2080": { + "file": "silu_config_M102400_N2080.json", + "M": 102400, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1058.6035000000006 + }, + "M=102400,N=2240": { + "file": "silu_config_M102400_N2240.json", + "M": 102400, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1113.8035000000004 + }, + "M=102400,N=2400": { + "file": "silu_config_M102400_N2400.json", + "M": 102400, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1167.0437499999998 + }, + "M=102400,N=2560": { + "file": "silu_config_M102400_N2560.json", + "M": 102400, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1215.8839999999996 + }, + "M=103424,N=128": { + "file": "silu_config_M103424_N128.json", + "M": 103424, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 73.03925000000001 + }, + "M=103424,N=160": { + "file": "silu_config_M103424_N160.json", + "M": 103424, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.27949999999998 + }, + "M=103424,N=192": { + "file": "silu_config_M103424_N192.json", + "M": 103424, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.55950000000004 + }, + "M=103424,N=256": { + "file": "silu_config_M103424_N256.json", + "M": 103424, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 117.71950000000004 + }, + "M=103424,N=320": { + "file": "silu_config_M103424_N320.json", + "M": 103424, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 257.4002500000001 + }, + "M=103424,N=384": { + "file": "silu_config_M103424_N384.json", + "M": 103424, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 255.43999999999994 + }, + "M=103424,N=480": { + "file": "silu_config_M103424_N480.json", + "M": 103424, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 261.5199999999999 + }, + "M=103424,N=512": { + "file": "silu_config_M103424_N512.json", + "M": 103424, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 207.47999999999996 + }, + "M=103424,N=576": { + "file": "silu_config_M103424_N576.json", + "M": 103424, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 497.081 + }, + "M=103424,N=640": { + "file": "silu_config_M103424_N640.json", + "M": 103424, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 482.64099999999974 + }, + "M=103424,N=768": { + "file": "silu_config_M103424_N768.json", + "M": 103424, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 489.92100000000005 + }, + "M=103424,N=800": { + "file": "silu_config_M103424_N800.json", + "M": 103424, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 510.40100000000035 + }, + "M=103424,N=896": { + "file": "silu_config_M103424_N896.json", + "M": 103424, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 492.20100000000025 + }, + "M=103424,N=960": { + "file": "silu_config_M103424_N960.json", + "M": 103424, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 512.6412499999999 + }, + "M=103424,N=1024": { + "file": "silu_config_M103424_N1024.json", + "M": 103424, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 399.3605 + }, + "M=103424,N=1120": { + "file": "silu_config_M103424_N1120.json", + "M": 103424, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 981.6830000000004 + }, + "M=103424,N=1152": { + "file": "silu_config_M103424_N1152.json", + "M": 103424, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 973.3229999999999 + }, + "M=103424,N=1280": { + "file": "silu_config_M103424_N1280.json", + "M": 103424, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 975.6429999999998 + }, + "M=103424,N=1344": { + "file": "silu_config_M103424_N1344.json", + "M": 103424, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 998.0832500000001 + }, + "M=103424,N=1408": { + "file": "silu_config_M103424_N1408.json", + "M": 103424, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 979.8029999999999 + }, + "M=103424,N=1440": { + "file": "silu_config_M103424_N1440.json", + "M": 103424, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 987.9229999999998 + }, + "M=103424,N=1536": { + "file": "silu_config_M103424_N1536.json", + "M": 103424, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 968.7629999999999 + }, + "M=103424,N=1600": { + "file": "silu_config_M103424_N1600.json", + "M": 103424, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 978.7629999999999 + }, + "M=103424,N=1664": { + "file": "silu_config_M103424_N1664.json", + "M": 103424, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 983.4029999999998 + }, + "M=103424,N=1728": { + "file": "silu_config_M103424_N1728.json", + "M": 103424, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 977.3629999999998 + }, + "M=103424,N=1760": { + "file": "silu_config_M103424_N1760.json", + "M": 103424, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 980.4029999999998 + }, + "M=103424,N=1792": { + "file": "silu_config_M103424_N1792.json", + "M": 103424, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 971.8029999999999 + }, + "M=103424,N=1920": { + "file": "silu_config_M103424_N1920.json", + "M": 103424, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 987.1630000000005 + }, + "M=103424,N=2048": { + "file": "silu_config_M103424_N2048.json", + "M": 103424, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 792.2022499999998 + }, + "M=103424,N=2080": { + "file": "silu_config_M103424_N2080.json", + "M": 103424, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1076.8435 + }, + "M=103424,N=2240": { + "file": "silu_config_M103424_N2240.json", + "M": 103424, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1122.8837500000004 + }, + "M=103424,N=2400": { + "file": "silu_config_M103424_N2400.json", + "M": 103424, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1198.484 + }, + "M=103424,N=2560": { + "file": "silu_config_M103424_N2560.json", + "M": 103424, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1272.7242500000007 + }, + "M=104448,N=128": { + "file": "silu_config_M104448_N128.json", + "M": 104448, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 73.31925000000003 + }, + "M=104448,N=160": { + "file": "silu_config_M104448_N160.json", + "M": 104448, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.3595 + }, + "M=104448,N=192": { + "file": "silu_config_M104448_N192.json", + "M": 104448, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.55975000000004 + }, + "M=104448,N=256": { + "file": "silu_config_M104448_N256.json", + "M": 104448, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 130.15975000000003 + }, + "M=104448,N=320": { + "file": "silu_config_M104448_N320.json", + "M": 104448, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 259.80000000000007 + }, + "M=104448,N=384": { + "file": "silu_config_M104448_N384.json", + "M": 104448, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 258.08000000000015 + }, + "M=104448,N=480": { + "file": "silu_config_M104448_N480.json", + "M": 104448, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 264.44024999999993 + }, + "M=104448,N=512": { + "file": "silu_config_M104448_N512.json", + "M": 104448, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 209.64 + }, + "M=104448,N=576": { + "file": "silu_config_M104448_N576.json", + "M": 104448, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 503.0009999999998 + }, + "M=104448,N=640": { + "file": "silu_config_M104448_N640.json", + "M": 104448, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 482.44100000000003 + }, + "M=104448,N=768": { + "file": "silu_config_M104448_N768.json", + "M": 104448, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 510.8412500000001 + }, + "M=104448,N=800": { + "file": "silu_config_M104448_N800.json", + "M": 104448, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 499.0809999999997 + }, + "M=104448,N=896": { + "file": "silu_config_M104448_N896.json", + "M": 104448, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 507.24124999999975 + }, + "M=104448,N=960": { + "file": "silu_config_M104448_N960.json", + "M": 104448, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 511.9210000000003 + }, + "M=104448,N=1024": { + "file": "silu_config_M104448_N1024.json", + "M": 104448, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 403.9207500000002 + }, + "M=104448,N=1120": { + "file": "silu_config_M104448_N1120.json", + "M": 104448, + "N": 1120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 1011.3232499999998 + }, + "M=104448,N=1152": { + "file": "silu_config_M104448_N1152.json", + "M": 104448, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 992.8829999999996 + }, + "M=104448,N=1280": { + "file": "silu_config_M104448_N1280.json", + "M": 104448, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 995.2029999999997 + }, + "M=104448,N=1344": { + "file": "silu_config_M104448_N1344.json", + "M": 104448, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 996.76325 + }, + "M=104448,N=1408": { + "file": "silu_config_M104448_N1408.json", + "M": 104448, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 997.3632499999999 + }, + "M=104448,N=1440": { + "file": "silu_config_M104448_N1440.json", + "M": 104448, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 986.8832500000003 + }, + "M=104448,N=1536": { + "file": "silu_config_M104448_N1536.json", + "M": 104448, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 980.2429999999999 + }, + "M=104448,N=1600": { + "file": "silu_config_M104448_N1600.json", + "M": 104448, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1007.8832499999999 + }, + "M=104448,N=1664": { + "file": "silu_config_M104448_N1664.json", + "M": 104448, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 982.1229999999996 + }, + "M=104448,N=1728": { + "file": "silu_config_M104448_N1728.json", + "M": 104448, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 996.8432499999999 + }, + "M=104448,N=1760": { + "file": "silu_config_M104448_N1760.json", + "M": 104448, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 999.2832500000004 + }, + "M=104448,N=1792": { + "file": "silu_config_M104448_N1792.json", + "M": 104448, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 981.4830000000002 + }, + "M=104448,N=1920": { + "file": "silu_config_M104448_N1920.json", + "M": 104448, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 984.2029999999995 + }, + "M=104448,N=2048": { + "file": "silu_config_M104448_N2048.json", + "M": 104448, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 790.56225 + }, + "M=104448,N=2080": { + "file": "silu_config_M104448_N2080.json", + "M": 104448, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1097.0835000000002 + }, + "M=104448,N=2240": { + "file": "silu_config_M104448_N2240.json", + "M": 104448, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1147.2437499999996 + }, + "M=104448,N=2400": { + "file": "silu_config_M104448_N2400.json", + "M": 104448, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1219.4039999999995 + }, + "M=104448,N=2560": { + "file": "silu_config_M104448_N2560.json", + "M": 104448, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1259.7242499999998 + }, + "M=105472,N=128": { + "file": "silu_config_M105472_N128.json", + "M": 105472, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.03925000000001 + }, + "M=105472,N=160": { + "file": "silu_config_M105472_N160.json", + "M": 105472, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 141.5195 + }, + "M=105472,N=192": { + "file": "silu_config_M105472_N192.json", + "M": 105472, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 143.63950000000003 + }, + "M=105472,N=256": { + "file": "silu_config_M105472_N256.json", + "M": 105472, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.71950000000001 + }, + "M=105472,N=320": { + "file": "silu_config_M105472_N320.json", + "M": 105472, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 261.88024999999993 + }, + "M=105472,N=384": { + "file": "silu_config_M105472_N384.json", + "M": 105472, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 260.51999999999987 + }, + "M=105472,N=480": { + "file": "silu_config_M105472_N480.json", + "M": 105472, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 266.8400000000001 + }, + "M=105472,N=512": { + "file": "silu_config_M105472_N512.json", + "M": 105472, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 211.60000000000008 + }, + "M=105472,N=576": { + "file": "silu_config_M105472_N576.json", + "M": 105472, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 506.56100000000004 + }, + "M=105472,N=640": { + "file": "silu_config_M105472_N640.json", + "M": 105472, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 508.081 + }, + "M=105472,N=768": { + "file": "silu_config_M105472_N768.json", + "M": 105472, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 515.6809999999998 + }, + "M=105472,N=800": { + "file": "silu_config_M105472_N800.json", + "M": 105472, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 519.8410000000001 + }, + "M=105472,N=896": { + "file": "silu_config_M105472_N896.json", + "M": 105472, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 506.8410000000001 + }, + "M=105472,N=960": { + "file": "silu_config_M105472_N960.json", + "M": 105472, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 506.56100000000015 + }, + "M=105472,N=1024": { + "file": "silu_config_M105472_N1024.json", + "M": 105472, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 407.76075000000014 + }, + "M=105472,N=1120": { + "file": "silu_config_M105472_N1120.json", + "M": 105472, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1000.7632499999996 + }, + "M=105472,N=1152": { + "file": "silu_config_M105472_N1152.json", + "M": 105472, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1002.4032499999995 + }, + "M=105472,N=1280": { + "file": "silu_config_M105472_N1280.json", + "M": 105472, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.9629999999997 + }, + "M=105472,N=1344": { + "file": "silu_config_M105472_N1344.json", + "M": 105472, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.3229999999999 + }, + "M=105472,N=1408": { + "file": "silu_config_M105472_N1408.json", + "M": 105472, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 986.0830000000001 + }, + "M=105472,N=1440": { + "file": "silu_config_M105472_N1440.json", + "M": 105472, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1006.8832499999997 + }, + "M=105472,N=1536": { + "file": "silu_config_M105472_N1536.json", + "M": 105472, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 998.0832499999997 + }, + "M=105472,N=1600": { + "file": "silu_config_M105472_N1600.json", + "M": 105472, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1008.7632499999999 + }, + "M=105472,N=1664": { + "file": "silu_config_M105472_N1664.json", + "M": 105472, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 989.4429999999998 + }, + "M=105472,N=1728": { + "file": "silu_config_M105472_N1728.json", + "M": 105472, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1006.9632500000004 + }, + "M=105472,N=1760": { + "file": "silu_config_M105472_N1760.json", + "M": 105472, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 998.5230000000006 + }, + "M=105472,N=1792": { + "file": "silu_config_M105472_N1792.json", + "M": 105472, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1001.2032499999997 + }, + "M=105472,N=1920": { + "file": "silu_config_M105472_N1920.json", + "M": 105472, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.4430000000002 + }, + "M=105472,N=2048": { + "file": "silu_config_M105472_N2048.json", + "M": 105472, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 797.9625000000001 + }, + "M=105472,N=2080": { + "file": "silu_config_M105472_N2080.json", + "M": 105472, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.2835 + }, + "M=105472,N=2240": { + "file": "silu_config_M105472_N2240.json", + "M": 105472, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1154.6037499999998 + }, + "M=105472,N=2400": { + "file": "silu_config_M105472_N2400.json", + "M": 105472, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1221.724000000001 + }, + "M=105472,N=2560": { + "file": "silu_config_M105472_N2560.json", + "M": 105472, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1272.8042500000001 + }, + "M=106496,N=128": { + "file": "silu_config_M106496_N128.json", + "M": 106496, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.35925000000002 + }, + "M=106496,N=160": { + "file": "silu_config_M106496_N160.json", + "M": 106496, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 132.87950000000004 + }, + "M=106496,N=192": { + "file": "silu_config_M106496_N192.json", + "M": 106496, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 142.8795 + }, + "M=106496,N=256": { + "file": "silu_config_M106496_N256.json", + "M": 106496, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.95975000000004 + }, + "M=106496,N=320": { + "file": "silu_config_M106496_N320.json", + "M": 106496, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 264.9200000000001 + }, + "M=106496,N=384": { + "file": "silu_config_M106496_N384.json", + "M": 106496, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 263.0400000000001 + }, + "M=106496,N=480": { + "file": "silu_config_M106496_N480.json", + "M": 106496, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 268.76 + }, + "M=106496,N=512": { + "file": "silu_config_M106496_N512.json", + "M": 106496, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 213.60000000000002 + }, + "M=106496,N=576": { + "file": "silu_config_M106496_N576.json", + "M": 106496, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 516.2010000000001 + }, + "M=106496,N=640": { + "file": "silu_config_M106496_N640.json", + "M": 106496, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 508.04100000000017 + }, + "M=106496,N=768": { + "file": "silu_config_M106496_N768.json", + "M": 106496, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 514.6010000000003 + }, + "M=106496,N=800": { + "file": "silu_config_M106496_N800.json", + "M": 106496, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 524.72125 + }, + "M=106496,N=896": { + "file": "silu_config_M106496_N896.json", + "M": 106496, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 511.56100000000015 + }, + "M=106496,N=960": { + "file": "silu_config_M106496_N960.json", + "M": 106496, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 516.4810000000002 + }, + "M=106496,N=1024": { + "file": "silu_config_M106496_N1024.json", + "M": 106496, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 411.3207500000003 + }, + "M=106496,N=1120": { + "file": "silu_config_M106496_N1120.json", + "M": 106496, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 999.4432499999998 + }, + "M=106496,N=1152": { + "file": "silu_config_M106496_N1152.json", + "M": 106496, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 990.9629999999995 + }, + "M=106496,N=1280": { + "file": "silu_config_M106496_N1280.json", + "M": 106496, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1003.72325 + }, + "M=106496,N=1344": { + "file": "silu_config_M106496_N1344.json", + "M": 106496, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1005.1632500000005 + }, + "M=106496,N=1408": { + "file": "silu_config_M106496_N1408.json", + "M": 106496, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 997.6030000000001 + }, + "M=106496,N=1440": { + "file": "silu_config_M106496_N1440.json", + "M": 106496, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1005.8432500000004 + }, + "M=106496,N=1536": { + "file": "silu_config_M106496_N1536.json", + "M": 106496, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1007.4032499999998 + }, + "M=106496,N=1600": { + "file": "silu_config_M106496_N1600.json", + "M": 106496, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1006.4432500000003 + }, + "M=106496,N=1664": { + "file": "silu_config_M106496_N1664.json", + "M": 106496, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1011.6832500000002 + }, + "M=106496,N=1728": { + "file": "silu_config_M106496_N1728.json", + "M": 106496, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1018.9632500000004 + }, + "M=106496,N=1760": { + "file": "silu_config_M106496_N1760.json", + "M": 106496, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1018.2432500000004 + }, + "M=106496,N=1792": { + "file": "silu_config_M106496_N1792.json", + "M": 106496, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1000.4032500000002 + }, + "M=106496,N=1920": { + "file": "silu_config_M106496_N1920.json", + "M": 106496, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1013.3232500000005 + }, + "M=106496,N=2048": { + "file": "silu_config_M106496_N2048.json", + "M": 106496, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 809.5225000000005 + }, + "M=106496,N=2080": { + "file": "silu_config_M106496_N2080.json", + "M": 106496, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.2034999999996 + }, + "M=106496,N=2240": { + "file": "silu_config_M106496_N2240.json", + "M": 106496, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1180.1240000000003 + }, + "M=106496,N=2400": { + "file": "silu_config_M106496_N2400.json", + "M": 106496, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1239.4842499999995 + }, + "M=106496,N=2560": { + "file": "silu_config_M106496_N2560.json", + "M": 106496, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1284.8842500000005 + }, + "M=107520,N=128": { + "file": "silu_config_M107520_N128.json", + "M": 107520, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.91924999999999 + }, + "M=107520,N=160": { + "file": "silu_config_M107520_N160.json", + "M": 107520, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 134.03949999999998 + }, + "M=107520,N=192": { + "file": "silu_config_M107520_N192.json", + "M": 107520, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 146.03949999999998 + }, + "M=107520,N=256": { + "file": "silu_config_M107520_N256.json", + "M": 107520, + "N": 256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 132.4795 + }, + "M=107520,N=320": { + "file": "silu_config_M107520_N320.json", + "M": 107520, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 265.88000000000005 + }, + "M=107520,N=384": { + "file": "silu_config_M107520_N384.json", + "M": 107520, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 263.6802499999999 + }, + "M=107520,N=480": { + "file": "silu_config_M107520_N480.json", + "M": 107520, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 268.9602500000002 + }, + "M=107520,N=512": { + "file": "silu_config_M107520_N512.json", + "M": 107520, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 215.43974999999995 + }, + "M=107520,N=576": { + "file": "silu_config_M107520_N576.json", + "M": 107520, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 507.5609999999999 + }, + "M=107520,N=640": { + "file": "silu_config_M107520_N640.json", + "M": 107520, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 493.721 + }, + "M=107520,N=768": { + "file": "silu_config_M107520_N768.json", + "M": 107520, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 515.6409999999998 + }, + "M=107520,N=800": { + "file": "silu_config_M107520_N800.json", + "M": 107520, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 518.441 + }, + "M=107520,N=896": { + "file": "silu_config_M107520_N896.json", + "M": 107520, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 527.3612500000002 + }, + "M=107520,N=960": { + "file": "silu_config_M107520_N960.json", + "M": 107520, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 520.9610000000002 + }, + "M=107520,N=1024": { + "file": "silu_config_M107520_N1024.json", + "M": 107520, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 415.40075 + }, + "M=107520,N=1120": { + "file": "silu_config_M107520_N1120.json", + "M": 107520, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1001.7232500000003 + }, + "M=107520,N=1152": { + "file": "silu_config_M107520_N1152.json", + "M": 107520, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1003.1232499999998 + }, + "M=107520,N=1280": { + "file": "silu_config_M107520_N1280.json", + "M": 107520, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1017.2032500000001 + }, + "M=107520,N=1344": { + "file": "silu_config_M107520_N1344.json", + "M": 107520, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1015.6832500000002 + }, + "M=107520,N=1408": { + "file": "silu_config_M107520_N1408.json", + "M": 107520, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1017.8032500000005 + }, + "M=107520,N=1440": { + "file": "silu_config_M107520_N1440.json", + "M": 107520, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1038.4432499999998 + }, + "M=107520,N=1536": { + "file": "silu_config_M107520_N1536.json", + "M": 107520, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1010.6832499999996 + }, + "M=107520,N=1600": { + "file": "silu_config_M107520_N1600.json", + "M": 107520, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1008.8832499999997 + }, + "M=107520,N=1664": { + "file": "silu_config_M107520_N1664.json", + "M": 107520, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1021.24325 + }, + "M=107520,N=1728": { + "file": "silu_config_M107520_N1728.json", + "M": 107520, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1018.1232500000001 + }, + "M=107520,N=1760": { + "file": "silu_config_M107520_N1760.json", + "M": 107520, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 999.5632500000002 + }, + "M=107520,N=1792": { + "file": "silu_config_M107520_N1792.json", + "M": 107520, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1023.6832499999999 + }, + "M=107520,N=1920": { + "file": "silu_config_M107520_N1920.json", + "M": 107520, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1016.0432500000012 + }, + "M=107520,N=2048": { + "file": "silu_config_M107520_N2048.json", + "M": 107520, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 809.0824999999995 + }, + "M=107520,N=2080": { + "file": "silu_config_M107520_N2080.json", + "M": 107520, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1100.6835 + }, + "M=107520,N=2240": { + "file": "silu_config_M107520_N2240.json", + "M": 107520, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1178.4439999999995 + }, + "M=107520,N=2400": { + "file": "silu_config_M107520_N2400.json", + "M": 107520, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1247.0442500000004 + }, + "M=107520,N=2560": { + "file": "silu_config_M107520_N2560.json", + "M": 107520, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1295.3245000000002 + }, + "M=108544,N=128": { + "file": "silu_config_M108544_N128.json", + "M": 108544, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 66.59924999999998 + }, + "M=108544,N=160": { + "file": "silu_config_M108544_N160.json", + "M": 108544, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 135.1995 + }, + "M=108544,N=192": { + "file": "silu_config_M108544_N192.json", + "M": 108544, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 145.3595 + }, + "M=108544,N=256": { + "file": "silu_config_M108544_N256.json", + "M": 108544, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 145.87950000000004 + }, + "M=108544,N=320": { + "file": "silu_config_M108544_N320.json", + "M": 108544, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 269.8002500000002 + }, + "M=108544,N=384": { + "file": "silu_config_M108544_N384.json", + "M": 108544, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 267.7599999999998 + }, + "M=108544,N=480": { + "file": "silu_config_M108544_N480.json", + "M": 108544, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 273.56025 + }, + "M=108544,N=512": { + "file": "silu_config_M108544_N512.json", + "M": 108544, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 217.44 + }, + "M=108544,N=576": { + "file": "silu_config_M108544_N576.json", + "M": 108544, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 520.6809999999999 + }, + "M=108544,N=640": { + "file": "silu_config_M108544_N640.json", + "M": 108544, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 511.76100000000014 + }, + "M=108544,N=768": { + "file": "silu_config_M108544_N768.json", + "M": 108544, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 502.84099999999967 + }, + "M=108544,N=800": { + "file": "silu_config_M108544_N800.json", + "M": 108544, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 538.5612500000002 + }, + "M=108544,N=896": { + "file": "silu_config_M108544_N896.json", + "M": 108544, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 509.961 + }, + "M=108544,N=960": { + "file": "silu_config_M108544_N960.json", + "M": 108544, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 526.1612500000001 + }, + "M=108544,N=1024": { + "file": "silu_config_M108544_N1024.json", + "M": 108544, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 418.3207500000001 + }, + "M=108544,N=1120": { + "file": "silu_config_M108544_N1120.json", + "M": 108544, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1042.6832499999998 + }, + "M=108544,N=1152": { + "file": "silu_config_M108544_N1152.json", + "M": 108544, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1020.1232499999998 + }, + "M=108544,N=1280": { + "file": "silu_config_M108544_N1280.json", + "M": 108544, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1037.16325 + }, + "M=108544,N=1344": { + "file": "silu_config_M108544_N1344.json", + "M": 108544, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1042.80325 + }, + "M=108544,N=1408": { + "file": "silu_config_M108544_N1408.json", + "M": 108544, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1014.0432499999996 + }, + "M=108544,N=1440": { + "file": "silu_config_M108544_N1440.json", + "M": 108544, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1035.28325 + }, + "M=108544,N=1536": { + "file": "silu_config_M108544_N1536.json", + "M": 108544, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1016.0032500000002 + }, + "M=108544,N=1600": { + "file": "silu_config_M108544_N1600.json", + "M": 108544, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1046.5632499999997 + }, + "M=108544,N=1664": { + "file": "silu_config_M108544_N1664.json", + "M": 108544, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1037.0032500000002 + }, + "M=108544,N=1728": { + "file": "silu_config_M108544_N1728.json", + "M": 108544, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1027.4832499999998 + }, + "M=108544,N=1760": { + "file": "silu_config_M108544_N1760.json", + "M": 108544, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1047.8832499999999 + }, + "M=108544,N=1792": { + "file": "silu_config_M108544_N1792.json", + "M": 108544, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1008.8832500000002 + }, + "M=108544,N=1920": { + "file": "silu_config_M108544_N1920.json", + "M": 108544, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1032.72325 + }, + "M=108544,N=2048": { + "file": "silu_config_M108544_N2048.json", + "M": 108544, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 820.6424999999999 + }, + "M=108544,N=2080": { + "file": "silu_config_M108544_N2080.json", + "M": 108544, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1119.6037500000007 + }, + "M=108544,N=2240": { + "file": "silu_config_M108544_N2240.json", + "M": 108544, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1187.964 + }, + "M=108544,N=2400": { + "file": "silu_config_M108544_N2400.json", + "M": 108544, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1257.36425 + }, + "M=108544,N=2560": { + "file": "silu_config_M108544_N2560.json", + "M": 108544, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1306.8444999999997 + }, + "M=109568,N=128": { + "file": "silu_config_M109568_N128.json", + "M": 109568, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 65.99925 + }, + "M=109568,N=160": { + "file": "silu_config_M109568_N160.json", + "M": 109568, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 148.7195 + }, + "M=109568,N=192": { + "file": "silu_config_M109568_N192.json", + "M": 109568, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 147.7195 + }, + "M=109568,N=256": { + "file": "silu_config_M109568_N256.json", + "M": 109568, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.23950000000002 + }, + "M=109568,N=320": { + "file": "silu_config_M109568_N320.json", + "M": 109568, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 272.24025 + }, + "M=109568,N=384": { + "file": "silu_config_M109568_N384.json", + "M": 109568, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 269.9199999999998 + }, + "M=109568,N=480": { + "file": "silu_config_M109568_N480.json", + "M": 109568, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 275.84024999999997 + }, + "M=109568,N=512": { + "file": "silu_config_M109568_N512.json", + "M": 109568, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 219.55999999999983 + }, + "M=109568,N=576": { + "file": "silu_config_M109568_N576.json", + "M": 109568, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 530.2812500000002 + }, + "M=109568,N=640": { + "file": "silu_config_M109568_N640.json", + "M": 109568, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 533.2412499999998 + }, + "M=109568,N=768": { + "file": "silu_config_M109568_N768.json", + "M": 109568, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 523.4812499999998 + }, + "M=109568,N=800": { + "file": "silu_config_M109568_N800.json", + "M": 109568, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 516.761 + }, + "M=109568,N=896": { + "file": "silu_config_M109568_N896.json", + "M": 109568, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 530.7612499999998 + }, + "M=109568,N=960": { + "file": "silu_config_M109568_N960.json", + "M": 109568, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 535.8812499999999 + }, + "M=109568,N=1024": { + "file": "silu_config_M109568_N1024.json", + "M": 109568, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 423.44075 + }, + "M=109568,N=1120": { + "file": "silu_config_M109568_N1120.json", + "M": 109568, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1052.0832499999997 + }, + "M=109568,N=1152": { + "file": "silu_config_M109568_N1152.json", + "M": 109568, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1033.20325 + }, + "M=109568,N=1280": { + "file": "silu_config_M109568_N1280.json", + "M": 109568, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1031.8832499999999 + }, + "M=109568,N=1344": { + "file": "silu_config_M109568_N1344.json", + "M": 109568, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1033.76325 + }, + "M=109568,N=1408": { + "file": "silu_config_M109568_N1408.json", + "M": 109568, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.1232500000006 + }, + "M=109568,N=1440": { + "file": "silu_config_M109568_N1440.json", + "M": 109568, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1035.84325 + }, + "M=109568,N=1536": { + "file": "silu_config_M109568_N1536.json", + "M": 109568, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1025.64325 + }, + "M=109568,N=1600": { + "file": "silu_config_M109568_N1600.json", + "M": 109568, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1035.0032500000002 + }, + "M=109568,N=1664": { + "file": "silu_config_M109568_N1664.json", + "M": 109568, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1035.9632500000002 + }, + "M=109568,N=1728": { + "file": "silu_config_M109568_N1728.json", + "M": 109568, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.68325 + }, + "M=109568,N=1760": { + "file": "silu_config_M109568_N1760.json", + "M": 109568, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1056.9234999999999 + }, + "M=109568,N=1792": { + "file": "silu_config_M109568_N1792.json", + "M": 109568, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1042.4032499999998 + }, + "M=109568,N=1920": { + "file": "silu_config_M109568_N1920.json", + "M": 109568, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1031.80325 + }, + "M=109568,N=2048": { + "file": "silu_config_M109568_N2048.json", + "M": 109568, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 846.3224999999998 + }, + "M=109568,N=2080": { + "file": "silu_config_M109568_N2080.json", + "M": 109568, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1140.0037499999999 + }, + "M=109568,N=2240": { + "file": "silu_config_M109568_N2240.json", + "M": 109568, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1203.524 + }, + "M=109568,N=2400": { + "file": "silu_config_M109568_N2400.json", + "M": 109568, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1268.6442499999998 + }, + "M=109568,N=2560": { + "file": "silu_config_M109568_N2560.json", + "M": 109568, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1331.4045 + }, + "M=110592,N=128": { + "file": "silu_config_M110592_N128.json", + "M": 110592, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 77.35925 + }, + "M=110592,N=160": { + "file": "silu_config_M110592_N160.json", + "M": 110592, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 137.71949999999998 + }, + "M=110592,N=192": { + "file": "silu_config_M110592_N192.json", + "M": 110592, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 137.7995 + }, + "M=110592,N=256": { + "file": "silu_config_M110592_N256.json", + "M": 110592, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.23950000000004 + }, + "M=110592,N=320": { + "file": "silu_config_M110592_N320.json", + "M": 110592, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 274.72025000000014 + }, + "M=110592,N=384": { + "file": "silu_config_M110592_N384.json", + "M": 110592, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 272.52 + }, + "M=110592,N=480": { + "file": "silu_config_M110592_N480.json", + "M": 110592, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 278.6802500000001 + }, + "M=110592,N=512": { + "file": "silu_config_M110592_N512.json", + "M": 110592, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 221.4799999999999 + }, + "M=110592,N=576": { + "file": "silu_config_M110592_N576.json", + "M": 110592, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 519.4010000000002 + }, + "M=110592,N=640": { + "file": "silu_config_M110592_N640.json", + "M": 110592, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 526.8012499999998 + }, + "M=110592,N=768": { + "file": "silu_config_M110592_N768.json", + "M": 110592, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 530.1612500000001 + }, + "M=110592,N=800": { + "file": "silu_config_M110592_N800.json", + "M": 110592, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 534.32125 + }, + "M=110592,N=896": { + "file": "silu_config_M110592_N896.json", + "M": 110592, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 530.5612499999997 + }, + "M=110592,N=960": { + "file": "silu_config_M110592_N960.json", + "M": 110592, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 535.5212500000002 + }, + "M=110592,N=1024": { + "file": "silu_config_M110592_N1024.json", + "M": 110592, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 425.84075000000007 + }, + "M=110592,N=1120": { + "file": "silu_config_M110592_N1120.json", + "M": 110592, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1039.8832500000015 + }, + "M=110592,N=1152": { + "file": "silu_config_M110592_N1152.json", + "M": 110592, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1050.0832500000004 + }, + "M=110592,N=1280": { + "file": "silu_config_M110592_N1280.json", + "M": 110592, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1045.2832499999995 + }, + "M=110592,N=1344": { + "file": "silu_config_M110592_N1344.json", + "M": 110592, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1051.4032500000003 + }, + "M=110592,N=1408": { + "file": "silu_config_M110592_N1408.json", + "M": 110592, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1046.4432499999998 + }, + "M=110592,N=1440": { + "file": "silu_config_M110592_N1440.json", + "M": 110592, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1043.8832499999999 + }, + "M=110592,N=1536": { + "file": "silu_config_M110592_N1536.json", + "M": 110592, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1045.6432500000005 + }, + "M=110592,N=1600": { + "file": "silu_config_M110592_N1600.json", + "M": 110592, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.8832499999999 + }, + "M=110592,N=1664": { + "file": "silu_config_M110592_N1664.json", + "M": 110592, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1035.2432500000004 + }, + "M=110592,N=1728": { + "file": "silu_config_M110592_N1728.json", + "M": 110592, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1054.2432499999995 + }, + "M=110592,N=1760": { + "file": "silu_config_M110592_N1760.json", + "M": 110592, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1048.0032499999998 + }, + "M=110592,N=1792": { + "file": "silu_config_M110592_N1792.json", + "M": 110592, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1062.9634999999998 + }, + "M=110592,N=1920": { + "file": "silu_config_M110592_N1920.json", + "M": 110592, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1051.5632499999997 + }, + "M=110592,N=2048": { + "file": "silu_config_M110592_N2048.json", + "M": 110592, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 824.6025 + }, + "M=110592,N=2080": { + "file": "silu_config_M110592_N2080.json", + "M": 110592, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1150.3237499999996 + }, + "M=110592,N=2240": { + "file": "silu_config_M110592_N2240.json", + "M": 110592, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1219.9240000000004 + }, + "M=110592,N=2400": { + "file": "silu_config_M110592_N2400.json", + "M": 110592, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1293.1642500000003 + }, + "M=110592,N=2560": { + "file": "silu_config_M110592_N2560.json", + "M": 110592, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1321.6844999999998 + }, + "M=111616,N=128": { + "file": "silu_config_M111616_N128.json", + "M": 111616, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 67.27925000000002 + }, + "M=111616,N=160": { + "file": "silu_config_M111616_N160.json", + "M": 111616, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 151.51975 + }, + "M=111616,N=192": { + "file": "silu_config_M111616_N192.json", + "M": 111616, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 151.43949999999998 + }, + "M=111616,N=256": { + "file": "silu_config_M111616_N256.json", + "M": 111616, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 137.7595 + }, + "M=111616,N=320": { + "file": "silu_config_M111616_N320.json", + "M": 111616, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 277.1602499999999 + }, + "M=111616,N=384": { + "file": "silu_config_M111616_N384.json", + "M": 111616, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 274.91999999999985 + }, + "M=111616,N=480": { + "file": "silu_config_M111616_N480.json", + "M": 111616, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 281.19999999999993 + }, + "M=111616,N=512": { + "file": "silu_config_M111616_N512.json", + "M": 111616, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 223.43999999999983 + }, + "M=111616,N=576": { + "file": "silu_config_M111616_N576.json", + "M": 111616, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 541.0012499999998 + }, + "M=111616,N=640": { + "file": "silu_config_M111616_N640.json", + "M": 111616, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 536.0412499999998 + }, + "M=111616,N=768": { + "file": "silu_config_M111616_N768.json", + "M": 111616, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 527.1612500000001 + }, + "M=111616,N=800": { + "file": "silu_config_M111616_N800.json", + "M": 111616, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 531.8812499999999 + }, + "M=111616,N=896": { + "file": "silu_config_M111616_N896.json", + "M": 111616, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 540.2412499999998 + }, + "M=111616,N=960": { + "file": "silu_config_M111616_N960.json", + "M": 111616, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 545.8812500000001 + }, + "M=111616,N=1024": { + "file": "silu_config_M111616_N1024.json", + "M": 111616, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 430.84075000000007 + }, + "M=111616,N=1120": { + "file": "silu_config_M111616_N1120.json", + "M": 111616, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1057.12325 + }, + "M=111616,N=1152": { + "file": "silu_config_M111616_N1152.json", + "M": 111616, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1049.1232499999996 + }, + "M=111616,N=1280": { + "file": "silu_config_M111616_N1280.json", + "M": 111616, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1060.8035 + }, + "M=111616,N=1344": { + "file": "silu_config_M111616_N1344.json", + "M": 111616, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1050.6032499999997 + }, + "M=111616,N=1408": { + "file": "silu_config_M111616_N1408.json", + "M": 111616, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1062.9234999999999 + }, + "M=111616,N=1440": { + "file": "silu_config_M111616_N1440.json", + "M": 111616, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1055.32325 + }, + "M=111616,N=1536": { + "file": "silu_config_M111616_N1536.json", + "M": 111616, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1054.80325 + }, + "M=111616,N=1600": { + "file": "silu_config_M111616_N1600.json", + "M": 111616, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1061.6435000000006 + }, + "M=111616,N=1664": { + "file": "silu_config_M111616_N1664.json", + "M": 111616, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1047.4832499999998 + }, + "M=111616,N=1728": { + "file": "silu_config_M111616_N1728.json", + "M": 111616, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1067.2435000000005 + }, + "M=111616,N=1760": { + "file": "silu_config_M111616_N1760.json", + "M": 111616, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1055.2432499999995 + }, + "M=111616,N=1792": { + "file": "silu_config_M111616_N1792.json", + "M": 111616, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1061.8035000000004 + }, + "M=111616,N=1920": { + "file": "silu_config_M111616_N1920.json", + "M": 111616, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1050.8432500000004 + }, + "M=111616,N=2048": { + "file": "silu_config_M111616_N2048.json", + "M": 111616, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 852.0425 + }, + "M=111616,N=2080": { + "file": "silu_config_M111616_N2080.json", + "M": 111616, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1170.72375 + }, + "M=111616,N=2240": { + "file": "silu_config_M111616_N2240.json", + "M": 111616, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1220.8839999999996 + }, + "M=111616,N=2400": { + "file": "silu_config_M111616_N2400.json", + "M": 111616, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1289.6842500000002 + }, + "M=111616,N=2560": { + "file": "silu_config_M111616_N2560.json", + "M": 111616, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1353.0045 + }, + "M=112640,N=128": { + "file": "silu_config_M112640_N128.json", + "M": 112640, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 78.87924999999996 + }, + "M=112640,N=160": { + "file": "silu_config_M112640_N160.json", + "M": 112640, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 138.03975 + }, + "M=112640,N=192": { + "file": "silu_config_M112640_N192.json", + "M": 112640, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.1595 + }, + "M=112640,N=256": { + "file": "silu_config_M112640_N256.json", + "M": 112640, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 127.39974999999995 + }, + "M=112640,N=320": { + "file": "silu_config_M112640_N320.json", + "M": 112640, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 277.5602499999999 + }, + "M=112640,N=384": { + "file": "silu_config_M112640_N384.json", + "M": 112640, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 276.4002500000001 + }, + "M=112640,N=480": { + "file": "silu_config_M112640_N480.json", + "M": 112640, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 281.8002500000001 + }, + "M=112640,N=512": { + "file": "silu_config_M112640_N512.json", + "M": 112640, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 225.32000000000005 + }, + "M=112640,N=576": { + "file": "silu_config_M112640_N576.json", + "M": 112640, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 530.8412500000001 + }, + "M=112640,N=640": { + "file": "silu_config_M112640_N640.json", + "M": 112640, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 527.48125 + }, + "M=112640,N=768": { + "file": "silu_config_M112640_N768.json", + "M": 112640, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 548.6012500000004 + }, + "M=112640,N=800": { + "file": "silu_config_M112640_N800.json", + "M": 112640, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 555.1212499999999 + }, + "M=112640,N=896": { + "file": "silu_config_M112640_N896.json", + "M": 112640, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 540.0012499999998 + }, + "M=112640,N=960": { + "file": "silu_config_M112640_N960.json", + "M": 112640, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 536.48125 + }, + "M=112640,N=1024": { + "file": "silu_config_M112640_N1024.json", + "M": 112640, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 434.28075000000035 + }, + "M=112640,N=1120": { + "file": "silu_config_M112640_N1120.json", + "M": 112640, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1069.3635 + }, + "M=112640,N=1152": { + "file": "silu_config_M112640_N1152.json", + "M": 112640, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1031.2832499999995 + }, + "M=112640,N=1280": { + "file": "silu_config_M112640_N1280.json", + "M": 112640, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1053.5632500000006 + }, + "M=112640,N=1344": { + "file": "silu_config_M112640_N1344.json", + "M": 112640, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1052.3632500000008 + }, + "M=112640,N=1408": { + "file": "silu_config_M112640_N1408.json", + "M": 112640, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1034.80325 + }, + "M=112640,N=1440": { + "file": "silu_config_M112640_N1440.json", + "M": 112640, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.6032500000006 + }, + "M=112640,N=1536": { + "file": "silu_config_M112640_N1536.json", + "M": 112640, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1067.6835 + }, + "M=112640,N=1600": { + "file": "silu_config_M112640_N1600.json", + "M": 112640, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1063.3234999999995 + }, + "M=112640,N=1664": { + "file": "silu_config_M112640_N1664.json", + "M": 112640, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1036.6432499999996 + }, + "M=112640,N=1728": { + "file": "silu_config_M112640_N1728.json", + "M": 112640, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1075.4035 + }, + "M=112640,N=1760": { + "file": "silu_config_M112640_N1760.json", + "M": 112640, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1057.12325 + }, + "M=112640,N=1792": { + "file": "silu_config_M112640_N1792.json", + "M": 112640, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1039.5232499999997 + }, + "M=112640,N=1920": { + "file": "silu_config_M112640_N1920.json", + "M": 112640, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1053.2032500000005 + }, + "M=112640,N=2048": { + "file": "silu_config_M112640_N2048.json", + "M": 112640, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 847.6024999999995 + }, + "M=112640,N=2080": { + "file": "silu_config_M112640_N2080.json", + "M": 112640, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1163.32375 + }, + "M=112640,N=2240": { + "file": "silu_config_M112640_N2240.json", + "M": 112640, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1223.3639999999996 + }, + "M=112640,N=2400": { + "file": "silu_config_M112640_N2400.json", + "M": 112640, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1287.0442499999995 + }, + "M=112640,N=2560": { + "file": "silu_config_M112640_N2560.json", + "M": 112640, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1357.7247500000008 + }, + "M=113664,N=128": { + "file": "silu_config_M113664_N128.json", + "M": 113664, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 68.27924999999999 + }, + "M=113664,N=160": { + "file": "silu_config_M113664_N160.json", + "M": 113664, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.19949999999994 + }, + "M=113664,N=192": { + "file": "silu_config_M113664_N192.json", + "M": 113664, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 152.39975 + }, + "M=113664,N=256": { + "file": "silu_config_M113664_N256.json", + "M": 113664, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.03950000000003 + }, + "M=113664,N=320": { + "file": "silu_config_M113664_N320.json", + "M": 113664, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 282.20025000000004 + }, + "M=113664,N=384": { + "file": "silu_config_M113664_N384.json", + "M": 113664, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 279.8402500000001 + }, + "M=113664,N=480": { + "file": "silu_config_M113664_N480.json", + "M": 113664, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 286.20000000000005 + }, + "M=113664,N=512": { + "file": "silu_config_M113664_N512.json", + "M": 113664, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 227.43999999999983 + }, + "M=113664,N=576": { + "file": "silu_config_M113664_N576.json", + "M": 113664, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 544.5612500000002 + }, + "M=113664,N=640": { + "file": "silu_config_M113664_N640.json", + "M": 113664, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 543.24125 + }, + "M=113664,N=768": { + "file": "silu_config_M113664_N768.json", + "M": 113664, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 544.6812499999999 + }, + "M=113664,N=800": { + "file": "silu_config_M113664_N800.json", + "M": 113664, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 541.2812500000002 + }, + "M=113664,N=896": { + "file": "silu_config_M113664_N896.json", + "M": 113664, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 556.1212499999999 + }, + "M=113664,N=960": { + "file": "silu_config_M113664_N960.json", + "M": 113664, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 550.0812500000002 + }, + "M=113664,N=1024": { + "file": "silu_config_M113664_N1024.json", + "M": 113664, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 438.3207500000001 + }, + "M=113664,N=1120": { + "file": "silu_config_M113664_N1120.json", + "M": 113664, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1081.9234999999999 + }, + "M=113664,N=1152": { + "file": "silu_config_M113664_N1152.json", + "M": 113664, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1068.0434999999998 + }, + "M=113664,N=1280": { + "file": "silu_config_M113664_N1280.json", + "M": 113664, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1059.2035 + }, + "M=113664,N=1344": { + "file": "silu_config_M113664_N1344.json", + "M": 113664, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1072.1234999999997 + }, + "M=113664,N=1408": { + "file": "silu_config_M113664_N1408.json", + "M": 113664, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1064.2435 + }, + "M=113664,N=1440": { + "file": "silu_config_M113664_N1440.json", + "M": 113664, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1082.9634999999998 + }, + "M=113664,N=1536": { + "file": "silu_config_M113664_N1536.json", + "M": 113664, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1084.2035 + }, + "M=113664,N=1600": { + "file": "silu_config_M113664_N1600.json", + "M": 113664, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1083.6835000000005 + }, + "M=113664,N=1664": { + "file": "silu_config_M113664_N1664.json", + "M": 113664, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1063.1634999999997 + }, + "M=113664,N=1728": { + "file": "silu_config_M113664_N1728.json", + "M": 113664, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1086.5235000000002 + }, + "M=113664,N=1760": { + "file": "silu_config_M113664_N1760.json", + "M": 113664, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1074.7635000000005 + }, + "M=113664,N=1792": { + "file": "silu_config_M113664_N1792.json", + "M": 113664, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1076.8035 + }, + "M=113664,N=1920": { + "file": "silu_config_M113664_N1920.json", + "M": 113664, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1094.1634999999997 + }, + "M=113664,N=2048": { + "file": "silu_config_M113664_N2048.json", + "M": 113664, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 867.2424999999998 + }, + "M=113664,N=2080": { + "file": "silu_config_M113664_N2080.json", + "M": 113664, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1182.3639999999996 + }, + "M=113664,N=2240": { + "file": "silu_config_M113664_N2240.json", + "M": 113664, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1241.0042500000004 + }, + "M=113664,N=2400": { + "file": "silu_config_M113664_N2400.json", + "M": 113664, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1309.4445 + }, + "M=113664,N=2560": { + "file": "silu_config_M113664_N2560.json", + "M": 113664, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1367.444750000001 + }, + "M=114688,N=128": { + "file": "silu_config_M114688_N128.json", + "M": 114688, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 68.87925000000001 + }, + "M=114688,N=160": { + "file": "silu_config_M114688_N160.json", + "M": 114688, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 141.47950000000003 + }, + "M=114688,N=192": { + "file": "silu_config_M114688_N192.json", + "M": 114688, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 142.7595 + }, + "M=114688,N=256": { + "file": "silu_config_M114688_N256.json", + "M": 114688, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.55949999999996 + }, + "M=114688,N=320": { + "file": "silu_config_M114688_N320.json", + "M": 114688, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 284.36024999999995 + }, + "M=114688,N=384": { + "file": "silu_config_M114688_N384.json", + "M": 114688, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 282.5600000000001 + }, + "M=114688,N=480": { + "file": "silu_config_M114688_N480.json", + "M": 114688, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.00025000000016 + }, + "M=114688,N=512": { + "file": "silu_config_M114688_N512.json", + "M": 114688, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 219.11974999999995 + }, + "M=114688,N=576": { + "file": "silu_config_M114688_N576.json", + "M": 114688, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 538.0010000000001 + }, + "M=114688,N=640": { + "file": "silu_config_M114688_N640.json", + "M": 114688, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 534.76125 + }, + "M=114688,N=768": { + "file": "silu_config_M114688_N768.json", + "M": 114688, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 535.8812499999997 + }, + "M=114688,N=800": { + "file": "silu_config_M114688_N800.json", + "M": 114688, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 540.6012499999997 + }, + "M=114688,N=896": { + "file": "silu_config_M114688_N896.json", + "M": 114688, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 538.56125 + }, + "M=114688,N=960": { + "file": "silu_config_M114688_N960.json", + "M": 114688, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 554.9212499999999 + }, + "M=114688,N=1024": { + "file": "silu_config_M114688_N1024.json", + "M": 114688, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 441.8810000000001 + }, + "M=114688,N=1120": { + "file": "silu_config_M114688_N1120.json", + "M": 114688, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1080.3235000000004 + }, + "M=114688,N=1152": { + "file": "silu_config_M114688_N1152.json", + "M": 114688, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1070.283500000001 + }, + "M=114688,N=1280": { + "file": "silu_config_M114688_N1280.json", + "M": 114688, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1072.3235 + }, + "M=114688,N=1344": { + "file": "silu_config_M114688_N1344.json", + "M": 114688, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1092.5234999999998 + }, + "M=114688,N=1408": { + "file": "silu_config_M114688_N1408.json", + "M": 114688, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1080.8034999999995 + }, + "M=114688,N=1440": { + "file": "silu_config_M114688_N1440.json", + "M": 114688, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1084.0434999999998 + }, + "M=114688,N=1536": { + "file": "silu_config_M114688_N1536.json", + "M": 114688, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1075.7635000000005 + }, + "M=114688,N=1600": { + "file": "silu_config_M114688_N1600.json", + "M": 114688, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1082.4434999999999 + }, + "M=114688,N=1664": { + "file": "silu_config_M114688_N1664.json", + "M": 114688, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1093.4034999999994 + }, + "M=114688,N=1728": { + "file": "silu_config_M114688_N1728.json", + "M": 114688, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1084.9235000000003 + }, + "M=114688,N=1760": { + "file": "silu_config_M114688_N1760.json", + "M": 114688, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1086.2435 + }, + "M=114688,N=1792": { + "file": "silu_config_M114688_N1792.json", + "M": 114688, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1079.4434999999999 + }, + "M=114688,N=1920": { + "file": "silu_config_M114688_N1920.json", + "M": 114688, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1081.6835 + }, + "M=114688,N=2048": { + "file": "silu_config_M114688_N2048.json", + "M": 114688, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 855.4024999999997 + }, + "M=114688,N=2080": { + "file": "silu_config_M114688_N2080.json", + "M": 114688, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1182.0839999999998 + }, + "M=114688,N=2240": { + "file": "silu_config_M114688_N2240.json", + "M": 114688, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1268.1642500000003 + }, + "M=114688,N=2400": { + "file": "silu_config_M114688_N2400.json", + "M": 114688, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1324.8845000000001 + }, + "M=114688,N=2560": { + "file": "silu_config_M114688_N2560.json", + "M": 114688, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1392.5647500000005 + }, + "M=115712,N=128": { + "file": "silu_config_M115712_N128.json", + "M": 115712, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 69.23924999999998 + }, + "M=115712,N=160": { + "file": "silu_config_M115712_N160.json", + "M": 115712, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.63950000000003 + }, + "M=115712,N=192": { + "file": "silu_config_M115712_N192.json", + "M": 115712, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 153.59950000000003 + }, + "M=115712,N=256": { + "file": "silu_config_M115712_N256.json", + "M": 115712, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 130.51950000000002 + }, + "M=115712,N=320": { + "file": "silu_config_M115712_N320.json", + "M": 115712, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.12025 + }, + "M=115712,N=384": { + "file": "silu_config_M115712_N384.json", + "M": 115712, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 285.24025 + }, + "M=115712,N=480": { + "file": "silu_config_M115712_N480.json", + "M": 115712, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 290.92025 + }, + "M=115712,N=512": { + "file": "silu_config_M115712_N512.json", + "M": 115712, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 231.39999999999986 + }, + "M=115712,N=576": { + "file": "silu_config_M115712_N576.json", + "M": 115712, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 558.7212499999999 + }, + "M=115712,N=640": { + "file": "silu_config_M115712_N640.json", + "M": 115712, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 556.72125 + }, + "M=115712,N=768": { + "file": "silu_config_M115712_N768.json", + "M": 115712, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 551.6812500000001 + }, + "M=115712,N=800": { + "file": "silu_config_M115712_N800.json", + "M": 115712, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.9212500000001 + }, + "M=115712,N=896": { + "file": "silu_config_M115712_N896.json", + "M": 115712, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 565.6812500000001 + }, + "M=115712,N=960": { + "file": "silu_config_M115712_N960.json", + "M": 115712, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.2012499999998 + }, + "M=115712,N=1024": { + "file": "silu_config_M115712_N1024.json", + "M": 115712, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 445.48075000000017 + }, + "M=115712,N=1120": { + "file": "silu_config_M115712_N1120.json", + "M": 115712, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1087.4835000000007 + }, + "M=115712,N=1152": { + "file": "silu_config_M115712_N1152.json", + "M": 115712, + "N": 1152, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 1150.4837499999999 + }, + "M=115712,N=1280": { + "file": "silu_config_M115712_N1280.json", + "M": 115712, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1155.5637500000003 + }, + "M=115712,N=1344": { + "file": "silu_config_M115712_N1344.json", + "M": 115712, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1102.1234999999997 + }, + "M=115712,N=1408": { + "file": "silu_config_M115712_N1408.json", + "M": 115712, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1083.2835000000005 + }, + "M=115712,N=1440": { + "file": "silu_config_M115712_N1440.json", + "M": 115712, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1101.8435 + }, + "M=115712,N=1536": { + "file": "silu_config_M115712_N1536.json", + "M": 115712, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1092.7234999999996 + }, + "M=115712,N=1600": { + "file": "silu_config_M115712_N1600.json", + "M": 115712, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1099.4834999999998 + }, + "M=115712,N=1664": { + "file": "silu_config_M115712_N1664.json", + "M": 115712, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1082.3635 + }, + "M=115712,N=1728": { + "file": "silu_config_M115712_N1728.json", + "M": 115712, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1101.6035000000002 + }, + "M=115712,N=1760": { + "file": "silu_config_M115712_N1760.json", + "M": 115712, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1117.8435 + }, + "M=115712,N=1792": { + "file": "silu_config_M115712_N1792.json", + "M": 115712, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1106.1234999999997 + }, + "M=115712,N=1920": { + "file": "silu_config_M115712_N1920.json", + "M": 115712, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1099.2035 + }, + "M=115712,N=2048": { + "file": "silu_config_M115712_N2048.json", + "M": 115712, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 883.4027499999997 + }, + "M=115712,N=2080": { + "file": "silu_config_M115712_N2080.json", + "M": 115712, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.3639999999996 + }, + "M=115712,N=2240": { + "file": "silu_config_M115712_N2240.json", + "M": 115712, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1266.7242500000002 + }, + "M=115712,N=2400": { + "file": "silu_config_M115712_N2400.json", + "M": 115712, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.4844999999996 + }, + "M=115712,N=2560": { + "file": "silu_config_M115712_N2560.json", + "M": 115712, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1413.1247499999995 + }, + "M=116736,N=128": { + "file": "silu_config_M116736_N128.json", + "M": 116736, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 69.71924999999999 + }, + "M=116736,N=160": { + "file": "silu_config_M116736_N160.json", + "M": 116736, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 157.7195 + }, + "M=116736,N=192": { + "file": "silu_config_M116736_N192.json", + "M": 116736, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.11949999999996 + }, + "M=116736,N=256": { + "file": "silu_config_M116736_N256.json", + "M": 116736, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 143.3195 + }, + "M=116736,N=320": { + "file": "silu_config_M116736_N320.json", + "M": 116736, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.3200000000001 + }, + "M=116736,N=384": { + "file": "silu_config_M116736_N384.json", + "M": 116736, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.3202499999998 + }, + "M=116736,N=480": { + "file": "silu_config_M116736_N480.json", + "M": 116736, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 293.52025000000003 + }, + "M=116736,N=512": { + "file": "silu_config_M116736_N512.json", + "M": 116736, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 233.08000000000004 + }, + "M=116736,N=576": { + "file": "silu_config_M116736_N576.json", + "M": 116736, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 542.4012500000001 + }, + "M=116736,N=640": { + "file": "silu_config_M116736_N640.json", + "M": 116736, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 557.9212500000001 + }, + "M=116736,N=768": { + "file": "silu_config_M116736_N768.json", + "M": 116736, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 550.6812500000001 + }, + "M=116736,N=800": { + "file": "silu_config_M116736_N800.json", + "M": 116736, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 550.3612500000002 + }, + "M=116736,N=896": { + "file": "silu_config_M116736_N896.json", + "M": 116736, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 561.1612500000003 + }, + "M=116736,N=960": { + "file": "silu_config_M116736_N960.json", + "M": 116736, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 553.28125 + }, + "M=116736,N=1024": { + "file": "silu_config_M116736_N1024.json", + "M": 116736, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 449.5609999999999 + }, + "M=116736,N=1120": { + "file": "silu_config_M116736_N1120.json", + "M": 116736, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1117.4035 + }, + "M=116736,N=1152": { + "file": "silu_config_M116736_N1152.json", + "M": 116736, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1089.5235000000007 + }, + "M=116736,N=1280": { + "file": "silu_config_M116736_N1280.json", + "M": 116736, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1091.2035 + }, + "M=116736,N=1344": { + "file": "silu_config_M116736_N1344.json", + "M": 116736, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1108.1235000000006 + }, + "M=116736,N=1408": { + "file": "silu_config_M116736_N1408.json", + "M": 116736, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1099.7635 + }, + "M=116736,N=1440": { + "file": "silu_config_M116736_N1440.json", + "M": 116736, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1103.3635 + }, + "M=116736,N=1536": { + "file": "silu_config_M116736_N1536.json", + "M": 116736, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.7635000000005 + }, + "M=116736,N=1600": { + "file": "silu_config_M116736_N1600.json", + "M": 116736, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.5235000000002 + }, + "M=116736,N=1664": { + "file": "silu_config_M116736_N1664.json", + "M": 116736, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1091.8035 + }, + "M=116736,N=1728": { + "file": "silu_config_M116736_N1728.json", + "M": 116736, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1115.2035 + }, + "M=116736,N=1760": { + "file": "silu_config_M116736_N1760.json", + "M": 116736, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1113.6034999999997 + }, + "M=116736,N=1792": { + "file": "silu_config_M116736_N1792.json", + "M": 116736, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1094.5634999999997 + }, + "M=116736,N=1920": { + "file": "silu_config_M116736_N1920.json", + "M": 116736, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.0434999999998 + }, + "M=116736,N=2048": { + "file": "silu_config_M116736_N2048.json", + "M": 116736, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 880.6827500000004 + }, + "M=116736,N=2080": { + "file": "silu_config_M116736_N2080.json", + "M": 116736, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1214.324 + }, + "M=116736,N=2240": { + "file": "silu_config_M116736_N2240.json", + "M": 116736, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1276.4842500000004 + }, + "M=116736,N=2400": { + "file": "silu_config_M116736_N2400.json", + "M": 116736, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1348.5245000000004 + }, + "M=116736,N=2560": { + "file": "silu_config_M116736_N2560.json", + "M": 116736, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1414.56475 + }, + "M=117760,N=128": { + "file": "silu_config_M117760_N128.json", + "M": 117760, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.43924999999999 + }, + "M=117760,N=160": { + "file": "silu_config_M117760_N160.json", + "M": 117760, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 159.23950000000005 + }, + "M=117760,N=192": { + "file": "silu_config_M117760_N192.json", + "M": 117760, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 144.27975 + }, + "M=117760,N=256": { + "file": "silu_config_M117760_N256.json", + "M": 117760, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 155.67975 + }, + "M=117760,N=320": { + "file": "silu_config_M117760_N320.json", + "M": 117760, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 290.24024999999995 + }, + "M=117760,N=384": { + "file": "silu_config_M117760_N384.json", + "M": 117760, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.8002500000001 + }, + "M=117760,N=480": { + "file": "silu_config_M117760_N480.json", + "M": 117760, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 294.2802499999999 + }, + "M=117760,N=512": { + "file": "silu_config_M117760_N512.json", + "M": 117760, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 235.0797500000001 + }, + "M=117760,N=576": { + "file": "silu_config_M117760_N576.json", + "M": 117760, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 552.5212499999999 + }, + "M=117760,N=640": { + "file": "silu_config_M117760_N640.json", + "M": 117760, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 571.4012500000003 + }, + "M=117760,N=768": { + "file": "silu_config_M117760_N768.json", + "M": 117760, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 568.04125 + }, + "M=117760,N=800": { + "file": "silu_config_M117760_N800.json", + "M": 117760, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 577.2012500000003 + }, + "M=117760,N=896": { + "file": "silu_config_M117760_N896.json", + "M": 117760, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.2812500000005 + }, + "M=117760,N=960": { + "file": "silu_config_M117760_N960.json", + "M": 117760, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.2812500000002 + }, + "M=117760,N=1024": { + "file": "silu_config_M117760_N1024.json", + "M": 117760, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 453.4409999999998 + }, + "M=117760,N=1120": { + "file": "silu_config_M117760_N1120.json", + "M": 117760, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1118.8835 + }, + "M=117760,N=1152": { + "file": "silu_config_M117760_N1152.json", + "M": 117760, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1078.0834999999997 + }, + "M=117760,N=1280": { + "file": "silu_config_M117760_N1280.json", + "M": 117760, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1100.0034999999998 + }, + "M=117760,N=1344": { + "file": "silu_config_M117760_N1344.json", + "M": 117760, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.0034999999998 + }, + "M=117760,N=1408": { + "file": "silu_config_M117760_N1408.json", + "M": 117760, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1081.6835 + }, + "M=117760,N=1440": { + "file": "silu_config_M117760_N1440.json", + "M": 117760, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1113.1235000000001 + }, + "M=117760,N=1536": { + "file": "silu_config_M117760_N1536.json", + "M": 117760, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1102.6835 + }, + "M=117760,N=1600": { + "file": "silu_config_M117760_N1600.json", + "M": 117760, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.5235000000002 + }, + "M=117760,N=1664": { + "file": "silu_config_M117760_N1664.json", + "M": 117760, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1114.6835 + }, + "M=117760,N=1728": { + "file": "silu_config_M117760_N1728.json", + "M": 117760, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1112.4434999999999 + }, + "M=117760,N=1760": { + "file": "silu_config_M117760_N1760.json", + "M": 117760, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1124.8837500000004 + }, + "M=117760,N=1792": { + "file": "silu_config_M117760_N1792.json", + "M": 117760, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1086.4434999999999 + }, + "M=117760,N=1920": { + "file": "silu_config_M117760_N1920.json", + "M": 117760, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1121.0037499999999 + }, + "M=117760,N=2048": { + "file": "silu_config_M117760_N2048.json", + "M": 117760, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 893.64275 + }, + "M=117760,N=2080": { + "file": "silu_config_M117760_N2080.json", + "M": 117760, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1224.9640000000009 + }, + "M=117760,N=2240": { + "file": "silu_config_M117760_N2240.json", + "M": 117760, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1284.44425 + }, + "M=117760,N=2400": { + "file": "silu_config_M117760_N2400.json", + "M": 117760, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1355.2444999999998 + }, + "M=117760,N=2560": { + "file": "silu_config_M117760_N2560.json", + "M": 117760, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1425.605 + }, + "M=118784,N=128": { + "file": "silu_config_M118784_N128.json", + "M": 118784, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.99925000000002 + }, + "M=118784,N=160": { + "file": "silu_config_M118784_N160.json", + "M": 118784, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 156.71975 + }, + "M=118784,N=192": { + "file": "silu_config_M118784_N192.json", + "M": 118784, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 147.43949999999998 + }, + "M=118784,N=256": { + "file": "silu_config_M118784_N256.json", + "M": 118784, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 134.0795 + }, + "M=118784,N=320": { + "file": "silu_config_M118784_N320.json", + "M": 118784, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 293.96025000000003 + }, + "M=118784,N=384": { + "file": "silu_config_M118784_N384.json", + "M": 118784, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 291.8402499999999 + }, + "M=118784,N=480": { + "file": "silu_config_M118784_N480.json", + "M": 118784, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 298.6802500000001 + }, + "M=118784,N=512": { + "file": "silu_config_M118784_N512.json", + "M": 118784, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 237.16000000000008 + }, + "M=118784,N=576": { + "file": "silu_config_M118784_N576.json", + "M": 118784, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 579.3612499999999 + }, + "M=118784,N=640": { + "file": "silu_config_M118784_N640.json", + "M": 118784, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 571.4012499999999 + }, + "M=118784,N=768": { + "file": "silu_config_M118784_N768.json", + "M": 118784, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.28125 + }, + "M=118784,N=800": { + "file": "silu_config_M118784_N800.json", + "M": 118784, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.1212499999999 + }, + "M=118784,N=896": { + "file": "silu_config_M118784_N896.json", + "M": 118784, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 568.8812499999997 + }, + "M=118784,N=960": { + "file": "silu_config_M118784_N960.json", + "M": 118784, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 574.1212499999999 + }, + "M=118784,N=1024": { + "file": "silu_config_M118784_N1024.json", + "M": 118784, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 456.76099999999997 + }, + "M=118784,N=1120": { + "file": "silu_config_M118784_N1120.json", + "M": 118784, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1136.40375 + }, + "M=118784,N=1152": { + "file": "silu_config_M118784_N1152.json", + "M": 118784, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1114.9234999999999 + }, + "M=118784,N=1280": { + "file": "silu_config_M118784_N1280.json", + "M": 118784, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1121.3237499999996 + }, + "M=118784,N=1344": { + "file": "silu_config_M118784_N1344.json", + "M": 118784, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1127.0037499999999 + }, + "M=118784,N=1408": { + "file": "silu_config_M118784_N1408.json", + "M": 118784, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1111.8435 + }, + "M=118784,N=1440": { + "file": "silu_config_M118784_N1440.json", + "M": 118784, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1122.9237499999995 + }, + "M=118784,N=1536": { + "file": "silu_config_M118784_N1536.json", + "M": 118784, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1112.8435000000004 + }, + "M=118784,N=1600": { + "file": "silu_config_M118784_N1600.json", + "M": 118784, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1120.9637499999994 + }, + "M=118784,N=1664": { + "file": "silu_config_M118784_N1664.json", + "M": 118784, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.3235 + }, + "M=118784,N=1728": { + "file": "silu_config_M118784_N1728.json", + "M": 118784, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1134.4037499999995 + }, + "M=118784,N=1760": { + "file": "silu_config_M118784_N1760.json", + "M": 118784, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1124.84375 + }, + "M=118784,N=1792": { + "file": "silu_config_M118784_N1792.json", + "M": 118784, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1103.3235 + }, + "M=118784,N=1920": { + "file": "silu_config_M118784_N1920.json", + "M": 118784, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1131.1637500000006 + }, + "M=118784,N=2048": { + "file": "silu_config_M118784_N2048.json", + "M": 118784, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 905.5227500000005 + }, + "M=118784,N=2080": { + "file": "silu_config_M118784_N2080.json", + "M": 118784, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1243.8042499999997 + }, + "M=118784,N=2240": { + "file": "silu_config_M118784_N2240.json", + "M": 118784, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1295.0042499999995 + }, + "M=118784,N=2400": { + "file": "silu_config_M118784_N2400.json", + "M": 118784, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1357.1644999999999 + }, + "M=118784,N=2560": { + "file": "silu_config_M118784_N2560.json", + "M": 118784, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1437.6849999999995 + }, + "M=119808,N=128": { + "file": "silu_config_M119808_N128.json", + "M": 119808, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 71.51925 + }, + "M=119808,N=160": { + "file": "silu_config_M119808_N160.json", + "M": 119808, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 157.9595 + }, + "M=119808,N=192": { + "file": "silu_config_M119808_N192.json", + "M": 119808, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.6395 + }, + "M=119808,N=256": { + "file": "silu_config_M119808_N256.json", + "M": 119808, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 135.1195 + }, + "M=119808,N=320": { + "file": "silu_config_M119808_N320.json", + "M": 119808, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 296.9202499999999 + }, + "M=119808,N=384": { + "file": "silu_config_M119808_N384.json", + "M": 119808, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 294.56025 + }, + "M=119808,N=480": { + "file": "silu_config_M119808_N480.json", + "M": 119808, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 301.24024999999995 + }, + "M=119808,N=512": { + "file": "silu_config_M119808_N512.json", + "M": 119808, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 239.0000000000001 + }, + "M=119808,N=576": { + "file": "silu_config_M119808_N576.json", + "M": 119808, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 566.9612499999998 + }, + "M=119808,N=640": { + "file": "silu_config_M119808_N640.json", + "M": 119808, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 569.2012500000001 + }, + "M=119808,N=768": { + "file": "silu_config_M119808_N768.json", + "M": 119808, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 585.7615000000003 + }, + "M=119808,N=800": { + "file": "silu_config_M119808_N800.json", + "M": 119808, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 578.2012500000001 + }, + "M=119808,N=896": { + "file": "silu_config_M119808_N896.json", + "M": 119808, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 567.3212499999997 + }, + "M=119808,N=960": { + "file": "silu_config_M119808_N960.json", + "M": 119808, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 581.1212500000001 + }, + "M=119808,N=1024": { + "file": "silu_config_M119808_N1024.json", + "M": 119808, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 460.84075000000007 + }, + "M=119808,N=1120": { + "file": "silu_config_M119808_N1120.json", + "M": 119808, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1145.84375 + }, + "M=119808,N=1152": { + "file": "silu_config_M119808_N1152.json", + "M": 119808, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1113.8835 + }, + "M=119808,N=1280": { + "file": "silu_config_M119808_N1280.json", + "M": 119808, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1130.7637499999996 + }, + "M=119808,N=1344": { + "file": "silu_config_M119808_N1344.json", + "M": 119808, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1136.4837499999999 + }, + "M=119808,N=1408": { + "file": "silu_config_M119808_N1408.json", + "M": 119808, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1138.2037500000001 + }, + "M=119808,N=1440": { + "file": "silu_config_M119808_N1440.json", + "M": 119808, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1129.88375 + }, + "M=119808,N=1536": { + "file": "silu_config_M119808_N1536.json", + "M": 119808, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1128.36375 + }, + "M=119808,N=1600": { + "file": "silu_config_M119808_N1600.json", + "M": 119808, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1141.2037499999997 + }, + "M=119808,N=1664": { + "file": "silu_config_M119808_N1664.json", + "M": 119808, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1124.0837499999998 + }, + "M=119808,N=1728": { + "file": "silu_config_M119808_N1728.json", + "M": 119808, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1129.2037500000001 + }, + "M=119808,N=1760": { + "file": "silu_config_M119808_N1760.json", + "M": 119808, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1142.0037500000003 + }, + "M=119808,N=1792": { + "file": "silu_config_M119808_N1792.json", + "M": 119808, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1123.2037500000001 + }, + "M=119808,N=1920": { + "file": "silu_config_M119808_N1920.json", + "M": 119808, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1137.0037500000003 + }, + "M=119808,N=2048": { + "file": "silu_config_M119808_N2048.json", + "M": 119808, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 913.0027499999997 + }, + "M=119808,N=2080": { + "file": "silu_config_M119808_N2080.json", + "M": 119808, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1254.56425 + }, + "M=119808,N=2240": { + "file": "silu_config_M119808_N2240.json", + "M": 119808, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1307.5244999999995 + }, + "M=119808,N=2400": { + "file": "silu_config_M119808_N2400.json", + "M": 119808, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1379.00475 + }, + "M=119808,N=2560": { + "file": "silu_config_M119808_N2560.json", + "M": 119808, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.3650000000007 + }, + "M=120832,N=128": { + "file": "silu_config_M120832_N128.json", + "M": 120832, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.47925000000001 + }, + "M=120832,N=160": { + "file": "silu_config_M120832_N160.json", + "M": 120832, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 159.1595 + }, + "M=120832,N=192": { + "file": "silu_config_M120832_N192.json", + "M": 120832, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.95975000000004 + }, + "M=120832,N=256": { + "file": "silu_config_M120832_N256.json", + "M": 120832, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 136.19974999999988 + }, + "M=120832,N=320": { + "file": "silu_config_M120832_N320.json", + "M": 120832, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 299.4802500000001 + }, + "M=120832,N=384": { + "file": "silu_config_M120832_N384.json", + "M": 120832, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 296.4402500000001 + }, + "M=120832,N=480": { + "file": "silu_config_M120832_N480.json", + "M": 120832, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 303.80025 + }, + "M=120832,N=512": { + "file": "silu_config_M120832_N512.json", + "M": 120832, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 241.0 + }, + "M=120832,N=576": { + "file": "silu_config_M120832_N576.json", + "M": 120832, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 578.00125 + }, + "M=120832,N=640": { + "file": "silu_config_M120832_N640.json", + "M": 120832, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 567.4012499999999 + }, + "M=120832,N=768": { + "file": "silu_config_M120832_N768.json", + "M": 120832, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 579.7212499999998 + }, + "M=120832,N=800": { + "file": "silu_config_M120832_N800.json", + "M": 120832, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 574.28125 + }, + "M=120832,N=896": { + "file": "silu_config_M120832_N896.json", + "M": 120832, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 589.4414999999997 + }, + "M=120832,N=960": { + "file": "silu_config_M120832_N960.json", + "M": 120832, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 594.4014999999999 + }, + "M=120832,N=1024": { + "file": "silu_config_M120832_N1024.json", + "M": 120832, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 464.88099999999986 + }, + "M=120832,N=1120": { + "file": "silu_config_M120832_N1120.json", + "M": 120832, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.5637499999998 + }, + "M=120832,N=1152": { + "file": "silu_config_M120832_N1152.json", + "M": 120832, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1123.2037500000001 + }, + "M=120832,N=1280": { + "file": "silu_config_M120832_N1280.json", + "M": 120832, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1145.4037500000004 + }, + "M=120832,N=1344": { + "file": "silu_config_M120832_N1344.json", + "M": 120832, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1146.0837499999998 + }, + "M=120832,N=1408": { + "file": "silu_config_M120832_N1408.json", + "M": 120832, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1127.0837499999998 + }, + "M=120832,N=1440": { + "file": "silu_config_M120832_N1440.json", + "M": 120832, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1160.1237499999997 + }, + "M=120832,N=1536": { + "file": "silu_config_M120832_N1536.json", + "M": 120832, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1137.7237499999997 + }, + "M=120832,N=1600": { + "file": "silu_config_M120832_N1600.json", + "M": 120832, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1157.4037499999995 + }, + "M=120832,N=1664": { + "file": "silu_config_M120832_N1664.json", + "M": 120832, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1150.0037499999999 + }, + "M=120832,N=1728": { + "file": "silu_config_M120832_N1728.json", + "M": 120832, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1142.8437500000005 + }, + "M=120832,N=1760": { + "file": "silu_config_M120832_N1760.json", + "M": 120832, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1130.763750000001 + }, + "M=120832,N=1792": { + "file": "silu_config_M120832_N1792.json", + "M": 120832, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1163.5637500000007 + }, + "M=120832,N=1920": { + "file": "silu_config_M120832_N1920.json", + "M": 120832, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1136.1637499999997 + }, + "M=120832,N=2048": { + "file": "silu_config_M120832_N2048.json", + "M": 120832, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 909.64275 + }, + "M=120832,N=2080": { + "file": "silu_config_M120832_N2080.json", + "M": 120832, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1255.0842499999994 + }, + "M=120832,N=2240": { + "file": "silu_config_M120832_N2240.json", + "M": 120832, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1323.7644999999993 + }, + "M=120832,N=2400": { + "file": "silu_config_M120832_N2400.json", + "M": 120832, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1404.9247499999997 + }, + "M=120832,N=2560": { + "file": "silu_config_M120832_N2560.json", + "M": 120832, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1465.6049999999996 + }, + "M=121856,N=128": { + "file": "silu_config_M121856_N128.json", + "M": 121856, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 72.75925 + }, + "M=121856,N=160": { + "file": "silu_config_M121856_N160.json", + "M": 121856, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 160.2395 + }, + "M=121856,N=192": { + "file": "silu_config_M121856_N192.json", + "M": 121856, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 162.07975000000002 + }, + "M=121856,N=256": { + "file": "silu_config_M121856_N256.json", + "M": 121856, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.59975000000003 + }, + "M=121856,N=320": { + "file": "silu_config_M121856_N320.json", + "M": 121856, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 302.0005 + }, + "M=121856,N=384": { + "file": "silu_config_M121856_N384.json", + "M": 121856, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 299.3202500000001 + }, + "M=121856,N=480": { + "file": "silu_config_M121856_N480.json", + "M": 121856, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 305.9602500000001 + }, + "M=121856,N=512": { + "file": "silu_config_M121856_N512.json", + "M": 121856, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 243.03999999999996 + }, + "M=121856,N=576": { + "file": "silu_config_M121856_N576.json", + "M": 121856, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 576.6412500000004 + }, + "M=121856,N=640": { + "file": "silu_config_M121856_N640.json", + "M": 121856, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 582.6012499999999 + }, + "M=121856,N=768": { + "file": "silu_config_M121856_N768.json", + "M": 121856, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 574.24125 + }, + "M=121856,N=800": { + "file": "silu_config_M121856_N800.json", + "M": 121856, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 568.6012499999999 + }, + "M=121856,N=896": { + "file": "silu_config_M121856_N896.json", + "M": 121856, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 594.2015000000001 + }, + "M=121856,N=960": { + "file": "silu_config_M121856_N960.json", + "M": 121856, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 593.3215000000002 + }, + "M=121856,N=1024": { + "file": "silu_config_M121856_N1024.json", + "M": 121856, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 468.3209999999999 + }, + "M=121856,N=1120": { + "file": "silu_config_M121856_N1120.json", + "M": 121856, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1152.8037500000003 + }, + "M=121856,N=1152": { + "file": "silu_config_M121856_N1152.json", + "M": 121856, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1142.0437499999998 + }, + "M=121856,N=1280": { + "file": "silu_config_M121856_N1280.json", + "M": 121856, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1142.76375 + }, + "M=121856,N=1344": { + "file": "silu_config_M121856_N1344.json", + "M": 121856, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1172.4037499999997 + }, + "M=121856,N=1408": { + "file": "silu_config_M121856_N1408.json", + "M": 121856, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1149.96375 + }, + "M=121856,N=1440": { + "file": "silu_config_M121856_N1440.json", + "M": 121856, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1159.6037500000002 + }, + "M=121856,N=1536": { + "file": "silu_config_M121856_N1536.json", + "M": 121856, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1146.12375 + }, + "M=121856,N=1600": { + "file": "silu_config_M121856_N1600.json", + "M": 121856, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1144.56375 + }, + "M=121856,N=1664": { + "file": "silu_config_M121856_N1664.json", + "M": 121856, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1153.1637500000002 + }, + "M=121856,N=1728": { + "file": "silu_config_M121856_N1728.json", + "M": 121856, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1146.6437500000002 + }, + "M=121856,N=1760": { + "file": "silu_config_M121856_N1760.json", + "M": 121856, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1169.8837499999995 + }, + "M=121856,N=1792": { + "file": "silu_config_M121856_N1792.json", + "M": 121856, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1171.8437499999998 + }, + "M=121856,N=1920": { + "file": "silu_config_M121856_N1920.json", + "M": 121856, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1164.4837500000003 + }, + "M=121856,N=2048": { + "file": "silu_config_M121856_N2048.json", + "M": 121856, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 923.9229999999998 + }, + "M=121856,N=2080": { + "file": "silu_config_M121856_N2080.json", + "M": 121856, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1273.0042500000002 + }, + "M=121856,N=2240": { + "file": "silu_config_M121856_N2240.json", + "M": 121856, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1326.9244999999999 + }, + "M=121856,N=2400": { + "file": "silu_config_M121856_N2400.json", + "M": 121856, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1409.8047499999998 + }, + "M=121856,N=2560": { + "file": "silu_config_M121856_N2560.json", + "M": 121856, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1492.5652500000006 + }, + "M=122880,N=128": { + "file": "silu_config_M122880_N128.json", + "M": 122880, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 73.11950000000002 + }, + "M=122880,N=160": { + "file": "silu_config_M122880_N160.json", + "M": 122880, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 149.95949999999993 + }, + "M=122880,N=192": { + "file": "silu_config_M122880_N192.json", + "M": 122880, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 150.4395 + }, + "M=122880,N=256": { + "file": "silu_config_M122880_N256.json", + "M": 122880, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.07950000000005 + }, + "M=122880,N=320": { + "file": "silu_config_M122880_N320.json", + "M": 122880, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 302.0002500000001 + }, + "M=122880,N=384": { + "file": "silu_config_M122880_N384.json", + "M": 122880, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 300.5602499999999 + }, + "M=122880,N=480": { + "file": "silu_config_M122880_N480.json", + "M": 122880, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 306.24024999999995 + }, + "M=122880,N=512": { + "file": "silu_config_M122880_N512.json", + "M": 122880, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 245.0402499999999 + }, + "M=122880,N=576": { + "file": "silu_config_M122880_N576.json", + "M": 122880, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 567.32125 + }, + "M=122880,N=640": { + "file": "silu_config_M122880_N640.json", + "M": 122880, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 563.1212500000001 + }, + "M=122880,N=768": { + "file": "silu_config_M122880_N768.json", + "M": 122880, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 575.7612499999998 + }, + "M=122880,N=800": { + "file": "silu_config_M122880_N800.json", + "M": 122880, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 569.4812499999998 + }, + "M=122880,N=896": { + "file": "silu_config_M122880_N896.json", + "M": 122880, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 567.4012500000006 + }, + "M=122880,N=960": { + "file": "silu_config_M122880_N960.json", + "M": 122880, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 593.9614999999999 + }, + "M=122880,N=1024": { + "file": "silu_config_M122880_N1024.json", + "M": 122880, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 471.9209999999998 + }, + "M=122880,N=1120": { + "file": "silu_config_M122880_N1120.json", + "M": 122880, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1132.2437499999996 + }, + "M=122880,N=1152": { + "file": "silu_config_M122880_N1152.json", + "M": 122880, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1133.9237500000004 + }, + "M=122880,N=1280": { + "file": "silu_config_M122880_N1280.json", + "M": 122880, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1136.0037500000003 + }, + "M=122880,N=1344": { + "file": "silu_config_M122880_N1344.json", + "M": 122880, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1135.0037500000003 + }, + "M=122880,N=1408": { + "file": "silu_config_M122880_N1408.json", + "M": 122880, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1137.5637500000003 + }, + "M=122880,N=1440": { + "file": "silu_config_M122880_N1440.json", + "M": 122880, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1147.6437499999997 + }, + "M=122880,N=1536": { + "file": "silu_config_M122880_N1536.json", + "M": 122880, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1128.6037499999998 + }, + "M=122880,N=1600": { + "file": "silu_config_M122880_N1600.json", + "M": 122880, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1136.6837500000001 + }, + "M=122880,N=1664": { + "file": "silu_config_M122880_N1664.json", + "M": 122880, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1130.76375 + }, + "M=122880,N=1728": { + "file": "silu_config_M122880_N1728.json", + "M": 122880, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1150.1237499999997 + }, + "M=122880,N=1760": { + "file": "silu_config_M122880_N1760.json", + "M": 122880, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1141.7237499999997 + }, + "M=122880,N=1792": { + "file": "silu_config_M122880_N1792.json", + "M": 122880, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.0437499999998 + }, + "M=122880,N=1920": { + "file": "silu_config_M122880_N1920.json", + "M": 122880, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1137.32375 + }, + "M=122880,N=2048": { + "file": "silu_config_M122880_N2048.json", + "M": 122880, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 917.2027500000008 + }, + "M=122880,N=2080": { + "file": "silu_config_M122880_N2080.json", + "M": 122880, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1258.56425 + }, + "M=122880,N=2240": { + "file": "silu_config_M122880_N2240.json", + "M": 122880, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1323.2044999999994 + }, + "M=122880,N=2400": { + "file": "silu_config_M122880_N2400.json", + "M": 122880, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1409.52475 + }, + "M=122880,N=2560": { + "file": "silu_config_M122880_N2560.json", + "M": 122880, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1455.165 + }, + "M=123904,N=128": { + "file": "silu_config_M123904_N128.json", + "M": 123904, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 73.55924999999999 + }, + "M=123904,N=160": { + "file": "silu_config_M123904_N160.json", + "M": 123904, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.03975000000003 + }, + "M=123904,N=192": { + "file": "silu_config_M123904_N192.json", + "M": 123904, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 167.19975000000002 + }, + "M=123904,N=256": { + "file": "silu_config_M123904_N256.json", + "M": 123904, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.51950000000005 + }, + "M=123904,N=320": { + "file": "silu_config_M123904_N320.json", + "M": 123904, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 306.72024999999996 + }, + "M=123904,N=384": { + "file": "silu_config_M123904_N384.json", + "M": 123904, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 304.64025000000004 + }, + "M=123904,N=480": { + "file": "silu_config_M123904_N480.json", + "M": 123904, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 310.68025 + }, + "M=123904,N=512": { + "file": "silu_config_M123904_N512.json", + "M": 123904, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 247.19999999999993 + }, + "M=123904,N=576": { + "file": "silu_config_M123904_N576.json", + "M": 123904, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 580.7212499999998 + }, + "M=123904,N=640": { + "file": "silu_config_M123904_N640.json", + "M": 123904, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 588.3615 + }, + "M=123904,N=768": { + "file": "silu_config_M123904_N768.json", + "M": 123904, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 597.6014999999998 + }, + "M=123904,N=800": { + "file": "silu_config_M123904_N800.json", + "M": 123904, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 594.9214999999999 + }, + "M=123904,N=896": { + "file": "silu_config_M123904_N896.json", + "M": 123904, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 581.7612500000002 + }, + "M=123904,N=960": { + "file": "silu_config_M123904_N960.json", + "M": 123904, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 612.9614999999999 + }, + "M=123904,N=1024": { + "file": "silu_config_M123904_N1024.json", + "M": 123904, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 475.9209999999998 + }, + "M=123904,N=1120": { + "file": "silu_config_M123904_N1120.json", + "M": 123904, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1191.324 + }, + "M=123904,N=1152": { + "file": "silu_config_M123904_N1152.json", + "M": 123904, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1160.5637499999998 + }, + "M=123904,N=1280": { + "file": "silu_config_M123904_N1280.json", + "M": 123904, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1162.6437500000006 + }, + "M=123904,N=1344": { + "file": "silu_config_M123904_N1344.json", + "M": 123904, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1173.2037499999997 + }, + "M=123904,N=1408": { + "file": "silu_config_M123904_N1408.json", + "M": 123904, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1180.8839999999996 + }, + "M=123904,N=1440": { + "file": "silu_config_M123904_N1440.json", + "M": 123904, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.6839999999997 + }, + "M=123904,N=1536": { + "file": "silu_config_M123904_N1536.json", + "M": 123904, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1165.9237500000008 + }, + "M=123904,N=1600": { + "file": "silu_config_M123904_N1600.json", + "M": 123904, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1190.964 + }, + "M=123904,N=1664": { + "file": "silu_config_M123904_N1664.json", + "M": 123904, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1167.8037499999996 + }, + "M=123904,N=1728": { + "file": "silu_config_M123904_N1728.json", + "M": 123904, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1167.88375 + }, + "M=123904,N=1760": { + "file": "silu_config_M123904_N1760.json", + "M": 123904, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1184.6040000000003 + }, + "M=123904,N=1792": { + "file": "silu_config_M123904_N1792.json", + "M": 123904, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1171.6437499999997 + }, + "M=123904,N=1920": { + "file": "silu_config_M123904_N1920.json", + "M": 123904, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1165.0437499999998 + }, + "M=123904,N=2048": { + "file": "silu_config_M123904_N2048.json", + "M": 123904, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 941.2429999999999 + }, + "M=123904,N=2080": { + "file": "silu_config_M123904_N2080.json", + "M": 123904, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1300.4042499999996 + }, + "M=123904,N=2240": { + "file": "silu_config_M123904_N2240.json", + "M": 123904, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1371.5647499999995 + }, + "M=123904,N=2400": { + "file": "silu_config_M123904_N2400.json", + "M": 123904, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1432.6450000000004 + }, + "M=123904,N=2560": { + "file": "silu_config_M123904_N2560.json", + "M": 123904, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1498.3652500000012 + }, + "M=124928,N=128": { + "file": "silu_config_M124928_N128.json", + "M": 124928, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 74.31925000000001 + }, + "M=124928,N=160": { + "file": "silu_config_M124928_N160.json", + "M": 124928, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 163.87974999999994 + }, + "M=124928,N=192": { + "file": "silu_config_M124928_N192.json", + "M": 124928, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.51974999999996 + }, + "M=124928,N=256": { + "file": "silu_config_M124928_N256.json", + "M": 124928, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.5995 + }, + "M=124928,N=320": { + "file": "silu_config_M124928_N320.json", + "M": 124928, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 308.88025000000005 + }, + "M=124928,N=384": { + "file": "silu_config_M124928_N384.json", + "M": 124928, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 306.24024999999995 + }, + "M=124928,N=480": { + "file": "silu_config_M124928_N480.json", + "M": 124928, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 312.92025000000024 + }, + "M=124928,N=512": { + "file": "silu_config_M124928_N512.json", + "M": 124928, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 248.9999999999999 + }, + "M=124928,N=576": { + "file": "silu_config_M124928_N576.json", + "M": 124928, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 585.5215000000003 + }, + "M=124928,N=640": { + "file": "silu_config_M124928_N640.json", + "M": 124928, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 600.6815000000001 + }, + "M=124928,N=768": { + "file": "silu_config_M124928_N768.json", + "M": 124928, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 605.7615000000003 + }, + "M=124928,N=800": { + "file": "silu_config_M124928_N800.json", + "M": 124928, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 599.7215000000003 + }, + "M=124928,N=896": { + "file": "silu_config_M124928_N896.json", + "M": 124928, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 590.7615000000001 + }, + "M=124928,N=960": { + "file": "silu_config_M124928_N960.json", + "M": 124928, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 596.5615 + }, + "M=124928,N=1024": { + "file": "silu_config_M124928_N1024.json", + "M": 124928, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 479.3212500000002 + }, + "M=124928,N=1120": { + "file": "silu_config_M124928_N1120.json", + "M": 124928, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1159.2837499999996 + }, + "M=124928,N=1152": { + "file": "silu_config_M124928_N1152.json", + "M": 124928, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1169.7637499999996 + }, + "M=124928,N=1280": { + "file": "silu_config_M124928_N1280.json", + "M": 124928, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1161.8837499999995 + }, + "M=124928,N=1344": { + "file": "silu_config_M124928_N1344.json", + "M": 124928, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1172.1237500000002 + }, + "M=124928,N=1408": { + "file": "silu_config_M124928_N1408.json", + "M": 124928, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.1240000000003 + }, + "M=124928,N=1440": { + "file": "silu_config_M124928_N1440.json", + "M": 124928, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1183.8839999999996 + }, + "M=124928,N=1536": { + "file": "silu_config_M124928_N1536.json", + "M": 124928, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1174.92375 + }, + "M=124928,N=1600": { + "file": "silu_config_M124928_N1600.json", + "M": 124928, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1195.0439999999999 + }, + "M=124928,N=1664": { + "file": "silu_config_M124928_N1664.json", + "M": 124928, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1171.84375 + }, + "M=124928,N=1728": { + "file": "silu_config_M124928_N1728.json", + "M": 124928, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1193.0439999999994 + }, + "M=124928,N=1760": { + "file": "silu_config_M124928_N1760.json", + "M": 124928, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1200.8439999999996 + }, + "M=124928,N=1792": { + "file": "silu_config_M124928_N1792.json", + "M": 124928, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1181.1237499999997 + }, + "M=124928,N=1920": { + "file": "silu_config_M124928_N1920.json", + "M": 124928, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1178.2837499999996 + }, + "M=124928,N=2048": { + "file": "silu_config_M124928_N2048.json", + "M": 124928, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 952.2429999999999 + }, + "M=124928,N=2080": { + "file": "silu_config_M124928_N2080.json", + "M": 124928, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1308.6444999999999 + }, + "M=124928,N=2240": { + "file": "silu_config_M124928_N2240.json", + "M": 124928, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1364.9247500000001 + }, + "M=124928,N=2400": { + "file": "silu_config_M124928_N2400.json", + "M": 124928, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1446.4849999999997 + }, + "M=124928,N=2560": { + "file": "silu_config_M124928_N2560.json", + "M": 124928, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.8852499999994 + }, + "M=125952,N=128": { + "file": "silu_config_M125952_N128.json", + "M": 125952, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 86.27950000000003 + }, + "M=125952,N=160": { + "file": "silu_config_M125952_N160.json", + "M": 125952, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 155.83975000000004 + }, + "M=125952,N=192": { + "file": "silu_config_M125952_N192.json", + "M": 125952, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 155.87950000000004 + }, + "M=125952,N=256": { + "file": "silu_config_M125952_N256.json", + "M": 125952, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 153.11950000000002 + }, + "M=125952,N=320": { + "file": "silu_config_M125952_N320.json", + "M": 125952, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 311.56024999999994 + }, + "M=125952,N=384": { + "file": "silu_config_M125952_N384.json", + "M": 125952, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 309.72024999999996 + }, + "M=125952,N=480": { + "file": "silu_config_M125952_N480.json", + "M": 125952, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 315.88024999999993 + }, + "M=125952,N=512": { + "file": "silu_config_M125952_N512.json", + "M": 125952, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 251.00000000000023 + }, + "M=125952,N=576": { + "file": "silu_config_M125952_N576.json", + "M": 125952, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 590.2414999999999 + }, + "M=125952,N=640": { + "file": "silu_config_M125952_N640.json", + "M": 125952, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 590.6415000000002 + }, + "M=125952,N=768": { + "file": "silu_config_M125952_N768.json", + "M": 125952, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 588.1215 + }, + "M=125952,N=800": { + "file": "silu_config_M125952_N800.json", + "M": 125952, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 598.2014999999999 + }, + "M=125952,N=896": { + "file": "silu_config_M125952_N896.json", + "M": 125952, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 602.1614999999999 + }, + "M=125952,N=960": { + "file": "silu_config_M125952_N960.json", + "M": 125952, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 622.0015000000003 + }, + "M=125952,N=1024": { + "file": "silu_config_M125952_N1024.json", + "M": 125952, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 483.20100000000025 + }, + "M=125952,N=1120": { + "file": "silu_config_M125952_N1120.json", + "M": 125952, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1189.364 + }, + "M=125952,N=1152": { + "file": "silu_config_M125952_N1152.json", + "M": 125952, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1169.0037500000003 + }, + "M=125952,N=1280": { + "file": "silu_config_M125952_N1280.json", + "M": 125952, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1170.9637500000003 + }, + "M=125952,N=1344": { + "file": "silu_config_M125952_N1344.json", + "M": 125952, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1208.0839999999998 + }, + "M=125952,N=1408": { + "file": "silu_config_M125952_N1408.json", + "M": 125952, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1183.6039999999998 + }, + "M=125952,N=1440": { + "file": "silu_config_M125952_N1440.json", + "M": 125952, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1193.2440000000001 + }, + "M=125952,N=1536": { + "file": "silu_config_M125952_N1536.json", + "M": 125952, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.7640000000001 + }, + "M=125952,N=1600": { + "file": "silu_config_M125952_N1600.json", + "M": 125952, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1183.964 + }, + "M=125952,N=1664": { + "file": "silu_config_M125952_N1664.json", + "M": 125952, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1181.6037499999998 + }, + "M=125952,N=1728": { + "file": "silu_config_M125952_N1728.json", + "M": 125952, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1197.4839999999995 + }, + "M=125952,N=1760": { + "file": "silu_config_M125952_N1760.json", + "M": 125952, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1193.1240000000003 + }, + "M=125952,N=1792": { + "file": "silu_config_M125952_N1792.json", + "M": 125952, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1201.0039999999995 + }, + "M=125952,N=1920": { + "file": "silu_config_M125952_N1920.json", + "M": 125952, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.0040000000004 + }, + "M=125952,N=2048": { + "file": "silu_config_M125952_N2048.json", + "M": 125952, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 950.203 + }, + "M=125952,N=2080": { + "file": "silu_config_M125952_N2080.json", + "M": 125952, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1308.8844999999997 + }, + "M=125952,N=2240": { + "file": "silu_config_M125952_N2240.json", + "M": 125952, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1375.8847499999997 + }, + "M=125952,N=2400": { + "file": "silu_config_M125952_N2400.json", + "M": 125952, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1454.7249999999995 + }, + "M=125952,N=2560": { + "file": "silu_config_M125952_N2560.json", + "M": 125952, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1513.80525 + }, + "M=126976,N=128": { + "file": "silu_config_M126976_N128.json", + "M": 126976, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 75.35925 + }, + "M=126976,N=160": { + "file": "silu_config_M126976_N160.json", + "M": 126976, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 171.15949999999998 + }, + "M=126976,N=192": { + "file": "silu_config_M126976_N192.json", + "M": 126976, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 156.11949999999996 + }, + "M=126976,N=256": { + "file": "silu_config_M126976_N256.json", + "M": 126976, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 142.7595 + }, + "M=126976,N=320": { + "file": "silu_config_M126976_N320.json", + "M": 126976, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 313.9602499999999 + }, + "M=126976,N=384": { + "file": "silu_config_M126976_N384.json", + "M": 126976, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 312.16049999999996 + }, + "M=126976,N=480": { + "file": "silu_config_M126976_N480.json", + "M": 126976, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 318.0402500000001 + }, + "M=126976,N=512": { + "file": "silu_config_M126976_N512.json", + "M": 126976, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 240.9202499999999 + }, + "M=126976,N=576": { + "file": "silu_config_M126976_N576.json", + "M": 126976, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 594.9614999999999 + }, + "M=126976,N=640": { + "file": "silu_config_M126976_N640.json", + "M": 126976, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 602.4815000000001 + }, + "M=126976,N=768": { + "file": "silu_config_M126976_N768.json", + "M": 126976, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 604.0815 + }, + "M=126976,N=800": { + "file": "silu_config_M126976_N800.json", + "M": 126976, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 602.8814999999997 + }, + "M=126976,N=896": { + "file": "silu_config_M126976_N896.json", + "M": 126976, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 595.7615000000003 + }, + "M=126976,N=960": { + "file": "silu_config_M126976_N960.json", + "M": 126976, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 623.1615000000002 + }, + "M=126976,N=1024": { + "file": "silu_config_M126976_N1024.json", + "M": 126976, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 486.88099999999986 + }, + "M=126976,N=1120": { + "file": "silu_config_M126976_N1120.json", + "M": 126976, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1178.2037499999997 + }, + "M=126976,N=1152": { + "file": "silu_config_M126976_N1152.json", + "M": 126976, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1183.7240000000002 + }, + "M=126976,N=1280": { + "file": "silu_config_M126976_N1280.json", + "M": 126976, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1180.2037500000001 + }, + "M=126976,N=1344": { + "file": "silu_config_M126976_N1344.json", + "M": 126976, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1201.2440000000001 + }, + "M=126976,N=1408": { + "file": "silu_config_M126976_N1408.json", + "M": 126976, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1197.884000000001 + }, + "M=126976,N=1440": { + "file": "silu_config_M126976_N1440.json", + "M": 126976, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1207.844 + }, + "M=126976,N=1536": { + "file": "silu_config_M126976_N1536.json", + "M": 126976, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1173.32375 + }, + "M=126976,N=1600": { + "file": "silu_config_M126976_N1600.json", + "M": 126976, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1214.1639999999998 + }, + "M=126976,N=1664": { + "file": "silu_config_M126976_N1664.json", + "M": 126976, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1213.324 + }, + "M=126976,N=1728": { + "file": "silu_config_M126976_N1728.json", + "M": 126976, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1212.444 + }, + "M=126976,N=1760": { + "file": "silu_config_M126976_N1760.json", + "M": 126976, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1209.6040000000003 + }, + "M=126976,N=1792": { + "file": "silu_config_M126976_N1792.json", + "M": 126976, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1189.5639999999994 + }, + "M=126976,N=1920": { + "file": "silu_config_M126976_N1920.json", + "M": 126976, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1203.804 + }, + "M=126976,N=2048": { + "file": "silu_config_M126976_N2048.json", + "M": 126976, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 966.2829999999994 + }, + "M=126976,N=2080": { + "file": "silu_config_M126976_N2080.json", + "M": 126976, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1321.4445 + }, + "M=126976,N=2240": { + "file": "silu_config_M126976_N2240.json", + "M": 126976, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1405.08475 + }, + "M=126976,N=2400": { + "file": "silu_config_M126976_N2400.json", + "M": 126976, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1470.3649999999998 + }, + "M=126976,N=2560": { + "file": "silu_config_M126976_N2560.json", + "M": 126976, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1533.8452500000003 + }, + "M=128000,N=128": { + "file": "silu_config_M128000_N128.json", + "M": 128000, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 75.7995 + }, + "M=128000,N=160": { + "file": "silu_config_M128000_N160.json", + "M": 128000, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 169.11975000000004 + }, + "M=128000,N=192": { + "file": "silu_config_M128000_N192.json", + "M": 128000, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 156.1995 + }, + "M=128000,N=256": { + "file": "silu_config_M128000_N256.json", + "M": 128000, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 155.19974999999994 + }, + "M=128000,N=320": { + "file": "silu_config_M128000_N320.json", + "M": 128000, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 313.92025000000007 + }, + "M=128000,N=384": { + "file": "silu_config_M128000_N384.json", + "M": 128000, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 311.76025000000004 + }, + "M=128000,N=480": { + "file": "silu_config_M128000_N480.json", + "M": 128000, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 318.84024999999986 + }, + "M=128000,N=512": { + "file": "silu_config_M128000_N512.json", + "M": 128000, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 255.19999999999993 + }, + "M=128000,N=576": { + "file": "silu_config_M128000_N576.json", + "M": 128000, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 600.0415 + }, + "M=128000,N=640": { + "file": "silu_config_M128000_N640.json", + "M": 128000, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 607.4014999999999 + }, + "M=128000,N=768": { + "file": "silu_config_M128000_N768.json", + "M": 128000, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 619.9214999999997 + }, + "M=128000,N=800": { + "file": "silu_config_M128000_N800.json", + "M": 128000, + "N": 800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 630.5615000000003 + }, + "M=128000,N=896": { + "file": "silu_config_M128000_N896.json", + "M": 128000, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.9215000000002 + }, + "M=128000,N=960": { + "file": "silu_config_M128000_N960.json", + "M": 128000, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 606.9214999999999 + }, + "M=128000,N=1024": { + "file": "silu_config_M128000_N1024.json", + "M": 128000, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 490.8409999999999 + }, + "M=128000,N=1120": { + "file": "silu_config_M128000_N1120.json", + "M": 128000, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1189.324 + }, + "M=128000,N=1152": { + "file": "silu_config_M128000_N1152.json", + "M": 128000, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1170.0437499999998 + }, + "M=128000,N=1280": { + "file": "silu_config_M128000_N1280.json", + "M": 128000, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1195.7239999999997 + }, + "M=128000,N=1344": { + "file": "silu_config_M128000_N1344.json", + "M": 128000, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.7239999999997 + }, + "M=128000,N=1408": { + "file": "silu_config_M128000_N1408.json", + "M": 128000, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1193.924 + }, + "M=128000,N=1440": { + "file": "silu_config_M128000_N1440.json", + "M": 128000, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1203.524 + }, + "M=128000,N=1536": { + "file": "silu_config_M128000_N1536.json", + "M": 128000, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1185.40375 + }, + "M=128000,N=1600": { + "file": "silu_config_M128000_N1600.json", + "M": 128000, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1215.0439999999999 + }, + "M=128000,N=1664": { + "file": "silu_config_M128000_N1664.json", + "M": 128000, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1198.2440000000001 + }, + "M=128000,N=1728": { + "file": "silu_config_M128000_N1728.json", + "M": 128000, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1186.7239999999997 + }, + "M=128000,N=1760": { + "file": "silu_config_M128000_N1760.json", + "M": 128000, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1212.5239999999994 + }, + "M=128000,N=1792": { + "file": "silu_config_M128000_N1792.json", + "M": 128000, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.1639999999993 + }, + "M=128000,N=1920": { + "file": "silu_config_M128000_N1920.json", + "M": 128000, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1194.8040000000005 + }, + "M=128000,N=2048": { + "file": "silu_config_M128000_N2048.json", + "M": 128000, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 964.4030000000007 + }, + "M=128000,N=2080": { + "file": "silu_config_M128000_N2080.json", + "M": 128000, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1332.4444999999996 + }, + "M=128000,N=2240": { + "file": "silu_config_M128000_N2240.json", + "M": 128000, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1398.3647499999997 + }, + "M=128000,N=2400": { + "file": "silu_config_M128000_N2400.json", + "M": 128000, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1477.6850000000004 + }, + "M=128000,N=2560": { + "file": "silu_config_M128000_N2560.json", + "M": 128000, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1525.5652500000006 + }, + "M=129024,N=128": { + "file": "silu_config_M129024_N128.json", + "M": 129024, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 89.15925 + }, + "M=129024,N=160": { + "file": "silu_config_M129024_N160.json", + "M": 129024, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.23975000000004 + }, + "M=129024,N=192": { + "file": "silu_config_M129024_N192.json", + "M": 129024, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 170.43974999999998 + }, + "M=129024,N=256": { + "file": "silu_config_M129024_N256.json", + "M": 129024, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.43975 + }, + "M=129024,N=320": { + "file": "silu_config_M129024_N320.json", + "M": 129024, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 319.3204999999998 + }, + "M=129024,N=384": { + "file": "silu_config_M129024_N384.json", + "M": 129024, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 316.6802500000001 + }, + "M=129024,N=480": { + "file": "silu_config_M129024_N480.json", + "M": 129024, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 322.8002500000001 + }, + "M=129024,N=512": { + "file": "silu_config_M129024_N512.json", + "M": 129024, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 257.0799999999998 + }, + "M=129024,N=576": { + "file": "silu_config_M129024_N576.json", + "M": 129024, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 630.0815 + }, + "M=129024,N=640": { + "file": "silu_config_M129024_N640.json", + "M": 129024, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 615.1614999999999 + }, + "M=129024,N=768": { + "file": "silu_config_M129024_N768.json", + "M": 129024, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 617.4814999999999 + }, + "M=129024,N=800": { + "file": "silu_config_M129024_N800.json", + "M": 129024, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 622.8415 + }, + "M=129024,N=896": { + "file": "silu_config_M129024_N896.json", + "M": 129024, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 616.1614999999999 + }, + "M=129024,N=960": { + "file": "silu_config_M129024_N960.json", + "M": 129024, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 615.5617499999998 + }, + "M=129024,N=1024": { + "file": "silu_config_M129024_N1024.json", + "M": 129024, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 495.1610000000003 + }, + "M=129024,N=1120": { + "file": "silu_config_M129024_N1120.json", + "M": 129024, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1234.4840000000004 + }, + "M=129024,N=1152": { + "file": "silu_config_M129024_N1152.json", + "M": 129024, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1207.4039999999995 + }, + "M=129024,N=1280": { + "file": "silu_config_M129024_N1280.json", + "M": 129024, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1199.1239999999998 + }, + "M=129024,N=1344": { + "file": "silu_config_M129024_N1344.json", + "M": 129024, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1225.6839999999997 + }, + "M=129024,N=1408": { + "file": "silu_config_M129024_N1408.json", + "M": 129024, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1210.6439999999998 + }, + "M=129024,N=1440": { + "file": "silu_config_M129024_N1440.json", + "M": 129024, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1238.1239999999998 + }, + "M=129024,N=1536": { + "file": "silu_config_M129024_N1536.json", + "M": 129024, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1213.0040000000004 + }, + "M=129024,N=1600": { + "file": "silu_config_M129024_N1600.json", + "M": 129024, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1228.0839999999998 + }, + "M=129024,N=1664": { + "file": "silu_config_M129024_N1664.json", + "M": 129024, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1194.8040000000005 + }, + "M=129024,N=1728": { + "file": "silu_config_M129024_N1728.json", + "M": 129024, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1231.7239999999997 + }, + "M=129024,N=1760": { + "file": "silu_config_M129024_N1760.json", + "M": 129024, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1208.0040000000008 + }, + "M=129024,N=1792": { + "file": "silu_config_M129024_N1792.json", + "M": 129024, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1214.524 + }, + "M=129024,N=1920": { + "file": "silu_config_M129024_N1920.json", + "M": 129024, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.4439999999995 + }, + "M=129024,N=2048": { + "file": "silu_config_M129024_N2048.json", + "M": 129024, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 981.723 + }, + "M=129024,N=2080": { + "file": "silu_config_M129024_N2080.json", + "M": 129024, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1330.4844999999996 + }, + "M=129024,N=2240": { + "file": "silu_config_M129024_N2240.json", + "M": 129024, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1418.8047499999998 + }, + "M=129024,N=2400": { + "file": "silu_config_M129024_N2400.json", + "M": 129024, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1494.3652499999998 + }, + "M=129024,N=2560": { + "file": "silu_config_M129024_N2560.json", + "M": 129024, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1547.9255000000003 + }, + "M=130048,N=128": { + "file": "silu_config_M130048_N128.json", + "M": 130048, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 76.99924999999999 + }, + "M=130048,N=160": { + "file": "silu_config_M130048_N160.json", + "M": 130048, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 171.47974999999994 + }, + "M=130048,N=192": { + "file": "silu_config_M130048_N192.json", + "M": 130048, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.4794999999999 + }, + "M=130048,N=256": { + "file": "silu_config_M130048_N256.json", + "M": 130048, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 157.35975 + }, + "M=130048,N=320": { + "file": "silu_config_M130048_N320.json", + "M": 130048, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 321.5602499999999 + }, + "M=130048,N=384": { + "file": "silu_config_M130048_N384.json", + "M": 130048, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 318.24025000000006 + }, + "M=130048,N=480": { + "file": "silu_config_M130048_N480.json", + "M": 130048, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 325.76025000000004 + }, + "M=130048,N=512": { + "file": "silu_config_M130048_N512.json", + "M": 130048, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 258.9202499999999 + }, + "M=130048,N=576": { + "file": "silu_config_M130048_N576.json", + "M": 130048, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 631.4414999999999 + }, + "M=130048,N=640": { + "file": "silu_config_M130048_N640.json", + "M": 130048, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 616.6015 + }, + "M=130048,N=768": { + "file": "silu_config_M130048_N768.json", + "M": 130048, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 607.1614999999999 + }, + "M=130048,N=800": { + "file": "silu_config_M130048_N800.json", + "M": 130048, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 634.8415 + }, + "M=130048,N=896": { + "file": "silu_config_M130048_N896.json", + "M": 130048, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 621.1614999999999 + }, + "M=130048,N=960": { + "file": "silu_config_M130048_N960.json", + "M": 130048, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 626.2014999999999 + }, + "M=130048,N=1024": { + "file": "silu_config_M130048_N1024.json", + "M": 130048, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 499.4409999999998 + }, + "M=130048,N=1120": { + "file": "silu_config_M130048_N1120.json", + "M": 130048, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1216.6440000000002 + }, + "M=130048,N=1152": { + "file": "silu_config_M130048_N1152.json", + "M": 130048, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1206.1239999999998 + }, + "M=130048,N=1280": { + "file": "silu_config_M130048_N1280.json", + "M": 130048, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1214.484 + }, + "M=130048,N=1344": { + "file": "silu_config_M130048_N1344.json", + "M": 130048, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1209.0040000000004 + }, + "M=130048,N=1408": { + "file": "silu_config_M130048_N1408.json", + "M": 130048, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1226.284 + }, + "M=130048,N=1440": { + "file": "silu_config_M130048_N1440.json", + "M": 130048, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1221.0839999999998 + }, + "M=130048,N=1536": { + "file": "silu_config_M130048_N1536.json", + "M": 130048, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1228.5240000000003 + }, + "M=130048,N=1600": { + "file": "silu_config_M130048_N1600.json", + "M": 130048, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1243.1240000000003 + }, + "M=130048,N=1664": { + "file": "silu_config_M130048_N1664.json", + "M": 130048, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1224.924 + }, + "M=130048,N=1728": { + "file": "silu_config_M130048_N1728.json", + "M": 130048, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1230.2839999999997 + }, + "M=130048,N=1760": { + "file": "silu_config_M130048_N1760.json", + "M": 130048, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1227.6440000000002 + }, + "M=130048,N=1792": { + "file": "silu_config_M130048_N1792.json", + "M": 130048, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1228.6440000000002 + }, + "M=130048,N=1920": { + "file": "silu_config_M130048_N1920.json", + "M": 130048, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1242.3242499999997 + }, + "M=130048,N=2048": { + "file": "silu_config_M130048_N2048.json", + "M": 130048, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 997.8029999999999 + }, + "M=130048,N=2080": { + "file": "silu_config_M130048_N2080.json", + "M": 130048, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1350.9244999999996 + }, + "M=130048,N=2240": { + "file": "silu_config_M130048_N2240.json", + "M": 130048, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1427.7649999999994 + }, + "M=130048,N=2400": { + "file": "silu_config_M130048_N2400.json", + "M": 130048, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1505.9252499999998 + }, + "M=130048,N=2560": { + "file": "silu_config_M130048_N2560.json", + "M": 130048, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1560.6854999999996 + }, + "M=131072,N=128": { + "file": "silu_config_M131072_N128.json", + "M": 131072, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.63949999999998 + }, + "M=131072,N=160": { + "file": "silu_config_M131072_N160.json", + "M": 131072, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.51975000000004 + }, + "M=131072,N=192": { + "file": "silu_config_M131072_N192.json", + "M": 131072, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.95950000000002 + }, + "M=131072,N=256": { + "file": "silu_config_M131072_N256.json", + "M": 131072, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 147.23949999999996 + }, + "M=131072,N=320": { + "file": "silu_config_M131072_N320.json", + "M": 131072, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 323.92024999999995 + }, + "M=131072,N=384": { + "file": "silu_config_M131072_N384.json", + "M": 131072, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 322.0802500000001 + }, + "M=131072,N=480": { + "file": "silu_config_M131072_N480.json", + "M": 131072, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 327.96025 + }, + "M=131072,N=512": { + "file": "silu_config_M131072_N512.json", + "M": 131072, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 248.64 + }, + "M=131072,N=576": { + "file": "silu_config_M131072_N576.json", + "M": 131072, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 625.0414999999998 + }, + "M=131072,N=640": { + "file": "silu_config_M131072_N640.json", + "M": 131072, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 610.2415000000001 + }, + "M=131072,N=768": { + "file": "silu_config_M131072_N768.json", + "M": 131072, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 611.9214999999999 + }, + "M=131072,N=800": { + "file": "silu_config_M131072_N800.json", + "M": 131072, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 628.4414999999999 + }, + "M=131072,N=896": { + "file": "silu_config_M131072_N896.json", + "M": 131072, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 614.6414999999997 + }, + "M=131072,N=960": { + "file": "silu_config_M131072_N960.json", + "M": 131072, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 620.1214999999997 + }, + "M=131072,N=1024": { + "file": "silu_config_M131072_N1024.json", + "M": 131072, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 502.64100000000013 + }, + "M=131072,N=1120": { + "file": "silu_config_M131072_N1120.json", + "M": 131072, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1246.8842499999996 + }, + "M=131072,N=1152": { + "file": "silu_config_M131072_N1152.json", + "M": 131072, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1233.0040000000004 + }, + "M=131072,N=1280": { + "file": "silu_config_M131072_N1280.json", + "M": 131072, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1223.8039999999996 + }, + "M=131072,N=1344": { + "file": "silu_config_M131072_N1344.json", + "M": 131072, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1228.804 + }, + "M=131072,N=1408": { + "file": "silu_config_M131072_N1408.json", + "M": 131072, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1225.004 + }, + "M=131072,N=1440": { + "file": "silu_config_M131072_N1440.json", + "M": 131072, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1235.2839999999997 + }, + "M=131072,N=1536": { + "file": "silu_config_M131072_N1536.json", + "M": 131072, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1227.1239999999998 + }, + "M=131072,N=1600": { + "file": "silu_config_M131072_N1600.json", + "M": 131072, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1236.964 + }, + "M=131072,N=1664": { + "file": "silu_config_M131072_N1664.json", + "M": 131072, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1229.4439999999995 + }, + "M=131072,N=1728": { + "file": "silu_config_M131072_N1728.json", + "M": 131072, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1250.7642499999997 + }, + "M=131072,N=1760": { + "file": "silu_config_M131072_N1760.json", + "M": 131072, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1241.5239999999994 + }, + "M=131072,N=1792": { + "file": "silu_config_M131072_N1792.json", + "M": 131072, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1227.364 + }, + "M=131072,N=1920": { + "file": "silu_config_M131072_N1920.json", + "M": 131072, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1235.7640000000001 + }, + "M=131072,N=2048": { + "file": "silu_config_M131072_N2048.json", + "M": 131072, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 967.5630000000001 + }, + "M=131072,N=2080": { + "file": "silu_config_M131072_N2080.json", + "M": 131072, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1361.4044999999996 + }, + "M=131072,N=2240": { + "file": "silu_config_M131072_N2240.json", + "M": 131072, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1431.0050000000006 + }, + "M=131072,N=2400": { + "file": "silu_config_M131072_N2400.json", + "M": 131072, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1523.80525 + }, + "M=131072,N=2560": { + "file": "silu_config_M131072_N2560.json", + "M": 131072, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1578.325499999999 + }, + "M=132096,N=128": { + "file": "silu_config_M132096_N128.json", + "M": 132096, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.4795 + }, + "M=132096,N=160": { + "file": "silu_config_M132096_N160.json", + "M": 132096, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 161.67950000000002 + }, + "M=132096,N=192": { + "file": "silu_config_M132096_N192.json", + "M": 132096, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 173.95974999999999 + }, + "M=132096,N=256": { + "file": "silu_config_M132096_N256.json", + "M": 132096, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.15975000000003 + }, + "M=132096,N=320": { + "file": "silu_config_M132096_N320.json", + "M": 132096, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 326.0802500000002 + }, + "M=132096,N=384": { + "file": "silu_config_M132096_N384.json", + "M": 132096, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 323.88025000000016 + }, + "M=132096,N=480": { + "file": "silu_config_M132096_N480.json", + "M": 132096, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.40025 + }, + "M=132096,N=512": { + "file": "silu_config_M132096_N512.json", + "M": 132096, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 262.96000000000015 + }, + "M=132096,N=576": { + "file": "silu_config_M132096_N576.json", + "M": 132096, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 633.6814999999999 + }, + "M=132096,N=640": { + "file": "silu_config_M132096_N640.json", + "M": 132096, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 618.8815000000002 + }, + "M=132096,N=768": { + "file": "silu_config_M132096_N768.json", + "M": 132096, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 627.6415 + }, + "M=132096,N=800": { + "file": "silu_config_M132096_N800.json", + "M": 132096, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 626.4815000000001 + }, + "M=132096,N=896": { + "file": "silu_config_M132096_N896.json", + "M": 132096, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.9614999999999 + }, + "M=132096,N=960": { + "file": "silu_config_M132096_N960.json", + "M": 132096, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 639.8814999999997 + }, + "M=132096,N=1024": { + "file": "silu_config_M132096_N1024.json", + "M": 132096, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 506.6009999999999 + }, + "M=132096,N=1120": { + "file": "silu_config_M132096_N1120.json", + "M": 132096, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1245.88425 + }, + "M=132096,N=1152": { + "file": "silu_config_M132096_N1152.json", + "M": 132096, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1224.8439999999996 + }, + "M=132096,N=1280": { + "file": "silu_config_M132096_N1280.json", + "M": 132096, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1247.6842499999998 + }, + "M=132096,N=1344": { + "file": "silu_config_M132096_N1344.json", + "M": 132096, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1238.6040000000003 + }, + "M=132096,N=1408": { + "file": "silu_config_M132096_N1408.json", + "M": 132096, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1249.3242500000001 + }, + "M=132096,N=1440": { + "file": "silu_config_M132096_N1440.json", + "M": 132096, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1250.5242500000004 + }, + "M=132096,N=1536": { + "file": "silu_config_M132096_N1536.json", + "M": 132096, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1251.52425 + }, + "M=132096,N=1600": { + "file": "silu_config_M132096_N1600.json", + "M": 132096, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1241.5640000000003 + }, + "M=132096,N=1664": { + "file": "silu_config_M132096_N1664.json", + "M": 132096, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1253.96425 + }, + "M=132096,N=1728": { + "file": "silu_config_M132096_N1728.json", + "M": 132096, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1264.7242500000007 + }, + "M=132096,N=1760": { + "file": "silu_config_M132096_N1760.json", + "M": 132096, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1267.404250000001 + }, + "M=132096,N=1792": { + "file": "silu_config_M132096_N1792.json", + "M": 132096, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1236.8439999999996 + }, + "M=132096,N=1920": { + "file": "silu_config_M132096_N1920.json", + "M": 132096, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1245.524 + }, + "M=132096,N=2048": { + "file": "silu_config_M132096_N2048.json", + "M": 132096, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 985.2829999999999 + }, + "M=132096,N=2080": { + "file": "silu_config_M132096_N2080.json", + "M": 132096, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1371.6047500000004 + }, + "M=132096,N=2240": { + "file": "silu_config_M132096_N2240.json", + "M": 132096, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1460.7650000000003 + }, + "M=132096,N=2400": { + "file": "silu_config_M132096_N2400.json", + "M": 132096, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1535.165249999999 + }, + "M=132096,N=2560": { + "file": "silu_config_M132096_N2560.json", + "M": 132096, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1574.2055 + }, + "M=133120,N=128": { + "file": "silu_config_M133120_N128.json", + "M": 133120, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 79.03924999999998 + }, + "M=133120,N=160": { + "file": "silu_config_M133120_N160.json", + "M": 133120, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 161.59949999999998 + }, + "M=133120,N=192": { + "file": "silu_config_M133120_N192.json", + "M": 133120, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.31975 + }, + "M=133120,N=256": { + "file": "silu_config_M133120_N256.json", + "M": 133120, + "N": 256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 162.5195 + }, + "M=133120,N=320": { + "file": "silu_config_M133120_N320.json", + "M": 133120, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 326.60024999999996 + }, + "M=133120,N=384": { + "file": "silu_config_M133120_N384.json", + "M": 133120, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 324.20025 + }, + "M=133120,N=480": { + "file": "silu_config_M133120_N480.json", + "M": 133120, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.6405000000001 + }, + "M=133120,N=512": { + "file": "silu_config_M133120_N512.json", + "M": 133120, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 264.9200000000001 + }, + "M=133120,N=576": { + "file": "silu_config_M133120_N576.json", + "M": 133120, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 623.6014999999998 + }, + "M=133120,N=640": { + "file": "silu_config_M133120_N640.json", + "M": 133120, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 630.6415000000002 + }, + "M=133120,N=768": { + "file": "silu_config_M133120_N768.json", + "M": 133120, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 632.6415 + }, + "M=133120,N=800": { + "file": "silu_config_M133120_N800.json", + "M": 133120, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.2414999999999 + }, + "M=133120,N=896": { + "file": "silu_config_M133120_N896.json", + "M": 133120, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 635.1215000000004 + }, + "M=133120,N=960": { + "file": "silu_config_M133120_N960.json", + "M": 133120, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 620.0415000000003 + }, + "M=133120,N=1024": { + "file": "silu_config_M133120_N1024.json", + "M": 133120, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 510.04125 + }, + "M=133120,N=1120": { + "file": "silu_config_M133120_N1120.json", + "M": 133120, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1236.4039999999995 + }, + "M=133120,N=1152": { + "file": "silu_config_M133120_N1152.json", + "M": 133120, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1237.1239999999998 + }, + "M=133120,N=1280": { + "file": "silu_config_M133120_N1280.json", + "M": 133120, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1218.844 + }, + "M=133120,N=1344": { + "file": "silu_config_M133120_N1344.json", + "M": 133120, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1268.56425 + }, + "M=133120,N=1408": { + "file": "silu_config_M133120_N1408.json", + "M": 133120, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1230.2439999999997 + }, + "M=133120,N=1440": { + "file": "silu_config_M133120_N1440.json", + "M": 133120, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1230.1640000000002 + }, + "M=133120,N=1536": { + "file": "silu_config_M133120_N1536.json", + "M": 133120, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1242.7239999999997 + }, + "M=133120,N=1600": { + "file": "silu_config_M133120_N1600.json", + "M": 133120, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1231.804 + }, + "M=133120,N=1664": { + "file": "silu_config_M133120_N1664.json", + "M": 133120, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1234.7240000000006 + }, + "M=133120,N=1728": { + "file": "silu_config_M133120_N1728.json", + "M": 133120, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1244.2040000000006 + }, + "M=133120,N=1760": { + "file": "silu_config_M133120_N1760.json", + "M": 133120, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1257.0442500000004 + }, + "M=133120,N=1792": { + "file": "silu_config_M133120_N1792.json", + "M": 133120, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1248.92425 + }, + "M=133120,N=1920": { + "file": "silu_config_M133120_N1920.json", + "M": 133120, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1252.2442500000006 + }, + "M=133120,N=2048": { + "file": "silu_config_M133120_N2048.json", + "M": 133120, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 1002.0832500000001 + }, + "M=133120,N=2080": { + "file": "silu_config_M133120_N2080.json", + "M": 133120, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1372.2047499999994 + }, + "M=133120,N=2240": { + "file": "silu_config_M133120_N2240.json", + "M": 133120, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1472.4449999999997 + }, + "M=133120,N=2400": { + "file": "silu_config_M133120_N2400.json", + "M": 133120, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1544.0855000000001 + }, + "M=133120,N=2560": { + "file": "silu_config_M133120_N2560.json", + "M": 133120, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1595.0455000000002 + }, + "M=134144,N=128": { + "file": "silu_config_M134144_N128.json", + "M": 134144, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 79.23925000000001 + }, + "M=134144,N=160": { + "file": "silu_config_M134144_N160.json", + "M": 134144, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 176.23975000000002 + }, + "M=134144,N=192": { + "file": "silu_config_M134144_N192.json", + "M": 134144, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 165.51950000000002 + }, + "M=134144,N=256": { + "file": "silu_config_M134144_N256.json", + "M": 134144, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.79975000000002 + }, + "M=134144,N=320": { + "file": "silu_config_M134144_N320.json", + "M": 134144, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 331.6402499999999 + }, + "M=134144,N=384": { + "file": "silu_config_M134144_N384.json", + "M": 134144, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 328.6802500000001 + }, + "M=134144,N=480": { + "file": "silu_config_M134144_N480.json", + "M": 134144, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 335.88024999999993 + }, + "M=134144,N=512": { + "file": "silu_config_M134144_N512.json", + "M": 134144, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 266.80025 + }, + "M=134144,N=576": { + "file": "silu_config_M134144_N576.json", + "M": 134144, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 632.4815000000001 + }, + "M=134144,N=640": { + "file": "silu_config_M134144_N640.json", + "M": 134144, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 644.6814999999999 + }, + "M=134144,N=768": { + "file": "silu_config_M134144_N768.json", + "M": 134144, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 640.8415 + }, + "M=134144,N=800": { + "file": "silu_config_M134144_N800.json", + "M": 134144, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 656.9617499999997 + }, + "M=134144,N=896": { + "file": "silu_config_M134144_N896.json", + "M": 134144, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 628.8814999999997 + }, + "M=134144,N=960": { + "file": "silu_config_M134144_N960.json", + "M": 134144, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 656.5217499999999 + }, + "M=134144,N=1024": { + "file": "silu_config_M134144_N1024.json", + "M": 134144, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 514.3612499999999 + }, + "M=134144,N=1120": { + "file": "silu_config_M134144_N1120.json", + "M": 134144, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1260.36425 + }, + "M=134144,N=1152": { + "file": "silu_config_M134144_N1152.json", + "M": 134144, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1254.1642500000003 + }, + "M=134144,N=1280": { + "file": "silu_config_M134144_N1280.json", + "M": 134144, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1256.3242500000001 + }, + "M=134144,N=1344": { + "file": "silu_config_M134144_N1344.json", + "M": 134144, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1261.5242500000004 + }, + "M=134144,N=1408": { + "file": "silu_config_M134144_N1408.json", + "M": 134144, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1257.8042499999997 + }, + "M=134144,N=1440": { + "file": "silu_config_M134144_N1440.json", + "M": 134144, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1264.2442500000002 + }, + "M=134144,N=1536": { + "file": "silu_config_M134144_N1536.json", + "M": 134144, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1266.48425 + }, + "M=134144,N=1600": { + "file": "silu_config_M134144_N1600.json", + "M": 134144, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1260.2842500000006 + }, + "M=134144,N=1664": { + "file": "silu_config_M134144_N1664.json", + "M": 134144, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1262.6442500000007 + }, + "M=134144,N=1728": { + "file": "silu_config_M134144_N1728.json", + "M": 134144, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1273.2842500000002 + }, + "M=134144,N=1760": { + "file": "silu_config_M134144_N1760.json", + "M": 134144, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1276.36425 + }, + "M=134144,N=1792": { + "file": "silu_config_M134144_N1792.json", + "M": 134144, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1284.04425 + }, + "M=134144,N=1920": { + "file": "silu_config_M134144_N1920.json", + "M": 134144, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1286.6842499999998 + }, + "M=134144,N=2048": { + "file": "silu_config_M134144_N2048.json", + "M": 134144, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1028.5232499999997 + }, + "M=134144,N=2080": { + "file": "silu_config_M134144_N2080.json", + "M": 134144, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1402.3247499999998 + }, + "M=134144,N=2240": { + "file": "silu_config_M134144_N2240.json", + "M": 134144, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1463.965000000001 + }, + "M=134144,N=2400": { + "file": "silu_config_M134144_N2400.json", + "M": 134144, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1540.4052499999998 + }, + "M=134144,N=2560": { + "file": "silu_config_M134144_N2560.json", + "M": 134144, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1619.20575 + }, + "M=135168,N=128": { + "file": "silu_config_M135168_N128.json", + "M": 135168, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 79.63950000000003 + }, + "M=135168,N=160": { + "file": "silu_config_M135168_N160.json", + "M": 135168, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 166.43949999999992 + }, + "M=135168,N=192": { + "file": "silu_config_M135168_N192.json", + "M": 135168, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 166.71975000000003 + }, + "M=135168,N=256": { + "file": "silu_config_M135168_N256.json", + "M": 135168, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 174.15974999999997 + }, + "M=135168,N=320": { + "file": "silu_config_M135168_N320.json", + "M": 135168, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 334.20024999999987 + }, + "M=135168,N=384": { + "file": "silu_config_M135168_N384.json", + "M": 135168, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.84024999999997 + }, + "M=135168,N=480": { + "file": "silu_config_M135168_N480.json", + "M": 135168, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 338.36024999999995 + }, + "M=135168,N=512": { + "file": "silu_config_M135168_N512.json", + "M": 135168, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 260.84000000000003 + }, + "M=135168,N=576": { + "file": "silu_config_M135168_N576.json", + "M": 135168, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 655.1617500000002 + }, + "M=135168,N=640": { + "file": "silu_config_M135168_N640.json", + "M": 135168, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 643.2414999999999 + }, + "M=135168,N=768": { + "file": "silu_config_M135168_N768.json", + "M": 135168, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 630.8015000000003 + }, + "M=135168,N=800": { + "file": "silu_config_M135168_N800.json", + "M": 135168, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 652.2417499999999 + }, + "M=135168,N=896": { + "file": "silu_config_M135168_N896.json", + "M": 135168, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 648.7617499999999 + }, + "M=135168,N=960": { + "file": "silu_config_M135168_N960.json", + "M": 135168, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 664.6417499999998 + }, + "M=135168,N=1024": { + "file": "silu_config_M135168_N1024.json", + "M": 135168, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 517.8412499999997 + }, + "M=135168,N=1120": { + "file": "silu_config_M135168_N1120.json", + "M": 135168, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1263.6442500000007 + }, + "M=135168,N=1152": { + "file": "silu_config_M135168_N1152.json", + "M": 135168, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1281.56425 + }, + "M=135168,N=1280": { + "file": "silu_config_M135168_N1280.json", + "M": 135168, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1265.5642500000004 + }, + "M=135168,N=1344": { + "file": "silu_config_M135168_N1344.json", + "M": 135168, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1275.6042500000003 + }, + "M=135168,N=1408": { + "file": "silu_config_M135168_N1408.json", + "M": 135168, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1266.9642499999995 + }, + "M=135168,N=1440": { + "file": "silu_config_M135168_N1440.json", + "M": 135168, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1279.0442500000004 + }, + "M=135168,N=1536": { + "file": "silu_config_M135168_N1536.json", + "M": 135168, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1248.9242499999996 + }, + "M=135168,N=1600": { + "file": "silu_config_M135168_N1600.json", + "M": 135168, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1290.5642499999994 + }, + "M=135168,N=1664": { + "file": "silu_config_M135168_N1664.json", + "M": 135168, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1267.52425 + }, + "M=135168,N=1728": { + "file": "silu_config_M135168_N1728.json", + "M": 135168, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1272.7642499999993 + }, + "M=135168,N=1760": { + "file": "silu_config_M135168_N1760.json", + "M": 135168, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1291.2042499999998 + }, + "M=135168,N=1792": { + "file": "silu_config_M135168_N1792.json", + "M": 135168, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1282.7642499999997 + }, + "M=135168,N=1920": { + "file": "silu_config_M135168_N1920.json", + "M": 135168, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1279.84425 + }, + "M=135168,N=2048": { + "file": "silu_config_M135168_N2048.json", + "M": 135168, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1008.0432499999996 + }, + "M=135168,N=2080": { + "file": "silu_config_M135168_N2080.json", + "M": 135168, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1416.8047500000007 + }, + "M=135168,N=2240": { + "file": "silu_config_M135168_N2240.json", + "M": 135168, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1475.125000000001 + }, + "M=135168,N=2400": { + "file": "silu_config_M135168_N2400.json", + "M": 135168, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1561.6854999999996 + }, + "M=135168,N=2560": { + "file": "silu_config_M135168_N2560.json", + "M": 135168, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1621.0457500000002 + }, + "M=136192,N=128": { + "file": "silu_config_M136192_N128.json", + "M": 136192, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.63950000000001 + }, + "M=136192,N=160": { + "file": "silu_config_M136192_N160.json", + "M": 136192, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 166.39974999999995 + }, + "M=136192,N=192": { + "file": "silu_config_M136192_N192.json", + "M": 136192, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.75949999999997 + }, + "M=136192,N=256": { + "file": "silu_config_M136192_N256.json", + "M": 136192, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 152.39950000000005 + }, + "M=136192,N=320": { + "file": "silu_config_M136192_N320.json", + "M": 136192, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 335.8802499999998 + }, + "M=136192,N=384": { + "file": "silu_config_M136192_N384.json", + "M": 136192, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 333.2802499999999 + }, + "M=136192,N=480": { + "file": "silu_config_M136192_N480.json", + "M": 136192, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 340.96025 + }, + "M=136192,N=512": { + "file": "silu_config_M136192_N512.json", + "M": 136192, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 270.88024999999993 + }, + "M=136192,N=576": { + "file": "silu_config_M136192_N576.json", + "M": 136192, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 662.92175 + }, + "M=136192,N=640": { + "file": "silu_config_M136192_N640.json", + "M": 136192, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 644.8014999999998 + }, + "M=136192,N=768": { + "file": "silu_config_M136192_N768.json", + "M": 136192, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 639.9215000000002 + }, + "M=136192,N=800": { + "file": "silu_config_M136192_N800.json", + "M": 136192, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 655.9217499999997 + }, + "M=136192,N=896": { + "file": "silu_config_M136192_N896.json", + "M": 136192, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 642.4415000000001 + }, + "M=136192,N=960": { + "file": "silu_config_M136192_N960.json", + "M": 136192, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 664.6017499999996 + }, + "M=136192,N=1024": { + "file": "silu_config_M136192_N1024.json", + "M": 136192, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 522.1210000000001 + }, + "M=136192,N=1120": { + "file": "silu_config_M136192_N1120.json", + "M": 136192, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1263.2842500000002 + }, + "M=136192,N=1152": { + "file": "silu_config_M136192_N1152.json", + "M": 136192, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1263.0042500000004 + }, + "M=136192,N=1280": { + "file": "silu_config_M136192_N1280.json", + "M": 136192, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1254.2842499999997 + }, + "M=136192,N=1344": { + "file": "silu_config_M136192_N1344.json", + "M": 136192, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1285.1642499999998 + }, + "M=136192,N=1408": { + "file": "silu_config_M136192_N1408.json", + "M": 136192, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1276.7642499999997 + }, + "M=136192,N=1440": { + "file": "silu_config_M136192_N1440.json", + "M": 136192, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1278.4042500000005 + }, + "M=136192,N=1536": { + "file": "silu_config_M136192_N1536.json", + "M": 136192, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1289.7642499999997 + }, + "M=136192,N=1600": { + "file": "silu_config_M136192_N1600.json", + "M": 136192, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1296.0842500000003 + }, + "M=136192,N=1664": { + "file": "silu_config_M136192_N1664.json", + "M": 136192, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1281.48425 + }, + "M=136192,N=1728": { + "file": "silu_config_M136192_N1728.json", + "M": 136192, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1302.8442500000006 + }, + "M=136192,N=1760": { + "file": "silu_config_M136192_N1760.json", + "M": 136192, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1305.6842500000002 + }, + "M=136192,N=1792": { + "file": "silu_config_M136192_N1792.json", + "M": 136192, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1285.2842500000002 + }, + "M=136192,N=1920": { + "file": "silu_config_M136192_N1920.json", + "M": 136192, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1310.1245 + }, + "M=136192,N=2048": { + "file": "silu_config_M136192_N2048.json", + "M": 136192, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1033.6034999999997 + }, + "M=136192,N=2080": { + "file": "silu_config_M136192_N2080.json", + "M": 136192, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1403.084750000001 + }, + "M=136192,N=2240": { + "file": "silu_config_M136192_N2240.json", + "M": 136192, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1486.4450000000002 + }, + "M=136192,N=2400": { + "file": "silu_config_M136192_N2400.json", + "M": 136192, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1563.4855000000016 + }, + "M=136192,N=2560": { + "file": "silu_config_M136192_N2560.json", + "M": 136192, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1643.5257499999998 + }, + "M=137216,N=128": { + "file": "silu_config_M137216_N128.json", + "M": 137216, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.15950000000001 + }, + "M=137216,N=160": { + "file": "silu_config_M137216_N160.json", + "M": 137216, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 179.79975000000002 + }, + "M=137216,N=192": { + "file": "silu_config_M137216_N192.json", + "M": 137216, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 180.11975000000004 + }, + "M=137216,N=256": { + "file": "silu_config_M137216_N256.json", + "M": 137216, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 153.6395 + }, + "M=137216,N=320": { + "file": "silu_config_M137216_N320.json", + "M": 137216, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 338.60024999999985 + }, + "M=137216,N=384": { + "file": "silu_config_M137216_N384.json", + "M": 137216, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 336.08025 + }, + "M=137216,N=480": { + "file": "silu_config_M137216_N480.json", + "M": 137216, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 342.9202499999999 + }, + "M=137216,N=512": { + "file": "silu_config_M137216_N512.json", + "M": 137216, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 272.84024999999997 + }, + "M=137216,N=576": { + "file": "silu_config_M137216_N576.json", + "M": 137216, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 663.5217500000001 + }, + "M=137216,N=640": { + "file": "silu_config_M137216_N640.json", + "M": 137216, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 649.64175 + }, + "M=137216,N=768": { + "file": "silu_config_M137216_N768.json", + "M": 137216, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 644.2414999999999 + }, + "M=137216,N=800": { + "file": "silu_config_M137216_N800.json", + "M": 137216, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 650.4017499999998 + }, + "M=137216,N=896": { + "file": "silu_config_M137216_N896.json", + "M": 137216, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.4015000000002 + }, + "M=137216,N=960": { + "file": "silu_config_M137216_N960.json", + "M": 137216, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 663.9217500000002 + }, + "M=137216,N=1024": { + "file": "silu_config_M137216_N1024.json", + "M": 137216, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 526.4812500000003 + }, + "M=137216,N=1120": { + "file": "silu_config_M137216_N1120.json", + "M": 137216, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1288.92425 + }, + "M=137216,N=1152": { + "file": "silu_config_M137216_N1152.json", + "M": 137216, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1282.84425 + }, + "M=137216,N=1280": { + "file": "silu_config_M137216_N1280.json", + "M": 137216, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1281.2442499999997 + }, + "M=137216,N=1344": { + "file": "silu_config_M137216_N1344.json", + "M": 137216, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1284.4042500000005 + }, + "M=137216,N=1408": { + "file": "silu_config_M137216_N1408.json", + "M": 137216, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1304.1242499999998 + }, + "M=137216,N=1440": { + "file": "silu_config_M137216_N1440.json", + "M": 137216, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1287.7242499999998 + }, + "M=137216,N=1536": { + "file": "silu_config_M137216_N1536.json", + "M": 137216, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1309.3245000000002 + }, + "M=137216,N=1600": { + "file": "silu_config_M137216_N1600.json", + "M": 137216, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1299.2442499999997 + }, + "M=137216,N=1664": { + "file": "silu_config_M137216_N1664.json", + "M": 137216, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1290.92425 + }, + "M=137216,N=1728": { + "file": "silu_config_M137216_N1728.json", + "M": 137216, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1301.7642500000006 + }, + "M=137216,N=1760": { + "file": "silu_config_M137216_N1760.json", + "M": 137216, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1299.4042499999996 + }, + "M=137216,N=1792": { + "file": "silu_config_M137216_N1792.json", + "M": 137216, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1284.2442500000002 + }, + "M=137216,N=1920": { + "file": "silu_config_M137216_N1920.json", + "M": 137216, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1298.8442499999996 + }, + "M=137216,N=2048": { + "file": "silu_config_M137216_N2048.json", + "M": 137216, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1041.0434999999998 + }, + "M=137216,N=2080": { + "file": "silu_config_M137216_N2080.json", + "M": 137216, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1427.2047500000003 + }, + "M=137216,N=2240": { + "file": "silu_config_M137216_N2240.json", + "M": 137216, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1507.3252499999999 + }, + "M=137216,N=2400": { + "file": "silu_config_M137216_N2400.json", + "M": 137216, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1574.8054999999995 + }, + "M=137216,N=2560": { + "file": "silu_config_M137216_N2560.json", + "M": 137216, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1665.4057500000008 + }, + "M=138240,N=128": { + "file": "silu_config_M138240_N128.json", + "M": 138240, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.4795 + }, + "M=138240,N=160": { + "file": "silu_config_M138240_N160.json", + "M": 138240, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 177.99974999999998 + }, + "M=138240,N=192": { + "file": "silu_config_M138240_N192.json", + "M": 138240, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 185.47975000000005 + }, + "M=138240,N=256": { + "file": "silu_config_M138240_N256.json", + "M": 138240, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.6397499999999 + }, + "M=138240,N=320": { + "file": "silu_config_M138240_N320.json", + "M": 138240, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 339.00025000000016 + }, + "M=138240,N=384": { + "file": "silu_config_M138240_N384.json", + "M": 138240, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 336.40025000000026 + }, + "M=138240,N=480": { + "file": "silu_config_M138240_N480.json", + "M": 138240, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 343.0802500000002 + }, + "M=138240,N=512": { + "file": "silu_config_M138240_N512.json", + "M": 138240, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 266.5999999999999 + }, + "M=138240,N=576": { + "file": "silu_config_M138240_N576.json", + "M": 138240, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 636.9614999999999 + }, + "M=138240,N=640": { + "file": "silu_config_M138240_N640.json", + "M": 138240, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 653.8817500000002 + }, + "M=138240,N=768": { + "file": "silu_config_M138240_N768.json", + "M": 138240, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 655.84175 + }, + "M=138240,N=800": { + "file": "silu_config_M138240_N800.json", + "M": 138240, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 671.3217499999996 + }, + "M=138240,N=896": { + "file": "silu_config_M138240_N896.json", + "M": 138240, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 658.2817500000003 + }, + "M=138240,N=960": { + "file": "silu_config_M138240_N960.json", + "M": 138240, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 674.92175 + }, + "M=138240,N=1024": { + "file": "silu_config_M138240_N1024.json", + "M": 138240, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 529.4412499999999 + }, + "M=138240,N=1120": { + "file": "silu_config_M138240_N1120.json", + "M": 138240, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1293.5642499999994 + }, + "M=138240,N=1152": { + "file": "silu_config_M138240_N1152.json", + "M": 138240, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1294.6842499999998 + }, + "M=138240,N=1280": { + "file": "silu_config_M138240_N1280.json", + "M": 138240, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1275.3642499999996 + }, + "M=138240,N=1344": { + "file": "silu_config_M138240_N1344.json", + "M": 138240, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1305.2842499999997 + }, + "M=138240,N=1408": { + "file": "silu_config_M138240_N1408.json", + "M": 138240, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1287.2442499999997 + }, + "M=138240,N=1440": { + "file": "silu_config_M138240_N1440.json", + "M": 138240, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1287.7242500000002 + }, + "M=138240,N=1536": { + "file": "silu_config_M138240_N1536.json", + "M": 138240, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1289.9242500000005 + }, + "M=138240,N=1600": { + "file": "silu_config_M138240_N1600.json", + "M": 138240, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1309.6845000000003 + }, + "M=138240,N=1664": { + "file": "silu_config_M138240_N1664.json", + "M": 138240, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1281.48425 + }, + "M=138240,N=1728": { + "file": "silu_config_M138240_N1728.json", + "M": 138240, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1307.40425 + }, + "M=138240,N=1760": { + "file": "silu_config_M138240_N1760.json", + "M": 138240, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1293.96425 + }, + "M=138240,N=1792": { + "file": "silu_config_M138240_N1792.json", + "M": 138240, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1300.56425 + }, + "M=138240,N=1920": { + "file": "silu_config_M138240_N1920.json", + "M": 138240, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1288.7242499999998 + }, + "M=138240,N=2048": { + "file": "silu_config_M138240_N2048.json", + "M": 138240, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1041.5232500000002 + }, + "M=138240,N=2080": { + "file": "silu_config_M138240_N2080.json", + "M": 138240, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.4449999999997 + }, + "M=138240,N=2240": { + "file": "silu_config_M138240_N2240.json", + "M": 138240, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1517.2852500000008 + }, + "M=138240,N=2400": { + "file": "silu_config_M138240_N2400.json", + "M": 138240, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1594.4054999999998 + }, + "M=138240,N=2560": { + "file": "silu_config_M138240_N2560.json", + "M": 138240, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1655.4857499999998 + }, + "M=139264,N=128": { + "file": "silu_config_M139264_N128.json", + "M": 139264, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.71950000000002 + }, + "M=139264,N=160": { + "file": "silu_config_M139264_N160.json", + "M": 139264, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 182.31975 + }, + "M=139264,N=192": { + "file": "silu_config_M139264_N192.json", + "M": 139264, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 187.03975000000003 + }, + "M=139264,N=256": { + "file": "silu_config_M139264_N256.json", + "M": 139264, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.19975 + }, + "M=139264,N=320": { + "file": "silu_config_M139264_N320.json", + "M": 139264, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 343.76049999999987 + }, + "M=139264,N=384": { + "file": "silu_config_M139264_N384.json", + "M": 139264, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 340.48024999999984 + }, + "M=139264,N=480": { + "file": "silu_config_M139264_N480.json", + "M": 139264, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 348.2004999999999 + }, + "M=139264,N=512": { + "file": "silu_config_M139264_N512.json", + "M": 139264, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 269.55999999999995 + }, + "M=139264,N=576": { + "file": "silu_config_M139264_N576.json", + "M": 139264, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 673.44175 + }, + "M=139264,N=640": { + "file": "silu_config_M139264_N640.json", + "M": 139264, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 641.1215 + }, + "M=139264,N=768": { + "file": "silu_config_M139264_N768.json", + "M": 139264, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 649.7617499999997 + }, + "M=139264,N=800": { + "file": "silu_config_M139264_N800.json", + "M": 139264, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 666.6017500000003 + }, + "M=139264,N=896": { + "file": "silu_config_M139264_N896.json", + "M": 139264, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.88175 + }, + "M=139264,N=960": { + "file": "silu_config_M139264_N960.json", + "M": 139264, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 679.5617500000001 + }, + "M=139264,N=1024": { + "file": "silu_config_M139264_N1024.json", + "M": 139264, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 533.2812500000002 + }, + "M=139264,N=1120": { + "file": "silu_config_M139264_N1120.json", + "M": 139264, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1319.2444999999998 + }, + "M=139264,N=1152": { + "file": "silu_config_M139264_N1152.json", + "M": 139264, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1297.92425 + }, + "M=139264,N=1280": { + "file": "silu_config_M139264_N1280.json", + "M": 139264, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1292.84425 + }, + "M=139264,N=1344": { + "file": "silu_config_M139264_N1344.json", + "M": 139264, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1303.3242500000001 + }, + "M=139264,N=1408": { + "file": "silu_config_M139264_N1408.json", + "M": 139264, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1323.4045000000006 + }, + "M=139264,N=1440": { + "file": "silu_config_M139264_N1440.json", + "M": 139264, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1327.3645000000001 + }, + "M=139264,N=1536": { + "file": "silu_config_M139264_N1536.json", + "M": 139264, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1318.0045000000005 + }, + "M=139264,N=1600": { + "file": "silu_config_M139264_N1600.json", + "M": 139264, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1307.8442500000006 + }, + "M=139264,N=1664": { + "file": "silu_config_M139264_N1664.json", + "M": 139264, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1317.1244999999994 + }, + "M=139264,N=1728": { + "file": "silu_config_M139264_N1728.json", + "M": 139264, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1328.004500000001 + }, + "M=139264,N=1760": { + "file": "silu_config_M139264_N1760.json", + "M": 139264, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1323.9645 + }, + "M=139264,N=1792": { + "file": "silu_config_M139264_N1792.json", + "M": 139264, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1303.56425 + }, + "M=139264,N=1920": { + "file": "silu_config_M139264_N1920.json", + "M": 139264, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1323.6445000000003 + }, + "M=139264,N=2048": { + "file": "silu_config_M139264_N2048.json", + "M": 139264, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 1049.1235000000001 + }, + "M=139264,N=2080": { + "file": "silu_config_M139264_N2080.json", + "M": 139264, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1454.7650000000003 + }, + "M=139264,N=2240": { + "file": "silu_config_M139264_N2240.json", + "M": 139264, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1528.4452500000002 + }, + "M=139264,N=2400": { + "file": "silu_config_M139264_N2400.json", + "M": 139264, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1608.7254999999996 + }, + "M=139264,N=2560": { + "file": "silu_config_M139264_N2560.json", + "M": 139264, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1677.7659999999996 + }, + "M=140288,N=128": { + "file": "silu_config_M140288_N128.json", + "M": 140288, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.3195 + }, + "M=140288,N=160": { + "file": "silu_config_M140288_N160.json", + "M": 140288, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 172.43975 + }, + "M=140288,N=192": { + "file": "silu_config_M140288_N192.json", + "M": 140288, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 188.31975000000003 + }, + "M=140288,N=256": { + "file": "silu_config_M140288_N256.json", + "M": 140288, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.95975000000004 + }, + "M=140288,N=320": { + "file": "silu_config_M140288_N320.json", + "M": 140288, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 345.6802500000001 + }, + "M=140288,N=384": { + "file": "silu_config_M140288_N384.json", + "M": 140288, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 343.32025 + }, + "M=140288,N=480": { + "file": "silu_config_M140288_N480.json", + "M": 140288, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 350.2805000000001 + }, + "M=140288,N=512": { + "file": "silu_config_M140288_N512.json", + "M": 140288, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 278.48025000000007 + }, + "M=140288,N=576": { + "file": "silu_config_M140288_N576.json", + "M": 140288, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 678.4417500000004 + }, + "M=140288,N=640": { + "file": "silu_config_M140288_N640.json", + "M": 140288, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 674.9617500000002 + }, + "M=140288,N=768": { + "file": "silu_config_M140288_N768.json", + "M": 140288, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 658.5217499999999 + }, + "M=140288,N=800": { + "file": "silu_config_M140288_N800.json", + "M": 140288, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 682.7217500000002 + }, + "M=140288,N=896": { + "file": "silu_config_M140288_N896.json", + "M": 140288, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 671.8017499999999 + }, + "M=140288,N=960": { + "file": "silu_config_M140288_N960.json", + "M": 140288, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 674.2817500000003 + }, + "M=140288,N=1024": { + "file": "silu_config_M140288_N1024.json", + "M": 140288, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 537.6012500000002 + }, + "M=140288,N=1120": { + "file": "silu_config_M140288_N1120.json", + "M": 140288, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1330.0045 + }, + "M=140288,N=1152": { + "file": "silu_config_M140288_N1152.json", + "M": 140288, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1318.6444999999999 + }, + "M=140288,N=1280": { + "file": "silu_config_M140288_N1280.json", + "M": 140288, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1309.3642500000005 + }, + "M=140288,N=1344": { + "file": "silu_config_M140288_N1344.json", + "M": 140288, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1312.6844999999998 + }, + "M=140288,N=1408": { + "file": "silu_config_M140288_N1408.json", + "M": 140288, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1314.2845000000002 + }, + "M=140288,N=1440": { + "file": "silu_config_M140288_N1440.json", + "M": 140288, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1322.2044999999998 + }, + "M=140288,N=1536": { + "file": "silu_config_M140288_N1536.json", + "M": 140288, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.9645000000005 + }, + "M=140288,N=1600": { + "file": "silu_config_M140288_N1600.json", + "M": 140288, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1323.4044999999996 + }, + "M=140288,N=1664": { + "file": "silu_config_M140288_N1664.json", + "M": 140288, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1298.44425 + }, + "M=140288,N=1728": { + "file": "silu_config_M140288_N1728.json", + "M": 140288, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1337.7644999999998 + }, + "M=140288,N=1760": { + "file": "silu_config_M140288_N1760.json", + "M": 140288, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1343.6844999999994 + }, + "M=140288,N=1792": { + "file": "silu_config_M140288_N1792.json", + "M": 140288, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1312.6445000000008 + }, + "M=140288,N=1920": { + "file": "silu_config_M140288_N1920.json", + "M": 140288, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1336.9244999999996 + }, + "M=140288,N=2048": { + "file": "silu_config_M140288_N2048.json", + "M": 140288, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1070.6035000000002 + }, + "M=140288,N=2080": { + "file": "silu_config_M140288_N2080.json", + "M": 140288, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1453.9650000000001 + }, + "M=140288,N=2240": { + "file": "silu_config_M140288_N2240.json", + "M": 140288, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1535.4052499999998 + }, + "M=140288,N=2400": { + "file": "silu_config_M140288_N2400.json", + "M": 140288, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1615.9657499999998 + }, + "M=140288,N=2560": { + "file": "silu_config_M140288_N2560.json", + "M": 140288, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1692.0859999999993 + }, + "M=141312,N=128": { + "file": "silu_config_M141312_N128.json", + "M": 141312, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.19925 + }, + "M=141312,N=160": { + "file": "silu_config_M141312_N160.json", + "M": 141312, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 183.71975 + }, + "M=141312,N=192": { + "file": "silu_config_M141312_N192.json", + "M": 141312, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 184.19975 + }, + "M=141312,N=256": { + "file": "silu_config_M141312_N256.json", + "M": 141312, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.39975000000004 + }, + "M=141312,N=320": { + "file": "silu_config_M141312_N320.json", + "M": 141312, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 346.8802499999998 + }, + "M=141312,N=384": { + "file": "silu_config_M141312_N384.json", + "M": 141312, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 344.88024999999993 + }, + "M=141312,N=480": { + "file": "silu_config_M141312_N480.json", + "M": 141312, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 351.0005 + }, + "M=141312,N=512": { + "file": "silu_config_M141312_N512.json", + "M": 141312, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 280.28 + }, + "M=141312,N=576": { + "file": "silu_config_M141312_N576.json", + "M": 141312, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 677.7217500000002 + }, + "M=141312,N=640": { + "file": "silu_config_M141312_N640.json", + "M": 141312, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 660.5217500000003 + }, + "M=141312,N=768": { + "file": "silu_config_M141312_N768.json", + "M": 141312, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 669.6017500000003 + }, + "M=141312,N=800": { + "file": "silu_config_M141312_N800.json", + "M": 141312, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 688.4817499999997 + }, + "M=141312,N=896": { + "file": "silu_config_M141312_N896.json", + "M": 141312, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.36175 + }, + "M=141312,N=960": { + "file": "silu_config_M141312_N960.json", + "M": 141312, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 677.44175 + }, + "M=141312,N=1024": { + "file": "silu_config_M141312_N1024.json", + "M": 141312, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 538.9612499999998 + }, + "M=141312,N=1120": { + "file": "silu_config_M141312_N1120.json", + "M": 141312, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1325.5645000000004 + }, + "M=141312,N=1152": { + "file": "silu_config_M141312_N1152.json", + "M": 141312, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1308.48425 + }, + "M=141312,N=1280": { + "file": "silu_config_M141312_N1280.json", + "M": 141312, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1328.3245000000002 + }, + "M=141312,N=1344": { + "file": "silu_config_M141312_N1344.json", + "M": 141312, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1350.1645000000003 + }, + "M=141312,N=1408": { + "file": "silu_config_M141312_N1408.json", + "M": 141312, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1321.2845000000002 + }, + "M=141312,N=1440": { + "file": "silu_config_M141312_N1440.json", + "M": 141312, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1321.3244999999997 + }, + "M=141312,N=1536": { + "file": "silu_config_M141312_N1536.json", + "M": 141312, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1333.8044999999993 + }, + "M=141312,N=1600": { + "file": "silu_config_M141312_N1600.json", + "M": 141312, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1339.6845000000003 + }, + "M=141312,N=1664": { + "file": "silu_config_M141312_N1664.json", + "M": 141312, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1336.2444999999993 + }, + "M=141312,N=1728": { + "file": "silu_config_M141312_N1728.json", + "M": 141312, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1324.6844999999994 + }, + "M=141312,N=1760": { + "file": "silu_config_M141312_N1760.json", + "M": 141312, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1358.1645000000003 + }, + "M=141312,N=1792": { + "file": "silu_config_M141312_N1792.json", + "M": 141312, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1325.9644999999996 + }, + "M=141312,N=1920": { + "file": "silu_config_M141312_N1920.json", + "M": 141312, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1339.6044999999995 + }, + "M=141312,N=2048": { + "file": "silu_config_M141312_N2048.json", + "M": 141312, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1053.8834999999995 + }, + "M=141312,N=2080": { + "file": "silu_config_M141312_N2080.json", + "M": 141312, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1477.5649999999996 + }, + "M=141312,N=2240": { + "file": "silu_config_M141312_N2240.json", + "M": 141312, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1546.64525 + }, + "M=141312,N=2400": { + "file": "silu_config_M141312_N2400.json", + "M": 141312, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1621.40575 + }, + "M=141312,N=2560": { + "file": "silu_config_M141312_N2560.json", + "M": 141312, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1699.4859999999999 + }, + "M=142336,N=128": { + "file": "silu_config_M142336_N128.json", + "M": 142336, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.91924999999996 + }, + "M=142336,N=160": { + "file": "silu_config_M142336_N160.json", + "M": 142336, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 184.95975000000004 + }, + "M=142336,N=192": { + "file": "silu_config_M142336_N192.json", + "M": 142336, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 183.23974999999993 + }, + "M=142336,N=256": { + "file": "silu_config_M142336_N256.json", + "M": 142336, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 168.95974999999999 + }, + "M=142336,N=320": { + "file": "silu_config_M142336_N320.json", + "M": 142336, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 349.68050000000017 + }, + "M=142336,N=384": { + "file": "silu_config_M142336_N384.json", + "M": 142336, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 347.2802499999999 + }, + "M=142336,N=480": { + "file": "silu_config_M142336_N480.json", + "M": 142336, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 353.3605000000001 + }, + "M=142336,N=512": { + "file": "silu_config_M142336_N512.json", + "M": 142336, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 282.43999999999994 + }, + "M=142336,N=576": { + "file": "silu_config_M142336_N576.json", + "M": 142336, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.44175 + }, + "M=142336,N=640": { + "file": "silu_config_M142336_N640.json", + "M": 142336, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 672.5617499999998 + }, + "M=142336,N=768": { + "file": "silu_config_M142336_N768.json", + "M": 142336, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 685.2017500000004 + }, + "M=142336,N=800": { + "file": "silu_config_M142336_N800.json", + "M": 142336, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 690.92175 + }, + "M=142336,N=896": { + "file": "silu_config_M142336_N896.json", + "M": 142336, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 669.5217499999999 + }, + "M=142336,N=960": { + "file": "silu_config_M142336_N960.json", + "M": 142336, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.1617499999998 + }, + "M=142336,N=1024": { + "file": "silu_config_M142336_N1024.json", + "M": 142336, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 542.4412500000003 + }, + "M=142336,N=1120": { + "file": "silu_config_M142336_N1120.json", + "M": 142336, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1335.2444999999998 + }, + "M=142336,N=1152": { + "file": "silu_config_M142336_N1152.json", + "M": 142336, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1317.8845000000006 + }, + "M=142336,N=1280": { + "file": "silu_config_M142336_N1280.json", + "M": 142336, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1319.0045000000005 + }, + "M=142336,N=1344": { + "file": "silu_config_M142336_N1344.json", + "M": 142336, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1348.9245 + }, + "M=142336,N=1408": { + "file": "silu_config_M142336_N1408.json", + "M": 142336, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1330.8844999999997 + }, + "M=142336,N=1440": { + "file": "silu_config_M142336_N1440.json", + "M": 142336, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1348.4044999999996 + }, + "M=142336,N=1536": { + "file": "silu_config_M142336_N1536.json", + "M": 142336, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1329.7244999999998 + }, + "M=142336,N=1600": { + "file": "silu_config_M142336_N1600.json", + "M": 142336, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1341.5645000000013 + }, + "M=142336,N=1664": { + "file": "silu_config_M142336_N1664.json", + "M": 142336, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1331.9645 + }, + "M=142336,N=1728": { + "file": "silu_config_M142336_N1728.json", + "M": 142336, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1352.4445 + }, + "M=142336,N=1760": { + "file": "silu_config_M142336_N1760.json", + "M": 142336, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1336.5644999999995 + }, + "M=142336,N=1792": { + "file": "silu_config_M142336_N1792.json", + "M": 142336, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1346.5245 + }, + "M=142336,N=1920": { + "file": "silu_config_M142336_N1920.json", + "M": 142336, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1342.0044999999996 + }, + "M=142336,N=2048": { + "file": "silu_config_M142336_N2048.json", + "M": 142336, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1075.6434999999997 + }, + "M=142336,N=2080": { + "file": "silu_config_M142336_N2080.json", + "M": 142336, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1487.8050000000007 + }, + "M=142336,N=2240": { + "file": "silu_config_M142336_N2240.json", + "M": 142336, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1557.4454999999998 + }, + "M=142336,N=2400": { + "file": "silu_config_M142336_N2400.json", + "M": 142336, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1622.6857500000006 + }, + "M=142336,N=2560": { + "file": "silu_config_M142336_N2560.json", + "M": 142336, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1705.0860000000002 + }, + "M=143360,N=128": { + "file": "silu_config_M143360_N128.json", + "M": 143360, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 94.07925 + }, + "M=143360,N=160": { + "file": "silu_config_M143360_N160.json", + "M": 143360, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 172.63975 + }, + "M=143360,N=192": { + "file": "silu_config_M143360_N192.json", + "M": 143360, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 172.7197499999999 + }, + "M=143360,N=256": { + "file": "silu_config_M143360_N256.json", + "M": 143360, + "N": 256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 180.47975000000002 + }, + "M=143360,N=320": { + "file": "silu_config_M143360_N320.json", + "M": 143360, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 349.52049999999986 + }, + "M=143360,N=384": { + "file": "silu_config_M143360_N384.json", + "M": 143360, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 347.3605000000001 + }, + "M=143360,N=480": { + "file": "silu_config_M143360_N480.json", + "M": 143360, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 353.3605 + }, + "M=143360,N=512": { + "file": "silu_config_M143360_N512.json", + "M": 143360, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 284.28 + }, + "M=143360,N=576": { + "file": "silu_config_M143360_N576.json", + "M": 143360, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 659.44175 + }, + "M=143360,N=640": { + "file": "silu_config_M143360_N640.json", + "M": 143360, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.1217500000005 + }, + "M=143360,N=768": { + "file": "silu_config_M143360_N768.json", + "M": 143360, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 667.92175 + }, + "M=143360,N=800": { + "file": "silu_config_M143360_N800.json", + "M": 143360, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 672.0817499999998 + }, + "M=143360,N=896": { + "file": "silu_config_M143360_N896.json", + "M": 143360, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 659.9617500000004 + }, + "M=143360,N=960": { + "file": "silu_config_M143360_N960.json", + "M": 143360, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 664.8017499999999 + }, + "M=143360,N=1024": { + "file": "silu_config_M143360_N1024.json", + "M": 143360, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 546.24125 + }, + "M=143360,N=1120": { + "file": "silu_config_M143360_N1120.json", + "M": 143360, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1317.7644999999998 + }, + "M=143360,N=1152": { + "file": "silu_config_M143360_N1152.json", + "M": 143360, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1329.6845000000003 + }, + "M=143360,N=1280": { + "file": "silu_config_M143360_N1280.json", + "M": 143360, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1320.4845000000005 + }, + "M=143360,N=1344": { + "file": "silu_config_M143360_N1344.json", + "M": 143360, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1328.5245 + }, + "M=143360,N=1408": { + "file": "silu_config_M143360_N1408.json", + "M": 143360, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1311.4045000000006 + }, + "M=143360,N=1440": { + "file": "silu_config_M143360_N1440.json", + "M": 143360, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1320.1245000000004 + }, + "M=143360,N=1536": { + "file": "silu_config_M143360_N1536.json", + "M": 143360, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1323.2844999999993 + }, + "M=143360,N=1600": { + "file": "silu_config_M143360_N1600.json", + "M": 143360, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1320.8045000000002 + }, + "M=143360,N=1664": { + "file": "silu_config_M143360_N1664.json", + "M": 143360, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1325.6444999999994 + }, + "M=143360,N=1728": { + "file": "silu_config_M143360_N1728.json", + "M": 143360, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1344.3244999999997 + }, + "M=143360,N=1760": { + "file": "silu_config_M143360_N1760.json", + "M": 143360, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1325.8445000000002 + }, + "M=143360,N=1792": { + "file": "silu_config_M143360_N1792.json", + "M": 143360, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1318.2044999999994 + }, + "M=143360,N=1920": { + "file": "silu_config_M143360_N1920.json", + "M": 143360, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1321.9245 + }, + "M=143360,N=2048": { + "file": "silu_config_M143360_N2048.json", + "M": 143360, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1050.2435 + }, + "M=143360,N=2080": { + "file": "silu_config_M143360_N2080.json", + "M": 143360, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1457.8849999999998 + }, + "M=143360,N=2240": { + "file": "silu_config_M143360_N2240.json", + "M": 143360, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1557.4854999999998 + }, + "M=143360,N=2400": { + "file": "silu_config_M143360_N2400.json", + "M": 143360, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1641.6457499999997 + }, + "M=143360,N=2560": { + "file": "silu_config_M143360_N2560.json", + "M": 143360, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1706.406 + }, + "M=144384,N=128": { + "file": "silu_config_M144384_N128.json", + "M": 144384, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.87925000000003 + }, + "M=144384,N=160": { + "file": "silu_config_M144384_N160.json", + "M": 144384, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 176.31975000000003 + }, + "M=144384,N=192": { + "file": "silu_config_M144384_N192.json", + "M": 144384, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 193.27974999999992 + }, + "M=144384,N=256": { + "file": "silu_config_M144384_N256.json", + "M": 144384, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.9595 + }, + "M=144384,N=320": { + "file": "silu_config_M144384_N320.json", + "M": 144384, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 354.8405 + }, + "M=144384,N=384": { + "file": "silu_config_M144384_N384.json", + "M": 144384, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 352.16050000000007 + }, + "M=144384,N=480": { + "file": "silu_config_M144384_N480.json", + "M": 144384, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.0005000000001 + }, + "M=144384,N=512": { + "file": "silu_config_M144384_N512.json", + "M": 144384, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 286.24 + }, + "M=144384,N=576": { + "file": "silu_config_M144384_N576.json", + "M": 144384, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 675.12175 + }, + "M=144384,N=640": { + "file": "silu_config_M144384_N640.json", + "M": 144384, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 674.3217500000001 + }, + "M=144384,N=768": { + "file": "silu_config_M144384_N768.json", + "M": 144384, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 683.7617499999997 + }, + "M=144384,N=800": { + "file": "silu_config_M144384_N800.json", + "M": 144384, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 692.0017499999999 + }, + "M=144384,N=896": { + "file": "silu_config_M144384_N896.json", + "M": 144384, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 686.44175 + }, + "M=144384,N=960": { + "file": "silu_config_M144384_N960.json", + "M": 144384, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 692.0417500000001 + }, + "M=144384,N=1024": { + "file": "silu_config_M144384_N1024.json", + "M": 144384, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 550.3612500000004 + }, + "M=144384,N=1120": { + "file": "silu_config_M144384_N1120.json", + "M": 144384, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1357.3244999999997 + }, + "M=144384,N=1152": { + "file": "silu_config_M144384_N1152.json", + "M": 144384, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1346.9645000000005 + }, + "M=144384,N=1280": { + "file": "silu_config_M144384_N1280.json", + "M": 144384, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1347.8844999999997 + }, + "M=144384,N=1344": { + "file": "silu_config_M144384_N1344.json", + "M": 144384, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1365.7644999999993 + }, + "M=144384,N=1408": { + "file": "silu_config_M144384_N1408.json", + "M": 144384, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1347.3245000000002 + }, + "M=144384,N=1440": { + "file": "silu_config_M144384_N1440.json", + "M": 144384, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1339.2845000000002 + }, + "M=144384,N=1536": { + "file": "silu_config_M144384_N1536.json", + "M": 144384, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1348.9245000000005 + }, + "M=144384,N=1600": { + "file": "silu_config_M144384_N1600.json", + "M": 144384, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1370.6844999999998 + }, + "M=144384,N=1664": { + "file": "silu_config_M144384_N1664.json", + "M": 144384, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1343.9645 + }, + "M=144384,N=1728": { + "file": "silu_config_M144384_N1728.json", + "M": 144384, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1371.9647499999996 + }, + "M=144384,N=1760": { + "file": "silu_config_M144384_N1760.json", + "M": 144384, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1372.524750000001 + }, + "M=144384,N=1792": { + "file": "silu_config_M144384_N1792.json", + "M": 144384, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1365.7645000000002 + }, + "M=144384,N=1920": { + "file": "silu_config_M144384_N1920.json", + "M": 144384, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1360.7644999999998 + }, + "M=144384,N=2048": { + "file": "silu_config_M144384_N2048.json", + "M": 144384, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1090.5234999999998 + }, + "M=144384,N=2080": { + "file": "silu_config_M144384_N2080.json", + "M": 144384, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1478.6049999999996 + }, + "M=144384,N=2240": { + "file": "silu_config_M144384_N2240.json", + "M": 144384, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1569.9654999999993 + }, + "M=144384,N=2400": { + "file": "silu_config_M144384_N2400.json", + "M": 144384, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1676.1659999999993 + }, + "M=144384,N=2560": { + "file": "silu_config_M144384_N2560.json", + "M": 144384, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1739.0062500000004 + }, + "M=145408,N=128": { + "file": "silu_config_M145408_N128.json", + "M": 145408, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 95.59950000000002 + }, + "M=145408,N=160": { + "file": "silu_config_M145408_N160.json", + "M": 145408, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 188.43975000000003 + }, + "M=145408,N=192": { + "file": "silu_config_M145408_N192.json", + "M": 145408, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 186.67975 + }, + "M=145408,N=256": { + "file": "silu_config_M145408_N256.json", + "M": 145408, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.87975000000006 + }, + "M=145408,N=320": { + "file": "silu_config_M145408_N320.json", + "M": 145408, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.6004999999999 + }, + "M=145408,N=384": { + "file": "silu_config_M145408_N384.json", + "M": 145408, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 354.80050000000006 + }, + "M=145408,N=480": { + "file": "silu_config_M145408_N480.json", + "M": 145408, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 360.56050000000016 + }, + "M=145408,N=512": { + "file": "silu_config_M145408_N512.json", + "M": 145408, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 288.15999999999997 + }, + "M=145408,N=576": { + "file": "silu_config_M145408_N576.json", + "M": 145408, + "N": 576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 702.12175 + }, + "M=145408,N=640": { + "file": "silu_config_M145408_N640.json", + "M": 145408, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 689.5617499999998 + }, + "M=145408,N=768": { + "file": "silu_config_M145408_N768.json", + "M": 145408, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 688.5217499999997 + }, + "M=145408,N=800": { + "file": "silu_config_M145408_N800.json", + "M": 145408, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 686.2017499999997 + }, + "M=145408,N=896": { + "file": "silu_config_M145408_N896.json", + "M": 145408, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 691.2817500000001 + }, + "M=145408,N=960": { + "file": "silu_config_M145408_N960.json", + "M": 145408, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 696.8417499999996 + }, + "M=145408,N=1024": { + "file": "silu_config_M145408_N1024.json", + "M": 145408, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 554.2012500000003 + }, + "M=145408,N=1120": { + "file": "silu_config_M145408_N1120.json", + "M": 145408, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1366.7644999999998 + }, + "M=145408,N=1152": { + "file": "silu_config_M145408_N1152.json", + "M": 145408, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1354.4045 + }, + "M=145408,N=1280": { + "file": "silu_config_M145408_N1280.json", + "M": 145408, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1347.0045 + }, + "M=145408,N=1344": { + "file": "silu_config_M145408_N1344.json", + "M": 145408, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1364.2845000000002 + }, + "M=145408,N=1408": { + "file": "silu_config_M145408_N1408.json", + "M": 145408, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1348.6045000000008 + }, + "M=145408,N=1440": { + "file": "silu_config_M145408_N1440.json", + "M": 145408, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1369.4445 + }, + "M=145408,N=1536": { + "file": "silu_config_M145408_N1536.json", + "M": 145408, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1340.1245000000008 + }, + "M=145408,N=1600": { + "file": "silu_config_M145408_N1600.json", + "M": 145408, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1370.1245 + }, + "M=145408,N=1664": { + "file": "silu_config_M145408_N1664.json", + "M": 145408, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1353.2845000000002 + }, + "M=145408,N=1728": { + "file": "silu_config_M145408_N1728.json", + "M": 145408, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1352.1645000000003 + }, + "M=145408,N=1760": { + "file": "silu_config_M145408_N1760.json", + "M": 145408, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1365.0845 + }, + "M=145408,N=1792": { + "file": "silu_config_M145408_N1792.json", + "M": 145408, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1366.2844999999998 + }, + "M=145408,N=1920": { + "file": "silu_config_M145408_N1920.json", + "M": 145408, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1378.1647500000004 + }, + "M=145408,N=2048": { + "file": "silu_config_M145408_N2048.json", + "M": 145408, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1088.5634999999997 + }, + "M=145408,N=2080": { + "file": "silu_config_M145408_N2080.json", + "M": 145408, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1505.9252499999998 + }, + "M=145408,N=2240": { + "file": "silu_config_M145408_N2240.json", + "M": 145408, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1580.2855000000009 + }, + "M=145408,N=2400": { + "file": "silu_config_M145408_N2400.json", + "M": 145408, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1668.1657499999992 + }, + "M=145408,N=2560": { + "file": "silu_config_M145408_N2560.json", + "M": 145408, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1751.1662500000002 + }, + "M=146432,N=128": { + "file": "silu_config_M146432_N128.json", + "M": 146432, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 96.35925000000002 + }, + "M=146432,N=160": { + "file": "silu_config_M146432_N160.json", + "M": 146432, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 189.83975000000004 + }, + "M=146432,N=192": { + "file": "silu_config_M146432_N192.json", + "M": 146432, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 177.47974999999997 + }, + "M=146432,N=256": { + "file": "silu_config_M146432_N256.json", + "M": 146432, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.91974999999996 + }, + "M=146432,N=320": { + "file": "silu_config_M146432_N320.json", + "M": 146432, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 359.4404999999998 + }, + "M=146432,N=384": { + "file": "silu_config_M146432_N384.json", + "M": 146432, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.7605 + }, + "M=146432,N=480": { + "file": "silu_config_M146432_N480.json", + "M": 146432, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 363.3204999999999 + }, + "M=146432,N=512": { + "file": "silu_config_M146432_N512.json", + "M": 146432, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 290.24024999999995 + }, + "M=146432,N=576": { + "file": "silu_config_M146432_N576.json", + "M": 146432, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 695.5617500000003 + }, + "M=146432,N=640": { + "file": "silu_config_M146432_N640.json", + "M": 146432, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 703.12175 + }, + "M=146432,N=768": { + "file": "silu_config_M146432_N768.json", + "M": 146432, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 704.2417500000001 + }, + "M=146432,N=800": { + "file": "silu_config_M146432_N800.json", + "M": 146432, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 701.1617500000002 + }, + "M=146432,N=896": { + "file": "silu_config_M146432_N896.json", + "M": 146432, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 699.1217500000009 + }, + "M=146432,N=960": { + "file": "silu_config_M146432_N960.json", + "M": 146432, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 694.2417499999997 + }, + "M=146432,N=1024": { + "file": "silu_config_M146432_N1024.json", + "M": 146432, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 557.9612500000001 + }, + "M=146432,N=1120": { + "file": "silu_config_M146432_N1120.json", + "M": 146432, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1373.4047500000001 + }, + "M=146432,N=1152": { + "file": "silu_config_M146432_N1152.json", + "M": 146432, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1355.2845000000002 + }, + "M=146432,N=1280": { + "file": "silu_config_M146432_N1280.json", + "M": 146432, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1356.4845000000005 + }, + "M=146432,N=1344": { + "file": "silu_config_M146432_N1344.json", + "M": 146432, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1376.4047499999997 + }, + "M=146432,N=1408": { + "file": "silu_config_M146432_N1408.json", + "M": 146432, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1377.3647499999997 + }, + "M=146432,N=1440": { + "file": "silu_config_M146432_N1440.json", + "M": 146432, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1375.60475 + }, + "M=146432,N=1536": { + "file": "silu_config_M146432_N1536.json", + "M": 146432, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1378.8847499999997 + }, + "M=146432,N=1600": { + "file": "silu_config_M146432_N1600.json", + "M": 146432, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1389.8847499999993 + }, + "M=146432,N=1664": { + "file": "silu_config_M146432_N1664.json", + "M": 146432, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1372.924750000001 + }, + "M=146432,N=1728": { + "file": "silu_config_M146432_N1728.json", + "M": 146432, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1380.0047499999996 + }, + "M=146432,N=1760": { + "file": "silu_config_M146432_N1760.json", + "M": 146432, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1384.9247500000001 + }, + "M=146432,N=1792": { + "file": "silu_config_M146432_N1792.json", + "M": 146432, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1365.7245000000003 + }, + "M=146432,N=1920": { + "file": "silu_config_M146432_N1920.json", + "M": 146432, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1379.9247499999997 + }, + "M=146432,N=2048": { + "file": "silu_config_M146432_N2048.json", + "M": 146432, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1096.0037499999994 + }, + "M=146432,N=2080": { + "file": "silu_config_M146432_N2080.json", + "M": 146432, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1526.8052500000013 + }, + "M=146432,N=2240": { + "file": "silu_config_M146432_N2240.json", + "M": 146432, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1602.0055000000002 + }, + "M=146432,N=2400": { + "file": "silu_config_M146432_N2400.json", + "M": 146432, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1679.0860000000002 + }, + "M=146432,N=2560": { + "file": "silu_config_M146432_N2560.json", + "M": 146432, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1770.52625 + }, + "M=147456,N=128": { + "file": "silu_config_M147456_N128.json", + "M": 147456, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 85.43924999999999 + }, + "M=147456,N=160": { + "file": "silu_config_M147456_N160.json", + "M": 147456, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 179.99974999999998 + }, + "M=147456,N=192": { + "file": "silu_config_M147456_N192.json", + "M": 147456, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 178.79975000000002 + }, + "M=147456,N=256": { + "file": "silu_config_M147456_N256.json", + "M": 147456, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 163.19950000000006 + }, + "M=147456,N=320": { + "file": "silu_config_M147456_N320.json", + "M": 147456, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 361.52049999999986 + }, + "M=147456,N=384": { + "file": "silu_config_M147456_N384.json", + "M": 147456, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 359.3605 + }, + "M=147456,N=480": { + "file": "silu_config_M147456_N480.json", + "M": 147456, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 365.5604999999998 + }, + "M=147456,N=512": { + "file": "silu_config_M147456_N512.json", + "M": 147456, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 275.80000000000007 + }, + "M=147456,N=576": { + "file": "silu_config_M147456_N576.json", + "M": 147456, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 689.2817499999999 + }, + "M=147456,N=640": { + "file": "silu_config_M147456_N640.json", + "M": 147456, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 685.44175 + }, + "M=147456,N=768": { + "file": "silu_config_M147456_N768.json", + "M": 147456, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 697.9617499999999 + }, + "M=147456,N=800": { + "file": "silu_config_M147456_N800.json", + "M": 147456, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 692.5617500000001 + }, + "M=147456,N=896": { + "file": "silu_config_M147456_N896.json", + "M": 147456, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 689.7217500000002 + }, + "M=147456,N=960": { + "file": "silu_config_M147456_N960.json", + "M": 147456, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 695.16175 + }, + "M=147456,N=1024": { + "file": "silu_config_M147456_N1024.json", + "M": 147456, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 561.8415000000002 + }, + "M=147456,N=1120": { + "file": "silu_config_M147456_N1120.json", + "M": 147456, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1375.0847499999995 + }, + "M=147456,N=1152": { + "file": "silu_config_M147456_N1152.json", + "M": 147456, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1364.6444999999994 + }, + "M=147456,N=1280": { + "file": "silu_config_M147456_N1280.json", + "M": 147456, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1385.44475 + }, + "M=147456,N=1344": { + "file": "silu_config_M147456_N1344.json", + "M": 147456, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1383.5647500000005 + }, + "M=147456,N=1408": { + "file": "silu_config_M147456_N1408.json", + "M": 147456, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1375.8047499999993 + }, + "M=147456,N=1440": { + "file": "silu_config_M147456_N1440.json", + "M": 147456, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1384.9247500000001 + }, + "M=147456,N=1536": { + "file": "silu_config_M147456_N1536.json", + "M": 147456, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1369.1644999999994 + }, + "M=147456,N=1600": { + "file": "silu_config_M147456_N1600.json", + "M": 147456, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1386.12475 + }, + "M=147456,N=1664": { + "file": "silu_config_M147456_N1664.json", + "M": 147456, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1372.1247500000004 + }, + "M=147456,N=1728": { + "file": "silu_config_M147456_N1728.json", + "M": 147456, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1402.2047499999994 + }, + "M=147456,N=1760": { + "file": "silu_config_M147456_N1760.json", + "M": 147456, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1384.2047499999994 + }, + "M=147456,N=1792": { + "file": "silu_config_M147456_N1792.json", + "M": 147456, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1383.3647499999997 + }, + "M=147456,N=1920": { + "file": "silu_config_M147456_N1920.json", + "M": 147456, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1379.04475 + }, + "M=147456,N=2048": { + "file": "silu_config_M147456_N2048.json", + "M": 147456, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1089.1635 + }, + "M=147456,N=2080": { + "file": "silu_config_M147456_N2080.json", + "M": 147456, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1530.4052499999998 + }, + "M=147456,N=2240": { + "file": "silu_config_M147456_N2240.json", + "M": 147456, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1612.6457499999997 + }, + "M=147456,N=2400": { + "file": "silu_config_M147456_N2400.json", + "M": 147456, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1681.0460000000003 + }, + "M=147456,N=2560": { + "file": "silu_config_M147456_N2560.json", + "M": 147456, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1754.0862499999994 + }, + "M=148480,N=128": { + "file": "silu_config_M148480_N128.json", + "M": 148480, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 85.99924999999999 + }, + "M=148480,N=160": { + "file": "silu_config_M148480_N160.json", + "M": 148480, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 188.99974999999995 + }, + "M=148480,N=192": { + "file": "silu_config_M148480_N192.json", + "M": 148480, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 192.23975000000002 + }, + "M=148480,N=256": { + "file": "silu_config_M148480_N256.json", + "M": 148480, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.1995 + }, + "M=148480,N=320": { + "file": "silu_config_M148480_N320.json", + "M": 148480, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 362.0804999999999 + }, + "M=148480,N=384": { + "file": "silu_config_M148480_N384.json", + "M": 148480, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 359.6405000000001 + }, + "M=148480,N=480": { + "file": "silu_config_M148480_N480.json", + "M": 148480, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 365.92049999999995 + }, + "M=148480,N=512": { + "file": "silu_config_M148480_N512.json", + "M": 148480, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 294.2002500000001 + }, + "M=148480,N=576": { + "file": "silu_config_M148480_N576.json", + "M": 148480, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 713.922 + }, + "M=148480,N=640": { + "file": "silu_config_M148480_N640.json", + "M": 148480, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 701.12175 + }, + "M=148480,N=768": { + "file": "silu_config_M148480_N768.json", + "M": 148480, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 701.7617500000006 + }, + "M=148480,N=800": { + "file": "silu_config_M148480_N800.json", + "M": 148480, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.2017500000002 + }, + "M=148480,N=896": { + "file": "silu_config_M148480_N896.json", + "M": 148480, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 705.36175 + }, + "M=148480,N=960": { + "file": "silu_config_M148480_N960.json", + "M": 148480, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 698.8417499999998 + }, + "M=148480,N=1024": { + "file": "silu_config_M148480_N1024.json", + "M": 148480, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 565.9212500000003 + }, + "M=148480,N=1120": { + "file": "silu_config_M148480_N1120.json", + "M": 148480, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1375.00475 + }, + "M=148480,N=1152": { + "file": "silu_config_M148480_N1152.json", + "M": 148480, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1393.5247499999996 + }, + "M=148480,N=1280": { + "file": "silu_config_M148480_N1280.json", + "M": 148480, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1366.8045000000002 + }, + "M=148480,N=1344": { + "file": "silu_config_M148480_N1344.json", + "M": 148480, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1386.0047499999996 + }, + "M=148480,N=1408": { + "file": "silu_config_M148480_N1408.json", + "M": 148480, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1388.6047499999995 + }, + "M=148480,N=1440": { + "file": "silu_config_M148480_N1440.json", + "M": 148480, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1366.9245 + }, + "M=148480,N=1536": { + "file": "silu_config_M148480_N1536.json", + "M": 148480, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1369.7247500000003 + }, + "M=148480,N=1600": { + "file": "silu_config_M148480_N1600.json", + "M": 148480, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1398.9247500000001 + }, + "M=148480,N=1664": { + "file": "silu_config_M148480_N1664.json", + "M": 148480, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1382.9247499999992 + }, + "M=148480,N=1728": { + "file": "silu_config_M148480_N1728.json", + "M": 148480, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1391.16475 + }, + "M=148480,N=1760": { + "file": "silu_config_M148480_N1760.json", + "M": 148480, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1372.8447499999993 + }, + "M=148480,N=1792": { + "file": "silu_config_M148480_N1792.json", + "M": 148480, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1385.8447500000002 + }, + "M=148480,N=1920": { + "file": "silu_config_M148480_N1920.json", + "M": 148480, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1399.7247500000012 + }, + "M=148480,N=2048": { + "file": "silu_config_M148480_N2048.json", + "M": 148480, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 1111.9235000000003 + }, + "M=148480,N=2080": { + "file": "silu_config_M148480_N2080.json", + "M": 148480, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1529.28525 + }, + "M=148480,N=2240": { + "file": "silu_config_M148480_N2240.json", + "M": 148480, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1601.9655000000002 + }, + "M=148480,N=2400": { + "file": "silu_config_M148480_N2400.json", + "M": 148480, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1679.286 + }, + "M=148480,N=2560": { + "file": "silu_config_M148480_N2560.json", + "M": 148480, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1774.0462499999994 + }, + "M=149504,N=128": { + "file": "silu_config_M149504_N128.json", + "M": 149504, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 86.39950000000002 + }, + "M=149504,N=160": { + "file": "silu_config_M149504_N160.json", + "M": 149504, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 182.31974999999994 + }, + "M=149504,N=192": { + "file": "silu_config_M149504_N192.json", + "M": 149504, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 182.55975 + }, + "M=149504,N=256": { + "file": "silu_config_M149504_N256.json", + "M": 149504, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 165.27975000000004 + }, + "M=149504,N=320": { + "file": "silu_config_M149504_N320.json", + "M": 149504, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 366.8805 + }, + "M=149504,N=384": { + "file": "silu_config_M149504_N384.json", + "M": 149504, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 364.72050000000013 + }, + "M=149504,N=480": { + "file": "silu_config_M149504_N480.json", + "M": 149504, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 370.8404999999998 + }, + "M=149504,N=512": { + "file": "silu_config_M149504_N512.json", + "M": 149504, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 289.2402500000002 + }, + "M=149504,N=576": { + "file": "silu_config_M149504_N576.json", + "M": 149504, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 720.9219999999998 + }, + "M=149504,N=640": { + "file": "silu_config_M149504_N640.json", + "M": 149504, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 705.88175 + }, + "M=149504,N=768": { + "file": "silu_config_M149504_N768.json", + "M": 149504, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 710.3620000000005 + }, + "M=149504,N=800": { + "file": "silu_config_M149504_N800.json", + "M": 149504, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 724.1619999999998 + }, + "M=149504,N=896": { + "file": "silu_config_M149504_N896.json", + "M": 149504, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 710.3620000000003 + }, + "M=149504,N=960": { + "file": "silu_config_M149504_N960.json", + "M": 149504, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 727.002 + }, + "M=149504,N=1024": { + "file": "silu_config_M149504_N1024.json", + "M": 149504, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 569.5615000000003 + }, + "M=149504,N=1120": { + "file": "silu_config_M149504_N1120.json", + "M": 149504, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1394.0047500000005 + }, + "M=149504,N=1152": { + "file": "silu_config_M149504_N1152.json", + "M": 149504, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1393.6447499999995 + }, + "M=149504,N=1280": { + "file": "silu_config_M149504_N1280.json", + "M": 149504, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1384.60475 + }, + "M=149504,N=1344": { + "file": "silu_config_M149504_N1344.json", + "M": 149504, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1413.2047500000003 + }, + "M=149504,N=1408": { + "file": "silu_config_M149504_N1408.json", + "M": 149504, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1385.96475 + }, + "M=149504,N=1440": { + "file": "silu_config_M149504_N1440.json", + "M": 149504, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1396.68475 + }, + "M=149504,N=1536": { + "file": "silu_config_M149504_N1536.json", + "M": 149504, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1398.12475 + }, + "M=149504,N=1600": { + "file": "silu_config_M149504_N1600.json", + "M": 149504, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1405.2047499999999 + }, + "M=149504,N=1664": { + "file": "silu_config_M149504_N1664.json", + "M": 149504, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1409.6447500000008 + }, + "M=149504,N=1728": { + "file": "silu_config_M149504_N1728.json", + "M": 149504, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1408.8047499999998 + }, + "M=149504,N=1760": { + "file": "silu_config_M149504_N1760.json", + "M": 149504, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1421.16475 + }, + "M=149504,N=1792": { + "file": "silu_config_M149504_N1792.json", + "M": 149504, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1404.60475 + }, + "M=149504,N=1920": { + "file": "silu_config_M149504_N1920.json", + "M": 149504, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1408.0047500000005 + }, + "M=149504,N=2048": { + "file": "silu_config_M149504_N2048.json", + "M": 149504, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 1119.2034999999992 + }, + "M=149504,N=2080": { + "file": "silu_config_M149504_N2080.json", + "M": 149504, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1550.8854999999999 + }, + "M=149504,N=2240": { + "file": "silu_config_M149504_N2240.json", + "M": 149504, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1634.20575 + }, + "M=149504,N=2400": { + "file": "silu_config_M149504_N2400.json", + "M": 149504, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1714.406 + }, + "M=149504,N=2560": { + "file": "silu_config_M149504_N2560.json", + "M": 149504, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1809.6864999999998 + }, + "M=150528,N=128": { + "file": "silu_config_M150528_N128.json", + "M": 150528, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.15925000000003 + }, + "M=150528,N=160": { + "file": "silu_config_M150528_N160.json", + "M": 150528, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 194.59974999999994 + }, + "M=150528,N=192": { + "file": "silu_config_M150528_N192.json", + "M": 150528, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 201.23999999999995 + }, + "M=150528,N=256": { + "file": "silu_config_M150528_N256.json", + "M": 150528, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 166.19975 + }, + "M=150528,N=320": { + "file": "silu_config_M150528_N320.json", + "M": 150528, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 369.1204999999999 + }, + "M=150528,N=384": { + "file": "silu_config_M150528_N384.json", + "M": 150528, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 366.7205 + }, + "M=150528,N=480": { + "file": "silu_config_M150528_N480.json", + "M": 150528, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 373.1205000000001 + }, + "M=150528,N=512": { + "file": "silu_config_M150528_N512.json", + "M": 150528, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 298.00024999999994 + }, + "M=150528,N=576": { + "file": "silu_config_M150528_N576.json", + "M": 150528, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.7617499999997 + }, + "M=150528,N=640": { + "file": "silu_config_M150528_N640.json", + "M": 150528, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 712.8020000000004 + }, + "M=150528,N=768": { + "file": "silu_config_M150528_N768.json", + "M": 150528, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 712.1219999999996 + }, + "M=150528,N=800": { + "file": "silu_config_M150528_N800.json", + "M": 150528, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 718.1220000000001 + }, + "M=150528,N=896": { + "file": "silu_config_M150528_N896.json", + "M": 150528, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 703.9617499999999 + }, + "M=150528,N=960": { + "file": "silu_config_M150528_N960.json", + "M": 150528, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 731.5620000000001 + }, + "M=150528,N=1024": { + "file": "silu_config_M150528_N1024.json", + "M": 150528, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 573.1612500000001 + }, + "M=150528,N=1120": { + "file": "silu_config_M150528_N1120.json", + "M": 150528, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1392.9247500000001 + }, + "M=150528,N=1152": { + "file": "silu_config_M150528_N1152.json", + "M": 150528, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1403.0847500000004 + }, + "M=150528,N=1280": { + "file": "silu_config_M150528_N1280.json", + "M": 150528, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1404.2447500000003 + }, + "M=150528,N=1344": { + "file": "silu_config_M150528_N1344.json", + "M": 150528, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1404.00475 + }, + "M=150528,N=1408": { + "file": "silu_config_M150528_N1408.json", + "M": 150528, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1405.6447499999995 + }, + "M=150528,N=1440": { + "file": "silu_config_M150528_N1440.json", + "M": 150528, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1416.3647499999997 + }, + "M=150528,N=1536": { + "file": "silu_config_M150528_N1536.json", + "M": 150528, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1405.9247499999997 + }, + "M=150528,N=1600": { + "file": "silu_config_M150528_N1600.json", + "M": 150528, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1407.9247500000001 + }, + "M=150528,N=1664": { + "file": "silu_config_M150528_N1664.json", + "M": 150528, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1410.3647499999997 + }, + "M=150528,N=1728": { + "file": "silu_config_M150528_N1728.json", + "M": 150528, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1429.3250000000012 + }, + "M=150528,N=1760": { + "file": "silu_config_M150528_N1760.json", + "M": 150528, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1419.44475 + }, + "M=150528,N=1792": { + "file": "silu_config_M150528_N1792.json", + "M": 150528, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1413.96475 + }, + "M=150528,N=1920": { + "file": "silu_config_M150528_N1920.json", + "M": 150528, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1436.7649999999999 + }, + "M=150528,N=2048": { + "file": "silu_config_M150528_N2048.json", + "M": 150528, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1135.2037500000001 + }, + "M=150528,N=2080": { + "file": "silu_config_M150528_N2080.json", + "M": 150528, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1559.6855 + }, + "M=150528,N=2240": { + "file": "silu_config_M150528_N2240.json", + "M": 150528, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1635.8857499999995 + }, + "M=150528,N=2400": { + "file": "silu_config_M150528_N2400.json", + "M": 150528, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1747.1662500000002 + }, + "M=150528,N=2560": { + "file": "silu_config_M150528_N2560.json", + "M": 150528, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1820.6064999999999 + }, + "M=151552,N=128": { + "file": "silu_config_M151552_N128.json", + "M": 151552, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.71950000000001 + }, + "M=151552,N=160": { + "file": "silu_config_M151552_N160.json", + "M": 151552, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 193.43975 + }, + "M=151552,N=192": { + "file": "silu_config_M151552_N192.json", + "M": 151552, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 195.91975000000002 + }, + "M=151552,N=256": { + "file": "silu_config_M151552_N256.json", + "M": 151552, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.19975 + }, + "M=151552,N=320": { + "file": "silu_config_M151552_N320.json", + "M": 151552, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 371.6004999999999 + }, + "M=151552,N=384": { + "file": "silu_config_M151552_N384.json", + "M": 151552, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 369.08050000000014 + }, + "M=151552,N=480": { + "file": "silu_config_M151552_N480.json", + "M": 151552, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 375.24049999999977 + }, + "M=151552,N=512": { + "file": "silu_config_M151552_N512.json", + "M": 151552, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 299.9602500000001 + }, + "M=151552,N=576": { + "file": "silu_config_M151552_N576.json", + "M": 151552, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 700.6817500000002 + }, + "M=151552,N=640": { + "file": "silu_config_M151552_N640.json", + "M": 151552, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 707.202 + }, + "M=151552,N=768": { + "file": "silu_config_M151552_N768.json", + "M": 151552, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 719.5620000000008 + }, + "M=151552,N=800": { + "file": "silu_config_M151552_N800.json", + "M": 151552, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 711.4020000000003 + }, + "M=151552,N=896": { + "file": "silu_config_M151552_N896.json", + "M": 151552, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 708.6819999999998 + }, + "M=151552,N=960": { + "file": "silu_config_M151552_N960.json", + "M": 151552, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 736.5219999999999 + }, + "M=151552,N=1024": { + "file": "silu_config_M151552_N1024.json", + "M": 151552, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 576.9615000000001 + }, + "M=151552,N=1120": { + "file": "silu_config_M151552_N1120.json", + "M": 151552, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1443.2849999999994 + }, + "M=151552,N=1152": { + "file": "silu_config_M151552_N1152.json", + "M": 151552, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1401.924750000001 + }, + "M=151552,N=1280": { + "file": "silu_config_M151552_N1280.json", + "M": 151552, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1413.64475 + }, + "M=151552,N=1344": { + "file": "silu_config_M151552_N1344.json", + "M": 151552, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1423.68475 + }, + "M=151552,N=1408": { + "file": "silu_config_M151552_N1408.json", + "M": 151552, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1404.7647500000003 + }, + "M=151552,N=1440": { + "file": "silu_config_M151552_N1440.json", + "M": 151552, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1415.5647499999995 + }, + "M=151552,N=1536": { + "file": "silu_config_M151552_N1536.json", + "M": 151552, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1416.9247499999997 + }, + "M=151552,N=1600": { + "file": "silu_config_M151552_N1600.json", + "M": 151552, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1427.3247500000007 + }, + "M=151552,N=1664": { + "file": "silu_config_M151552_N1664.json", + "M": 151552, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1430.3250000000007 + }, + "M=151552,N=1728": { + "file": "silu_config_M151552_N1728.json", + "M": 151552, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1427.96475 + }, + "M=151552,N=1760": { + "file": "silu_config_M151552_N1760.json", + "M": 151552, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1429.1649999999995 + }, + "M=151552,N=1792": { + "file": "silu_config_M151552_N1792.json", + "M": 151552, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1423.3247499999998 + }, + "M=151552,N=1920": { + "file": "silu_config_M151552_N1920.json", + "M": 151552, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1427.2050000000004 + }, + "M=151552,N=2048": { + "file": "silu_config_M151552_N2048.json", + "M": 151552, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1132.9634999999998 + }, + "M=151552,N=2080": { + "file": "silu_config_M151552_N2080.json", + "M": 151552, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1570.2855 + }, + "M=151552,N=2240": { + "file": "silu_config_M151552_N2240.json", + "M": 151552, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1656.4457499999999 + }, + "M=151552,N=2400": { + "file": "silu_config_M151552_N2400.json", + "M": 151552, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1756.52625 + }, + "M=151552,N=2560": { + "file": "silu_config_M151552_N2560.json", + "M": 151552, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1821.0865000000003 + }, + "M=152576,N=128": { + "file": "silu_config_M152576_N128.json", + "M": 152576, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 88.11950000000002 + }, + "M=152576,N=160": { + "file": "silu_config_M152576_N160.json", + "M": 152576, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 196.95974999999996 + }, + "M=152576,N=192": { + "file": "silu_config_M152576_N192.json", + "M": 152576, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 197.15975000000003 + }, + "M=152576,N=256": { + "file": "silu_config_M152576_N256.json", + "M": 152576, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 168.35974999999996 + }, + "M=152576,N=320": { + "file": "silu_config_M152576_N320.json", + "M": 152576, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 373.7605 + }, + "M=152576,N=384": { + "file": "silu_config_M152576_N384.json", + "M": 152576, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 371.20050000000003 + }, + "M=152576,N=480": { + "file": "silu_config_M152576_N480.json", + "M": 152576, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 378.04049999999995 + }, + "M=152576,N=512": { + "file": "silu_config_M152576_N512.json", + "M": 152576, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 302.00025000000005 + }, + "M=152576,N=576": { + "file": "silu_config_M152576_N576.json", + "M": 152576, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 713.1219999999998 + }, + "M=152576,N=640": { + "file": "silu_config_M152576_N640.json", + "M": 152576, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 719.922 + }, + "M=152576,N=768": { + "file": "silu_config_M152576_N768.json", + "M": 152576, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 724.1219999999998 + }, + "M=152576,N=800": { + "file": "silu_config_M152576_N800.json", + "M": 152576, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 729.5619999999999 + }, + "M=152576,N=896": { + "file": "silu_config_M152576_N896.json", + "M": 152576, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 724.2820000000002 + }, + "M=152576,N=960": { + "file": "silu_config_M152576_N960.json", + "M": 152576, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 730.242 + }, + "M=152576,N=1024": { + "file": "silu_config_M152576_N1024.json", + "M": 152576, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 581.0814999999998 + }, + "M=152576,N=1120": { + "file": "silu_config_M152576_N1120.json", + "M": 152576, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.045 + }, + "M=152576,N=1152": { + "file": "silu_config_M152576_N1152.json", + "M": 152576, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1420.7247499999999 + }, + "M=152576,N=1280": { + "file": "silu_config_M152576_N1280.json", + "M": 152576, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1432.9250000000006 + }, + "M=152576,N=1344": { + "file": "silu_config_M152576_N1344.json", + "M": 152576, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1422.60475 + }, + "M=152576,N=1408": { + "file": "silu_config_M152576_N1408.json", + "M": 152576, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1404.00475 + }, + "M=152576,N=1440": { + "file": "silu_config_M152576_N1440.json", + "M": 152576, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1435.2849999999999 + }, + "M=152576,N=1536": { + "file": "silu_config_M152576_N1536.json", + "M": 152576, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1436.1650000000004 + }, + "M=152576,N=1600": { + "file": "silu_config_M152576_N1600.json", + "M": 152576, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1434.6850000000009 + }, + "M=152576,N=1664": { + "file": "silu_config_M152576_N1664.json", + "M": 152576, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1429.4049999999997 + }, + "M=152576,N=1728": { + "file": "silu_config_M152576_N1728.json", + "M": 152576, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1439.3249999999998 + }, + "M=152576,N=1760": { + "file": "silu_config_M152576_N1760.json", + "M": 152576, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1462.3650000000002 + }, + "M=152576,N=1792": { + "file": "silu_config_M152576_N1792.json", + "M": 152576, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1442.4850000000001 + }, + "M=152576,N=1920": { + "file": "silu_config_M152576_N1920.json", + "M": 152576, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1436.445000000001 + }, + "M=152576,N=2048": { + "file": "silu_config_M152576_N2048.json", + "M": 152576, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1138.843499999999 + }, + "M=152576,N=2080": { + "file": "silu_config_M152576_N2080.json", + "M": 152576, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1563.1655 + }, + "M=152576,N=2240": { + "file": "silu_config_M152576_N2240.json", + "M": 152576, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1657.8457500000004 + }, + "M=152576,N=2400": { + "file": "silu_config_M152576_N2400.json", + "M": 152576, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1752.6862499999997 + }, + "M=152576,N=2560": { + "file": "silu_config_M152576_N2560.json", + "M": 152576, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1847.0064999999995 + }, + "M=153600,N=128": { + "file": "silu_config_M153600_N128.json", + "M": 153600, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 88.71950000000002 + }, + "M=153600,N=160": { + "file": "silu_config_M153600_N160.json", + "M": 153600, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 184.19975 + }, + "M=153600,N=192": { + "file": "silu_config_M153600_N192.json", + "M": 153600, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 187.23975000000002 + }, + "M=153600,N=256": { + "file": "silu_config_M153600_N256.json", + "M": 153600, + "N": 256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 181.23974999999996 + }, + "M=153600,N=320": { + "file": "silu_config_M153600_N320.json", + "M": 153600, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 373.7605 + }, + "M=153600,N=384": { + "file": "silu_config_M153600_N384.json", + "M": 153600, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 371.52049999999986 + }, + "M=153600,N=480": { + "file": "silu_config_M153600_N480.json", + "M": 153600, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 377.6807499999999 + }, + "M=153600,N=512": { + "file": "silu_config_M153600_N512.json", + "M": 153600, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 304.12025000000006 + }, + "M=153600,N=576": { + "file": "silu_config_M153600_N576.json", + "M": 153600, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 728.722 + }, + "M=153600,N=640": { + "file": "silu_config_M153600_N640.json", + "M": 153600, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 733.402 + }, + "M=153600,N=768": { + "file": "silu_config_M153600_N768.json", + "M": 153600, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 714.402 + }, + "M=153600,N=800": { + "file": "silu_config_M153600_N800.json", + "M": 153600, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 719.1619999999996 + }, + "M=153600,N=896": { + "file": "silu_config_M153600_N896.json", + "M": 153600, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 735.7220000000004 + }, + "M=153600,N=960": { + "file": "silu_config_M153600_N960.json", + "M": 153600, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 732.6019999999996 + }, + "M=153600,N=1024": { + "file": "silu_config_M153600_N1024.json", + "M": 153600, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 584.24125 + }, + "M=153600,N=1120": { + "file": "silu_config_M153600_N1120.json", + "M": 153600, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1442.525 + }, + "M=153600,N=1152": { + "file": "silu_config_M153600_N1152.json", + "M": 153600, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1401.8847500000002 + }, + "M=153600,N=1280": { + "file": "silu_config_M153600_N1280.json", + "M": 153600, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1434.045 + }, + "M=153600,N=1344": { + "file": "silu_config_M153600_N1344.json", + "M": 153600, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1432.5249999999996 + }, + "M=153600,N=1408": { + "file": "silu_config_M153600_N1408.json", + "M": 153600, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1445.7250000000017 + }, + "M=153600,N=1440": { + "file": "silu_config_M153600_N1440.json", + "M": 153600, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1434.4450000000002 + }, + "M=153600,N=1536": { + "file": "silu_config_M153600_N1536.json", + "M": 153600, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1406.3247499999998 + }, + "M=153600,N=1600": { + "file": "silu_config_M153600_N1600.json", + "M": 153600, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1446.4449999999997 + }, + "M=153600,N=1664": { + "file": "silu_config_M153600_N1664.json", + "M": 153600, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1429.445000000001 + }, + "M=153600,N=1728": { + "file": "silu_config_M153600_N1728.json", + "M": 153600, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1428.1650000000004 + }, + "M=153600,N=1760": { + "file": "silu_config_M153600_N1760.json", + "M": 153600, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1440.3249999999998 + }, + "M=153600,N=1792": { + "file": "silu_config_M153600_N1792.json", + "M": 153600, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1432.6849999999995 + }, + "M=153600,N=1920": { + "file": "silu_config_M153600_N1920.json", + "M": 153600, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1446.8049999999994 + }, + "M=153600,N=2048": { + "file": "silu_config_M153600_N2048.json", + "M": 153600, + "N": 2048, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 1151.2034999999992 + }, + "M=153600,N=2080": { + "file": "silu_config_M153600_N2080.json", + "M": 153600, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1582.6054999999997 + }, + "M=153600,N=2240": { + "file": "silu_config_M153600_N2240.json", + "M": 153600, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1656.32575 + }, + "M=153600,N=2400": { + "file": "silu_config_M153600_N2400.json", + "M": 153600, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1746.0462500000003 + }, + "M=153600,N=2560": { + "file": "silu_config_M153600_N2560.json", + "M": 153600, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1834.9264999999996 + }, + "M=154624,N=128": { + "file": "silu_config_M154624_N128.json", + "M": 154624, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.1595 + }, + "M=154624,N=160": { + "file": "silu_config_M154624_N160.json", + "M": 154624, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 188.24 + }, + "M=154624,N=192": { + "file": "silu_config_M154624_N192.json", + "M": 154624, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 206.60000000000002 + }, + "M=154624,N=256": { + "file": "silu_config_M154624_N256.json", + "M": 154624, + "N": 256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 184.39974999999998 + }, + "M=154624,N=320": { + "file": "silu_config_M154624_N320.json", + "M": 154624, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 378.6804999999998 + }, + "M=154624,N=384": { + "file": "silu_config_M154624_N384.json", + "M": 154624, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.2805000000001 + }, + "M=154624,N=480": { + "file": "silu_config_M154624_N480.json", + "M": 154624, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 382.96050000000014 + }, + "M=154624,N=512": { + "file": "silu_config_M154624_N512.json", + "M": 154624, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 305.88024999999993 + }, + "M=154624,N=576": { + "file": "silu_config_M154624_N576.json", + "M": 154624, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 735.762 + }, + "M=154624,N=640": { + "file": "silu_config_M154624_N640.json", + "M": 154624, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 729.2820000000002 + }, + "M=154624,N=768": { + "file": "silu_config_M154624_N768.json", + "M": 154624, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 731.1620000000003 + }, + "M=154624,N=800": { + "file": "silu_config_M154624_N800.json", + "M": 154624, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 737.0420000000001 + }, + "M=154624,N=896": { + "file": "silu_config_M154624_N896.json", + "M": 154624, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 736.5219999999999 + }, + "M=154624,N=960": { + "file": "silu_config_M154624_N960.json", + "M": 154624, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 740.002 + }, + "M=154624,N=1024": { + "file": "silu_config_M154624_N1024.json", + "M": 154624, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 588.4812499999998 + }, + "M=154624,N=1120": { + "file": "silu_config_M154624_N1120.json", + "M": 154624, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1441.0049999999997 + }, + "M=154624,N=1152": { + "file": "silu_config_M154624_N1152.json", + "M": 154624, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1440.3650000000002 + }, + "M=154624,N=1280": { + "file": "silu_config_M154624_N1280.json", + "M": 154624, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1441.3250000000003 + }, + "M=154624,N=1344": { + "file": "silu_config_M154624_N1344.json", + "M": 154624, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1452.1250000000014 + }, + "M=154624,N=1408": { + "file": "silu_config_M154624_N1408.json", + "M": 154624, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1453.685 + }, + "M=154624,N=1440": { + "file": "silu_config_M154624_N1440.json", + "M": 154624, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1454.205 + }, + "M=154624,N=1536": { + "file": "silu_config_M154624_N1536.json", + "M": 154624, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1455.085 + }, + "M=154624,N=1600": { + "file": "silu_config_M154624_N1600.json", + "M": 154624, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1466.1249999999995 + }, + "M=154624,N=1664": { + "file": "silu_config_M154624_N1664.json", + "M": 154624, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1437.9650000000006 + }, + "M=154624,N=1728": { + "file": "silu_config_M154624_N1728.json", + "M": 154624, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1456.9650000000001 + }, + "M=154624,N=1760": { + "file": "silu_config_M154624_N1760.json", + "M": 154624, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1440.0049999999992 + }, + "M=154624,N=1792": { + "file": "silu_config_M154624_N1792.json", + "M": 154624, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1461.0850000000014 + }, + "M=154624,N=1920": { + "file": "silu_config_M154624_N1920.json", + "M": 154624, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1464.0449999999996 + }, + "M=154624,N=2048": { + "file": "silu_config_M154624_N2048.json", + "M": 154624, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1158.1634999999987 + }, + "M=154624,N=2080": { + "file": "silu_config_M154624_N2080.json", + "M": 154624, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1584.3654999999999 + }, + "M=154624,N=2240": { + "file": "silu_config_M154624_N2240.json", + "M": 154624, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1669.246000000001 + }, + "M=154624,N=2400": { + "file": "silu_config_M154624_N2400.json", + "M": 154624, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1768.5662499999999 + }, + "M=154624,N=2560": { + "file": "silu_config_M154624_N2560.json", + "M": 154624, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1860.2867500000002 + }, + "M=155648,N=128": { + "file": "silu_config_M155648_N128.json", + "M": 155648, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.19925 + }, + "M=155648,N=160": { + "file": "silu_config_M155648_N160.json", + "M": 155648, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 200.56 + }, + "M=155648,N=192": { + "file": "silu_config_M155648_N192.json", + "M": 155648, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 198.43975 + }, + "M=155648,N=256": { + "file": "silu_config_M155648_N256.json", + "M": 155648, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 182.91974999999996 + }, + "M=155648,N=320": { + "file": "silu_config_M155648_N320.json", + "M": 155648, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 381.08050000000003 + }, + "M=155648,N=384": { + "file": "silu_config_M155648_N384.json", + "M": 155648, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 378.88049999999987 + }, + "M=155648,N=480": { + "file": "silu_config_M155648_N480.json", + "M": 155648, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 385.2805000000003 + }, + "M=155648,N=512": { + "file": "silu_config_M155648_N512.json", + "M": 155648, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 290.8802499999998 + }, + "M=155648,N=576": { + "file": "silu_config_M155648_N576.json", + "M": 155648, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 730.1220000000003 + }, + "M=155648,N=640": { + "file": "silu_config_M155648_N640.json", + "M": 155648, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 734.0420000000004 + }, + "M=155648,N=768": { + "file": "silu_config_M155648_N768.json", + "M": 155648, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 727.6820000000002 + }, + "M=155648,N=800": { + "file": "silu_config_M155648_N800.json", + "M": 155648, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 730.6020000000005 + }, + "M=155648,N=896": { + "file": "silu_config_M155648_N896.json", + "M": 155648, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 727.7619999999997 + }, + "M=155648,N=960": { + "file": "silu_config_M155648_N960.json", + "M": 155648, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 733.4419999999998 + }, + "M=155648,N=1024": { + "file": "silu_config_M155648_N1024.json", + "M": 155648, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 591.9615000000001 + }, + "M=155648,N=1120": { + "file": "silu_config_M155648_N1120.json", + "M": 155648, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1440.4049999999993 + }, + "M=155648,N=1152": { + "file": "silu_config_M155648_N1152.json", + "M": 155648, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1449.8450000000003 + }, + "M=155648,N=1280": { + "file": "silu_config_M155648_N1280.json", + "M": 155648, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1450.565 + }, + "M=155648,N=1344": { + "file": "silu_config_M155648_N1344.json", + "M": 155648, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1471.5249999999996 + }, + "M=155648,N=1408": { + "file": "silu_config_M155648_N1408.json", + "M": 155648, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1452.525 + }, + "M=155648,N=1440": { + "file": "silu_config_M155648_N1440.json", + "M": 155648, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1474.0049999999997 + }, + "M=155648,N=1536": { + "file": "silu_config_M155648_N1536.json", + "M": 155648, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1453.5649999999996 + }, + "M=155648,N=1600": { + "file": "silu_config_M155648_N1600.json", + "M": 155648, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1474.2849999999994 + }, + "M=155648,N=1664": { + "file": "silu_config_M155648_N1664.json", + "M": 155648, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1467.725 + }, + "M=155648,N=1728": { + "file": "silu_config_M155648_N1728.json", + "M": 155648, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1466.4449999999997 + }, + "M=155648,N=1760": { + "file": "silu_config_M155648_N1760.json", + "M": 155648, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1490.6452499999996 + }, + "M=155648,N=1792": { + "file": "silu_config_M155648_N1792.json", + "M": 155648, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1471.245 + }, + "M=155648,N=1920": { + "file": "silu_config_M155648_N1920.json", + "M": 155648, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1462.8450000000012 + }, + "M=155648,N=2048": { + "file": "silu_config_M155648_N2048.json", + "M": 155648, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1174.9637500000003 + }, + "M=155648,N=2080": { + "file": "silu_config_M155648_N2080.json", + "M": 155648, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1605.045500000001 + }, + "M=155648,N=2240": { + "file": "silu_config_M155648_N2240.json", + "M": 155648, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1690.4860000000017 + }, + "M=155648,N=2400": { + "file": "silu_config_M155648_N2400.json", + "M": 155648, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1800.3665 + }, + "M=155648,N=2560": { + "file": "silu_config_M155648_N2560.json", + "M": 155648, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1851.2867499999998 + }, + "M=156672,N=128": { + "file": "silu_config_M156672_N128.json", + "M": 156672, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 90.39925000000001 + }, + "M=156672,N=160": { + "file": "silu_config_M156672_N160.json", + "M": 156672, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 199.55975 + }, + "M=156672,N=192": { + "file": "silu_config_M156672_N192.json", + "M": 156672, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 191.0397499999999 + }, + "M=156672,N=256": { + "file": "silu_config_M156672_N256.json", + "M": 156672, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 172.71974999999998 + }, + "M=156672,N=320": { + "file": "silu_config_M156672_N320.json", + "M": 156672, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 383.7205 + }, + "M=156672,N=384": { + "file": "silu_config_M156672_N384.json", + "M": 156672, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 381.44049999999993 + }, + "M=156672,N=480": { + "file": "silu_config_M156672_N480.json", + "M": 156672, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 388.2404999999999 + }, + "M=156672,N=512": { + "file": "silu_config_M156672_N512.json", + "M": 156672, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 298.9202500000001 + }, + "M=156672,N=576": { + "file": "silu_config_M156672_N576.json", + "M": 156672, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 745.2819999999997 + }, + "M=156672,N=640": { + "file": "silu_config_M156672_N640.json", + "M": 156672, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 746.8419999999999 + }, + "M=156672,N=768": { + "file": "silu_config_M156672_N768.json", + "M": 156672, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 743.1219999999998 + }, + "M=156672,N=800": { + "file": "silu_config_M156672_N800.json", + "M": 156672, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 757.2419999999997 + }, + "M=156672,N=896": { + "file": "silu_config_M156672_N896.json", + "M": 156672, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 735.1219999999998 + }, + "M=156672,N=960": { + "file": "silu_config_M156672_N960.json", + "M": 156672, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 752.0020000000006 + }, + "M=156672,N=1024": { + "file": "silu_config_M156672_N1024.json", + "M": 156672, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 596.6012499999999 + }, + "M=156672,N=1120": { + "file": "silu_config_M156672_N1120.json", + "M": 156672, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1450.2849999999999 + }, + "M=156672,N=1152": { + "file": "silu_config_M156672_N1152.json", + "M": 156672, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1458.725 + }, + "M=156672,N=1280": { + "file": "silu_config_M156672_N1280.json", + "M": 156672, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.9650000000001 + }, + "M=156672,N=1344": { + "file": "silu_config_M156672_N1344.json", + "M": 156672, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.6449999999995 + }, + "M=156672,N=1408": { + "file": "silu_config_M156672_N1408.json", + "M": 156672, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1461.2850000000003 + }, + "M=156672,N=1440": { + "file": "silu_config_M156672_N1440.json", + "M": 156672, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1473.3650000000002 + }, + "M=156672,N=1536": { + "file": "silu_config_M156672_N1536.json", + "M": 156672, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1453.685 + }, + "M=156672,N=1600": { + "file": "silu_config_M156672_N1600.json", + "M": 156672, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1453.1650000000004 + }, + "M=156672,N=1664": { + "file": "silu_config_M156672_N1664.json", + "M": 156672, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1466.6850000000009 + }, + "M=156672,N=1728": { + "file": "silu_config_M156672_N1728.json", + "M": 156672, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1476.2849999999994 + }, + "M=156672,N=1760": { + "file": "silu_config_M156672_N1760.json", + "M": 156672, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1488.76525 + }, + "M=156672,N=1792": { + "file": "silu_config_M156672_N1792.json", + "M": 156672, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1479.9650000000001 + }, + "M=156672,N=1920": { + "file": "silu_config_M156672_N1920.json", + "M": 156672, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1463.4450000000002 + }, + "M=156672,N=2048": { + "file": "silu_config_M156672_N2048.json", + "M": 156672, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1170.6037500000004 + }, + "M=156672,N=2080": { + "file": "silu_config_M156672_N2080.json", + "M": 156672, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1620.4857499999994 + }, + "M=156672,N=2240": { + "file": "silu_config_M156672_N2240.json", + "M": 156672, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1708.6460000000002 + }, + "M=156672,N=2400": { + "file": "silu_config_M156672_N2400.json", + "M": 156672, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1811.8465 + }, + "M=156672,N=2560": { + "file": "silu_config_M156672_N2560.json", + "M": 156672, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1886.8867500000006 + }, + "M=157696,N=128": { + "file": "silu_config_M157696_N128.json", + "M": 157696, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.19950000000001 + }, + "M=157696,N=160": { + "file": "silu_config_M157696_N160.json", + "M": 157696, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 191.95975 + }, + "M=157696,N=192": { + "file": "silu_config_M157696_N192.json", + "M": 157696, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 192.27999999999997 + }, + "M=157696,N=256": { + "file": "silu_config_M157696_N256.json", + "M": 157696, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 173.95975000000004 + }, + "M=157696,N=320": { + "file": "silu_config_M157696_N320.json", + "M": 157696, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 386.2805000000001 + }, + "M=157696,N=384": { + "file": "silu_config_M157696_N384.json", + "M": 157696, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 383.80049999999983 + }, + "M=157696,N=480": { + "file": "silu_config_M157696_N480.json", + "M": 157696, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 390.2805000000001 + }, + "M=157696,N=512": { + "file": "silu_config_M157696_N512.json", + "M": 157696, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 301.9202500000001 + }, + "M=157696,N=576": { + "file": "silu_config_M157696_N576.json", + "M": 157696, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 728.922 + }, + "M=157696,N=640": { + "file": "silu_config_M157696_N640.json", + "M": 157696, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 743.5620000000004 + }, + "M=157696,N=768": { + "file": "silu_config_M157696_N768.json", + "M": 157696, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 745.0420000000001 + }, + "M=157696,N=800": { + "file": "silu_config_M157696_N800.json", + "M": 157696, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 753.3619999999999 + }, + "M=157696,N=896": { + "file": "silu_config_M157696_N896.json", + "M": 157696, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 750.8019999999997 + }, + "M=157696,N=960": { + "file": "silu_config_M157696_N960.json", + "M": 157696, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.002 + }, + "M=157696,N=1024": { + "file": "silu_config_M157696_N1024.json", + "M": 157696, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 600.8015000000003 + }, + "M=157696,N=1120": { + "file": "silu_config_M157696_N1120.json", + "M": 157696, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1480.5650000000005 + }, + "M=157696,N=1152": { + "file": "silu_config_M157696_N1152.json", + "M": 157696, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1479.3249999999994 + }, + "M=157696,N=1280": { + "file": "silu_config_M157696_N1280.json", + "M": 157696, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1469.8849999999998 + }, + "M=157696,N=1344": { + "file": "silu_config_M157696_N1344.json", + "M": 157696, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.7250000000004 + }, + "M=157696,N=1408": { + "file": "silu_config_M157696_N1408.json", + "M": 157696, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1451.2850000000003 + }, + "M=157696,N=1440": { + "file": "silu_config_M157696_N1440.json", + "M": 157696, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.6050000000005 + }, + "M=157696,N=1536": { + "file": "silu_config_M157696_N1536.json", + "M": 157696, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1463.4049999999997 + }, + "M=157696,N=1600": { + "file": "silu_config_M157696_N1600.json", + "M": 157696, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1484.165 + }, + "M=157696,N=1664": { + "file": "silu_config_M157696_N1664.json", + "M": 157696, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1476.8850000000002 + }, + "M=157696,N=1728": { + "file": "silu_config_M157696_N1728.json", + "M": 157696, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1487.125 + }, + "M=157696,N=1760": { + "file": "silu_config_M157696_N1760.json", + "M": 157696, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1479.4850000000001 + }, + "M=157696,N=1792": { + "file": "silu_config_M157696_N1792.json", + "M": 157696, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1490.6052500000005 + }, + "M=157696,N=1920": { + "file": "silu_config_M157696_N1920.json", + "M": 157696, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1473.9250000000002 + }, + "M=157696,N=2048": { + "file": "silu_config_M157696_N2048.json", + "M": 157696, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1181.6439999999998 + }, + "M=157696,N=2080": { + "file": "silu_config_M157696_N2080.json", + "M": 157696, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1615.6857499999996 + }, + "M=157696,N=2240": { + "file": "silu_config_M157696_N2240.json", + "M": 157696, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1723.6459999999997 + }, + "M=157696,N=2400": { + "file": "silu_config_M157696_N2400.json", + "M": 157696, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1817.1265000000003 + }, + "M=157696,N=2560": { + "file": "silu_config_M157696_N2560.json", + "M": 157696, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1891.3667500000001 + }, + "M=158720,N=128": { + "file": "silu_config_M158720_N128.json", + "M": 158720, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.47924999999994 + }, + "M=158720,N=160": { + "file": "silu_config_M158720_N160.json", + "M": 158720, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 203.87975 + }, + "M=158720,N=192": { + "file": "silu_config_M158720_N192.json", + "M": 158720, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 211.43975000000006 + }, + "M=158720,N=256": { + "file": "silu_config_M158720_N256.json", + "M": 158720, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 174.95975000000004 + }, + "M=158720,N=320": { + "file": "silu_config_M158720_N320.json", + "M": 158720, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 385.68050000000017 + }, + "M=158720,N=384": { + "file": "silu_config_M158720_N384.json", + "M": 158720, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 383.80050000000017 + }, + "M=158720,N=480": { + "file": "silu_config_M158720_N480.json", + "M": 158720, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 389.9604999999999 + }, + "M=158720,N=512": { + "file": "silu_config_M158720_N512.json", + "M": 158720, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 313.92025 + }, + "M=158720,N=576": { + "file": "silu_config_M158720_N576.json", + "M": 158720, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 760.6019999999999 + }, + "M=158720,N=640": { + "file": "silu_config_M158720_N640.json", + "M": 158720, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 746.0820000000001 + }, + "M=158720,N=768": { + "file": "silu_config_M158720_N768.json", + "M": 158720, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 761.162 + }, + "M=158720,N=800": { + "file": "silu_config_M158720_N800.json", + "M": 158720, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 763.5220000000004 + }, + "M=158720,N=896": { + "file": "silu_config_M158720_N896.json", + "M": 158720, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 764.1619999999998 + }, + "M=158720,N=960": { + "file": "silu_config_M158720_N960.json", + "M": 158720, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 748.242 + }, + "M=158720,N=1024": { + "file": "silu_config_M158720_N1024.json", + "M": 158720, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 603.3615000000002 + }, + "M=158720,N=1120": { + "file": "silu_config_M158720_N1120.json", + "M": 158720, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1469.165 + }, + "M=158720,N=1152": { + "file": "silu_config_M158720_N1152.json", + "M": 158720, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1479.5649999999996 + }, + "M=158720,N=1280": { + "file": "silu_config_M158720_N1280.json", + "M": 158720, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1480.7249999999995 + }, + "M=158720,N=1344": { + "file": "silu_config_M158720_N1344.json", + "M": 158720, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1480.565 + }, + "M=158720,N=1408": { + "file": "silu_config_M158720_N1408.json", + "M": 158720, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1472.2850000000008 + }, + "M=158720,N=1440": { + "file": "silu_config_M158720_N1440.json", + "M": 158720, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1492.8452500000003 + }, + "M=158720,N=1536": { + "file": "silu_config_M158720_N1536.json", + "M": 158720, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1463.7649999999994 + }, + "M=158720,N=1600": { + "file": "silu_config_M158720_N1600.json", + "M": 158720, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1493.4852499999997 + }, + "M=158720,N=1664": { + "file": "silu_config_M158720_N1664.json", + "M": 158720, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1476.8849999999998 + }, + "M=158720,N=1728": { + "file": "silu_config_M158720_N1728.json", + "M": 158720, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1496.2052500000004 + }, + "M=158720,N=1760": { + "file": "silu_config_M158720_N1760.json", + "M": 158720, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1498.72525 + }, + "M=158720,N=1792": { + "file": "silu_config_M158720_N1792.json", + "M": 158720, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1480.085 + }, + "M=158720,N=1920": { + "file": "silu_config_M158720_N1920.json", + "M": 158720, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.9649999999997 + }, + "M=158720,N=2048": { + "file": "silu_config_M158720_N2048.json", + "M": 158720, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1191.204000000001 + }, + "M=158720,N=2080": { + "file": "silu_config_M158720_N2080.json", + "M": 158720, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1614.4457499999999 + }, + "M=158720,N=2240": { + "file": "silu_config_M158720_N2240.json", + "M": 158720, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1730.6060000000007 + }, + "M=158720,N=2400": { + "file": "silu_config_M158720_N2400.json", + "M": 158720, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1804.2464999999993 + }, + "M=158720,N=2560": { + "file": "silu_config_M158720_N2560.json", + "M": 158720, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1889.6067500000008 + }, + "M=159744,N=128": { + "file": "silu_config_M159744_N128.json", + "M": 159744, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.3195 + }, + "M=159744,N=160": { + "file": "silu_config_M159744_N160.json", + "M": 159744, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 212.95974999999999 + }, + "M=159744,N=192": { + "file": "silu_config_M159744_N192.json", + "M": 159744, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 213.11974999999995 + }, + "M=159744,N=256": { + "file": "silu_config_M159744_N256.json", + "M": 159744, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 175.91975000000008 + }, + "M=159744,N=320": { + "file": "silu_config_M159744_N320.json", + "M": 159744, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 390.92050000000006 + }, + "M=159744,N=384": { + "file": "silu_config_M159744_N384.json", + "M": 159744, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 388.72050000000013 + }, + "M=159744,N=480": { + "file": "silu_config_M159744_N480.json", + "M": 159744, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 395.32050000000004 + }, + "M=159744,N=512": { + "file": "silu_config_M159744_N512.json", + "M": 159744, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 304.24024999999995 + }, + "M=159744,N=576": { + "file": "silu_config_M159744_N576.json", + "M": 159744, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 768.3219999999997 + }, + "M=159744,N=640": { + "file": "silu_config_M159744_N640.json", + "M": 159744, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 752.9220000000003 + }, + "M=159744,N=768": { + "file": "silu_config_M159744_N768.json", + "M": 159744, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 767.5220000000004 + }, + "M=159744,N=800": { + "file": "silu_config_M159744_N800.json", + "M": 159744, + "N": 800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 768.6020000000003 + }, + "M=159744,N=896": { + "file": "silu_config_M159744_N896.json", + "M": 159744, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 765.202 + }, + "M=159744,N=960": { + "file": "silu_config_M159744_N960.json", + "M": 159744, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 755.8019999999999 + }, + "M=159744,N=1024": { + "file": "silu_config_M159744_N1024.json", + "M": 159744, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 608.0015000000001 + }, + "M=159744,N=1120": { + "file": "silu_config_M159744_N1120.json", + "M": 159744, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1509.5252500000001 + }, + "M=159744,N=1152": { + "file": "silu_config_M159744_N1152.json", + "M": 159744, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1487.3649999999998 + }, + "M=159744,N=1280": { + "file": "silu_config_M159744_N1280.json", + "M": 159744, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1478.4449999999997 + }, + "M=159744,N=1344": { + "file": "silu_config_M159744_N1344.json", + "M": 159744, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1499.08525 + }, + "M=159744,N=1408": { + "file": "silu_config_M159744_N1408.json", + "M": 159744, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1490.7652499999995 + }, + "M=159744,N=1440": { + "file": "silu_config_M159744_N1440.json", + "M": 159744, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1502.72525 + }, + "M=159744,N=1536": { + "file": "silu_config_M159744_N1536.json", + "M": 159744, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1503.24525 + }, + "M=159744,N=1600": { + "file": "silu_config_M159744_N1600.json", + "M": 159744, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1492.8852499999998 + }, + "M=159744,N=1664": { + "file": "silu_config_M159744_N1664.json", + "M": 159744, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.2452499999995 + }, + "M=159744,N=1728": { + "file": "silu_config_M159744_N1728.json", + "M": 159744, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.4052500000003 + }, + "M=159744,N=1760": { + "file": "silu_config_M159744_N1760.json", + "M": 159744, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.68525 + }, + "M=159744,N=1792": { + "file": "silu_config_M159744_N1792.json", + "M": 159744, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1498.4452500000002 + }, + "M=159744,N=1920": { + "file": "silu_config_M159744_N1920.json", + "M": 159744, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1501.8852500000003 + }, + "M=159744,N=2048": { + "file": "silu_config_M159744_N2048.json", + "M": 159744, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1196.2439999999997 + }, + "M=159744,N=2080": { + "file": "silu_config_M159744_N2080.json", + "M": 159744, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1666.8057499999995 + }, + "M=159744,N=2240": { + "file": "silu_config_M159744_N2240.json", + "M": 159744, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1744.446249999999 + }, + "M=159744,N=2400": { + "file": "silu_config_M159744_N2400.json", + "M": 159744, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1840.5265 + }, + "M=159744,N=2560": { + "file": "silu_config_M159744_N2560.json", + "M": 159744, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1917.4470000000001 + }, + "M=160768,N=128": { + "file": "silu_config_M160768_N128.json", + "M": 160768, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.51950000000002 + }, + "M=160768,N=160": { + "file": "silu_config_M160768_N160.json", + "M": 160768, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 195.43975 + }, + "M=160768,N=192": { + "file": "silu_config_M160768_N192.json", + "M": 160768, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 195.68 + }, + "M=160768,N=256": { + "file": "silu_config_M160768_N256.json", + "M": 160768, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 177.11975 + }, + "M=160768,N=320": { + "file": "silu_config_M160768_N320.json", + "M": 160768, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.16049999999996 + }, + "M=160768,N=384": { + "file": "silu_config_M160768_N384.json", + "M": 160768, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 390.52049999999997 + }, + "M=160768,N=480": { + "file": "silu_config_M160768_N480.json", + "M": 160768, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 398.1205000000001 + }, + "M=160768,N=512": { + "file": "silu_config_M160768_N512.json", + "M": 160768, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 317.72024999999996 + }, + "M=160768,N=576": { + "file": "silu_config_M160768_N576.json", + "M": 160768, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 750.922 + }, + "M=160768,N=640": { + "file": "silu_config_M160768_N640.json", + "M": 160768, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 757.922 + }, + "M=160768,N=768": { + "file": "silu_config_M160768_N768.json", + "M": 160768, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 772.1622499999996 + }, + "M=160768,N=800": { + "file": "silu_config_M160768_N800.json", + "M": 160768, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.1220000000001 + }, + "M=160768,N=896": { + "file": "silu_config_M160768_N896.json", + "M": 160768, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 784.9222499999998 + }, + "M=160768,N=960": { + "file": "silu_config_M160768_N960.json", + "M": 160768, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 781.3622499999999 + }, + "M=160768,N=1024": { + "file": "silu_config_M160768_N1024.json", + "M": 160768, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 611.9615000000001 + }, + "M=160768,N=1120": { + "file": "silu_config_M160768_N1120.json", + "M": 160768, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1518.605250000001 + }, + "M=160768,N=1152": { + "file": "silu_config_M160768_N1152.json", + "M": 160768, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1497.0452499999997 + }, + "M=160768,N=1280": { + "file": "silu_config_M160768_N1280.json", + "M": 160768, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1498.0452500000001 + }, + "M=160768,N=1344": { + "file": "silu_config_M160768_N1344.json", + "M": 160768, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.7249999999995 + }, + "M=160768,N=1408": { + "file": "silu_config_M160768_N1408.json", + "M": 160768, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1510.2452500000004 + }, + "M=160768,N=1440": { + "file": "silu_config_M160768_N1440.json", + "M": 160768, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1510.8852500000003 + }, + "M=160768,N=1536": { + "file": "silu_config_M160768_N1536.json", + "M": 160768, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1501.9652499999997 + }, + "M=160768,N=1600": { + "file": "silu_config_M160768_N1600.json", + "M": 160768, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1491.8852500000003 + }, + "M=160768,N=1664": { + "file": "silu_config_M160768_N1664.json", + "M": 160768, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1515.3252499999999 + }, + "M=160768,N=1728": { + "file": "silu_config_M160768_N1728.json", + "M": 160768, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1525.8852500000003 + }, + "M=160768,N=1760": { + "file": "silu_config_M160768_N1760.json", + "M": 160768, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1528.6052499999996 + }, + "M=160768,N=1792": { + "file": "silu_config_M160768_N1792.json", + "M": 160768, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1518.8852500000007 + }, + "M=160768,N=1920": { + "file": "silu_config_M160768_N1920.json", + "M": 160768, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1523.12525 + }, + "M=160768,N=2048": { + "file": "silu_config_M160768_N2048.json", + "M": 160768, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1194.8039999999996 + }, + "M=160768,N=2080": { + "file": "silu_config_M160768_N2080.json", + "M": 160768, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1647.1657500000001 + }, + "M=160768,N=2240": { + "file": "silu_config_M160768_N2240.json", + "M": 160768, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1765.446249999999 + }, + "M=160768,N=2400": { + "file": "silu_config_M160768_N2400.json", + "M": 160768, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1851.44675 + }, + "M=160768,N=2560": { + "file": "silu_config_M160768_N2560.json", + "M": 160768, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1934.8069999999998 + }, + "M=161792,N=128": { + "file": "silu_config_M161792_N128.json", + "M": 161792, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.4795 + }, + "M=161792,N=160": { + "file": "silu_config_M161792_N160.json", + "M": 161792, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 215.75975 + }, + "M=161792,N=192": { + "file": "silu_config_M161792_N192.json", + "M": 161792, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 196.87999999999994 + }, + "M=161792,N=256": { + "file": "silu_config_M161792_N256.json", + "M": 161792, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 178.19975 + }, + "M=161792,N=320": { + "file": "silu_config_M161792_N320.json", + "M": 161792, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 396.1205 + }, + "M=161792,N=384": { + "file": "silu_config_M161792_N384.json", + "M": 161792, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.68050000000005 + }, + "M=161792,N=480": { + "file": "silu_config_M161792_N480.json", + "M": 161792, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 399.9604999999999 + }, + "M=161792,N=512": { + "file": "silu_config_M161792_N512.json", + "M": 161792, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 319.7605000000001 + }, + "M=161792,N=576": { + "file": "silu_config_M161792_N576.json", + "M": 161792, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 777.6822499999998 + }, + "M=161792,N=640": { + "file": "silu_config_M161792_N640.json", + "M": 161792, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 762.48225 + }, + "M=161792,N=768": { + "file": "silu_config_M161792_N768.json", + "M": 161792, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 753.2020000000002 + }, + "M=161792,N=800": { + "file": "silu_config_M161792_N800.json", + "M": 161792, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 772.2422499999998 + }, + "M=161792,N=896": { + "file": "silu_config_M161792_N896.json", + "M": 161792, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 767.402 + }, + "M=161792,N=960": { + "file": "silu_config_M161792_N960.json", + "M": 161792, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 775.9222500000001 + }, + "M=161792,N=1024": { + "file": "silu_config_M161792_N1024.json", + "M": 161792, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 615.5217500000003 + }, + "M=161792,N=1120": { + "file": "silu_config_M161792_N1120.json", + "M": 161792, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1518.2052499999995 + }, + "M=161792,N=1152": { + "file": "silu_config_M161792_N1152.json", + "M": 161792, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1526.6452499999996 + }, + "M=161792,N=1280": { + "file": "silu_config_M161792_N1280.json", + "M": 161792, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1517.64525 + }, + "M=161792,N=1344": { + "file": "silu_config_M161792_N1344.json", + "M": 161792, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1518.76525 + }, + "M=161792,N=1408": { + "file": "silu_config_M161792_N1408.json", + "M": 161792, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1498.8852500000003 + }, + "M=161792,N=1440": { + "file": "silu_config_M161792_N1440.json", + "M": 161792, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1521.3652499999994 + }, + "M=161792,N=1536": { + "file": "silu_config_M161792_N1536.json", + "M": 161792, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1511.3652499999994 + }, + "M=161792,N=1600": { + "file": "silu_config_M161792_N1600.json", + "M": 161792, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1511.64525 + }, + "M=161792,N=1664": { + "file": "silu_config_M161792_N1664.json", + "M": 161792, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1514.60525 + }, + "M=161792,N=1728": { + "file": "silu_config_M161792_N1728.json", + "M": 161792, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1535.64525 + }, + "M=161792,N=1760": { + "file": "silu_config_M161792_N1760.json", + "M": 161792, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1517.5252499999997 + }, + "M=161792,N=1792": { + "file": "silu_config_M161792_N1792.json", + "M": 161792, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1528.20525 + }, + "M=161792,N=1920": { + "file": "silu_config_M161792_N1920.json", + "M": 161792, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1520.8852499999998 + }, + "M=161792,N=2048": { + "file": "silu_config_M161792_N2048.json", + "M": 161792, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1211.7240000000002 + }, + "M=161792,N=2080": { + "file": "silu_config_M161792_N2080.json", + "M": 161792, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1677.406 + }, + "M=161792,N=2240": { + "file": "silu_config_M161792_N2240.json", + "M": 161792, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1746.44625 + }, + "M=161792,N=2400": { + "file": "silu_config_M161792_N2400.json", + "M": 161792, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1863.7667500000007 + }, + "M=161792,N=2560": { + "file": "silu_config_M161792_N2560.json", + "M": 161792, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1940.6070000000009 + }, + "M=162816,N=128": { + "file": "silu_config_M162816_N128.json", + "M": 162816, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.91949999999997 + }, + "M=162816,N=160": { + "file": "silu_config_M162816_N160.json", + "M": 162816, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 206.59974999999997 + }, + "M=162816,N=192": { + "file": "silu_config_M162816_N192.json", + "M": 162816, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 198.03975000000003 + }, + "M=162816,N=256": { + "file": "silu_config_M162816_N256.json", + "M": 162816, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 190.51975000000004 + }, + "M=162816,N=320": { + "file": "silu_config_M162816_N320.json", + "M": 162816, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 398.12049999999977 + }, + "M=162816,N=384": { + "file": "silu_config_M162816_N384.json", + "M": 162816, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 395.72050000000024 + }, + "M=162816,N=480": { + "file": "silu_config_M162816_N480.json", + "M": 162816, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 402.8805000000002 + }, + "M=162816,N=512": { + "file": "silu_config_M162816_N512.json", + "M": 162816, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 321.0002499999998 + }, + "M=162816,N=576": { + "file": "silu_config_M162816_N576.json", + "M": 162816, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 773.4422499999996 + }, + "M=162816,N=640": { + "file": "silu_config_M162816_N640.json", + "M": 162816, + "N": 640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 775.96225 + }, + "M=162816,N=768": { + "file": "silu_config_M162816_N768.json", + "M": 162816, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 750.0419999999997 + }, + "M=162816,N=800": { + "file": "silu_config_M162816_N800.json", + "M": 162816, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 766.642 + }, + "M=162816,N=896": { + "file": "silu_config_M162816_N896.json", + "M": 162816, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 774.0422499999997 + }, + "M=162816,N=960": { + "file": "silu_config_M162816_N960.json", + "M": 162816, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 778.6022499999997 + }, + "M=162816,N=1024": { + "file": "silu_config_M162816_N1024.json", + "M": 162816, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 619.3615000000004 + }, + "M=162816,N=1120": { + "file": "silu_config_M162816_N1120.json", + "M": 162816, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1527.4852499999997 + }, + "M=162816,N=1152": { + "file": "silu_config_M162816_N1152.json", + "M": 162816, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1525.76525 + }, + "M=162816,N=1280": { + "file": "silu_config_M162816_N1280.json", + "M": 162816, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1517.0052500000002 + }, + "M=162816,N=1344": { + "file": "silu_config_M162816_N1344.json", + "M": 162816, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1527.8452499999999 + }, + "M=162816,N=1408": { + "file": "silu_config_M162816_N1408.json", + "M": 162816, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1528.9652499999993 + }, + "M=162816,N=1440": { + "file": "silu_config_M162816_N1440.json", + "M": 162816, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1510.24525 + }, + "M=162816,N=1536": { + "file": "silu_config_M162816_N1536.json", + "M": 162816, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1521.0452500000006 + }, + "M=162816,N=1600": { + "file": "silu_config_M162816_N1600.json", + "M": 162816, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1552.0454999999997 + }, + "M=162816,N=1664": { + "file": "silu_config_M162816_N1664.json", + "M": 162816, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1534.2452500000009 + }, + "M=162816,N=1728": { + "file": "silu_config_M162816_N1728.json", + "M": 162816, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1534.4452499999998 + }, + "M=162816,N=1760": { + "file": "silu_config_M162816_N1760.json", + "M": 162816, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1537.5252499999992 + }, + "M=162816,N=1792": { + "file": "silu_config_M162816_N1792.json", + "M": 162816, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1527.3652500000007 + }, + "M=162816,N=1920": { + "file": "silu_config_M162816_N1920.json", + "M": 162816, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1532.2052499999995 + }, + "M=162816,N=2048": { + "file": "silu_config_M162816_N2048.json", + "M": 162816, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1227.5639999999994 + }, + "M=162816,N=2080": { + "file": "silu_config_M162816_N2080.json", + "M": 162816, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1688.2859999999991 + }, + "M=162816,N=2240": { + "file": "silu_config_M162816_N2240.json", + "M": 162816, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1767.44625 + }, + "M=162816,N=2400": { + "file": "silu_config_M162816_N2400.json", + "M": 162816, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1884.8867499999997 + }, + "M=162816,N=2560": { + "file": "silu_config_M162816_N2560.json", + "M": 162816, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1969.8072499999998 + }, + "M=163840,N=128": { + "file": "silu_config_M163840_N128.json", + "M": 163840, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 94.11949999999995 + }, + "M=163840,N=160": { + "file": "silu_config_M163840_N160.json", + "M": 163840, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 195.95974999999999 + }, + "M=163840,N=192": { + "file": "silu_config_M163840_N192.json", + "M": 163840, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 196.39975000000004 + }, + "M=163840,N=256": { + "file": "silu_config_M163840_N256.json", + "M": 163840, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 191.71999999999997 + }, + "M=163840,N=320": { + "file": "silu_config_M163840_N320.json", + "M": 163840, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 398.04050000000007 + }, + "M=163840,N=384": { + "file": "silu_config_M163840_N384.json", + "M": 163840, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 395.6007500000002 + }, + "M=163840,N=480": { + "file": "silu_config_M163840_N480.json", + "M": 163840, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 402.60075000000006 + }, + "M=163840,N=512": { + "file": "silu_config_M163840_N512.json", + "M": 163840, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 305.72024999999996 + }, + "M=163840,N=576": { + "file": "silu_config_M163840_N576.json", + "M": 163840, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 765.1619999999998 + }, + "M=163840,N=640": { + "file": "silu_config_M163840_N640.json", + "M": 163840, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 748.3219999999999 + }, + "M=163840,N=768": { + "file": "silu_config_M163840_N768.json", + "M": 163840, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 750.5619999999999 + }, + "M=163840,N=800": { + "file": "silu_config_M163840_N800.json", + "M": 163840, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 755.4020000000005 + }, + "M=163840,N=896": { + "file": "silu_config_M163840_N896.json", + "M": 163840, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 763.8819999999998 + }, + "M=163840,N=960": { + "file": "silu_config_M163840_N960.json", + "M": 163840, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 759.1619999999996 + }, + "M=163840,N=1024": { + "file": "silu_config_M163840_N1024.json", + "M": 163840, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 622.3215000000005 + }, + "M=163840,N=1120": { + "file": "silu_config_M163840_N1120.json", + "M": 163840, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1515.8852500000003 + }, + "M=163840,N=1152": { + "file": "silu_config_M163840_N1152.json", + "M": 163840, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1495.0452500000001 + }, + "M=163840,N=1280": { + "file": "silu_config_M163840_N1280.json", + "M": 163840, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1496.4052499999993 + }, + "M=163840,N=1344": { + "file": "silu_config_M163840_N1344.json", + "M": 163840, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.4852499999997 + }, + "M=163840,N=1408": { + "file": "silu_config_M163840_N1408.json", + "M": 163840, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.4852499999997 + }, + "M=163840,N=1440": { + "file": "silu_config_M163840_N1440.json", + "M": 163840, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.4452499999993 + }, + "M=163840,N=1536": { + "file": "silu_config_M163840_N1536.json", + "M": 163840, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1510.28525 + }, + "M=163840,N=1600": { + "file": "silu_config_M163840_N1600.json", + "M": 163840, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1509.1252499999996 + }, + "M=163840,N=1664": { + "file": "silu_config_M163840_N1664.json", + "M": 163840, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1502.8452500000003 + }, + "M=163840,N=1728": { + "file": "silu_config_M163840_N1728.json", + "M": 163840, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1522.7652500000008 + }, + "M=163840,N=1760": { + "file": "silu_config_M163840_N1760.json", + "M": 163840, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1514.6052499999996 + }, + "M=163840,N=1792": { + "file": "silu_config_M163840_N1792.json", + "M": 163840, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.3252500000003 + }, + "M=163840,N=1920": { + "file": "silu_config_M163840_N1920.json", + "M": 163840, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1520.9252499999993 + }, + "M=163840,N=2048": { + "file": "silu_config_M163840_N2048.json", + "M": 163840, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1201.4440000000004 + }, + "M=163840,N=2080": { + "file": "silu_config_M163840_N2080.json", + "M": 163840, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1676.246 + }, + "M=163840,N=2240": { + "file": "silu_config_M163840_N2240.json", + "M": 163840, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1755.40625 + }, + "M=163840,N=2400": { + "file": "silu_config_M163840_N2400.json", + "M": 163840, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1851.7667499999998 + }, + "M=163840,N=2560": { + "file": "silu_config_M163840_N2560.json", + "M": 163840, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1938.0870000000004 + }, + "M=164864,N=128": { + "file": "silu_config_M164864_N128.json", + "M": 164864, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 94.67925 + }, + "M=164864,N=160": { + "file": "silu_config_M164864_N160.json", + "M": 164864, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 211.15974999999997 + }, + "M=164864,N=192": { + "file": "silu_config_M164864_N192.json", + "M": 164864, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 211.40000000000003 + }, + "M=164864,N=256": { + "file": "silu_config_M164864_N256.json", + "M": 164864, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 204.11975000000007 + }, + "M=164864,N=320": { + "file": "silu_config_M164864_N320.json", + "M": 164864, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 403.4805000000001 + }, + "M=164864,N=384": { + "file": "silu_config_M164864_N384.json", + "M": 164864, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 400.64075000000025 + }, + "M=164864,N=480": { + "file": "silu_config_M164864_N480.json", + "M": 164864, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 408.2805000000001 + }, + "M=164864,N=512": { + "file": "silu_config_M164864_N512.json", + "M": 164864, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 325.80049999999983 + }, + "M=164864,N=576": { + "file": "silu_config_M164864_N576.json", + "M": 164864, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 782.76225 + }, + "M=164864,N=640": { + "file": "silu_config_M164864_N640.json", + "M": 164864, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 765.4419999999998 + }, + "M=164864,N=768": { + "file": "silu_config_M164864_N768.json", + "M": 164864, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 789.56225 + }, + "M=164864,N=800": { + "file": "silu_config_M164864_N800.json", + "M": 164864, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 773.6022499999999 + }, + "M=164864,N=896": { + "file": "silu_config_M164864_N896.json", + "M": 164864, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 783.52225 + }, + "M=164864,N=960": { + "file": "silu_config_M164864_N960.json", + "M": 164864, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 788.0022499999995 + }, + "M=164864,N=1024": { + "file": "silu_config_M164864_N1024.json", + "M": 164864, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 627.3217500000007 + }, + "M=164864,N=1120": { + "file": "silu_config_M164864_N1120.json", + "M": 164864, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1546.4852500000002 + }, + "M=164864,N=1152": { + "file": "silu_config_M164864_N1152.json", + "M": 164864, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1534.4452500000002 + }, + "M=164864,N=1280": { + "file": "silu_config_M164864_N1280.json", + "M": 164864, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1535.72525 + }, + "M=164864,N=1344": { + "file": "silu_config_M164864_N1344.json", + "M": 164864, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1536.5652499999997 + }, + "M=164864,N=1408": { + "file": "silu_config_M164864_N1408.json", + "M": 164864, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1537.485250000001 + }, + "M=164864,N=1440": { + "file": "silu_config_M164864_N1440.json", + "M": 164864, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1529.4052500000003 + }, + "M=164864,N=1536": { + "file": "silu_config_M164864_N1536.json", + "M": 164864, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1539.64525 + }, + "M=164864,N=1600": { + "file": "silu_config_M164864_N1600.json", + "M": 164864, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1560.8855000000003 + }, + "M=164864,N=1664": { + "file": "silu_config_M164864_N1664.json", + "M": 164864, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1542.8852499999998 + }, + "M=164864,N=1728": { + "file": "silu_config_M164864_N1728.json", + "M": 164864, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1563.8055 + }, + "M=164864,N=1760": { + "file": "silu_config_M164864_N1760.json", + "M": 164864, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1555.0454999999997 + }, + "M=164864,N=1792": { + "file": "silu_config_M164864_N1792.json", + "M": 164864, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1546.20525 + }, + "M=164864,N=1920": { + "file": "silu_config_M164864_N1920.json", + "M": 164864, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1561.0855000000001 + }, + "M=164864,N=2048": { + "file": "silu_config_M164864_N2048.json", + "M": 164864, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1231.1639999999998 + }, + "M=164864,N=2080": { + "file": "silu_config_M164864_N2080.json", + "M": 164864, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1688.8459999999995 + }, + "M=164864,N=2240": { + "file": "silu_config_M164864_N2240.json", + "M": 164864, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1798.9665000000005 + }, + "M=164864,N=2400": { + "file": "silu_config_M164864_N2400.json", + "M": 164864, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1898.6467499999999 + }, + "M=164864,N=2560": { + "file": "silu_config_M164864_N2560.json", + "M": 164864, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1993.60725 + }, + "M=165888,N=128": { + "file": "silu_config_M165888_N128.json", + "M": 165888, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 95.2395 + }, + "M=165888,N=160": { + "file": "silu_config_M165888_N160.json", + "M": 165888, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 201.36 + }, + "M=165888,N=192": { + "file": "silu_config_M165888_N192.json", + "M": 165888, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 212.72000000000003 + }, + "M=165888,N=256": { + "file": "silu_config_M165888_N256.json", + "M": 165888, + "N": 256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 208.11975 + }, + "M=165888,N=320": { + "file": "silu_config_M165888_N320.json", + "M": 165888, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 405.04050000000007 + }, + "M=165888,N=384": { + "file": "silu_config_M165888_N384.json", + "M": 165888, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 402.96074999999996 + }, + "M=165888,N=480": { + "file": "silu_config_M165888_N480.json", + "M": 165888, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 410.1607499999999 + }, + "M=165888,N=512": { + "file": "silu_config_M165888_N512.json", + "M": 165888, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 326.1602499999999 + }, + "M=165888,N=576": { + "file": "silu_config_M165888_N576.json", + "M": 165888, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 796.8422499999999 + }, + "M=165888,N=640": { + "file": "silu_config_M165888_N640.json", + "M": 165888, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 781.2822499999997 + }, + "M=165888,N=768": { + "file": "silu_config_M165888_N768.json", + "M": 165888, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 783.24225 + }, + "M=165888,N=800": { + "file": "silu_config_M165888_N800.json", + "M": 165888, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 800.52225 + }, + "M=165888,N=896": { + "file": "silu_config_M165888_N896.json", + "M": 165888, + "N": 896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 794.4022499999996 + }, + "M=165888,N=960": { + "file": "silu_config_M165888_N960.json", + "M": 165888, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 794.6822500000001 + }, + "M=165888,N=1024": { + "file": "silu_config_M165888_N1024.json", + "M": 165888, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 630.7217500000002 + }, + "M=165888,N=1120": { + "file": "silu_config_M165888_N1120.json", + "M": 165888, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1555.8055 + }, + "M=165888,N=1152": { + "file": "silu_config_M165888_N1152.json", + "M": 165888, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1533.2052500000004 + }, + "M=165888,N=1280": { + "file": "silu_config_M165888_N1280.json", + "M": 165888, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1565.5255000000002 + }, + "M=165888,N=1344": { + "file": "silu_config_M165888_N1344.json", + "M": 165888, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1546.12525 + }, + "M=165888,N=1408": { + "file": "silu_config_M165888_N1408.json", + "M": 165888, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1546.9652500000002 + }, + "M=165888,N=1440": { + "file": "silu_config_M165888_N1440.json", + "M": 165888, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1549.0052499999993 + }, + "M=165888,N=1536": { + "file": "silu_config_M165888_N1536.json", + "M": 165888, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1548.72525 + }, + "M=165888,N=1600": { + "file": "silu_config_M165888_N1600.json", + "M": 165888, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1559.6054999999997 + }, + "M=165888,N=1664": { + "file": "silu_config_M165888_N1664.json", + "M": 165888, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1552.2455 + }, + "M=165888,N=1728": { + "file": "silu_config_M165888_N1728.json", + "M": 165888, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1563.0054999999998 + }, + "M=165888,N=1760": { + "file": "silu_config_M165888_N1760.json", + "M": 165888, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1565.6455000000005 + }, + "M=165888,N=1792": { + "file": "silu_config_M165888_N1792.json", + "M": 165888, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1555.5654999999997 + }, + "M=165888,N=1920": { + "file": "silu_config_M165888_N1920.json", + "M": 165888, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1560.6054999999997 + }, + "M=165888,N=2048": { + "file": "silu_config_M165888_N2048.json", + "M": 165888, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1231.2839999999997 + }, + "M=165888,N=2080": { + "file": "silu_config_M165888_N2080.json", + "M": 165888, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1709.366 + }, + "M=165888,N=2240": { + "file": "silu_config_M165888_N2240.json", + "M": 165888, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1800.2864999999993 + }, + "M=165888,N=2400": { + "file": "silu_config_M165888_N2400.json", + "M": 165888, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1899.7267499999998 + }, + "M=165888,N=2560": { + "file": "silu_config_M165888_N2560.json", + "M": 165888, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1984.4472500000002 + }, + "M=166912,N=128": { + "file": "silu_config_M166912_N128.json", + "M": 166912, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 95.75924999999998 + }, + "M=166912,N=160": { + "file": "silu_config_M166912_N160.json", + "M": 166912, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 202.63975 + }, + "M=166912,N=192": { + "file": "silu_config_M166912_N192.json", + "M": 166912, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 211.83974999999992 + }, + "M=166912,N=256": { + "file": "silu_config_M166912_N256.json", + "M": 166912, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 183.51975 + }, + "M=166912,N=320": { + "file": "silu_config_M166912_N320.json", + "M": 166912, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 407.7607499999999 + }, + "M=166912,N=384": { + "file": "silu_config_M166912_N384.json", + "M": 166912, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 405.72075000000007 + }, + "M=166912,N=480": { + "file": "silu_config_M166912_N480.json", + "M": 166912, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 412.52075000000025 + }, + "M=166912,N=512": { + "file": "silu_config_M166912_N512.json", + "M": 166912, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 329.5602500000001 + }, + "M=166912,N=576": { + "file": "silu_config_M166912_N576.json", + "M": 166912, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 792.0822499999997 + }, + "M=166912,N=640": { + "file": "silu_config_M166912_N640.json", + "M": 166912, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 797.0422500000002 + }, + "M=166912,N=768": { + "file": "silu_config_M166912_N768.json", + "M": 166912, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 809.8022499999997 + }, + "M=166912,N=800": { + "file": "silu_config_M166912_N800.json", + "M": 166912, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 806.2022499999998 + }, + "M=166912,N=896": { + "file": "silu_config_M166912_N896.json", + "M": 166912, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 791.2422499999998 + }, + "M=166912,N=960": { + "file": "silu_config_M166912_N960.json", + "M": 166912, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 809.96225 + }, + "M=166912,N=1024": { + "file": "silu_config_M166912_N1024.json", + "M": 166912, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 634.8817499999998 + }, + "M=166912,N=1120": { + "file": "silu_config_M166912_N1120.json", + "M": 166912, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1555.0455000000006 + }, + "M=166912,N=1152": { + "file": "silu_config_M166912_N1152.json", + "M": 166912, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1542.4052499999993 + }, + "M=166912,N=1280": { + "file": "silu_config_M166912_N1280.json", + "M": 166912, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1544.0052499999997 + }, + "M=166912,N=1344": { + "file": "silu_config_M166912_N1344.json", + "M": 166912, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1565.5654999999997 + }, + "M=166912,N=1408": { + "file": "silu_config_M166912_N1408.json", + "M": 166912, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1535.8852500000003 + }, + "M=166912,N=1440": { + "file": "silu_config_M166912_N1440.json", + "M": 166912, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1568.4854999999993 + }, + "M=166912,N=1536": { + "file": "silu_config_M166912_N1536.json", + "M": 166912, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1558.5254999999997 + }, + "M=166912,N=1600": { + "file": "silu_config_M166912_N1600.json", + "M": 166912, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1559.0055000000002 + }, + "M=166912,N=1664": { + "file": "silu_config_M166912_N1664.json", + "M": 166912, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1551.2055 + }, + "M=166912,N=1728": { + "file": "silu_config_M166912_N1728.json", + "M": 166912, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1562.1654999999996 + }, + "M=166912,N=1760": { + "file": "silu_config_M166912_N1760.json", + "M": 166912, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1575.2855000000009 + }, + "M=166912,N=1792": { + "file": "silu_config_M166912_N1792.json", + "M": 166912, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1575.5255000000002 + }, + "M=166912,N=1920": { + "file": "silu_config_M166912_N1920.json", + "M": 166912, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1580.2054999999991 + }, + "M=166912,N=2048": { + "file": "silu_config_M166912_N2048.json", + "M": 166912, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1231.284 + }, + "M=166912,N=2080": { + "file": "silu_config_M166912_N2080.json", + "M": 166912, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1719.8059999999996 + }, + "M=166912,N=2240": { + "file": "silu_config_M166912_N2240.json", + "M": 166912, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1811.6064999999999 + }, + "M=166912,N=2400": { + "file": "silu_config_M166912_N2400.json", + "M": 166912, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1921.487000000001 + }, + "M=166912,N=2560": { + "file": "silu_config_M166912_N2560.json", + "M": 166912, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2008.2072500000013 + }, + "M=167936,N=128": { + "file": "silu_config_M167936_N128.json", + "M": 167936, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 107.79925 + }, + "M=167936,N=160": { + "file": "silu_config_M167936_N160.json", + "M": 167936, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 203.71999999999997 + }, + "M=167936,N=192": { + "file": "silu_config_M167936_N192.json", + "M": 167936, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 202.23975000000002 + }, + "M=167936,N=256": { + "file": "silu_config_M167936_N256.json", + "M": 167936, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 184.59974999999997 + }, + "M=167936,N=320": { + "file": "silu_config_M167936_N320.json", + "M": 167936, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 411.00075000000004 + }, + "M=167936,N=384": { + "file": "silu_config_M167936_N384.json", + "M": 167936, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 408.12075000000004 + }, + "M=167936,N=480": { + "file": "silu_config_M167936_N480.json", + "M": 167936, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 415.24074999999993 + }, + "M=167936,N=512": { + "file": "silu_config_M167936_N512.json", + "M": 167936, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 321.3605 + }, + "M=167936,N=576": { + "file": "silu_config_M167936_N576.json", + "M": 167936, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 786.4822500000002 + }, + "M=167936,N=640": { + "file": "silu_config_M167936_N640.json", + "M": 167936, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 790.96225 + }, + "M=167936,N=768": { + "file": "silu_config_M167936_N768.json", + "M": 167936, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 794.5622499999997 + }, + "M=167936,N=800": { + "file": "silu_config_M167936_N800.json", + "M": 167936, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 800.4022499999999 + }, + "M=167936,N=896": { + "file": "silu_config_M167936_N896.json", + "M": 167936, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 796.1222500000001 + }, + "M=167936,N=960": { + "file": "silu_config_M167936_N960.json", + "M": 167936, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 804.20225 + }, + "M=167936,N=1024": { + "file": "silu_config_M167936_N1024.json", + "M": 167936, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 638.6415 + }, + "M=167936,N=1120": { + "file": "silu_config_M167936_N1120.json", + "M": 167936, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1564.4854999999998 + }, + "M=167936,N=1152": { + "file": "silu_config_M167936_N1152.json", + "M": 167936, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1551.8855000000003 + }, + "M=167936,N=1280": { + "file": "silu_config_M167936_N1280.json", + "M": 167936, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1553.2855 + }, + "M=167936,N=1344": { + "file": "silu_config_M167936_N1344.json", + "M": 167936, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1575.2455 + }, + "M=167936,N=1408": { + "file": "silu_config_M167936_N1408.json", + "M": 167936, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1586.3254999999995 + }, + "M=167936,N=1440": { + "file": "silu_config_M167936_N1440.json", + "M": 167936, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1578.2454999999995 + }, + "M=167936,N=1536": { + "file": "silu_config_M167936_N1536.json", + "M": 167936, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1567.6854999999996 + }, + "M=167936,N=1600": { + "file": "silu_config_M167936_N1600.json", + "M": 167936, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1579.1254999999996 + }, + "M=167936,N=1664": { + "file": "silu_config_M167936_N1664.json", + "M": 167936, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1581.1255 + }, + "M=167936,N=1728": { + "file": "silu_config_M167936_N1728.json", + "M": 167936, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1592.5254999999997 + }, + "M=167936,N=1760": { + "file": "silu_config_M167936_N1760.json", + "M": 167936, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1584.1654999999996 + }, + "M=167936,N=1792": { + "file": "silu_config_M167936_N1792.json", + "M": 167936, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1574.5254999999997 + }, + "M=167936,N=1920": { + "file": "silu_config_M167936_N1920.json", + "M": 167936, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1589.7654999999995 + }, + "M=167936,N=2048": { + "file": "silu_config_M167936_N2048.json", + "M": 167936, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1265.0042500000004 + }, + "M=167936,N=2080": { + "file": "silu_config_M167936_N2080.json", + "M": 167936, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1740.1662500000002 + }, + "M=167936,N=2240": { + "file": "silu_config_M167936_N2240.json", + "M": 167936, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1822.2065000000011 + }, + "M=167936,N=2400": { + "file": "silu_config_M167936_N2400.json", + "M": 167936, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1923.0069999999996 + }, + "M=167936,N=2560": { + "file": "silu_config_M167936_N2560.json", + "M": 167936, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2019.2872499999992 + }, + "M=168960,N=128": { + "file": "silu_config_M168960_N128.json", + "M": 168960, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 107.63950000000003 + }, + "M=168960,N=160": { + "file": "silu_config_M168960_N160.json", + "M": 168960, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 204.87975 + }, + "M=168960,N=192": { + "file": "silu_config_M168960_N192.json", + "M": 168960, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 205.2 + }, + "M=168960,N=256": { + "file": "silu_config_M168960_N256.json", + "M": 168960, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 185.75974999999988 + }, + "M=168960,N=320": { + "file": "silu_config_M168960_N320.json", + "M": 168960, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 410.24075000000005 + }, + "M=168960,N=384": { + "file": "silu_config_M168960_N384.json", + "M": 168960, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 407.80049999999994 + }, + "M=168960,N=480": { + "file": "silu_config_M168960_N480.json", + "M": 168960, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 414.7207500000002 + }, + "M=168960,N=512": { + "file": "silu_config_M168960_N512.json", + "M": 168960, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 333.7202500000001 + }, + "M=168960,N=576": { + "file": "silu_config_M168960_N576.json", + "M": 168960, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 796.6822500000001 + }, + "M=168960,N=640": { + "file": "silu_config_M168960_N640.json", + "M": 168960, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 795.2822500000004 + }, + "M=168960,N=768": { + "file": "silu_config_M168960_N768.json", + "M": 168960, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 794.6022499999999 + }, + "M=168960,N=800": { + "file": "silu_config_M168960_N800.json", + "M": 168960, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 803.6822499999996 + }, + "M=168960,N=896": { + "file": "silu_config_M168960_N896.json", + "M": 168960, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 800.52225 + }, + "M=168960,N=960": { + "file": "silu_config_M168960_N960.json", + "M": 168960, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 813.8822499999997 + }, + "M=168960,N=1024": { + "file": "silu_config_M168960_N1024.json", + "M": 168960, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 641.6015000000007 + }, + "M=168960,N=1120": { + "file": "silu_config_M168960_N1120.json", + "M": 168960, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1562.4455000000007 + }, + "M=168960,N=1152": { + "file": "silu_config_M168960_N1152.json", + "M": 168960, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1562.1655 + }, + "M=168960,N=1280": { + "file": "silu_config_M168960_N1280.json", + "M": 168960, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1563.5254999999997 + }, + "M=168960,N=1344": { + "file": "silu_config_M168960_N1344.json", + "M": 168960, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1583.9654999999998 + }, + "M=168960,N=1408": { + "file": "silu_config_M168960_N1408.json", + "M": 168960, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1575.8055 + }, + "M=168960,N=1440": { + "file": "silu_config_M168960_N1440.json", + "M": 168960, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1576.1655000000005 + }, + "M=168960,N=1536": { + "file": "silu_config_M168960_N1536.json", + "M": 168960, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1567.4855000000007 + }, + "M=168960,N=1600": { + "file": "silu_config_M168960_N1600.json", + "M": 168960, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1577.6055000000001 + }, + "M=168960,N=1664": { + "file": "silu_config_M168960_N1664.json", + "M": 168960, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1570.0455000000002 + }, + "M=168960,N=1728": { + "file": "silu_config_M168960_N1728.json", + "M": 168960, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1559.4455000000012 + }, + "M=168960,N=1760": { + "file": "silu_config_M168960_N1760.json", + "M": 168960, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1603.1254999999996 + }, + "M=168960,N=1792": { + "file": "silu_config_M168960_N1792.json", + "M": 168960, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1585.0455000000006 + }, + "M=168960,N=1920": { + "file": "silu_config_M168960_N1920.json", + "M": 168960, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1588.6854999999996 + }, + "M=168960,N=2048": { + "file": "silu_config_M168960_N2048.json", + "M": 168960, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1256.3642500000005 + }, + "M=168960,N=2080": { + "file": "silu_config_M168960_N2080.json", + "M": 168960, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1718.2459999999992 + }, + "M=168960,N=2240": { + "file": "silu_config_M168960_N2240.json", + "M": 168960, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1840.2865000000002 + }, + "M=168960,N=2400": { + "file": "silu_config_M168960_N2400.json", + "M": 168960, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1933.0070000000005 + }, + "M=168960,N=2560": { + "file": "silu_config_M168960_N2560.json", + "M": 168960, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2029.3672500000005 + }, + "M=169984,N=128": { + "file": "silu_config_M169984_N128.json", + "M": 169984, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 115.67949999999999 + }, + "M=169984,N=160": { + "file": "silu_config_M169984_N160.json", + "M": 169984, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 214.75975 + }, + "M=169984,N=192": { + "file": "silu_config_M169984_N192.json", + "M": 169984, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 226.44 + }, + "M=169984,N=256": { + "file": "silu_config_M169984_N256.json", + "M": 169984, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 186.75974999999994 + }, + "M=169984,N=320": { + "file": "silu_config_M169984_N320.json", + "M": 169984, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 415.40075000000013 + }, + "M=169984,N=384": { + "file": "silu_config_M169984_N384.json", + "M": 169984, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 412.6007500000002 + }, + "M=169984,N=480": { + "file": "silu_config_M169984_N480.json", + "M": 169984, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 420.0807500000004 + }, + "M=169984,N=512": { + "file": "silu_config_M169984_N512.json", + "M": 169984, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 324.88025000000016 + }, + "M=169984,N=576": { + "file": "silu_config_M169984_N576.json", + "M": 169984, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 804.5222499999998 + }, + "M=169984,N=640": { + "file": "silu_config_M169984_N640.json", + "M": 169984, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 801.4822500000002 + }, + "M=169984,N=768": { + "file": "silu_config_M169984_N768.json", + "M": 169984, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 802.0422499999997 + }, + "M=169984,N=800": { + "file": "silu_config_M169984_N800.json", + "M": 169984, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 819.6422499999996 + }, + "M=169984,N=896": { + "file": "silu_config_M169984_N896.json", + "M": 169984, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 816.6022499999999 + }, + "M=169984,N=960": { + "file": "silu_config_M169984_N960.json", + "M": 169984, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 813.602250000001 + }, + "M=169984,N=1024": { + "file": "silu_config_M169984_N1024.json", + "M": 169984, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 645.88175 + }, + "M=169984,N=1120": { + "file": "silu_config_M169984_N1120.json", + "M": 169984, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1603.6055000000001 + }, + "M=169984,N=1152": { + "file": "silu_config_M169984_N1152.json", + "M": 169984, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1570.3654999999994 + }, + "M=169984,N=1280": { + "file": "silu_config_M169984_N1280.json", + "M": 169984, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1572.0855000000001 + }, + "M=169984,N=1344": { + "file": "silu_config_M169984_N1344.json", + "M": 169984, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1593.8454999999994 + }, + "M=169984,N=1408": { + "file": "silu_config_M169984_N1408.json", + "M": 169984, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1573.9654999999993 + }, + "M=169984,N=1440": { + "file": "silu_config_M169984_N1440.json", + "M": 169984, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1597.2055 + }, + "M=169984,N=1536": { + "file": "silu_config_M169984_N1536.json", + "M": 169984, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1586.4854999999993 + }, + "M=169984,N=1600": { + "file": "silu_config_M169984_N1600.json", + "M": 169984, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1598.3654999999999 + }, + "M=169984,N=1664": { + "file": "silu_config_M169984_N1664.json", + "M": 169984, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1599.7255 + }, + "M=169984,N=1728": { + "file": "silu_config_M169984_N1728.json", + "M": 169984, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1590.6454999999996 + }, + "M=169984,N=1760": { + "file": "silu_config_M169984_N1760.json", + "M": 169984, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1624.4057499999994 + }, + "M=169984,N=1792": { + "file": "silu_config_M169984_N1792.json", + "M": 169984, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1593.4454999999994 + }, + "M=169984,N=1920": { + "file": "silu_config_M169984_N1920.json", + "M": 169984, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1587.7255000000005 + }, + "M=169984,N=2048": { + "file": "silu_config_M169984_N2048.json", + "M": 169984, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1271.3242500000001 + }, + "M=169984,N=2080": { + "file": "silu_config_M169984_N2080.json", + "M": 169984, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1761.0062500000013 + }, + "M=169984,N=2240": { + "file": "silu_config_M169984_N2240.json", + "M": 169984, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1834.3264999999992 + }, + "M=169984,N=2400": { + "file": "silu_config_M169984_N2400.json", + "M": 169984, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1965.687 + }, + "M=169984,N=2560": { + "file": "silu_config_M169984_N2560.json", + "M": 169984, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2060.4875 + }, + "M=171008,N=128": { + "file": "silu_config_M171008_N128.json", + "M": 171008, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 97.91949999999999 + }, + "M=171008,N=160": { + "file": "silu_config_M171008_N160.json", + "M": 171008, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 226.43975000000006 + }, + "M=171008,N=192": { + "file": "silu_config_M171008_N192.json", + "M": 171008, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 226.63975000000005 + }, + "M=171008,N=256": { + "file": "silu_config_M171008_N256.json", + "M": 171008, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 199.1599999999999 + }, + "M=171008,N=320": { + "file": "silu_config_M171008_N320.json", + "M": 171008, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 417.60074999999995 + }, + "M=171008,N=384": { + "file": "silu_config_M171008_N384.json", + "M": 171008, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 415.12074999999993 + }, + "M=171008,N=480": { + "file": "silu_config_M171008_N480.json", + "M": 171008, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 422.60074999999995 + }, + "M=171008,N=512": { + "file": "silu_config_M171008_N512.json", + "M": 171008, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 337.4402499999999 + }, + "M=171008,N=576": { + "file": "silu_config_M171008_N576.json", + "M": 171008, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 809.7622499999998 + }, + "M=171008,N=640": { + "file": "silu_config_M171008_N640.json", + "M": 171008, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 795.48225 + }, + "M=171008,N=768": { + "file": "silu_config_M171008_N768.json", + "M": 171008, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 806.80225 + }, + "M=171008,N=800": { + "file": "silu_config_M171008_N800.json", + "M": 171008, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 814.52225 + }, + "M=171008,N=896": { + "file": "silu_config_M171008_N896.json", + "M": 171008, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 799.1222500000001 + }, + "M=171008,N=960": { + "file": "silu_config_M171008_N960.json", + "M": 171008, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 827.7622500000002 + }, + "M=171008,N=1024": { + "file": "silu_config_M171008_N1024.json", + "M": 171008, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 649.9217499999997 + }, + "M=171008,N=1120": { + "file": "silu_config_M171008_N1120.json", + "M": 171008, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1613.20575 + }, + "M=171008,N=1152": { + "file": "silu_config_M171008_N1152.json", + "M": 171008, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1600.4854999999998 + }, + "M=171008,N=1280": { + "file": "silu_config_M171008_N1280.json", + "M": 171008, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1602.0855000000001 + }, + "M=171008,N=1344": { + "file": "silu_config_M171008_N1344.json", + "M": 171008, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1592.9254999999998 + }, + "M=171008,N=1408": { + "file": "silu_config_M171008_N1408.json", + "M": 171008, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1573.0054999999998 + }, + "M=171008,N=1440": { + "file": "silu_config_M171008_N1440.json", + "M": 171008, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1616.7657500000005 + }, + "M=171008,N=1536": { + "file": "silu_config_M171008_N1536.json", + "M": 171008, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1595.6055000000006 + }, + "M=171008,N=1600": { + "file": "silu_config_M171008_N1600.json", + "M": 171008, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1608.0454999999997 + }, + "M=171008,N=1664": { + "file": "silu_config_M171008_N1664.json", + "M": 171008, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1598.8854999999999 + }, + "M=171008,N=1728": { + "file": "silu_config_M171008_N1728.json", + "M": 171008, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1610.1654999999996 + }, + "M=171008,N=1760": { + "file": "silu_config_M171008_N1760.json", + "M": 171008, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1602.8854999999994 + }, + "M=171008,N=1792": { + "file": "silu_config_M171008_N1792.json", + "M": 171008, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1592.3255 + }, + "M=171008,N=1920": { + "file": "silu_config_M171008_N1920.json", + "M": 171008, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1597.4054999999998 + }, + "M=171008,N=2048": { + "file": "silu_config_M171008_N2048.json", + "M": 171008, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1273.04425 + }, + "M=171008,N=2080": { + "file": "silu_config_M171008_N2080.json", + "M": 171008, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1770.4062500000005 + }, + "M=171008,N=2240": { + "file": "silu_config_M171008_N2240.json", + "M": 171008, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1863.8867499999997 + }, + "M=171008,N=2400": { + "file": "silu_config_M171008_N2400.json", + "M": 171008, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.2070000000003 + }, + "M=171008,N=2560": { + "file": "silu_config_M171008_N2560.json", + "M": 171008, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2067.4875 + }, + "M=172032,N=128": { + "file": "silu_config_M172032_N128.json", + "M": 172032, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 98.07950000000002 + }, + "M=172032,N=160": { + "file": "silu_config_M172032_N160.json", + "M": 172032, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 216.79974999999996 + }, + "M=172032,N=192": { + "file": "silu_config_M172032_N192.json", + "M": 172032, + "N": 192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 228.67999999999995 + }, + "M=172032,N=256": { + "file": "silu_config_M172032_N256.json", + "M": 172032, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 188.35975000000002 + }, + "M=172032,N=320": { + "file": "silu_config_M172032_N320.json", + "M": 172032, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 419.88075000000015 + }, + "M=172032,N=384": { + "file": "silu_config_M172032_N384.json", + "M": 172032, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 417.9607500000003 + }, + "M=172032,N=480": { + "file": "silu_config_M172032_N480.json", + "M": 172032, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 424.72074999999995 + }, + "M=172032,N=512": { + "file": "silu_config_M172032_N512.json", + "M": 172032, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 338.1605000000002 + }, + "M=172032,N=576": { + "file": "silu_config_M172032_N576.json", + "M": 172032, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 814.1222499999997 + }, + "M=172032,N=640": { + "file": "silu_config_M172032_N640.json", + "M": 172032, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 798.3222499999999 + }, + "M=172032,N=768": { + "file": "silu_config_M172032_N768.json", + "M": 172032, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 800.1622500000003 + }, + "M=172032,N=800": { + "file": "silu_config_M172032_N800.json", + "M": 172032, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 807.9622499999998 + }, + "M=172032,N=896": { + "file": "silu_config_M172032_N896.json", + "M": 172032, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 803.4822500000005 + }, + "M=172032,N=960": { + "file": "silu_config_M172032_N960.json", + "M": 172032, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 809.7222499999998 + }, + "M=172032,N=1024": { + "file": "silu_config_M172032_N1024.json", + "M": 172032, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 652.7217499999999 + }, + "M=172032,N=1120": { + "file": "silu_config_M172032_N1120.json", + "M": 172032, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1612.1657499999997 + }, + "M=172032,N=1152": { + "file": "silu_config_M172032_N1152.json", + "M": 172032, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1589.0054999999998 + }, + "M=172032,N=1280": { + "file": "silu_config_M172032_N1280.json", + "M": 172032, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1602.3255 + }, + "M=172032,N=1344": { + "file": "silu_config_M172032_N1344.json", + "M": 172032, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1601.5655000000002 + }, + "M=172032,N=1408": { + "file": "silu_config_M172032_N1408.json", + "M": 172032, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1602.4454999999994 + }, + "M=172032,N=1440": { + "file": "silu_config_M172032_N1440.json", + "M": 172032, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1625.2057499999996 + }, + "M=172032,N=1536": { + "file": "silu_config_M172032_N1536.json", + "M": 172032, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1615.3257500000004 + }, + "M=172032,N=1600": { + "file": "silu_config_M172032_N1600.json", + "M": 172032, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1626.0457499999993 + }, + "M=172032,N=1664": { + "file": "silu_config_M172032_N1664.json", + "M": 172032, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1607.9655000000007 + }, + "M=172032,N=1728": { + "file": "silu_config_M172032_N1728.json", + "M": 172032, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1618.4857499999998 + }, + "M=172032,N=1760": { + "file": "silu_config_M172032_N1760.json", + "M": 172032, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1600.125500000001 + }, + "M=172032,N=1792": { + "file": "silu_config_M172032_N1792.json", + "M": 172032, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1612.2057499999992 + }, + "M=172032,N=1920": { + "file": "silu_config_M172032_N1920.json", + "M": 172032, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1625.9657499999994 + }, + "M=172032,N=2048": { + "file": "silu_config_M172032_N2048.json", + "M": 172032, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1267.56425 + }, + "M=172032,N=2080": { + "file": "silu_config_M172032_N2080.json", + "M": 172032, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1758.8462500000005 + }, + "M=172032,N=2240": { + "file": "silu_config_M172032_N2240.json", + "M": 172032, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1874.52675 + }, + "M=172032,N=2400": { + "file": "silu_config_M172032_N2400.json", + "M": 172032, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1979.3272499999994 + }, + "M=172032,N=2560": { + "file": "silu_config_M172032_N2560.json", + "M": 172032, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2058.1674999999996 + }, + "M=173056,N=128": { + "file": "silu_config_M173056_N128.json", + "M": 173056, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 98.67950000000002 + }, + "M=173056,N=160": { + "file": "silu_config_M173056_N160.json", + "M": 173056, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 230.32 + }, + "M=173056,N=192": { + "file": "silu_config_M173056_N192.json", + "M": 173056, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 220.67975 + }, + "M=173056,N=256": { + "file": "silu_config_M173056_N256.json", + "M": 173056, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 200.8400000000001 + }, + "M=173056,N=320": { + "file": "silu_config_M173056_N320.json", + "M": 173056, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 421.8807499999998 + }, + "M=173056,N=384": { + "file": "silu_config_M173056_N384.json", + "M": 173056, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 420.08074999999985 + }, + "M=173056,N=480": { + "file": "silu_config_M173056_N480.json", + "M": 173056, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 427.3607499999998 + }, + "M=173056,N=512": { + "file": "silu_config_M173056_N512.json", + "M": 173056, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 341.40025 + }, + "M=173056,N=576": { + "file": "silu_config_M173056_N576.json", + "M": 173056, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 818.7222499999998 + }, + "M=173056,N=640": { + "file": "silu_config_M173056_N640.json", + "M": 173056, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 825.52225 + }, + "M=173056,N=768": { + "file": "silu_config_M173056_N768.json", + "M": 173056, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 826.9222500000001 + }, + "M=173056,N=800": { + "file": "silu_config_M173056_N800.json", + "M": 173056, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 812.8022500000004 + }, + "M=173056,N=896": { + "file": "silu_config_M173056_N896.json", + "M": 173056, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 820.6022500000004 + }, + "M=173056,N=960": { + "file": "silu_config_M173056_N960.json", + "M": 173056, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 837.4425000000003 + }, + "M=173056,N=1024": { + "file": "silu_config_M173056_N1024.json", + "M": 173056, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 656.8017500000003 + }, + "M=173056,N=1120": { + "file": "silu_config_M173056_N1120.json", + "M": 173056, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1610.4055000000003 + }, + "M=173056,N=1152": { + "file": "silu_config_M173056_N1152.json", + "M": 173056, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1618.9257500000003 + }, + "M=173056,N=1280": { + "file": "silu_config_M173056_N1280.json", + "M": 173056, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1610.0455000000002 + }, + "M=173056,N=1344": { + "file": "silu_config_M173056_N1344.json", + "M": 173056, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1631.8457499999995 + }, + "M=173056,N=1408": { + "file": "silu_config_M173056_N1408.json", + "M": 173056, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1591.0855000000001 + }, + "M=173056,N=1440": { + "file": "silu_config_M173056_N1440.json", + "M": 173056, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1624.5257499999993 + }, + "M=173056,N=1536": { + "file": "silu_config_M173056_N1536.json", + "M": 173056, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1614.28575 + }, + "M=173056,N=1600": { + "file": "silu_config_M173056_N1600.json", + "M": 173056, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1635.3257499999995 + }, + "M=173056,N=1664": { + "file": "silu_config_M173056_N1664.json", + "M": 173056, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1627.565750000002 + }, + "M=173056,N=1728": { + "file": "silu_config_M173056_N1728.json", + "M": 173056, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1648.2857499999996 + }, + "M=173056,N=1760": { + "file": "silu_config_M173056_N1760.json", + "M": 173056, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1640.6057500000002 + }, + "M=173056,N=1792": { + "file": "silu_config_M173056_N1792.json", + "M": 173056, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1620.5257500000007 + }, + "M=173056,N=1920": { + "file": "silu_config_M173056_N1920.json", + "M": 173056, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1625.20575 + }, + "M=173056,N=2048": { + "file": "silu_config_M173056_N2048.json", + "M": 173056, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1288.3242499999997 + }, + "M=173056,N=2080": { + "file": "silu_config_M173056_N2080.json", + "M": 173056, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1769.36625 + }, + "M=173056,N=2240": { + "file": "silu_config_M173056_N2240.json", + "M": 173056, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1885.2867500000002 + }, + "M=173056,N=2400": { + "file": "silu_config_M173056_N2400.json", + "M": 173056, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1980.72725 + }, + "M=173056,N=2560": { + "file": "silu_config_M173056_N2560.json", + "M": 173056, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2089.687500000001 + }, + "M=174080,N=128": { + "file": "silu_config_M174080_N128.json", + "M": 174080, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 98.91925000000003 + }, + "M=174080,N=160": { + "file": "silu_config_M174080_N160.json", + "M": 174080, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 217.91975000000008 + }, + "M=174080,N=192": { + "file": "silu_config_M174080_N192.json", + "M": 174080, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 210.80000000000007 + }, + "M=174080,N=256": { + "file": "silu_config_M174080_N256.json", + "M": 174080, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 201.99975 + }, + "M=174080,N=320": { + "file": "silu_config_M174080_N320.json", + "M": 174080, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 421.96074999999985 + }, + "M=174080,N=384": { + "file": "silu_config_M174080_N384.json", + "M": 174080, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 419.6807500000002 + }, + "M=174080,N=480": { + "file": "silu_config_M174080_N480.json", + "M": 174080, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 426.80074999999977 + }, + "M=174080,N=512": { + "file": "silu_config_M174080_N512.json", + "M": 174080, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 333.8802499999998 + }, + "M=174080,N=576": { + "file": "silu_config_M174080_N576.json", + "M": 174080, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 809.56225 + }, + "M=174080,N=640": { + "file": "silu_config_M174080_N640.json", + "M": 174080, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 818.8822499999997 + }, + "M=174080,N=768": { + "file": "silu_config_M174080_N768.json", + "M": 174080, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 820.6822499999998 + }, + "M=174080,N=800": { + "file": "silu_config_M174080_N800.json", + "M": 174080, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 811.9622500000003 + }, + "M=174080,N=896": { + "file": "silu_config_M174080_N896.json", + "M": 174080, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 820.48225 + }, + "M=174080,N=960": { + "file": "silu_config_M174080_N960.json", + "M": 174080, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 815.7222499999998 + }, + "M=174080,N=1024": { + "file": "silu_config_M174080_N1024.json", + "M": 174080, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 659.0017500000001 + }, + "M=174080,N=1120": { + "file": "silu_config_M174080_N1120.json", + "M": 174080, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1618.8857500000004 + }, + "M=174080,N=1152": { + "file": "silu_config_M174080_N1152.json", + "M": 174080, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1608.6055000000001 + }, + "M=174080,N=1280": { + "file": "silu_config_M174080_N1280.json", + "M": 174080, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1599.4055000000003 + }, + "M=174080,N=1344": { + "file": "silu_config_M174080_N1344.json", + "M": 174080, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1619.84575 + }, + "M=174080,N=1408": { + "file": "silu_config_M174080_N1408.json", + "M": 174080, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1600.8855000000008 + }, + "M=174080,N=1440": { + "file": "silu_config_M174080_N1440.json", + "M": 174080, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1632.4857500000003 + }, + "M=174080,N=1536": { + "file": "silu_config_M174080_N1536.json", + "M": 174080, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1613.5257500000002 + }, + "M=174080,N=1600": { + "file": "silu_config_M174080_N1600.json", + "M": 174080, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1622.9657500000008 + }, + "M=174080,N=1664": { + "file": "silu_config_M174080_N1664.json", + "M": 174080, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1626.76575 + }, + "M=174080,N=1728": { + "file": "silu_config_M174080_N1728.json", + "M": 174080, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1615.4857499999998 + }, + "M=174080,N=1760": { + "file": "silu_config_M174080_N1760.json", + "M": 174080, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1638.0457500000007 + }, + "M=174080,N=1792": { + "file": "silu_config_M174080_N1792.json", + "M": 174080, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1609.2055 + }, + "M=174080,N=1920": { + "file": "silu_config_M174080_N1920.json", + "M": 174080, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1613.28575 + }, + "M=174080,N=2048": { + "file": "silu_config_M174080_N2048.json", + "M": 174080, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1292.0842500000008 + }, + "M=174080,N=2080": { + "file": "silu_config_M174080_N2080.json", + "M": 174080, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1787.40625 + }, + "M=174080,N=2240": { + "file": "silu_config_M174080_N2240.json", + "M": 174080, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1883.0867499999995 + }, + "M=174080,N=2400": { + "file": "silu_config_M174080_N2400.json", + "M": 174080, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.2470000000003 + }, + "M=174080,N=2560": { + "file": "silu_config_M174080_N2560.json", + "M": 174080, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2090.567499999999 + }, + "M=175104,N=128": { + "file": "silu_config_M175104_N128.json", + "M": 175104, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 99.71950000000001 + }, + "M=175104,N=160": { + "file": "silu_config_M175104_N160.json", + "M": 175104, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 211.64000000000004 + }, + "M=175104,N=192": { + "file": "silu_config_M175104_N192.json", + "M": 175104, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 223.07975 + }, + "M=175104,N=256": { + "file": "silu_config_M175104_N256.json", + "M": 175104, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 191.47974999999997 + }, + "M=175104,N=320": { + "file": "silu_config_M175104_N320.json", + "M": 175104, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 427.52075000000013 + }, + "M=175104,N=384": { + "file": "silu_config_M175104_N384.json", + "M": 175104, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 424.64075000000025 + }, + "M=175104,N=480": { + "file": "silu_config_M175104_N480.json", + "M": 175104, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 432.0410000000004 + }, + "M=175104,N=512": { + "file": "silu_config_M175104_N512.json", + "M": 175104, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 343.64025000000004 + }, + "M=175104,N=576": { + "file": "silu_config_M175104_N576.json", + "M": 175104, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 829.4024999999999 + }, + "M=175104,N=640": { + "file": "silu_config_M175104_N640.json", + "M": 175104, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 823.6822500000001 + }, + "M=175104,N=768": { + "file": "silu_config_M175104_N768.json", + "M": 175104, + "N": 768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 835.8825000000004 + }, + "M=175104,N=800": { + "file": "silu_config_M175104_N800.json", + "M": 175104, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 832.5224999999998 + }, + "M=175104,N=896": { + "file": "silu_config_M175104_N896.json", + "M": 175104, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 808.96225 + }, + "M=175104,N=960": { + "file": "silu_config_M175104_N960.json", + "M": 175104, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 825.96225 + }, + "M=175104,N=1024": { + "file": "silu_config_M175104_N1024.json", + "M": 175104, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 664.3217500000005 + }, + "M=175104,N=1120": { + "file": "silu_config_M175104_N1120.json", + "M": 175104, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1629.24575 + }, + "M=175104,N=1152": { + "file": "silu_config_M175104_N1152.json", + "M": 175104, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1637.4457500000008 + }, + "M=175104,N=1280": { + "file": "silu_config_M175104_N1280.json", + "M": 175104, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1639.0857499999993 + }, + "M=175104,N=1344": { + "file": "silu_config_M175104_N1344.json", + "M": 175104, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1630.1657499999997 + }, + "M=175104,N=1408": { + "file": "silu_config_M175104_N1408.json", + "M": 175104, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1640.8857500000008 + }, + "M=175104,N=1440": { + "file": "silu_config_M175104_N1440.json", + "M": 175104, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1643.40575 + }, + "M=175104,N=1536": { + "file": "silu_config_M175104_N1536.json", + "M": 175104, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1653.6057500000002 + }, + "M=175104,N=1600": { + "file": "silu_config_M175104_N1600.json", + "M": 175104, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1654.1257499999997 + }, + "M=175104,N=1664": { + "file": "silu_config_M175104_N1664.json", + "M": 175104, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1656.88575 + }, + "M=175104,N=1728": { + "file": "silu_config_M175104_N1728.json", + "M": 175104, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1648.1657500000001 + }, + "M=175104,N=1760": { + "file": "silu_config_M175104_N1760.json", + "M": 175104, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1649.24575 + }, + "M=175104,N=1792": { + "file": "silu_config_M175104_N1792.json", + "M": 175104, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1639.0857499999993 + }, + "M=175104,N=1920": { + "file": "silu_config_M175104_N1920.json", + "M": 175104, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1654.0857499999993 + }, + "M=175104,N=2048": { + "file": "silu_config_M175104_N2048.json", + "M": 175104, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1299.5642499999994 + }, + "M=175104,N=2080": { + "file": "silu_config_M175104_N2080.json", + "M": 175104, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1790.9662500000004 + }, + "M=175104,N=2240": { + "file": "silu_config_M175104_N2240.json", + "M": 175104, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1897.56675 + }, + "M=175104,N=2400": { + "file": "silu_config_M175104_N2400.json", + "M": 175104, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2004.0472500000005 + }, + "M=175104,N=2560": { + "file": "silu_config_M175104_N2560.json", + "M": 175104, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2118.2877500000004 + }, + "M=176128,N=128": { + "file": "silu_config_M176128_N128.json", + "M": 176128, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 111.51924999999997 + }, + "M=176128,N=160": { + "file": "silu_config_M176128_N160.json", + "M": 176128, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 221.35975000000002 + }, + "M=176128,N=192": { + "file": "silu_config_M176128_N192.json", + "M": 176128, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 224.2800000000001 + }, + "M=176128,N=256": { + "file": "silu_config_M176128_N256.json", + "M": 176128, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 192.67974999999996 + }, + "M=176128,N=320": { + "file": "silu_config_M176128_N320.json", + "M": 176128, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 429.32074999999975 + }, + "M=176128,N=384": { + "file": "silu_config_M176128_N384.json", + "M": 176128, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 427.32074999999986 + }, + "M=176128,N=480": { + "file": "silu_config_M176128_N480.json", + "M": 176128, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.5207499999999 + }, + "M=176128,N=512": { + "file": "silu_config_M176128_N512.json", + "M": 176128, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 336.9202499999998 + }, + "M=176128,N=576": { + "file": "silu_config_M176128_N576.json", + "M": 176128, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 813.1222499999999 + }, + "M=176128,N=640": { + "file": "silu_config_M176128_N640.json", + "M": 176128, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 828.4822500000002 + }, + "M=176128,N=768": { + "file": "silu_config_M176128_N768.json", + "M": 176128, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 831.3224999999998 + }, + "M=176128,N=800": { + "file": "silu_config_M176128_N800.json", + "M": 176128, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 847.5624999999998 + }, + "M=176128,N=896": { + "file": "silu_config_M176128_N896.json", + "M": 176128, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 834.6425000000004 + }, + "M=176128,N=960": { + "file": "silu_config_M176128_N960.json", + "M": 176128, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 820.6422499999999 + }, + "M=176128,N=1024": { + "file": "silu_config_M176128_N1024.json", + "M": 176128, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 667.9617499999999 + }, + "M=176128,N=1120": { + "file": "silu_config_M176128_N1120.json", + "M": 176128, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1638.80575 + }, + "M=176128,N=1152": { + "file": "silu_config_M176128_N1152.json", + "M": 176128, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1657.1657500000001 + }, + "M=176128,N=1280": { + "file": "silu_config_M176128_N1280.json", + "M": 176128, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1651.36575 + }, + "M=176128,N=1344": { + "file": "silu_config_M176128_N1344.json", + "M": 176128, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1649.6457499999997 + }, + "M=176128,N=1408": { + "file": "silu_config_M176128_N1408.json", + "M": 176128, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1640.0457500000002 + }, + "M=176128,N=1440": { + "file": "silu_config_M176128_N1440.json", + "M": 176128, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1652.5257499999993 + }, + "M=176128,N=1536": { + "file": "silu_config_M176128_N1536.json", + "M": 176128, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1632.0057499999998 + }, + "M=176128,N=1600": { + "file": "silu_config_M176128_N1600.json", + "M": 176128, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1643.0857500000002 + }, + "M=176128,N=1664": { + "file": "silu_config_M176128_N1664.json", + "M": 176128, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1645.4457499999994 + }, + "M=176128,N=1728": { + "file": "silu_config_M176128_N1728.json", + "M": 176128, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1645.8057500000004 + }, + "M=176128,N=1760": { + "file": "silu_config_M176128_N1760.json", + "M": 176128, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1648.245750000001 + }, + "M=176128,N=1792": { + "file": "silu_config_M176128_N1792.json", + "M": 176128, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1669.32575 + }, + "M=176128,N=1920": { + "file": "silu_config_M176128_N1920.json", + "M": 176128, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1663.5657499999998 + }, + "M=176128,N=2048": { + "file": "silu_config_M176128_N2048.json", + "M": 176128, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1319.3244999999997 + }, + "M=176128,N=2080": { + "file": "silu_config_M176128_N2080.json", + "M": 176128, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1833.0064999999995 + }, + "M=176128,N=2240": { + "file": "silu_config_M176128_N2240.json", + "M": 176128, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1908.1267500000004 + }, + "M=176128,N=2400": { + "file": "silu_config_M176128_N2400.json", + "M": 176128, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2025.8872499999998 + }, + "M=176128,N=2560": { + "file": "silu_config_M176128_N2560.json", + "M": 176128, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2105.1677500000005 + }, + "M=177152,N=128": { + "file": "silu_config_M177152_N128.json", + "M": 177152, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 100.87950000000001 + }, + "M=177152,N=160": { + "file": "silu_config_M177152_N160.json", + "M": 177152, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 222.60000000000002 + }, + "M=177152,N=192": { + "file": "silu_config_M177152_N192.json", + "M": 177152, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 214.48000000000002 + }, + "M=177152,N=256": { + "file": "silu_config_M177152_N256.json", + "M": 177152, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 193.67999999999995 + }, + "M=177152,N=320": { + "file": "silu_config_M177152_N320.json", + "M": 177152, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 431.48074999999983 + }, + "M=177152,N=384": { + "file": "silu_config_M177152_N384.json", + "M": 177152, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 429.88049999999976 + }, + "M=177152,N=480": { + "file": "silu_config_M177152_N480.json", + "M": 177152, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 437.20074999999986 + }, + "M=177152,N=512": { + "file": "silu_config_M177152_N512.json", + "M": 177152, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 349.3204999999999 + }, + "M=177152,N=576": { + "file": "silu_config_M177152_N576.json", + "M": 177152, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 848.9224999999999 + }, + "M=177152,N=640": { + "file": "silu_config_M177152_N640.json", + "M": 177152, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 822.0422499999997 + }, + "M=177152,N=768": { + "file": "silu_config_M177152_N768.json", + "M": 177152, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 835.8824999999999 + }, + "M=177152,N=800": { + "file": "silu_config_M177152_N800.json", + "M": 177152, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 852.3625000000004 + }, + "M=177152,N=896": { + "file": "silu_config_M177152_N896.json", + "M": 177152, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 849.0824999999998 + }, + "M=177152,N=960": { + "file": "silu_config_M177152_N960.json", + "M": 177152, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 855.6424999999999 + }, + "M=177152,N=1024": { + "file": "silu_config_M177152_N1024.json", + "M": 177152, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 671.3619999999999 + }, + "M=177152,N=1120": { + "file": "silu_config_M177152_N1120.json", + "M": 177152, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1648.2857500000005 + }, + "M=177152,N=1152": { + "file": "silu_config_M177152_N1152.json", + "M": 177152, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1666.5657500000002 + }, + "M=177152,N=1280": { + "file": "silu_config_M177152_N1280.json", + "M": 177152, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1647.4457499999999 + }, + "M=177152,N=1344": { + "file": "silu_config_M177152_N1344.json", + "M": 177152, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1659.1257499999997 + }, + "M=177152,N=1408": { + "file": "silu_config_M177152_N1408.json", + "M": 177152, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1649.4457499999999 + }, + "M=177152,N=1440": { + "file": "silu_config_M177152_N1440.json", + "M": 177152, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1662.1257500000002 + }, + "M=177152,N=1536": { + "file": "silu_config_M177152_N1536.json", + "M": 177152, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1661.9657500000008 + }, + "M=177152,N=1600": { + "file": "silu_config_M177152_N1600.json", + "M": 177152, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1674.8460000000005 + }, + "M=177152,N=1664": { + "file": "silu_config_M177152_N1664.json", + "M": 177152, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1665.2457499999996 + }, + "M=177152,N=1728": { + "file": "silu_config_M177152_N1728.json", + "M": 177152, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1667.5257499999998 + }, + "M=177152,N=1760": { + "file": "silu_config_M177152_N1760.json", + "M": 177152, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1658.0057499999994 + }, + "M=177152,N=1792": { + "file": "silu_config_M177152_N1792.json", + "M": 177152, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1668.5257499999998 + }, + "M=177152,N=1920": { + "file": "silu_config_M177152_N1920.json", + "M": 177152, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1662.8057499999995 + }, + "M=177152,N=2048": { + "file": "silu_config_M177152_N2048.json", + "M": 177152, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1314.6045000000004 + }, + "M=177152,N=2080": { + "file": "silu_config_M177152_N2080.json", + "M": 177152, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1811.6064999999999 + }, + "M=177152,N=2240": { + "file": "silu_config_M177152_N2240.json", + "M": 177152, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1932.9669999999996 + }, + "M=177152,N=2400": { + "file": "silu_config_M177152_N2400.json", + "M": 177152, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2050.0075000000006 + }, + "M=177152,N=2560": { + "file": "silu_config_M177152_N2560.json", + "M": 177152, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2118.4477499999994 + }, + "M=178176,N=128": { + "file": "silu_config_M178176_N128.json", + "M": 178176, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 112.79949999999994 + }, + "M=178176,N=160": { + "file": "silu_config_M178176_N160.json", + "M": 178176, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 223.63974999999994 + }, + "M=178176,N=192": { + "file": "silu_config_M178176_N192.json", + "M": 178176, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 215.48000000000002 + }, + "M=178176,N=256": { + "file": "silu_config_M178176_N256.json", + "M": 178176, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 206.19974999999994 + }, + "M=178176,N=320": { + "file": "silu_config_M178176_N320.json", + "M": 178176, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.44074999999987 + }, + "M=178176,N=384": { + "file": "silu_config_M178176_N384.json", + "M": 178176, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 431.9607500000001 + }, + "M=178176,N=480": { + "file": "silu_config_M178176_N480.json", + "M": 178176, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.2807499999999 + }, + "M=178176,N=512": { + "file": "silu_config_M178176_N512.json", + "M": 178176, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 350.96050000000014 + }, + "M=178176,N=576": { + "file": "silu_config_M178176_N576.json", + "M": 178176, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 833.1624999999999 + }, + "M=178176,N=640": { + "file": "silu_config_M178176_N640.json", + "M": 178176, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 838.5625 + }, + "M=178176,N=768": { + "file": "silu_config_M178176_N768.json", + "M": 178176, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 819.8822500000001 + }, + "M=178176,N=800": { + "file": "silu_config_M178176_N800.json", + "M": 178176, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 846.6424999999997 + }, + "M=178176,N=896": { + "file": "silu_config_M178176_N896.json", + "M": 178176, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 843.0025 + }, + "M=178176,N=960": { + "file": "silu_config_M178176_N960.json", + "M": 178176, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 849.5625 + }, + "M=178176,N=1024": { + "file": "silu_config_M178176_N1024.json", + "M": 178176, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 675.7617499999999 + }, + "M=178176,N=1120": { + "file": "silu_config_M178176_N1120.json", + "M": 178176, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1667.9257499999994 + }, + "M=178176,N=1152": { + "file": "silu_config_M178176_N1152.json", + "M": 178176, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1665.4857500000003 + }, + "M=178176,N=1280": { + "file": "silu_config_M178176_N1280.json", + "M": 178176, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1659.5657499999998 + }, + "M=178176,N=1344": { + "file": "silu_config_M178176_N1344.json", + "M": 178176, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1678.8859999999995 + }, + "M=178176,N=1408": { + "file": "silu_config_M178176_N1408.json", + "M": 178176, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1658.6057500000002 + }, + "M=178176,N=1440": { + "file": "silu_config_M178176_N1440.json", + "M": 178176, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1681.726 + }, + "M=178176,N=1536": { + "file": "silu_config_M178176_N1536.json", + "M": 178176, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1671.36575 + }, + "M=178176,N=1600": { + "file": "silu_config_M178176_N1600.json", + "M": 178176, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1661.9657500000003 + }, + "M=178176,N=1664": { + "file": "silu_config_M178176_N1664.json", + "M": 178176, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1654.1257499999997 + }, + "M=178176,N=1728": { + "file": "silu_config_M178176_N1728.json", + "M": 178176, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1676.9260000000004 + }, + "M=178176,N=1760": { + "file": "silu_config_M178176_N1760.json", + "M": 178176, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1677.6059999999998 + }, + "M=178176,N=1792": { + "file": "silu_config_M178176_N1792.json", + "M": 178176, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1667.5257499999998 + }, + "M=178176,N=1920": { + "file": "silu_config_M178176_N1920.json", + "M": 178176, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1683.8860000000004 + }, + "M=178176,N=2048": { + "file": "silu_config_M178176_N2048.json", + "M": 178176, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1322.4444999999996 + }, + "M=178176,N=2080": { + "file": "silu_config_M178176_N2080.json", + "M": 178176, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1843.5665 + }, + "M=178176,N=2240": { + "file": "silu_config_M178176_N2240.json", + "M": 178176, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1930.0869999999995 + }, + "M=178176,N=2400": { + "file": "silu_config_M178176_N2400.json", + "M": 178176, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2038.8875 + }, + "M=178176,N=2560": { + "file": "silu_config_M178176_N2560.json", + "M": 178176, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2143.68775 + }, + "M=179200,N=128": { + "file": "silu_config_M179200_N128.json", + "M": 179200, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 112.51949999999997 + }, + "M=179200,N=160": { + "file": "silu_config_M179200_N160.json", + "M": 179200, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 238.19999999999993 + }, + "M=179200,N=192": { + "file": "silu_config_M179200_N192.json", + "M": 179200, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 213.59974999999997 + }, + "M=179200,N=256": { + "file": "silu_config_M179200_N256.json", + "M": 179200, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 207.07975000000005 + }, + "M=179200,N=320": { + "file": "silu_config_M179200_N320.json", + "M": 179200, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.08100000000024 + }, + "M=179200,N=384": { + "file": "silu_config_M179200_N384.json", + "M": 179200, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 431.2807499999999 + }, + "M=179200,N=480": { + "file": "silu_config_M179200_N480.json", + "M": 179200, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 438.8007499999999 + }, + "M=179200,N=512": { + "file": "silu_config_M179200_N512.json", + "M": 179200, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 343.2805000000001 + }, + "M=179200,N=576": { + "file": "silu_config_M179200_N576.json", + "M": 179200, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 843.1225 + }, + "M=179200,N=640": { + "file": "silu_config_M179200_N640.json", + "M": 179200, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 849.0024999999998 + }, + "M=179200,N=768": { + "file": "silu_config_M179200_N768.json", + "M": 179200, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 840.8425000000004 + }, + "M=179200,N=800": { + "file": "silu_config_M179200_N800.json", + "M": 179200, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 845.7224999999999 + }, + "M=179200,N=896": { + "file": "silu_config_M179200_N896.json", + "M": 179200, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 833.3625 + }, + "M=179200,N=960": { + "file": "silu_config_M179200_N960.json", + "M": 179200, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 865.0825 + }, + "M=179200,N=1024": { + "file": "silu_config_M179200_N1024.json", + "M": 179200, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 678.2820000000002 + }, + "M=179200,N=1120": { + "file": "silu_config_M179200_N1120.json", + "M": 179200, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1686.326 + }, + "M=179200,N=1152": { + "file": "silu_config_M179200_N1152.json", + "M": 179200, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1644.5257500000002 + }, + "M=179200,N=1280": { + "file": "silu_config_M179200_N1280.json", + "M": 179200, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1645.5657500000002 + }, + "M=179200,N=1344": { + "file": "silu_config_M179200_N1344.json", + "M": 179200, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1666.4857500000003 + }, + "M=179200,N=1408": { + "file": "silu_config_M179200_N1408.json", + "M": 179200, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1678.6060000000002 + }, + "M=179200,N=1440": { + "file": "silu_config_M179200_N1440.json", + "M": 179200, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1648.3657500000008 + }, + "M=179200,N=1536": { + "file": "silu_config_M179200_N1536.json", + "M": 179200, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1680.6059999999993 + }, + "M=179200,N=1600": { + "file": "silu_config_M179200_N1600.json", + "M": 179200, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1680.2060000000001 + }, + "M=179200,N=1664": { + "file": "silu_config_M179200_N1664.json", + "M": 179200, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1673.2860000000005 + }, + "M=179200,N=1728": { + "file": "silu_config_M179200_N1728.json", + "M": 179200, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1672.4857499999998 + }, + "M=179200,N=1760": { + "file": "silu_config_M179200_N1760.json", + "M": 179200, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1674.286 + }, + "M=179200,N=1792": { + "file": "silu_config_M179200_N1792.json", + "M": 179200, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1686.8860000000013 + }, + "M=179200,N=1920": { + "file": "silu_config_M179200_N1920.json", + "M": 179200, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1660.5257499999998 + }, + "M=179200,N=2048": { + "file": "silu_config_M179200_N2048.json", + "M": 179200, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1334.9644999999991 + }, + "M=179200,N=2080": { + "file": "silu_config_M179200_N2080.json", + "M": 179200, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1829.0865000000013 + }, + "M=179200,N=2240": { + "file": "silu_config_M179200_N2240.json", + "M": 179200, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1938.7669999999998 + }, + "M=179200,N=2400": { + "file": "silu_config_M179200_N2400.json", + "M": 179200, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2054.2874999999995 + }, + "M=179200,N=2560": { + "file": "silu_config_M179200_N2560.json", + "M": 179200, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2150.407750000001 + }, + "M=180224,N=128": { + "file": "silu_config_M180224_N128.json", + "M": 180224, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.43950000000001 + }, + "M=180224,N=160": { + "file": "silu_config_M180224_N160.json", + "M": 180224, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 217.56 + }, + "M=180224,N=192": { + "file": "silu_config_M180224_N192.json", + "M": 180224, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 218.04000000000002 + }, + "M=180224,N=256": { + "file": "silu_config_M180224_N256.json", + "M": 180224, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 208.31975 + }, + "M=180224,N=320": { + "file": "silu_config_M180224_N320.json", + "M": 180224, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.601 + }, + "M=180224,N=384": { + "file": "silu_config_M180224_N384.json", + "M": 180224, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 437.08100000000013 + }, + "M=180224,N=480": { + "file": "silu_config_M180224_N480.json", + "M": 180224, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 444.60074999999995 + }, + "M=180224,N=512": { + "file": "silu_config_M180224_N512.json", + "M": 180224, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 335.0804999999999 + }, + "M=180224,N=576": { + "file": "silu_config_M180224_N576.json", + "M": 180224, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 832.0024999999998 + }, + "M=180224,N=640": { + "file": "silu_config_M180224_N640.json", + "M": 180224, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 836.2025000000001 + }, + "M=180224,N=768": { + "file": "silu_config_M180224_N768.json", + "M": 180224, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 837.8825000000002 + }, + "M=180224,N=800": { + "file": "silu_config_M180224_N800.json", + "M": 180224, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 844.5224999999998 + }, + "M=180224,N=896": { + "file": "silu_config_M180224_N896.json", + "M": 180224, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 841.4824999999996 + }, + "M=180224,N=960": { + "file": "silu_config_M180224_N960.json", + "M": 180224, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 848.0025 + }, + "M=180224,N=1024": { + "file": "silu_config_M180224_N1024.json", + "M": 180224, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 682.9217500000004 + }, + "M=180224,N=1120": { + "file": "silu_config_M180224_N1120.json", + "M": 180224, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1686.5660000000003 + }, + "M=180224,N=1152": { + "file": "silu_config_M180224_N1152.json", + "M": 180224, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1674.2060000000001 + }, + "M=180224,N=1280": { + "file": "silu_config_M180224_N1280.json", + "M": 180224, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1675.5659999999998 + }, + "M=180224,N=1344": { + "file": "silu_config_M180224_N1344.json", + "M": 180224, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1700.5660000000012 + }, + "M=180224,N=1408": { + "file": "silu_config_M180224_N1408.json", + "M": 180224, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1656.8057500000004 + }, + "M=180224,N=1440": { + "file": "silu_config_M180224_N1440.json", + "M": 180224, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1690.6059999999998 + }, + "M=180224,N=1536": { + "file": "silu_config_M180224_N1536.json", + "M": 180224, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1679.8060000000005 + }, + "M=180224,N=1600": { + "file": "silu_config_M180224_N1600.json", + "M": 180224, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1680.6460000000002 + }, + "M=180224,N=1664": { + "file": "silu_config_M180224_N1664.json", + "M": 180224, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1683.2059999999992 + }, + "M=180224,N=1728": { + "file": "silu_config_M180224_N1728.json", + "M": 180224, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1694.246 + }, + "M=180224,N=1760": { + "file": "silu_config_M180224_N1760.json", + "M": 180224, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1697.286 + }, + "M=180224,N=1792": { + "file": "silu_config_M180224_N1792.json", + "M": 180224, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1688.9260000000004 + }, + "M=180224,N=1920": { + "file": "silu_config_M180224_N1920.json", + "M": 180224, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1692.166000000001 + }, + "M=180224,N=2048": { + "file": "silu_config_M180224_N2048.json", + "M": 180224, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1317.0445 + }, + "M=180224,N=2080": { + "file": "silu_config_M180224_N2080.json", + "M": 180224, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1863.0467499999995 + }, + "M=180224,N=2240": { + "file": "silu_config_M180224_N2240.json", + "M": 180224, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1974.0070000000005 + }, + "M=180224,N=2400": { + "file": "silu_config_M180224_N2400.json", + "M": 180224, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2071.8475 + }, + "M=180224,N=2560": { + "file": "silu_config_M180224_N2560.json", + "M": 180224, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2155.4480000000012 + }, + "M=181248,N=128": { + "file": "silu_config_M181248_N128.json", + "M": 181248, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.07950000000002 + }, + "M=181248,N=160": { + "file": "silu_config_M181248_N160.json", + "M": 181248, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 218.84000000000003 + }, + "M=181248,N=192": { + "file": "silu_config_M181248_N192.json", + "M": 181248, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 219.2397499999999 + }, + "M=181248,N=256": { + "file": "silu_config_M181248_N256.json", + "M": 181248, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 197.95974999999993 + }, + "M=181248,N=320": { + "file": "silu_config_M181248_N320.json", + "M": 181248, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 442.32075 + }, + "M=181248,N=384": { + "file": "silu_config_M181248_N384.json", + "M": 181248, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.32075 + }, + "M=181248,N=480": { + "file": "silu_config_M181248_N480.json", + "M": 181248, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 446.6407499999999 + }, + "M=181248,N=512": { + "file": "silu_config_M181248_N512.json", + "M": 181248, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 347.00025000000005 + }, + "M=181248,N=576": { + "file": "silu_config_M181248_N576.json", + "M": 181248, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 856.8024999999998 + }, + "M=181248,N=640": { + "file": "silu_config_M181248_N640.json", + "M": 181248, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 851.9224999999999 + }, + "M=181248,N=768": { + "file": "silu_config_M181248_N768.json", + "M": 181248, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 853.6824999999999 + }, + "M=181248,N=800": { + "file": "silu_config_M181248_N800.json", + "M": 181248, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 860.3625 + }, + "M=181248,N=896": { + "file": "silu_config_M181248_N896.json", + "M": 181248, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 857.1224999999995 + }, + "M=181248,N=960": { + "file": "silu_config_M181248_N960.json", + "M": 181248, + "N": 960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 873.8824999999997 + }, + "M=181248,N=1024": { + "file": "silu_config_M181248_N1024.json", + "M": 181248, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 687.5217499999999 + }, + "M=181248,N=1120": { + "file": "silu_config_M181248_N1120.json", + "M": 181248, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1706.4860000000003 + }, + "M=181248,N=1152": { + "file": "silu_config_M181248_N1152.json", + "M": 181248, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1683.1660000000002 + }, + "M=181248,N=1280": { + "file": "silu_config_M181248_N1280.json", + "M": 181248, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1695.4060000000004 + }, + "M=181248,N=1344": { + "file": "silu_config_M181248_N1344.json", + "M": 181248, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1707.1659999999997 + }, + "M=181248,N=1408": { + "file": "silu_config_M181248_N1408.json", + "M": 181248, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1687.0459999999998 + }, + "M=181248,N=1440": { + "file": "silu_config_M181248_N1440.json", + "M": 181248, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1679.2059999999997 + }, + "M=181248,N=1536": { + "file": "silu_config_M181248_N1536.json", + "M": 181248, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1689.2459999999996 + }, + "M=181248,N=1600": { + "file": "silu_config_M181248_N1600.json", + "M": 181248, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1700.5660000000007 + }, + "M=181248,N=1664": { + "file": "silu_config_M181248_N1664.json", + "M": 181248, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1705.6460000000006 + }, + "M=181248,N=1728": { + "file": "silu_config_M181248_N1728.json", + "M": 181248, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1713.7260000000006 + }, + "M=181248,N=1760": { + "file": "silu_config_M181248_N1760.json", + "M": 181248, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1716.246000000001 + }, + "M=181248,N=1792": { + "file": "silu_config_M181248_N1792.json", + "M": 181248, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1695.8860000000004 + }, + "M=181248,N=1920": { + "file": "silu_config_M181248_N1920.json", + "M": 181248, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1700.4460000000008 + }, + "M=181248,N=2048": { + "file": "silu_config_M181248_N2048.json", + "M": 181248, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1353.2045000000007 + }, + "M=181248,N=2080": { + "file": "silu_config_M181248_N2080.json", + "M": 181248, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1882.9267499999996 + }, + "M=181248,N=2240": { + "file": "silu_config_M181248_N2240.json", + "M": 181248, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.0470000000005 + }, + "M=181248,N=2400": { + "file": "silu_config_M181248_N2400.json", + "M": 181248, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2083.7675 + }, + "M=181248,N=2560": { + "file": "silu_config_M181248_N2560.json", + "M": 181248, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2168.608 + }, + "M=182272,N=128": { + "file": "silu_config_M182272_N128.json", + "M": 182272, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.59925000000001 + }, + "M=182272,N=160": { + "file": "silu_config_M182272_N160.json", + "M": 182272, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 220.07975 + }, + "M=182272,N=192": { + "file": "silu_config_M182272_N192.json", + "M": 182272, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 231.52000000000004 + }, + "M=182272,N=256": { + "file": "silu_config_M182272_N256.json", + "M": 182272, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 199.0000000000001 + }, + "M=182272,N=320": { + "file": "silu_config_M182272_N320.json", + "M": 182272, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 444.16075 + }, + "M=182272,N=384": { + "file": "silu_config_M182272_N384.json", + "M": 182272, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 441.4007499999999 + }, + "M=182272,N=480": { + "file": "silu_config_M182272_N480.json", + "M": 182272, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 449.1207499999998 + }, + "M=182272,N=512": { + "file": "silu_config_M182272_N512.json", + "M": 182272, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 359.28049999999996 + }, + "M=182272,N=576": { + "file": "silu_config_M182272_N576.json", + "M": 182272, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 872.6824999999997 + }, + "M=182272,N=640": { + "file": "silu_config_M182272_N640.json", + "M": 182272, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 856.6025 + }, + "M=182272,N=768": { + "file": "silu_config_M182272_N768.json", + "M": 182272, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 869.3625000000002 + }, + "M=182272,N=800": { + "file": "silu_config_M182272_N800.json", + "M": 182272, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 865.4425000000008 + }, + "M=182272,N=896": { + "file": "silu_config_M182272_N896.json", + "M": 182272, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 862.6025000000004 + }, + "M=182272,N=960": { + "file": "silu_config_M182272_N960.json", + "M": 182272, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 859.4424999999997 + }, + "M=182272,N=1024": { + "file": "silu_config_M182272_N1024.json", + "M": 182272, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 690.5217499999997 + }, + "M=182272,N=1120": { + "file": "silu_config_M182272_N1120.json", + "M": 182272, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1718.7659999999996 + }, + "M=182272,N=1152": { + "file": "silu_config_M182272_N1152.json", + "M": 182272, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1682.3659999999995 + }, + "M=182272,N=1280": { + "file": "silu_config_M182272_N1280.json", + "M": 182272, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1704.406 + }, + "M=182272,N=1344": { + "file": "silu_config_M182272_N1344.json", + "M": 182272, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1706.2859999999996 + }, + "M=182272,N=1408": { + "file": "silu_config_M182272_N1408.json", + "M": 182272, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1696.5659999999998 + }, + "M=182272,N=1440": { + "file": "silu_config_M182272_N1440.json", + "M": 182272, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1719.6859999999997 + }, + "M=182272,N=1536": { + "file": "silu_config_M182272_N1536.json", + "M": 182272, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1719.4459999999995 + }, + "M=182272,N=1600": { + "file": "silu_config_M182272_N1600.json", + "M": 182272, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1720.5660000000007 + }, + "M=182272,N=1664": { + "file": "silu_config_M182272_N1664.json", + "M": 182272, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1691.7259999999997 + }, + "M=182272,N=1728": { + "file": "silu_config_M182272_N1728.json", + "M": 182272, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1723.1260000000002 + }, + "M=182272,N=1760": { + "file": "silu_config_M182272_N1760.json", + "M": 182272, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1715.366 + }, + "M=182272,N=1792": { + "file": "silu_config_M182272_N1792.json", + "M": 182272, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1694.9660000000003 + }, + "M=182272,N=1920": { + "file": "silu_config_M182272_N1920.json", + "M": 182272, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1699.9259999999995 + }, + "M=182272,N=2048": { + "file": "silu_config_M182272_N2048.json", + "M": 182272, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1360.6444999999994 + }, + "M=182272,N=2080": { + "file": "silu_config_M182272_N2080.json", + "M": 182272, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1885.9267499999996 + }, + "M=182272,N=2240": { + "file": "silu_config_M182272_N2240.json", + "M": 182272, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.2872500000003 + }, + "M=182272,N=2400": { + "file": "silu_config_M182272_N2400.json", + "M": 182272, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2085.3275000000003 + }, + "M=182272,N=2560": { + "file": "silu_config_M182272_N2560.json", + "M": 182272, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2170.2079999999996 + }, + "M=183296,N=128": { + "file": "silu_config_M183296_N128.json", + "M": 183296, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.91924999999998 + }, + "M=183296,N=160": { + "file": "silu_config_M183296_N160.json", + "M": 183296, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 232.15974999999997 + }, + "M=183296,N=192": { + "file": "silu_config_M183296_N192.json", + "M": 183296, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 232.36 + }, + "M=183296,N=256": { + "file": "silu_config_M183296_N256.json", + "M": 183296, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 200.15974999999997 + }, + "M=183296,N=320": { + "file": "silu_config_M183296_N320.json", + "M": 183296, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 446.8007499999999 + }, + "M=183296,N=384": { + "file": "silu_config_M183296_N384.json", + "M": 183296, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 444.4407500000001 + }, + "M=183296,N=480": { + "file": "silu_config_M183296_N480.json", + "M": 183296, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 452.12075000000004 + }, + "M=183296,N=512": { + "file": "silu_config_M183296_N512.json", + "M": 183296, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 361.2404999999999 + }, + "M=183296,N=576": { + "file": "silu_config_M183296_N576.json", + "M": 183296, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 877.2825000000003 + }, + "M=183296,N=640": { + "file": "silu_config_M183296_N640.json", + "M": 183296, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 861.8825000000002 + }, + "M=183296,N=768": { + "file": "silu_config_M183296_N768.json", + "M": 183296, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 863.1224999999997 + }, + "M=183296,N=800": { + "file": "silu_config_M183296_N800.json", + "M": 183296, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 870.2425000000003 + }, + "M=183296,N=896": { + "file": "silu_config_M183296_N896.json", + "M": 183296, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 856.8425000000011 + }, + "M=183296,N=960": { + "file": "silu_config_M183296_N960.json", + "M": 183296, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 884.4825000000001 + }, + "M=183296,N=1024": { + "file": "silu_config_M183296_N1024.json", + "M": 183296, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 694.8820000000001 + }, + "M=183296,N=1120": { + "file": "silu_config_M183296_N1120.json", + "M": 183296, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1715.0460000000003 + }, + "M=183296,N=1152": { + "file": "silu_config_M183296_N1152.json", + "M": 183296, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1691.9260000000008 + }, + "M=183296,N=1280": { + "file": "silu_config_M183296_N1280.json", + "M": 183296, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1714.0059999999999 + }, + "M=183296,N=1344": { + "file": "silu_config_M183296_N1344.json", + "M": 183296, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1729.0860000000002 + }, + "M=183296,N=1408": { + "file": "silu_config_M183296_N1408.json", + "M": 183296, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1705.6459999999997 + }, + "M=183296,N=1440": { + "file": "silu_config_M183296_N1440.json", + "M": 183296, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1728.966 + }, + "M=183296,N=1536": { + "file": "silu_config_M183296_N1536.json", + "M": 183296, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1718.2459999999996 + }, + "M=183296,N=1600": { + "file": "silu_config_M183296_N1600.json", + "M": 183296, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1709.4860000000008 + }, + "M=183296,N=1664": { + "file": "silu_config_M183296_N1664.json", + "M": 183296, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1721.8060000000005 + }, + "M=183296,N=1728": { + "file": "silu_config_M183296_N1728.json", + "M": 183296, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1712.0860000000002 + }, + "M=183296,N=1760": { + "file": "silu_config_M183296_N1760.json", + "M": 183296, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1704.1259999999993 + }, + "M=183296,N=1792": { + "file": "silu_config_M183296_N1792.json", + "M": 183296, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1725.0860000000002 + }, + "M=183296,N=1920": { + "file": "silu_config_M183296_N1920.json", + "M": 183296, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1729.9260000000004 + }, + "M=183296,N=2048": { + "file": "silu_config_M183296_N2048.json", + "M": 183296, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1371.0845 + }, + "M=183296,N=2080": { + "file": "silu_config_M183296_N2080.json", + "M": 183296, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1903.8867499999997 + }, + "M=183296,N=2240": { + "file": "silu_config_M183296_N2240.json", + "M": 183296, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1977.2069999999994 + }, + "M=183296,N=2400": { + "file": "silu_config_M183296_N2400.json", + "M": 183296, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2107.80775 + }, + "M=183296,N=2560": { + "file": "silu_config_M183296_N2560.json", + "M": 183296, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2197.848 + }, + "M=184320,N=128": { + "file": "silu_config_M184320_N128.json", + "M": 184320, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.67949999999999 + }, + "M=184320,N=160": { + "file": "silu_config_M184320_N160.json", + "M": 184320, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 222.39975000000004 + }, + "M=184320,N=192": { + "file": "silu_config_M184320_N192.json", + "M": 184320, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 229.9600000000001 + }, + "M=184320,N=256": { + "file": "silu_config_M184320_N256.json", + "M": 184320, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 201.2 + }, + "M=184320,N=320": { + "file": "silu_config_M184320_N320.json", + "M": 184320, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 446.24074999999993 + }, + "M=184320,N=384": { + "file": "silu_config_M184320_N384.json", + "M": 184320, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 444.0407499999999 + }, + "M=184320,N=480": { + "file": "silu_config_M184320_N480.json", + "M": 184320, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 451.32075 + }, + "M=184320,N=512": { + "file": "silu_config_M184320_N512.json", + "M": 184320, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 350.00049999999976 + }, + "M=184320,N=576": { + "file": "silu_config_M184320_N576.json", + "M": 184320, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 866.4424999999997 + }, + "M=184320,N=640": { + "file": "silu_config_M184320_N640.json", + "M": 184320, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 851.2424999999998 + }, + "M=184320,N=768": { + "file": "silu_config_M184320_N768.json", + "M": 184320, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 843.2825 + }, + "M=184320,N=800": { + "file": "silu_config_M184320_N800.json", + "M": 184320, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 848.4024999999997 + }, + "M=184320,N=896": { + "file": "silu_config_M184320_N896.json", + "M": 184320, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 867.4825000000001 + }, + "M=184320,N=960": { + "file": "silu_config_M184320_N960.json", + "M": 184320, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 852.4024999999997 + }, + "M=184320,N=1024": { + "file": "silu_config_M184320_N1024.json", + "M": 184320, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 696.8819999999998 + }, + "M=184320,N=1120": { + "file": "silu_config_M184320_N1120.json", + "M": 184320, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1692.1659999999997 + }, + "M=184320,N=1152": { + "file": "silu_config_M184320_N1152.json", + "M": 184320, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1691.0060000000003 + }, + "M=184320,N=1280": { + "file": "silu_config_M184320_N1280.json", + "M": 184320, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1692.3659999999995 + }, + "M=184320,N=1344": { + "file": "silu_config_M184320_N1344.json", + "M": 184320, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1713.6460000000002 + }, + "M=184320,N=1408": { + "file": "silu_config_M184320_N1408.json", + "M": 184320, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1684.0460000000003 + }, + "M=184320,N=1440": { + "file": "silu_config_M184320_N1440.json", + "M": 184320, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1695.406 + }, + "M=184320,N=1536": { + "file": "silu_config_M184320_N1536.json", + "M": 184320, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1686.2859999999996 + }, + "M=184320,N=1600": { + "file": "silu_config_M184320_N1600.json", + "M": 184320, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1717.0859999999998 + }, + "M=184320,N=1664": { + "file": "silu_config_M184320_N1664.json", + "M": 184320, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1699.4459999999995 + }, + "M=184320,N=1728": { + "file": "silu_config_M184320_N1728.json", + "M": 184320, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1709.0060000000003 + }, + "M=184320,N=1760": { + "file": "silu_config_M184320_N1760.json", + "M": 184320, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1701.0060000000003 + }, + "M=184320,N=1792": { + "file": "silu_config_M184320_N1792.json", + "M": 184320, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1692.4459999999995 + }, + "M=184320,N=1920": { + "file": "silu_config_M184320_N1920.json", + "M": 184320, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1696.7259999999997 + }, + "M=184320,N=2048": { + "file": "silu_config_M184320_N2048.json", + "M": 184320, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1335.6844999999998 + }, + "M=184320,N=2080": { + "file": "silu_config_M184320_N2080.json", + "M": 184320, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1879.6067500000004 + }, + "M=184320,N=2240": { + "file": "silu_config_M184320_N2240.json", + "M": 184320, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1990.0472499999996 + }, + "M=184320,N=2400": { + "file": "silu_config_M184320_N2400.json", + "M": 184320, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2087.7675 + }, + "M=184320,N=2560": { + "file": "silu_config_M184320_N2560.json", + "M": 184320, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2195.6479999999992 + }, + "M=185344,N=128": { + "file": "silu_config_M185344_N128.json", + "M": 185344, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 125.59949999999998 + }, + "M=185344,N=160": { + "file": "silu_config_M185344_N160.json", + "M": 185344, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 223.64 + }, + "M=185344,N=192": { + "file": "silu_config_M185344_N192.json", + "M": 185344, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.8 + }, + "M=185344,N=256": { + "file": "silu_config_M185344_N256.json", + "M": 185344, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 213.56000000000006 + }, + "M=185344,N=320": { + "file": "silu_config_M185344_N320.json", + "M": 185344, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 451.8810000000001 + }, + "M=185344,N=384": { + "file": "silu_config_M185344_N384.json", + "M": 185344, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 449.16075000000046 + }, + "M=185344,N=480": { + "file": "silu_config_M185344_N480.json", + "M": 185344, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.8407500000002 + }, + "M=185344,N=512": { + "file": "silu_config_M185344_N512.json", + "M": 185344, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 363.92049999999983 + }, + "M=185344,N=576": { + "file": "silu_config_M185344_N576.json", + "M": 185344, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 864.3625000000002 + }, + "M=185344,N=640": { + "file": "silu_config_M185344_N640.json", + "M": 185344, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 870.8025 + }, + "M=185344,N=768": { + "file": "silu_config_M185344_N768.json", + "M": 185344, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 872.4824999999998 + }, + "M=185344,N=800": { + "file": "silu_config_M185344_N800.json", + "M": 185344, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 879.5624999999998 + }, + "M=185344,N=896": { + "file": "silu_config_M185344_N896.json", + "M": 185344, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 897.8427500000003 + }, + "M=185344,N=960": { + "file": "silu_config_M185344_N960.json", + "M": 185344, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 871.8824999999997 + }, + "M=185344,N=1024": { + "file": "silu_config_M185344_N1024.json", + "M": 185344, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 702.5217500000003 + }, + "M=185344,N=1120": { + "file": "silu_config_M185344_N1120.json", + "M": 185344, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1744.0862500000012 + }, + "M=185344,N=1152": { + "file": "silu_config_M185344_N1152.json", + "M": 185344, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1731.0459999999998 + }, + "M=185344,N=1280": { + "file": "silu_config_M185344_N1280.json", + "M": 185344, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1732.8859999999995 + }, + "M=185344,N=1344": { + "file": "silu_config_M185344_N1344.json", + "M": 185344, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1734.5260000000003 + }, + "M=185344,N=1408": { + "file": "silu_config_M185344_N1408.json", + "M": 185344, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1713.7659999999996 + }, + "M=185344,N=1440": { + "file": "silu_config_M185344_N1440.json", + "M": 185344, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1747.9662500000004 + }, + "M=185344,N=1536": { + "file": "silu_config_M185344_N1536.json", + "M": 185344, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1737.6062500000003 + }, + "M=185344,N=1600": { + "file": "silu_config_M185344_N1600.json", + "M": 185344, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1749.1262499999993 + }, + "M=185344,N=1664": { + "file": "silu_config_M185344_N1664.json", + "M": 185344, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1720.446 + }, + "M=185344,N=1728": { + "file": "silu_config_M185344_N1728.json", + "M": 185344, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1745.52625 + }, + "M=185344,N=1760": { + "file": "silu_config_M185344_N1760.json", + "M": 185344, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1755.9262499999995 + }, + "M=185344,N=1792": { + "file": "silu_config_M185344_N1792.json", + "M": 185344, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1748.6862499999997 + }, + "M=185344,N=1920": { + "file": "silu_config_M185344_N1920.json", + "M": 185344, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1739.2062500000002 + }, + "M=185344,N=2048": { + "file": "silu_config_M185344_N2048.json", + "M": 185344, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1373.5244999999995 + }, + "M=185344,N=2080": { + "file": "silu_config_M185344_N2080.json", + "M": 185344, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1927.567 + }, + "M=185344,N=2240": { + "file": "silu_config_M185344_N2240.json", + "M": 185344, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2017.7272500000001 + }, + "M=185344,N=2400": { + "file": "silu_config_M185344_N2400.json", + "M": 185344, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2130.2077500000005 + }, + "M=185344,N=2560": { + "file": "silu_config_M185344_N2560.json", + "M": 185344, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2239.6482499999993 + }, + "M=186368,N=128": { + "file": "silu_config_M186368_N128.json", + "M": 186368, + "N": 128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 126.27950000000004 + }, + "M=186368,N=160": { + "file": "silu_config_M186368_N160.json", + "M": 186368, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 235.95999999999998 + }, + "M=186368,N=192": { + "file": "silu_config_M186368_N192.json", + "M": 186368, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 244.0000000000001 + }, + "M=186368,N=256": { + "file": "silu_config_M186368_N256.json", + "M": 186368, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 214.83975000000015 + }, + "M=186368,N=320": { + "file": "silu_config_M186368_N320.json", + "M": 186368, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 454.3610000000001 + }, + "M=186368,N=384": { + "file": "silu_config_M186368_N384.json", + "M": 186368, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 451.9607500000002 + }, + "M=186368,N=480": { + "file": "silu_config_M186368_N480.json", + "M": 186368, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 459.40075 + }, + "M=186368,N=512": { + "file": "silu_config_M186368_N512.json", + "M": 186368, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 355.1205 + }, + "M=186368,N=576": { + "file": "silu_config_M186368_N576.json", + "M": 186368, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 891.1225 + }, + "M=186368,N=640": { + "file": "silu_config_M186368_N640.json", + "M": 186368, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 864.4825000000001 + }, + "M=186368,N=768": { + "file": "silu_config_M186368_N768.json", + "M": 186368, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 878.0025 + }, + "M=186368,N=800": { + "file": "silu_config_M186368_N800.json", + "M": 186368, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 883.8425 + }, + "M=186368,N=896": { + "file": "silu_config_M186368_N896.json", + "M": 186368, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 871.1225 + }, + "M=186368,N=960": { + "file": "silu_config_M186368_N960.json", + "M": 186368, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 898.6827500000004 + }, + "M=186368,N=1024": { + "file": "silu_config_M186368_N1024.json", + "M": 186368, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 705.5219999999999 + }, + "M=186368,N=1120": { + "file": "silu_config_M186368_N1120.json", + "M": 186368, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1742.9662499999995 + }, + "M=186368,N=1152": { + "file": "silu_config_M186368_N1152.json", + "M": 186368, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1733.806 + }, + "M=186368,N=1280": { + "file": "silu_config_M186368_N1280.json", + "M": 186368, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1721.5659999999993 + }, + "M=186368,N=1344": { + "file": "silu_config_M186368_N1344.json", + "M": 186368, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1733.5259999999994 + }, + "M=186368,N=1408": { + "file": "silu_config_M186368_N1408.json", + "M": 186368, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1744.1262499999998 + }, + "M=186368,N=1440": { + "file": "silu_config_M186368_N1440.json", + "M": 186368, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1757.48625 + }, + "M=186368,N=1536": { + "file": "silu_config_M186368_N1536.json", + "M": 186368, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1736.5262499999994 + }, + "M=186368,N=1600": { + "file": "silu_config_M186368_N1600.json", + "M": 186368, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1750.9262500000004 + }, + "M=186368,N=1664": { + "file": "silu_config_M186368_N1664.json", + "M": 186368, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1753.5662499999999 + }, + "M=186368,N=1728": { + "file": "silu_config_M186368_N1728.json", + "M": 186368, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1752.0462499999994 + }, + "M=186368,N=1760": { + "file": "silu_config_M186368_N1760.json", + "M": 186368, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1734.326 + }, + "M=186368,N=1792": { + "file": "silu_config_M186368_N1792.json", + "M": 186368, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1744.0062499999995 + }, + "M=186368,N=1920": { + "file": "silu_config_M186368_N1920.json", + "M": 186368, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1748.48625 + }, + "M=186368,N=2048": { + "file": "silu_config_M186368_N2048.json", + "M": 186368, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1385.5647499999995 + }, + "M=186368,N=2080": { + "file": "silu_config_M186368_N2080.json", + "M": 186368, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1926.4069999999992 + }, + "M=186368,N=2240": { + "file": "silu_config_M186368_N2240.json", + "M": 186368, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2019.0872499999996 + }, + "M=186368,N=2400": { + "file": "silu_config_M186368_N2400.json", + "M": 186368, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2132.2877499999995 + }, + "M=186368,N=2560": { + "file": "silu_config_M186368_N2560.json", + "M": 186368, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2237.5282500000003 + }, + "M=187392,N=128": { + "file": "silu_config_M187392_N128.json", + "M": 187392, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 106.19924999999998 + }, + "M=187392,N=160": { + "file": "silu_config_M187392_N160.json", + "M": 187392, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.08000000000004 + }, + "M=187392,N=192": { + "file": "silu_config_M187392_N192.json", + "M": 187392, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.31999999999994 + }, + "M=187392,N=256": { + "file": "silu_config_M187392_N256.json", + "M": 187392, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 215.88000000000005 + }, + "M=187392,N=320": { + "file": "silu_config_M187392_N320.json", + "M": 187392, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.8810000000003 + }, + "M=187392,N=384": { + "file": "silu_config_M187392_N384.json", + "M": 187392, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 454.4407500000003 + }, + "M=187392,N=480": { + "file": "silu_config_M187392_N480.json", + "M": 187392, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 461.80074999999965 + }, + "M=187392,N=512": { + "file": "silu_config_M187392_N512.json", + "M": 187392, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 357.44049999999993 + }, + "M=187392,N=576": { + "file": "silu_config_M187392_N576.json", + "M": 187392, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 895.8427500000003 + }, + "M=187392,N=640": { + "file": "silu_config_M187392_N640.json", + "M": 187392, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 890.9625000000001 + }, + "M=187392,N=768": { + "file": "silu_config_M187392_N768.json", + "M": 187392, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 871.0425 + }, + "M=187392,N=800": { + "file": "silu_config_M187392_N800.json", + "M": 187392, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.3627499999998 + }, + "M=187392,N=896": { + "file": "silu_config_M187392_N896.json", + "M": 187392, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 886.0825000000004 + }, + "M=187392,N=960": { + "file": "silu_config_M187392_N960.json", + "M": 187392, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 892.4427500000002 + }, + "M=187392,N=1024": { + "file": "silu_config_M187392_N1024.json", + "M": 187392, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 710.2820000000002 + }, + "M=187392,N=1120": { + "file": "silu_config_M187392_N1120.json", + "M": 187392, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1752.6462499999998 + }, + "M=187392,N=1152": { + "file": "silu_config_M187392_N1152.json", + "M": 187392, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1739.3662500000005 + }, + "M=187392,N=1280": { + "file": "silu_config_M187392_N1280.json", + "M": 187392, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1741.0462499999999 + }, + "M=187392,N=1344": { + "file": "silu_config_M187392_N1344.json", + "M": 187392, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1753.5662499999999 + }, + "M=187392,N=1408": { + "file": "silu_config_M187392_N1408.json", + "M": 187392, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1732.766 + }, + "M=187392,N=1440": { + "file": "silu_config_M187392_N1440.json", + "M": 187392, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1756.80625 + }, + "M=187392,N=1536": { + "file": "silu_config_M187392_N1536.json", + "M": 187392, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1756.36625 + }, + "M=187392,N=1600": { + "file": "silu_config_M187392_N1600.json", + "M": 187392, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1758.1662500000002 + }, + "M=187392,N=1664": { + "file": "silu_config_M187392_N1664.json", + "M": 187392, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1749.7662499999997 + }, + "M=187392,N=1728": { + "file": "silu_config_M187392_N1728.json", + "M": 187392, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1771.9262500000004 + }, + "M=187392,N=1760": { + "file": "silu_config_M187392_N1760.json", + "M": 187392, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1775.2062499999993 + }, + "M=187392,N=1792": { + "file": "silu_config_M187392_N1792.json", + "M": 187392, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1753.44625 + }, + "M=187392,N=1920": { + "file": "silu_config_M187392_N1920.json", + "M": 187392, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1758.36625 + }, + "M=187392,N=2048": { + "file": "silu_config_M187392_N2048.json", + "M": 187392, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1384.12475 + }, + "M=187392,N=2080": { + "file": "silu_config_M187392_N2080.json", + "M": 187392, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1937.2470000000003 + }, + "M=187392,N=2240": { + "file": "silu_config_M187392_N2240.json", + "M": 187392, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2039.8475 + }, + "M=187392,N=2400": { + "file": "silu_config_M187392_N2400.json", + "M": 187392, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2163.1279999999997 + }, + "M=187392,N=2560": { + "file": "silu_config_M187392_N2560.json", + "M": 187392, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2252.6882499999992 + }, + "M=188416,N=128": { + "file": "silu_config_M188416_N128.json", + "M": 188416, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 106.63925000000003 + }, + "M=188416,N=160": { + "file": "silu_config_M188416_N160.json", + "M": 188416, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 238.47999999999996 + }, + "M=188416,N=192": { + "file": "silu_config_M188416_N192.json", + "M": 188416, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 227.51999999999998 + }, + "M=188416,N=256": { + "file": "silu_config_M188416_N256.json", + "M": 188416, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 205.59974999999997 + }, + "M=188416,N=320": { + "file": "silu_config_M188416_N320.json", + "M": 188416, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 459.36075000000017 + }, + "M=188416,N=384": { + "file": "silu_config_M188416_N384.json", + "M": 188416, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.7207499999997 + }, + "M=188416,N=480": { + "file": "silu_config_M188416_N480.json", + "M": 188416, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 464.60074999999983 + }, + "M=188416,N=512": { + "file": "silu_config_M188416_N512.json", + "M": 188416, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 349.8002499999999 + }, + "M=188416,N=576": { + "file": "silu_config_M188416_N576.json", + "M": 188416, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 889.6424999999999 + }, + "M=188416,N=640": { + "file": "silu_config_M188416_N640.json", + "M": 188416, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 884.8824999999997 + }, + "M=188416,N=768": { + "file": "silu_config_M188416_N768.json", + "M": 188416, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 886.8425 + }, + "M=188416,N=800": { + "file": "silu_config_M188416_N800.json", + "M": 188416, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 893.2027500000002 + }, + "M=188416,N=896": { + "file": "silu_config_M188416_N896.json", + "M": 188416, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 890.5225 + }, + "M=188416,N=960": { + "file": "silu_config_M188416_N960.json", + "M": 188416, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 886.0025 + }, + "M=188416,N=1024": { + "file": "silu_config_M188416_N1024.json", + "M": 188416, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 713.8020000000001 + }, + "M=188416,N=1120": { + "file": "silu_config_M188416_N1120.json", + "M": 188416, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1772.1262500000003 + }, + "M=188416,N=1152": { + "file": "silu_config_M188416_N1152.json", + "M": 188416, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1748.4862500000004 + }, + "M=188416,N=1280": { + "file": "silu_config_M188416_N1280.json", + "M": 188416, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1750.32625 + }, + "M=188416,N=1344": { + "file": "silu_config_M188416_N1344.json", + "M": 188416, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1752.36625 + }, + "M=188416,N=1408": { + "file": "silu_config_M188416_N1408.json", + "M": 188416, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1731.726 + }, + "M=188416,N=1440": { + "file": "silu_config_M188416_N1440.json", + "M": 188416, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1776.2462500000001 + }, + "M=188416,N=1536": { + "file": "silu_config_M188416_N1536.json", + "M": 188416, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1755.1662499999993 + }, + "M=188416,N=1600": { + "file": "silu_config_M188416_N1600.json", + "M": 188416, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1769.9262500000004 + }, + "M=188416,N=1664": { + "file": "silu_config_M188416_N1664.json", + "M": 188416, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1758.48625 + }, + "M=188416,N=1728": { + "file": "silu_config_M188416_N1728.json", + "M": 188416, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1750.36625 + }, + "M=188416,N=1760": { + "file": "silu_config_M188416_N1760.json", + "M": 188416, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1753.1662500000002 + }, + "M=188416,N=1792": { + "file": "silu_config_M188416_N1792.json", + "M": 188416, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1762.8062499999996 + }, + "M=188416,N=1920": { + "file": "silu_config_M188416_N1920.json", + "M": 188416, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1769.7662500000006 + }, + "M=188416,N=2048": { + "file": "silu_config_M188416_N2048.json", + "M": 188416, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1409.5247499999996 + }, + "M=188416,N=2080": { + "file": "silu_config_M188416_N2080.json", + "M": 188416, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1927.607 + }, + "M=188416,N=2240": { + "file": "silu_config_M188416_N2240.json", + "M": 188416, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2040.4875000000002 + }, + "M=188416,N=2400": { + "file": "silu_config_M188416_N2400.json", + "M": 188416, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2165.847999999999 + }, + "M=188416,N=2560": { + "file": "silu_config_M188416_N2560.json", + "M": 188416, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2268.608250000002 + }, + "M=189440,N=128": { + "file": "silu_config_M189440_N128.json", + "M": 189440, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 118.71950000000001 + }, + "M=189440,N=160": { + "file": "silu_config_M189440_N160.json", + "M": 189440, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 228.31975 + }, + "M=189440,N=192": { + "file": "silu_config_M189440_N192.json", + "M": 189440, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 228.71974999999992 + }, + "M=189440,N=256": { + "file": "silu_config_M189440_N256.json", + "M": 189440, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 206.63975000000016 + }, + "M=189440,N=320": { + "file": "silu_config_M189440_N320.json", + "M": 189440, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 457.72074999999984 + }, + "M=189440,N=384": { + "file": "silu_config_M189440_N384.json", + "M": 189440, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.2810000000003 + }, + "M=189440,N=480": { + "file": "silu_config_M189440_N480.json", + "M": 189440, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 463.92075 + }, + "M=189440,N=512": { + "file": "silu_config_M189440_N512.json", + "M": 189440, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 371.80050000000006 + }, + "M=189440,N=576": { + "file": "silu_config_M189440_N576.json", + "M": 189440, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 894.6027499999998 + }, + "M=189440,N=640": { + "file": "silu_config_M189440_N640.json", + "M": 189440, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 900.5227499999999 + }, + "M=189440,N=768": { + "file": "silu_config_M189440_N768.json", + "M": 189440, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 891.3225000000004 + }, + "M=189440,N=800": { + "file": "silu_config_M189440_N800.json", + "M": 189440, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 882.1225000000002 + }, + "M=189440,N=896": { + "file": "silu_config_M189440_N896.json", + "M": 189440, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 911.5227500000001 + }, + "M=189440,N=960": { + "file": "silu_config_M189440_N960.json", + "M": 189440, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 896.8027499999998 + }, + "M=189440,N=1024": { + "file": "silu_config_M189440_N1024.json", + "M": 189440, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 715.7219999999998 + }, + "M=189440,N=1120": { + "file": "silu_config_M189440_N1120.json", + "M": 189440, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1748.5662499999994 + }, + "M=189440,N=1152": { + "file": "silu_config_M189440_N1152.json", + "M": 189440, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1758.0062499999995 + }, + "M=189440,N=1280": { + "file": "silu_config_M189440_N1280.json", + "M": 189440, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1759.48625 + }, + "M=189440,N=1344": { + "file": "silu_config_M189440_N1344.json", + "M": 189440, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1760.1262499999998 + }, + "M=189440,N=1408": { + "file": "silu_config_M189440_N1408.json", + "M": 189440, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1751.1262500000007 + }, + "M=189440,N=1440": { + "file": "silu_config_M189440_N1440.json", + "M": 189440, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1773.1262500000012 + }, + "M=189440,N=1536": { + "file": "silu_config_M189440_N1536.json", + "M": 189440, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1754.0462500000003 + }, + "M=189440,N=1600": { + "file": "silu_config_M189440_N1600.json", + "M": 189440, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1784.8462499999996 + }, + "M=189440,N=1664": { + "file": "silu_config_M189440_N1664.json", + "M": 189440, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1756.6462499999998 + }, + "M=189440,N=1728": { + "file": "silu_config_M189440_N1728.json", + "M": 189440, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1757.1262499999993 + }, + "M=189440,N=1760": { + "file": "silu_config_M189440_N1760.json", + "M": 189440, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1780.6062500000007 + }, + "M=189440,N=1792": { + "file": "silu_config_M189440_N1792.json", + "M": 189440, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1750.2062499999993 + }, + "M=189440,N=1920": { + "file": "silu_config_M189440_N1920.json", + "M": 189440, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1765.44625 + }, + "M=189440,N=2048": { + "file": "silu_config_M189440_N2048.json", + "M": 189440, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1409.9647499999992 + }, + "M=189440,N=2080": { + "file": "silu_config_M189440_N2080.json", + "M": 189440, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1933.3269999999993 + }, + "M=189440,N=2240": { + "file": "silu_config_M189440_N2240.json", + "M": 189440, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2047.4875000000013 + }, + "M=189440,N=2400": { + "file": "silu_config_M189440_N2400.json", + "M": 189440, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2160.048 + }, + "M=189440,N=2560": { + "file": "silu_config_M189440_N2560.json", + "M": 189440, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2267.1682500000006 + }, + "M=190464,N=128": { + "file": "silu_config_M190464_N128.json", + "M": 190464, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 108.03924999999995 + }, + "M=190464,N=160": { + "file": "silu_config_M190464_N160.json", + "M": 190464, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 240.76 + }, + "M=190464,N=192": { + "file": "silu_config_M190464_N192.json", + "M": 190464, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 238.39999999999998 + }, + "M=190464,N=256": { + "file": "silu_config_M190464_N256.json", + "M": 190464, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 207.8 + }, + "M=190464,N=320": { + "file": "silu_config_M190464_N320.json", + "M": 190464, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 464.16100000000006 + }, + "M=190464,N=384": { + "file": "silu_config_M190464_N384.json", + "M": 190464, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 461.48075000000006 + }, + "M=190464,N=480": { + "file": "silu_config_M190464_N480.json", + "M": 190464, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 469.32074999999963 + }, + "M=190464,N=512": { + "file": "silu_config_M190464_N512.json", + "M": 190464, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 375.04049999999995 + }, + "M=190464,N=576": { + "file": "silu_config_M190464_N576.json", + "M": 190464, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.5627499999996 + }, + "M=190464,N=640": { + "file": "silu_config_M190464_N640.json", + "M": 190464, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 894.4027500000002 + }, + "M=190464,N=768": { + "file": "silu_config_M190464_N768.json", + "M": 190464, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 907.16275 + }, + "M=190464,N=800": { + "file": "silu_config_M190464_N800.json", + "M": 190464, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 913.4027499999997 + }, + "M=190464,N=896": { + "file": "silu_config_M190464_N896.json", + "M": 190464, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 900.3627500000002 + }, + "M=190464,N=960": { + "file": "silu_config_M190464_N960.json", + "M": 190464, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 907.7627499999999 + }, + "M=190464,N=1024": { + "file": "silu_config_M190464_N1024.json", + "M": 190464, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 721.2020000000002 + }, + "M=190464,N=1120": { + "file": "silu_config_M190464_N1120.json", + "M": 190464, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1759.92625 + }, + "M=190464,N=1152": { + "file": "silu_config_M190464_N1152.json", + "M": 190464, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1777.6462499999993 + }, + "M=190464,N=1280": { + "file": "silu_config_M190464_N1280.json", + "M": 190464, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1769.00625 + }, + "M=190464,N=1344": { + "file": "silu_config_M190464_N1344.json", + "M": 190464, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1792.0462499999999 + }, + "M=190464,N=1408": { + "file": "silu_config_M190464_N1408.json", + "M": 190464, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1771.0462499999999 + }, + "M=190464,N=1440": { + "file": "silu_config_M190464_N1440.json", + "M": 190464, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1774.7262500000006 + }, + "M=190464,N=1536": { + "file": "silu_config_M190464_N1536.json", + "M": 190464, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1774.286250000001 + }, + "M=190464,N=1600": { + "file": "silu_config_M190464_N1600.json", + "M": 190464, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1796.6062499999998 + }, + "M=190464,N=1664": { + "file": "silu_config_M190464_N1664.json", + "M": 190464, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1787.7262499999997 + }, + "M=190464,N=1728": { + "file": "silu_config_M190464_N1728.json", + "M": 190464, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1779.48625 + }, + "M=190464,N=1760": { + "file": "silu_config_M190464_N1760.json", + "M": 190464, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1792.8862500000005 + }, + "M=190464,N=1792": { + "file": "silu_config_M190464_N1792.json", + "M": 190464, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1785.406250000001 + }, + "M=190464,N=1920": { + "file": "silu_config_M190464_N1920.json", + "M": 190464, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1786.6462499999998 + }, + "M=190464,N=2048": { + "file": "silu_config_M190464_N2048.json", + "M": 190464, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1421.00475 + }, + "M=190464,N=2080": { + "file": "silu_config_M190464_N2080.json", + "M": 190464, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1958.2870000000003 + }, + "M=190464,N=2240": { + "file": "silu_config_M190464_N2240.json", + "M": 190464, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2079.2075000000004 + }, + "M=190464,N=2400": { + "file": "silu_config_M190464_N2400.json", + "M": 190464, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2202.4880000000003 + }, + "M=190464,N=2560": { + "file": "silu_config_M190464_N2560.json", + "M": 190464, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2277.6882499999992 + }, + "M=191488,N=128": { + "file": "silu_config_M191488_N128.json", + "M": 191488, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.75949999999997 + }, + "M=191488,N=160": { + "file": "silu_config_M191488_N160.json", + "M": 191488, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 230.84000000000003 + }, + "M=191488,N=192": { + "file": "silu_config_M191488_N192.json", + "M": 191488, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 242.19999999999993 + }, + "M=191488,N=256": { + "file": "silu_config_M191488_N256.json", + "M": 191488, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 208.87975000000017 + }, + "M=191488,N=320": { + "file": "silu_config_M191488_N320.json", + "M": 191488, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 466.08100000000013 + }, + "M=191488,N=384": { + "file": "silu_config_M191488_N384.json", + "M": 191488, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 465.24075000000016 + }, + "M=191488,N=480": { + "file": "silu_config_M191488_N480.json", + "M": 191488, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 471.4410000000005 + }, + "M=191488,N=512": { + "file": "silu_config_M191488_N512.json", + "M": 191488, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 375.8805000000001 + }, + "M=191488,N=576": { + "file": "silu_config_M191488_N576.json", + "M": 191488, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 893.7627500000001 + }, + "M=191488,N=640": { + "file": "silu_config_M191488_N640.json", + "M": 191488, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 888.6024999999997 + }, + "M=191488,N=768": { + "file": "silu_config_M191488_N768.json", + "M": 191488, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 900.8827500000002 + }, + "M=191488,N=800": { + "file": "silu_config_M191488_N800.json", + "M": 191488, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 918.2827499999999 + }, + "M=191488,N=896": { + "file": "silu_config_M191488_N896.json", + "M": 191488, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 915.5227500000001 + }, + "M=191488,N=960": { + "file": "silu_config_M191488_N960.json", + "M": 191488, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 901.8827499999998 + }, + "M=191488,N=1024": { + "file": "silu_config_M191488_N1024.json", + "M": 191488, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 726.2419999999997 + }, + "M=191488,N=1120": { + "file": "silu_config_M191488_N1120.json", + "M": 191488, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1790.0062500000008 + }, + "M=191488,N=1152": { + "file": "silu_config_M191488_N1152.json", + "M": 191488, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1776.7262499999997 + }, + "M=191488,N=1280": { + "file": "silu_config_M191488_N1280.json", + "M": 191488, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1778.6862500000002 + }, + "M=191488,N=1344": { + "file": "silu_config_M191488_N1344.json", + "M": 191488, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1811.8465 + }, + "M=191488,N=1408": { + "file": "silu_config_M191488_N1408.json", + "M": 191488, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1790.8862499999996 + }, + "M=191488,N=1440": { + "file": "silu_config_M191488_N1440.json", + "M": 191488, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1784.366250000001 + }, + "M=191488,N=1536": { + "file": "silu_config_M191488_N1536.json", + "M": 191488, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1783.52625 + }, + "M=191488,N=1600": { + "file": "silu_config_M191488_N1600.json", + "M": 191488, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1795.6862500000016 + }, + "M=191488,N=1664": { + "file": "silu_config_M191488_N1664.json", + "M": 191488, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1786.48625 + }, + "M=191488,N=1728": { + "file": "silu_config_M191488_N1728.json", + "M": 191488, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1789.0462499999994 + }, + "M=191488,N=1760": { + "file": "silu_config_M191488_N1760.json", + "M": 191488, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1812.6064999999999 + }, + "M=191488,N=1792": { + "file": "silu_config_M191488_N1792.json", + "M": 191488, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1780.40625 + }, + "M=191488,N=1920": { + "file": "silu_config_M191488_N1920.json", + "M": 191488, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1796.2862499999992 + }, + "M=191488,N=2048": { + "file": "silu_config_M191488_N2048.json", + "M": 191488, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1422.6847500000003 + }, + "M=191488,N=2080": { + "file": "silu_config_M191488_N2080.json", + "M": 191488, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1979.1272499999995 + }, + "M=191488,N=2240": { + "file": "silu_config_M191488_N2240.json", + "M": 191488, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2073.8075 + }, + "M=191488,N=2400": { + "file": "silu_config_M191488_N2400.json", + "M": 191488, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2189.2879999999996 + }, + "M=191488,N=2560": { + "file": "silu_config_M191488_N2560.json", + "M": 191488, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2290.6484999999993 + }, + "M=192512,N=128": { + "file": "silu_config_M192512_N128.json", + "M": 192512, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 108.99949999999998 + }, + "M=192512,N=160": { + "file": "silu_config_M192512_N160.json", + "M": 192512, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 240.27999999999997 + }, + "M=192512,N=192": { + "file": "silu_config_M192512_N192.json", + "M": 192512, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 243.3599999999999 + }, + "M=192512,N=256": { + "file": "silu_config_M192512_N256.json", + "M": 192512, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 221.51999999999987 + }, + "M=192512,N=320": { + "file": "silu_config_M192512_N320.json", + "M": 192512, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 469.00099999999986 + }, + "M=192512,N=384": { + "file": "silu_config_M192512_N384.json", + "M": 192512, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 465.92075000000034 + }, + "M=192512,N=480": { + "file": "silu_config_M192512_N480.json", + "M": 192512, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 474.3610000000001 + }, + "M=192512,N=512": { + "file": "silu_config_M192512_N512.json", + "M": 192512, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 378.8807499999997 + }, + "M=192512,N=576": { + "file": "silu_config_M192512_N576.json", + "M": 192512, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 897.7627499999999 + }, + "M=192512,N=640": { + "file": "silu_config_M192512_N640.json", + "M": 192512, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 903.6427499999998 + }, + "M=192512,N=768": { + "file": "silu_config_M192512_N768.json", + "M": 192512, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 895.7227499999999 + }, + "M=192512,N=800": { + "file": "silu_config_M192512_N800.json", + "M": 192512, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 922.9227500000006 + }, + "M=192512,N=896": { + "file": "silu_config_M192512_N896.json", + "M": 192512, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 909.7227500000004 + }, + "M=192512,N=960": { + "file": "silu_config_M192512_N960.json", + "M": 192512, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 927.4827500000001 + }, + "M=192512,N=1024": { + "file": "silu_config_M192512_N1024.json", + "M": 192512, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 729.002 + }, + "M=192512,N=1120": { + "file": "silu_config_M192512_N1120.json", + "M": 192512, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1799.4065 + }, + "M=192512,N=1152": { + "file": "silu_config_M192512_N1152.json", + "M": 192512, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1775.4862500000008 + }, + "M=192512,N=1280": { + "file": "silu_config_M192512_N1280.json", + "M": 192512, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1798.2065000000002 + }, + "M=192512,N=1344": { + "file": "silu_config_M192512_N1344.json", + "M": 192512, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1800.8064999999997 + }, + "M=192512,N=1408": { + "file": "silu_config_M192512_N1408.json", + "M": 192512, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1779.52625 + }, + "M=192512,N=1440": { + "file": "silu_config_M192512_N1440.json", + "M": 192512, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1793.44625 + }, + "M=192512,N=1536": { + "file": "silu_config_M192512_N1536.json", + "M": 192512, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1782.6462499999998 + }, + "M=192512,N=1600": { + "file": "silu_config_M192512_N1600.json", + "M": 192512, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1815.6864999999998 + }, + "M=192512,N=1664": { + "file": "silu_config_M192512_N1664.json", + "M": 192512, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1796.2465000000002 + }, + "M=192512,N=1728": { + "file": "silu_config_M192512_N1728.json", + "M": 192512, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1812.4465 + }, + "M=192512,N=1760": { + "file": "silu_config_M192512_N1760.json", + "M": 192512, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1812.3265000000001 + }, + "M=192512,N=1792": { + "file": "silu_config_M192512_N1792.json", + "M": 192512, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1810.9264999999996 + }, + "M=192512,N=1920": { + "file": "silu_config_M192512_N1920.json", + "M": 192512, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1794.8062499999996 + }, + "M=192512,N=2048": { + "file": "silu_config_M192512_N2048.json", + "M": 192512, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1439.2849999999999 + }, + "M=192512,N=2080": { + "file": "silu_config_M192512_N2080.json", + "M": 192512, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1999.2072500000004 + }, + "M=192512,N=2240": { + "file": "silu_config_M192512_N2240.json", + "M": 192512, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2084.6475 + }, + "M=192512,N=2400": { + "file": "silu_config_M192512_N2400.json", + "M": 192512, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2212.0879999999997 + }, + "M=192512,N=2560": { + "file": "silu_config_M192512_N2560.json", + "M": 192512, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2319.2885000000006 + }, + "M=193536,N=128": { + "file": "silu_config_M193536_N128.json", + "M": 193536, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 109.47950000000006 + }, + "M=193536,N=160": { + "file": "silu_config_M193536_N160.json", + "M": 193536, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 255.52000000000004 + }, + "M=193536,N=192": { + "file": "silu_config_M193536_N192.json", + "M": 193536, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 241.87999999999994 + }, + "M=193536,N=256": { + "file": "silu_config_M193536_N256.json", + "M": 193536, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 210.83975000000015 + }, + "M=193536,N=320": { + "file": "silu_config_M193536_N320.json", + "M": 193536, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 471.361 + }, + "M=193536,N=384": { + "file": "silu_config_M193536_N384.json", + "M": 193536, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 468.7207500000002 + }, + "M=193536,N=480": { + "file": "silu_config_M193536_N480.json", + "M": 193536, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 476.40099999999984 + }, + "M=193536,N=512": { + "file": "silu_config_M193536_N512.json", + "M": 193536, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 378.12049999999977 + }, + "M=193536,N=576": { + "file": "silu_config_M193536_N576.json", + "M": 193536, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 924.6827499999999 + }, + "M=193536,N=640": { + "file": "silu_config_M193536_N640.json", + "M": 193536, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 919.1227500000002 + }, + "M=193536,N=768": { + "file": "silu_config_M193536_N768.json", + "M": 193536, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 899.2427499999997 + }, + "M=193536,N=800": { + "file": "silu_config_M193536_N800.json", + "M": 193536, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 917.2427499999999 + }, + "M=193536,N=896": { + "file": "silu_config_M193536_N896.json", + "M": 193536, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 924.8027499999998 + }, + "M=193536,N=960": { + "file": "silu_config_M193536_N960.json", + "M": 193536, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 911.4427499999997 + }, + "M=193536,N=1024": { + "file": "silu_config_M193536_N1024.json", + "M": 193536, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 732.482 + }, + "M=193536,N=1120": { + "file": "silu_config_M193536_N1120.json", + "M": 193536, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1809.0065000000004 + }, + "M=193536,N=1152": { + "file": "silu_config_M193536_N1152.json", + "M": 193536, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1805.6064999999999 + }, + "M=193536,N=1280": { + "file": "silu_config_M193536_N1280.json", + "M": 193536, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1797.4062500000005 + }, + "M=193536,N=1344": { + "file": "silu_config_M193536_N1344.json", + "M": 193536, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1789.48625 + }, + "M=193536,N=1408": { + "file": "silu_config_M193536_N1408.json", + "M": 193536, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1803.6064999999999 + }, + "M=193536,N=1440": { + "file": "silu_config_M193536_N1440.json", + "M": 193536, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1814.0465000000004 + }, + "M=193536,N=1536": { + "file": "silu_config_M193536_N1536.json", + "M": 193536, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1792.2462500000001 + }, + "M=193536,N=1600": { + "file": "silu_config_M193536_N1600.json", + "M": 193536, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1828.8064999999997 + }, + "M=193536,N=1664": { + "file": "silu_config_M193536_N1664.json", + "M": 193536, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1805.6064999999999 + }, + "M=193536,N=1728": { + "file": "silu_config_M193536_N1728.json", + "M": 193536, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1818.0865000000003 + }, + "M=193536,N=1760": { + "file": "silu_config_M193536_N1760.json", + "M": 193536, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1821.4465 + }, + "M=193536,N=1792": { + "file": "silu_config_M193536_N1792.json", + "M": 193536, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1809.8464999999997 + }, + "M=193536,N=1920": { + "file": "silu_config_M193536_N1920.json", + "M": 193536, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1817.6464999999998 + }, + "M=193536,N=2048": { + "file": "silu_config_M193536_N2048.json", + "M": 193536, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1437.3650000000007 + }, + "M=193536,N=2080": { + "file": "silu_config_M193536_N2080.json", + "M": 193536, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2001.3272499999991 + }, + "M=193536,N=2240": { + "file": "silu_config_M193536_N2240.json", + "M": 193536, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2115.76775 + }, + "M=193536,N=2400": { + "file": "silu_config_M193536_N2400.json", + "M": 193536, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2214.688000000001 + }, + "M=193536,N=2560": { + "file": "silu_config_M193536_N2560.json", + "M": 193536, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2317.888499999999 + }, + "M=194560,N=128": { + "file": "silu_config_M194560_N128.json", + "M": 194560, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 109.51949999999997 + }, + "M=194560,N=160": { + "file": "silu_config_M194560_N160.json", + "M": 194560, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 241.40000000000003 + }, + "M=194560,N=192": { + "file": "silu_config_M194560_N192.json", + "M": 194560, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.88 + }, + "M=194560,N=256": { + "file": "silu_config_M194560_N256.json", + "M": 194560, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 212.07999999999987 + }, + "M=194560,N=320": { + "file": "silu_config_M194560_N320.json", + "M": 194560, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 471.081 + }, + "M=194560,N=384": { + "file": "silu_config_M194560_N384.json", + "M": 194560, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 467.80074999999977 + }, + "M=194560,N=480": { + "file": "silu_config_M194560_N480.json", + "M": 194560, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 475.5609999999999 + }, + "M=194560,N=512": { + "file": "silu_config_M194560_N512.json", + "M": 194560, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 381.6004999999999 + }, + "M=194560,N=576": { + "file": "silu_config_M194560_N576.json", + "M": 194560, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 918.2027500000004 + }, + "M=194560,N=640": { + "file": "silu_config_M194560_N640.json", + "M": 194560, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 907.9627500000006 + }, + "M=194560,N=768": { + "file": "silu_config_M194560_N768.json", + "M": 194560, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.92275 + }, + "M=194560,N=800": { + "file": "silu_config_M194560_N800.json", + "M": 194560, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 905.4827499999997 + }, + "M=194560,N=896": { + "file": "silu_config_M194560_N896.json", + "M": 194560, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 907.60275 + }, + "M=194560,N=960": { + "file": "silu_config_M194560_N960.json", + "M": 194560, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.2827499999999 + }, + "M=194560,N=1024": { + "file": "silu_config_M194560_N1024.json", + "M": 194560, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 735.8422500000011 + }, + "M=194560,N=1120": { + "file": "silu_config_M194560_N1120.json", + "M": 194560, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1805.6064999999999 + }, + "M=194560,N=1152": { + "file": "silu_config_M194560_N1152.json", + "M": 194560, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1814.7265000000007 + }, + "M=194560,N=1280": { + "file": "silu_config_M194560_N1280.json", + "M": 194560, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1806.1264999999999 + }, + "M=194560,N=1344": { + "file": "silu_config_M194560_N1344.json", + "M": 194560, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1817.3664999999996 + }, + "M=194560,N=1408": { + "file": "silu_config_M194560_N1408.json", + "M": 194560, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1787.32625 + }, + "M=194560,N=1440": { + "file": "silu_config_M194560_N1440.json", + "M": 194560, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1799.9662499999995 + }, + "M=194560,N=1536": { + "file": "silu_config_M194560_N1536.json", + "M": 194560, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1800.3265000000001 + }, + "M=194560,N=1600": { + "file": "silu_config_M194560_N1600.json", + "M": 194560, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1832.0065000000004 + }, + "M=194560,N=1664": { + "file": "silu_config_M194560_N1664.json", + "M": 194560, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1792.9662499999995 + }, + "M=194560,N=1728": { + "file": "silu_config_M194560_N1728.json", + "M": 194560, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1793.7662499999997 + }, + "M=194560,N=1760": { + "file": "silu_config_M194560_N1760.json", + "M": 194560, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1806.8464999999997 + }, + "M=194560,N=1792": { + "file": "silu_config_M194560_N1792.json", + "M": 194560, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1796.9262500000004 + }, + "M=194560,N=1920": { + "file": "silu_config_M194560_N1920.json", + "M": 194560, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1822.4465 + }, + "M=194560,N=2048": { + "file": "silu_config_M194560_N2048.json", + "M": 194560, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1446.8849999999993 + }, + "M=194560,N=2080": { + "file": "silu_config_M194560_N2080.json", + "M": 194560, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.487 + }, + "M=194560,N=2240": { + "file": "silu_config_M194560_N2240.json", + "M": 194560, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2111.96775 + }, + "M=194560,N=2400": { + "file": "silu_config_M194560_N2400.json", + "M": 194560, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2217.808 + }, + "M=194560,N=2560": { + "file": "silu_config_M194560_N2560.json", + "M": 194560, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2317.9685 + }, + "M=195584,N=128": { + "file": "silu_config_M195584_N128.json", + "M": 195584, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 110.4795 + }, + "M=195584,N=160": { + "file": "silu_config_M195584_N160.json", + "M": 195584, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 246.64 + }, + "M=195584,N=192": { + "file": "silu_config_M195584_N192.json", + "M": 195584, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 235.8800000000001 + }, + "M=195584,N=256": { + "file": "silu_config_M195584_N256.json", + "M": 195584, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 213.16000000000014 + }, + "M=195584,N=320": { + "file": "silu_config_M195584_N320.json", + "M": 195584, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 476.2009999999998 + }, + "M=195584,N=384": { + "file": "silu_config_M195584_N384.json", + "M": 195584, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 473.56100000000015 + }, + "M=195584,N=480": { + "file": "silu_config_M195584_N480.json", + "M": 195584, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 481.6010000000001 + }, + "M=195584,N=512": { + "file": "silu_config_M195584_N512.json", + "M": 195584, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 384.76075000000003 + }, + "M=195584,N=576": { + "file": "silu_config_M195584_N576.json", + "M": 195584, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 912.3627500000002 + }, + "M=195584,N=640": { + "file": "silu_config_M195584_N640.json", + "M": 195584, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 906.6827500000002 + }, + "M=195584,N=768": { + "file": "silu_config_M195584_N768.json", + "M": 195584, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 919.60275 + }, + "M=195584,N=800": { + "file": "silu_config_M195584_N800.json", + "M": 195584, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 926.5227500000005 + }, + "M=195584,N=896": { + "file": "silu_config_M195584_N896.json", + "M": 195584, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 923.64275 + }, + "M=195584,N=960": { + "file": "silu_config_M195584_N960.json", + "M": 195584, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 952.08275 + }, + "M=195584,N=1024": { + "file": "silu_config_M195584_N1024.json", + "M": 195584, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 741.3620000000005 + }, + "M=195584,N=1120": { + "file": "silu_config_M195584_N1120.json", + "M": 195584, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1837.886500000001 + }, + "M=195584,N=1152": { + "file": "silu_config_M195584_N1152.json", + "M": 195584, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1814.2065000000007 + }, + "M=195584,N=1280": { + "file": "silu_config_M195584_N1280.json", + "M": 195584, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1816.0064999999995 + }, + "M=195584,N=1344": { + "file": "silu_config_M195584_N1344.json", + "M": 195584, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1818.5264999999995 + }, + "M=195584,N=1408": { + "file": "silu_config_M195584_N1408.json", + "M": 195584, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1828.6465000000007 + }, + "M=195584,N=1440": { + "file": "silu_config_M195584_N1440.json", + "M": 195584, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1843.0864999999994 + }, + "M=195584,N=1536": { + "file": "silu_config_M195584_N1536.json", + "M": 195584, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1831.6464999999998 + }, + "M=195584,N=1600": { + "file": "silu_config_M195584_N1600.json", + "M": 195584, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1822.326500000001 + }, + "M=195584,N=1664": { + "file": "silu_config_M195584_N1664.json", + "M": 195584, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1824.3265000000001 + }, + "M=195584,N=1728": { + "file": "silu_config_M195584_N1728.json", + "M": 195584, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1826.8465000000006 + }, + "M=195584,N=1760": { + "file": "silu_config_M195584_N1760.json", + "M": 195584, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1840.1664999999994 + }, + "M=195584,N=1792": { + "file": "silu_config_M195584_N1792.json", + "M": 195584, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1843.9665000000005 + }, + "M=195584,N=1920": { + "file": "silu_config_M195584_N1920.json", + "M": 195584, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1823.4465 + }, + "M=195584,N=2048": { + "file": "silu_config_M195584_N2048.json", + "M": 195584, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1447.8049999999994 + }, + "M=195584,N=2080": { + "file": "silu_config_M195584_N2080.json", + "M": 195584, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2011.0872499999996 + }, + "M=195584,N=2240": { + "file": "silu_config_M195584_N2240.json", + "M": 195584, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2127.88775 + }, + "M=195584,N=2400": { + "file": "silu_config_M195584_N2400.json", + "M": 195584, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2226.4082500000004 + }, + "M=195584,N=2560": { + "file": "silu_config_M195584_N2560.json", + "M": 195584, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2360.96875 + }, + "M=196608,N=128": { + "file": "silu_config_M196608_N128.json", + "M": 196608, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 122.35950000000005 + }, + "M=196608,N=160": { + "file": "silu_config_M196608_N160.json", + "M": 196608, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 244.95999999999992 + }, + "M=196608,N=192": { + "file": "silu_config_M196608_N192.json", + "M": 196608, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.19999999999993 + }, + "M=196608,N=256": { + "file": "silu_config_M196608_N256.json", + "M": 196608, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 214.11999999999995 + }, + "M=196608,N=320": { + "file": "silu_config_M196608_N320.json", + "M": 196608, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 478.6809999999999 + }, + "M=196608,N=384": { + "file": "silu_config_M196608_N384.json", + "M": 196608, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 475.9209999999998 + }, + "M=196608,N=480": { + "file": "silu_config_M196608_N480.json", + "M": 196608, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 484.2809999999997 + }, + "M=196608,N=512": { + "file": "silu_config_M196608_N512.json", + "M": 196608, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 364.6405 + }, + "M=196608,N=576": { + "file": "silu_config_M196608_N576.json", + "M": 196608, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 916.7227499999999 + }, + "M=196608,N=640": { + "file": "silu_config_M196608_N640.json", + "M": 196608, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 922.4427500000004 + }, + "M=196608,N=768": { + "file": "silu_config_M196608_N768.json", + "M": 196608, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 913.4827500000006 + }, + "M=196608,N=800": { + "file": "silu_config_M196608_N800.json", + "M": 196608, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 920.2427499999994 + }, + "M=196608,N=896": { + "file": "silu_config_M196608_N896.json", + "M": 196608, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 917.2827499999999 + }, + "M=196608,N=960": { + "file": "silu_config_M196608_N960.json", + "M": 196608, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 935.6827499999999 + }, + "M=196608,N=1024": { + "file": "silu_config_M196608_N1024.json", + "M": 196608, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 745.002 + }, + "M=196608,N=1120": { + "file": "silu_config_M196608_N1120.json", + "M": 196608, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1816.5664999999995 + }, + "M=196608,N=1152": { + "file": "silu_config_M196608_N1152.json", + "M": 196608, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1828.6464999999998 + }, + "M=196608,N=1280": { + "file": "silu_config_M196608_N1280.json", + "M": 196608, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1825.2464999999997 + }, + "M=196608,N=1344": { + "file": "silu_config_M196608_N1344.json", + "M": 196608, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1838.366500000001 + }, + "M=196608,N=1408": { + "file": "silu_config_M196608_N1408.json", + "M": 196608, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1817.5265 + }, + "M=196608,N=1440": { + "file": "silu_config_M196608_N1440.json", + "M": 196608, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1842.0464999999995 + }, + "M=196608,N=1536": { + "file": "silu_config_M196608_N1536.json", + "M": 196608, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1840.6865000000007 + }, + "M=196608,N=1600": { + "file": "silu_config_M196608_N1600.json", + "M": 196608, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1821.6464999999998 + }, + "M=196608,N=1664": { + "file": "silu_config_M196608_N1664.json", + "M": 196608, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1833.5665 + }, + "M=196608,N=1728": { + "file": "silu_config_M196608_N1728.json", + "M": 196608, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1846.8465000000015 + }, + "M=196608,N=1760": { + "file": "silu_config_M196608_N1760.json", + "M": 196608, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1852.6064999999999 + }, + "M=196608,N=1792": { + "file": "silu_config_M196608_N1792.json", + "M": 196608, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1838.1265000000003 + }, + "M=196608,N=1920": { + "file": "silu_config_M196608_N1920.json", + "M": 196608, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1843.1664999999994 + }, + "M=196608,N=2048": { + "file": "silu_config_M196608_N2048.json", + "M": 196608, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1444.9250000000006 + }, + "M=196608,N=2080": { + "file": "silu_config_M196608_N2080.json", + "M": 196608, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2032.487249999999 + }, + "M=196608,N=2240": { + "file": "silu_config_M196608_N2240.json", + "M": 196608, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2144.72775 + }, + "M=196608,N=2400": { + "file": "silu_config_M196608_N2400.json", + "M": 196608, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2260.76825 + }, + "M=196608,N=2560": { + "file": "silu_config_M196608_N2560.json", + "M": 196608, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2362.288749999999 + }, + "M=197632,N=128": { + "file": "silu_config_M197632_N128.json", + "M": 197632, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 111.75950000000003 + }, + "M=197632,N=160": { + "file": "silu_config_M197632_N160.json", + "M": 197632, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 256.64000000000004 + }, + "M=197632,N=192": { + "file": "silu_config_M197632_N192.json", + "M": 197632, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 238.24000000000007 + }, + "M=197632,N=256": { + "file": "silu_config_M197632_N256.json", + "M": 197632, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 215.31975 + }, + "M=197632,N=320": { + "file": "silu_config_M197632_N320.json", + "M": 197632, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 481.1210000000001 + }, + "M=197632,N=384": { + "file": "silu_config_M197632_N384.json", + "M": 197632, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 478.6010000000002 + }, + "M=197632,N=480": { + "file": "silu_config_M197632_N480.json", + "M": 197632, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 486.241 + }, + "M=197632,N=512": { + "file": "silu_config_M197632_N512.json", + "M": 197632, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 386.2805000000003 + }, + "M=197632,N=576": { + "file": "silu_config_M197632_N576.json", + "M": 197632, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 942.92275 + }, + "M=197632,N=640": { + "file": "silu_config_M197632_N640.json", + "M": 197632, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 916.7227499999999 + }, + "M=197632,N=768": { + "file": "silu_config_M197632_N768.json", + "M": 197632, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 918.12275 + }, + "M=197632,N=800": { + "file": "silu_config_M197632_N800.json", + "M": 197632, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 935.9627499999997 + }, + "M=197632,N=896": { + "file": "silu_config_M197632_N896.json", + "M": 197632, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 954.3230000000003 + }, + "M=197632,N=960": { + "file": "silu_config_M197632_N960.json", + "M": 197632, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 930.16275 + }, + "M=197632,N=1024": { + "file": "silu_config_M197632_N1024.json", + "M": 197632, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 748.8420000000006 + }, + "M=197632,N=1120": { + "file": "silu_config_M197632_N1120.json", + "M": 197632, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1836.4465000000005 + }, + "M=197632,N=1152": { + "file": "silu_config_M197632_N1152.json", + "M": 197632, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1843.1664999999998 + }, + "M=197632,N=1280": { + "file": "silu_config_M197632_N1280.json", + "M": 197632, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1845.0465000000008 + }, + "M=197632,N=1344": { + "file": "silu_config_M197632_N1344.json", + "M": 197632, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1837.4064999999996 + }, + "M=197632,N=1408": { + "file": "silu_config_M197632_N1408.json", + "M": 197632, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1836.8864999999996 + }, + "M=197632,N=1440": { + "file": "silu_config_M197632_N1440.json", + "M": 197632, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1851.2064999999993 + }, + "M=197632,N=1536": { + "file": "silu_config_M197632_N1536.json", + "M": 197632, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1839.6464999999998 + }, + "M=197632,N=1600": { + "file": "silu_config_M197632_N1600.json", + "M": 197632, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1862.0867500000004 + }, + "M=197632,N=1664": { + "file": "silu_config_M197632_N1664.json", + "M": 197632, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1842.7664999999997 + }, + "M=197632,N=1728": { + "file": "silu_config_M197632_N1728.json", + "M": 197632, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1856.4865 + }, + "M=197632,N=1760": { + "file": "silu_config_M197632_N1760.json", + "M": 197632, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1880.0067499999996 + }, + "M=197632,N=1792": { + "file": "silu_config_M197632_N1792.json", + "M": 197632, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1852.7264999999998 + }, + "M=197632,N=1920": { + "file": "silu_config_M197632_N1920.json", + "M": 197632, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1867.0867500000004 + }, + "M=197632,N=2048": { + "file": "silu_config_M197632_N2048.json", + "M": 197632, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1462.085 + }, + "M=197632,N=2080": { + "file": "silu_config_M197632_N2080.json", + "M": 197632, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2052.9675000000007 + }, + "M=197632,N=2240": { + "file": "silu_config_M197632_N2240.json", + "M": 197632, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2152.72775 + }, + "M=197632,N=2400": { + "file": "silu_config_M197632_N2400.json", + "M": 197632, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2273.1282500000007 + }, + "M=197632,N=2560": { + "file": "silu_config_M197632_N2560.json", + "M": 197632, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2365.2487500000007 + }, + "M=198656,N=128": { + "file": "silu_config_M198656_N128.json", + "M": 198656, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 112.03949999999995 + }, + "M=198656,N=160": { + "file": "silu_config_M198656_N160.json", + "M": 198656, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 236.7600000000001 + }, + "M=198656,N=192": { + "file": "silu_config_M198656_N192.json", + "M": 198656, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 250.39999999999984 + }, + "M=198656,N=256": { + "file": "silu_config_M198656_N256.json", + "M": 198656, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 216.39974999999998 + }, + "M=198656,N=320": { + "file": "silu_config_M198656_N320.json", + "M": 198656, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 483.361 + }, + "M=198656,N=384": { + "file": "silu_config_M198656_N384.json", + "M": 198656, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 480.3209999999999 + }, + "M=198656,N=480": { + "file": "silu_config_M198656_N480.json", + "M": 198656, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 488.6809999999998 + }, + "M=198656,N=512": { + "file": "silu_config_M198656_N512.json", + "M": 198656, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 390.7204999999999 + }, + "M=198656,N=576": { + "file": "silu_config_M198656_N576.json", + "M": 198656, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 947.64275 + }, + "M=198656,N=640": { + "file": "silu_config_M198656_N640.json", + "M": 198656, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 910.9627500000001 + }, + "M=198656,N=768": { + "file": "silu_config_M198656_N768.json", + "M": 198656, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 944.7627500000003 + }, + "M=198656,N=800": { + "file": "silu_config_M198656_N800.json", + "M": 198656, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 919.64275 + }, + "M=198656,N=896": { + "file": "silu_config_M198656_N896.json", + "M": 198656, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 937.9227499999997 + }, + "M=198656,N=960": { + "file": "silu_config_M198656_N960.json", + "M": 198656, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 945.0027499999997 + }, + "M=198656,N=1024": { + "file": "silu_config_M198656_N1024.json", + "M": 198656, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 751.8420000000001 + }, + "M=198656,N=1120": { + "file": "silu_config_M198656_N1120.json", + "M": 198656, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1871.44675 + }, + "M=198656,N=1152": { + "file": "silu_config_M198656_N1152.json", + "M": 198656, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1831.886500000002 + }, + "M=198656,N=1280": { + "file": "silu_config_M198656_N1280.json", + "M": 198656, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1843.9264999999996 + }, + "M=198656,N=1344": { + "file": "silu_config_M198656_N1344.json", + "M": 198656, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1846.9264999999996 + }, + "M=198656,N=1408": { + "file": "silu_config_M198656_N1408.json", + "M": 198656, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1866.8467499999997 + }, + "M=198656,N=1440": { + "file": "silu_config_M198656_N1440.json", + "M": 198656, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1864.0467500000004 + }, + "M=198656,N=1536": { + "file": "silu_config_M198656_N1536.json", + "M": 198656, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1848.8865000000014 + }, + "M=198656,N=1600": { + "file": "silu_config_M198656_N1600.json", + "M": 198656, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1881.7667499999998 + }, + "M=198656,N=1664": { + "file": "silu_config_M198656_N1664.json", + "M": 198656, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1831.3265000000001 + }, + "M=198656,N=1728": { + "file": "silu_config_M198656_N1728.json", + "M": 198656, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1875.8867499999997 + }, + "M=198656,N=1760": { + "file": "silu_config_M198656_N1760.json", + "M": 198656, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1858.8464999999997 + }, + "M=198656,N=1792": { + "file": "silu_config_M198656_N1792.json", + "M": 198656, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1857.2064999999993 + }, + "M=198656,N=1920": { + "file": "silu_config_M198656_N1920.json", + "M": 198656, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1876.9667499999996 + }, + "M=198656,N=2048": { + "file": "silu_config_M198656_N2048.json", + "M": 198656, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1474.4050000000007 + }, + "M=198656,N=2080": { + "file": "silu_config_M198656_N2080.json", + "M": 198656, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2063.1674999999996 + }, + "M=198656,N=2240": { + "file": "silu_config_M198656_N2240.json", + "M": 198656, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2163.968 + }, + "M=198656,N=2400": { + "file": "silu_config_M198656_N2400.json", + "M": 198656, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2294.8485 + }, + "M=198656,N=2560": { + "file": "silu_config_M198656_N2560.json", + "M": 198656, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2385.3287500000015 + }, + "M=199680,N=128": { + "file": "silu_config_M199680_N128.json", + "M": 199680, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 112.59949999999998 + }, + "M=199680,N=160": { + "file": "silu_config_M199680_N160.json", + "M": 199680, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 246.96000000000004 + }, + "M=199680,N=192": { + "file": "silu_config_M199680_N192.json", + "M": 199680, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 240.51999999999998 + }, + "M=199680,N=256": { + "file": "silu_config_M199680_N256.json", + "M": 199680, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 217.4000000000001 + }, + "M=199680,N=320": { + "file": "silu_config_M199680_N320.json", + "M": 199680, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 482.9210000000003 + }, + "M=199680,N=384": { + "file": "silu_config_M199680_N384.json", + "M": 199680, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 479.961 + }, + "M=199680,N=480": { + "file": "silu_config_M199680_N480.json", + "M": 199680, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 487.9609999999998 + }, + "M=199680,N=512": { + "file": "silu_config_M199680_N512.json", + "M": 199680, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 389.2007500000002 + }, + "M=199680,N=576": { + "file": "silu_config_M199680_N576.json", + "M": 199680, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 941.92275 + }, + "M=199680,N=640": { + "file": "silu_config_M199680_N640.json", + "M": 199680, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 920.9627500000004 + }, + "M=199680,N=768": { + "file": "silu_config_M199680_N768.json", + "M": 199680, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 944.1227500000009 + }, + "M=199680,N=800": { + "file": "silu_config_M199680_N800.json", + "M": 199680, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 939.1627500000004 + }, + "M=199680,N=896": { + "file": "silu_config_M199680_N896.json", + "M": 199680, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 926.8827500000007 + }, + "M=199680,N=960": { + "file": "silu_config_M199680_N960.json", + "M": 199680, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 949.6427500000009 + }, + "M=199680,N=1024": { + "file": "silu_config_M199680_N1024.json", + "M": 199680, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 755.2819999999997 + }, + "M=199680,N=1120": { + "file": "silu_config_M199680_N1120.json", + "M": 199680, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1863.12675 + }, + "M=199680,N=1152": { + "file": "silu_config_M199680_N1152.json", + "M": 199680, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1830.4465 + }, + "M=199680,N=1280": { + "file": "silu_config_M199680_N1280.json", + "M": 199680, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1852.7265000000002 + }, + "M=199680,N=1344": { + "file": "silu_config_M199680_N1344.json", + "M": 199680, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1853.8864999999996 + }, + "M=199680,N=1408": { + "file": "silu_config_M199680_N1408.json", + "M": 199680, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1823.3265000000001 + }, + "M=199680,N=1440": { + "file": "silu_config_M199680_N1440.json", + "M": 199680, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1877.2867500000002 + }, + "M=199680,N=1536": { + "file": "silu_config_M199680_N1536.json", + "M": 199680, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1857.0865000000013 + }, + "M=199680,N=1600": { + "file": "silu_config_M199680_N1600.json", + "M": 199680, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1847.0465000000004 + }, + "M=199680,N=1664": { + "file": "silu_config_M199680_N1664.json", + "M": 199680, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1860.0867499999995 + }, + "M=199680,N=1728": { + "file": "silu_config_M199680_N1728.json", + "M": 199680, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1861.7267499999998 + }, + "M=199680,N=1760": { + "file": "silu_config_M199680_N1760.json", + "M": 199680, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1885.0867500000004 + }, + "M=199680,N=1792": { + "file": "silu_config_M199680_N1792.json", + "M": 199680, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1864.7267499999998 + }, + "M=199680,N=1920": { + "file": "silu_config_M199680_N1920.json", + "M": 199680, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1859.3665 + }, + "M=199680,N=2048": { + "file": "silu_config_M199680_N2048.json", + "M": 199680, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1467.9250000000002 + }, + "M=199680,N=2080": { + "file": "silu_config_M199680_N2080.json", + "M": 199680, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2028.9672500000004 + }, + "M=199680,N=2240": { + "file": "silu_config_M199680_N2240.json", + "M": 199680, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2150.0877499999997 + }, + "M=199680,N=2400": { + "file": "silu_config_M199680_N2400.json", + "M": 199680, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2259.088249999999 + }, + "M=199680,N=2560": { + "file": "silu_config_M199680_N2560.json", + "M": 199680, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2393.968749999999 + }, + "M=200704,N=128": { + "file": "silu_config_M200704_N128.json", + "M": 200704, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 113.11949999999999 + }, + "M=200704,N=160": { + "file": "silu_config_M200704_N160.json", + "M": 200704, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 241.63999999999993 + }, + "M=200704,N=192": { + "file": "silu_config_M200704_N192.json", + "M": 200704, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 260.3999999999998 + }, + "M=200704,N=256": { + "file": "silu_config_M200704_N256.json", + "M": 200704, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 218.40000000000003 + }, + "M=200704,N=320": { + "file": "silu_config_M200704_N320.json", + "M": 200704, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 488.28100000000006 + }, + "M=200704,N=384": { + "file": "silu_config_M200704_N384.json", + "M": 200704, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 485.3609999999999 + }, + "M=200704,N=480": { + "file": "silu_config_M200704_N480.json", + "M": 200704, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 494.00099999999975 + }, + "M=200704,N=512": { + "file": "silu_config_M200704_N512.json", + "M": 200704, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 391.88049999999976 + }, + "M=200704,N=576": { + "file": "silu_config_M200704_N576.json", + "M": 200704, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 946.8827500000007 + }, + "M=200704,N=640": { + "file": "silu_config_M200704_N640.json", + "M": 200704, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 952.6029999999998 + }, + "M=200704,N=768": { + "file": "silu_config_M200704_N768.json", + "M": 200704, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 933.16275 + }, + "M=200704,N=800": { + "file": "silu_config_M200704_N800.json", + "M": 200704, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 960.3229999999999 + }, + "M=200704,N=896": { + "file": "silu_config_M200704_N896.json", + "M": 200704, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 937.0027500000001 + }, + "M=200704,N=960": { + "file": "silu_config_M200704_N960.json", + "M": 200704, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 954.3627499999998 + }, + "M=200704,N=1024": { + "file": "silu_config_M200704_N1024.json", + "M": 200704, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 759.6022499999999 + }, + "M=200704,N=1120": { + "file": "silu_config_M200704_N1120.json", + "M": 200704, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1864.6067500000017 + }, + "M=200704,N=1152": { + "file": "silu_config_M200704_N1152.json", + "M": 200704, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1881.52675 + }, + "M=200704,N=1280": { + "file": "silu_config_M200704_N1280.json", + "M": 200704, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1873.0867499999995 + }, + "M=200704,N=1344": { + "file": "silu_config_M200704_N1344.json", + "M": 200704, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1886.52675 + }, + "M=200704,N=1408": { + "file": "silu_config_M200704_N1408.json", + "M": 200704, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1864.9267499999996 + }, + "M=200704,N=1440": { + "file": "silu_config_M200704_N1440.json", + "M": 200704, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1879.2467500000002 + }, + "M=200704,N=1536": { + "file": "silu_config_M200704_N1536.json", + "M": 200704, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1867.2067500000012 + }, + "M=200704,N=1600": { + "file": "silu_config_M200704_N1600.json", + "M": 200704, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1879.9267500000005 + }, + "M=200704,N=1664": { + "file": "silu_config_M200704_N1664.json", + "M": 200704, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1870.8867499999997 + }, + "M=200704,N=1728": { + "file": "silu_config_M200704_N1728.json", + "M": 200704, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1894.9267500000005 + }, + "M=200704,N=1760": { + "file": "silu_config_M200704_N1760.json", + "M": 200704, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1877.52675 + }, + "M=200704,N=1792": { + "file": "silu_config_M200704_N1792.json", + "M": 200704, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1865.8867499999997 + }, + "M=200704,N=1920": { + "file": "silu_config_M200704_N1920.json", + "M": 200704, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1860.3667499999992 + }, + "M=200704,N=2048": { + "file": "silu_config_M200704_N2048.json", + "M": 200704, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1484.4049999999997 + }, + "M=200704,N=2080": { + "file": "silu_config_M200704_N2080.json", + "M": 200704, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2054.2875000000004 + }, + "M=200704,N=2240": { + "file": "silu_config_M200704_N2240.json", + "M": 200704, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2196.2080000000005 + }, + "M=200704,N=2400": { + "file": "silu_config_M200704_N2400.json", + "M": 200704, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2308.3684999999987 + }, + "M=200704,N=2560": { + "file": "silu_config_M200704_N2560.json", + "M": 200704, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2417.5290000000005 + }, + "M=201728,N=128": { + "file": "silu_config_M201728_N128.json", + "M": 201728, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 135.83950000000002 + }, + "M=201728,N=160": { + "file": "silu_config_M201728_N160.json", + "M": 201728, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 242.83999999999997 + }, + "M=201728,N=192": { + "file": "silu_config_M201728_N192.json", + "M": 201728, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 254.20025000000007 + }, + "M=201728,N=256": { + "file": "silu_config_M201728_N256.json", + "M": 201728, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 219.63975000000005 + }, + "M=201728,N=320": { + "file": "silu_config_M201728_N320.json", + "M": 201728, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 490.80099999999993 + }, + "M=201728,N=384": { + "file": "silu_config_M201728_N384.json", + "M": 201728, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 488.4409999999998 + }, + "M=201728,N=480": { + "file": "silu_config_M201728_N480.json", + "M": 201728, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 496.36100000000033 + }, + "M=201728,N=512": { + "file": "silu_config_M201728_N512.json", + "M": 201728, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 395.24074999999993 + }, + "M=201728,N=576": { + "file": "silu_config_M201728_N576.json", + "M": 201728, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 951.7627500000001 + }, + "M=201728,N=640": { + "file": "silu_config_M201728_N640.json", + "M": 201728, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 956.123 + }, + "M=201728,N=768": { + "file": "silu_config_M201728_N768.json", + "M": 201728, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 937.2027499999999 + }, + "M=201728,N=800": { + "file": "silu_config_M201728_N800.json", + "M": 201728, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 955.2429999999999 + }, + "M=201728,N=896": { + "file": "silu_config_M201728_N896.json", + "M": 201728, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 941.16275 + }, + "M=201728,N=960": { + "file": "silu_config_M201728_N960.json", + "M": 201728, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 970.0029999999997 + }, + "M=201728,N=1024": { + "file": "silu_config_M201728_N1024.json", + "M": 201728, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 763.2419999999997 + }, + "M=201728,N=1120": { + "file": "silu_config_M201728_N1120.json", + "M": 201728, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1889.0867499999995 + }, + "M=201728,N=1152": { + "file": "silu_config_M201728_N1152.json", + "M": 201728, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1870.2867499999993 + }, + "M=201728,N=1280": { + "file": "silu_config_M201728_N1280.json", + "M": 201728, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1861.6467499999999 + }, + "M=201728,N=1344": { + "file": "silu_config_M201728_N1344.json", + "M": 201728, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1906.0067499999996 + }, + "M=201728,N=1408": { + "file": "silu_config_M201728_N1408.json", + "M": 201728, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1884.52675 + }, + "M=201728,N=1440": { + "file": "silu_config_M201728_N1440.json", + "M": 201728, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1899.2067500000003 + }, + "M=201728,N=1536": { + "file": "silu_config_M201728_N1536.json", + "M": 201728, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1876.8067499999997 + }, + "M=201728,N=1600": { + "file": "silu_config_M201728_N1600.json", + "M": 201728, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1899.9267500000005 + }, + "M=201728,N=1664": { + "file": "silu_config_M201728_N1664.json", + "M": 201728, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1881.1267500000004 + }, + "M=201728,N=1728": { + "file": "silu_config_M201728_N1728.json", + "M": 201728, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1904.2867499999993 + }, + "M=201728,N=1760": { + "file": "silu_config_M201728_N1760.json", + "M": 201728, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1897.44675 + }, + "M=201728,N=1792": { + "file": "silu_config_M201728_N1792.json", + "M": 201728, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1895.8067500000006 + }, + "M=201728,N=1920": { + "file": "silu_config_M201728_N1920.json", + "M": 201728, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1890.56675 + }, + "M=201728,N=2048": { + "file": "silu_config_M201728_N2048.json", + "M": 201728, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1487.3249999999998 + }, + "M=201728,N=2080": { + "file": "silu_config_M201728_N2080.json", + "M": 201728, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2074.5275 + }, + "M=201728,N=2240": { + "file": "silu_config_M201728_N2240.json", + "M": 201728, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2197.0880000000006 + }, + "M=201728,N=2400": { + "file": "silu_config_M201728_N2400.json", + "M": 201728, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2329.928499999999 + }, + "M=201728,N=2560": { + "file": "silu_config_M201728_N2560.json", + "M": 201728, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2424.208999999998 + }, + "M=202752,N=128": { + "file": "silu_config_M202752_N128.json", + "M": 202752, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 114.23950000000002 + }, + "M=202752,N=160": { + "file": "silu_config_M202752_N160.json", + "M": 202752, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 243.92000000000002 + }, + "M=202752,N=192": { + "file": "silu_config_M202752_N192.json", + "M": 202752, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 255.32 + }, + "M=202752,N=256": { + "file": "silu_config_M202752_N256.json", + "M": 202752, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 220.51975000000022 + }, + "M=202752,N=320": { + "file": "silu_config_M202752_N320.json", + "M": 202752, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 493.40099999999995 + }, + "M=202752,N=384": { + "file": "silu_config_M202752_N384.json", + "M": 202752, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 490.64099999999985 + }, + "M=202752,N=480": { + "file": "silu_config_M202752_N480.json", + "M": 202752, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 498.8810000000001 + }, + "M=202752,N=512": { + "file": "silu_config_M202752_N512.json", + "M": 202752, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 384.52049999999963 + }, + "M=202752,N=576": { + "file": "silu_config_M202752_N576.json", + "M": 202752, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 945.60275 + }, + "M=202752,N=640": { + "file": "silu_config_M202752_N640.json", + "M": 202752, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 939.88275 + }, + "M=202752,N=768": { + "file": "silu_config_M202752_N768.json", + "M": 202752, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 953.0027499999997 + }, + "M=202752,N=800": { + "file": "silu_config_M202752_N800.json", + "M": 202752, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 948.9627500000001 + }, + "M=202752,N=896": { + "file": "silu_config_M202752_N896.json", + "M": 202752, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 956.9229999999998 + }, + "M=202752,N=960": { + "file": "silu_config_M202752_N960.json", + "M": 202752, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 974.7629999999999 + }, + "M=202752,N=1024": { + "file": "silu_config_M202752_N1024.json", + "M": 202752, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 768.00225 + }, + "M=202752,N=1120": { + "file": "silu_config_M202752_N1120.json", + "M": 202752, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1904.04675 + }, + "M=202752,N=1152": { + "file": "silu_config_M202752_N1152.json", + "M": 202752, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1879.7267500000003 + }, + "M=202752,N=1280": { + "file": "silu_config_M202752_N1280.json", + "M": 202752, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1902.1267500000004 + }, + "M=202752,N=1344": { + "file": "silu_config_M202752_N1344.json", + "M": 202752, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1899.8467499999997 + }, + "M=202752,N=1408": { + "file": "silu_config_M202752_N1408.json", + "M": 202752, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1894.2067499999994 + }, + "M=202752,N=1440": { + "file": "silu_config_M202752_N1440.json", + "M": 202752, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1898.2867499999993 + }, + "M=202752,N=1536": { + "file": "silu_config_M202752_N1536.json", + "M": 202752, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1902.56675 + }, + "M=202752,N=1600": { + "file": "silu_config_M202752_N1600.json", + "M": 202752, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1899.0467500000004 + }, + "M=202752,N=1664": { + "file": "silu_config_M202752_N1664.json", + "M": 202752, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1900.9267500000005 + }, + "M=202752,N=1728": { + "file": "silu_config_M202752_N1728.json", + "M": 202752, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1893.326750000001 + }, + "M=202752,N=1760": { + "file": "silu_config_M202752_N1760.json", + "M": 202752, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1896.60675 + }, + "M=202752,N=1792": { + "file": "silu_config_M202752_N1792.json", + "M": 202752, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1905.2867499999993 + }, + "M=202752,N=1920": { + "file": "silu_config_M202752_N1920.json", + "M": 202752, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1900.7267499999998 + }, + "M=202752,N=2048": { + "file": "silu_config_M202752_N2048.json", + "M": 202752, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1499.24525 + }, + "M=202752,N=2080": { + "file": "silu_config_M202752_N2080.json", + "M": 202752, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2085.3275000000003 + }, + "M=202752,N=2240": { + "file": "silu_config_M202752_N2240.json", + "M": 202752, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2188.3280000000004 + }, + "M=202752,N=2400": { + "file": "silu_config_M202752_N2400.json", + "M": 202752, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2341.80875 + }, + "M=202752,N=2560": { + "file": "silu_config_M202752_N2560.json", + "M": 202752, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2435.4490000000005 + }, + "M=203776,N=128": { + "file": "silu_config_M203776_N128.json", + "M": 203776, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 114.7595 + }, + "M=203776,N=160": { + "file": "silu_config_M203776_N160.json", + "M": 203776, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 242.64 + }, + "M=203776,N=192": { + "file": "silu_config_M203776_N192.json", + "M": 203776, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 256.5200000000001 + }, + "M=203776,N=256": { + "file": "silu_config_M203776_N256.json", + "M": 203776, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 221.75974999999988 + }, + "M=203776,N=320": { + "file": "silu_config_M203776_N320.json", + "M": 203776, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 495.8810000000002 + }, + "M=203776,N=384": { + "file": "silu_config_M203776_N384.json", + "M": 203776, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 493.24099999999976 + }, + "M=203776,N=480": { + "file": "silu_config_M203776_N480.json", + "M": 203776, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 501.36100000000005 + }, + "M=203776,N=512": { + "file": "silu_config_M203776_N512.json", + "M": 203776, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 400.64075 + }, + "M=203776,N=576": { + "file": "silu_config_M203776_N576.json", + "M": 203776, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 960.6030000000005 + }, + "M=203776,N=640": { + "file": "silu_config_M203776_N640.json", + "M": 203776, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 944.8827500000004 + }, + "M=203776,N=768": { + "file": "silu_config_M203776_N768.json", + "M": 203776, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 957.6030000000001 + }, + "M=203776,N=800": { + "file": "silu_config_M203776_N800.json", + "M": 203776, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 964.6030000000001 + }, + "M=203776,N=896": { + "file": "silu_config_M203776_N896.json", + "M": 203776, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 972.9229999999998 + }, + "M=203776,N=960": { + "file": "silu_config_M203776_N960.json", + "M": 203776, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 968.723 + }, + "M=203776,N=1024": { + "file": "silu_config_M203776_N1024.json", + "M": 203776, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 771.8422500000006 + }, + "M=203776,N=1120": { + "file": "silu_config_M203776_N1120.json", + "M": 203776, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1913.486750000001 + }, + "M=203776,N=1152": { + "file": "silu_config_M203776_N1152.json", + "M": 203776, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1899.3267499999997 + }, + "M=203776,N=1280": { + "file": "silu_config_M203776_N1280.json", + "M": 203776, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1897.1667499999994 + }, + "M=203776,N=1344": { + "file": "silu_config_M203776_N1344.json", + "M": 203776, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1925.1670000000004 + }, + "M=203776,N=1408": { + "file": "silu_config_M203776_N1408.json", + "M": 203776, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1903.60675 + }, + "M=203776,N=1440": { + "file": "silu_config_M203776_N1440.json", + "M": 203776, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1917.9667500000005 + }, + "M=203776,N=1536": { + "file": "silu_config_M203776_N1536.json", + "M": 203776, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1911.8867499999997 + }, + "M=203776,N=1600": { + "file": "silu_config_M203776_N1600.json", + "M": 203776, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1898.0467500000004 + }, + "M=203776,N=1664": { + "file": "silu_config_M203776_N1664.json", + "M": 203776, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1910.2467500000002 + }, + "M=203776,N=1728": { + "file": "silu_config_M203776_N1728.json", + "M": 203776, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1902.6867499999998 + }, + "M=203776,N=1760": { + "file": "silu_config_M203776_N1760.json", + "M": 203776, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1920.1667500000003 + }, + "M=203776,N=1792": { + "file": "silu_config_M203776_N1792.json", + "M": 203776, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1894.1267500000013 + }, + "M=203776,N=1920": { + "file": "silu_config_M203776_N1920.json", + "M": 203776, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1920.2067499999994 + }, + "M=203776,N=2048": { + "file": "silu_config_M203776_N2048.json", + "M": 203776, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1521.08525 + }, + "M=203776,N=2080": { + "file": "silu_config_M203776_N2080.json", + "M": 203776, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2105.4877500000002 + }, + "M=203776,N=2240": { + "file": "silu_config_M203776_N2240.json", + "M": 203776, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2219.0880000000006 + }, + "M=203776,N=2400": { + "file": "silu_config_M203776_N2400.json", + "M": 203776, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2342.8085 + }, + "M=203776,N=2560": { + "file": "silu_config_M203776_N2560.json", + "M": 203776, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2443.4489999999996 + }, + "M=204800,N=128": { + "file": "silu_config_M204800_N128.json", + "M": 204800, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 115.03950000000003 + }, + "M=204800,N=160": { + "file": "silu_config_M204800_N160.json", + "M": 204800, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 242.24 + }, + "M=204800,N=192": { + "file": "silu_config_M204800_N192.json", + "M": 204800, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 253.2399999999999 + }, + "M=204800,N=256": { + "file": "silu_config_M204800_N256.json", + "M": 204800, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 222.83974999999998 + }, + "M=204800,N=320": { + "file": "silu_config_M204800_N320.json", + "M": 204800, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 494.6410000000001 + }, + "M=204800,N=384": { + "file": "silu_config_M204800_N384.json", + "M": 204800, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 492.2409999999999 + }, + "M=204800,N=480": { + "file": "silu_config_M204800_N480.json", + "M": 204800, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 500.28099999999995 + }, + "M=204800,N=512": { + "file": "silu_config_M204800_N512.json", + "M": 204800, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 399.20074999999974 + }, + "M=204800,N=576": { + "file": "silu_config_M204800_N576.json", + "M": 204800, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 938.4827500000001 + }, + "M=204800,N=640": { + "file": "silu_config_M204800_N640.json", + "M": 204800, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 933.6427500000002 + }, + "M=204800,N=768": { + "file": "silu_config_M204800_N768.json", + "M": 204800, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 946.7627500000003 + }, + "M=204800,N=800": { + "file": "silu_config_M204800_N800.json", + "M": 204800, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 952.0027499999997 + }, + "M=204800,N=896": { + "file": "silu_config_M204800_N896.json", + "M": 204800, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 950.4027500000002 + }, + "M=204800,N=960": { + "file": "silu_config_M204800_N960.json", + "M": 204800, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 956.6829999999995 + }, + "M=204800,N=1024": { + "file": "silu_config_M204800_N1024.json", + "M": 204800, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 773.1222499999999 + }, + "M=204800,N=1120": { + "file": "silu_config_M204800_N1120.json", + "M": 204800, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1888.9667500000005 + }, + "M=204800,N=1152": { + "file": "silu_config_M204800_N1152.json", + "M": 204800, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1876.8467499999992 + }, + "M=204800,N=1280": { + "file": "silu_config_M204800_N1280.json", + "M": 204800, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1868.08675 + }, + "M=204800,N=1344": { + "file": "silu_config_M204800_N1344.json", + "M": 204800, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1880.1667500000003 + }, + "M=204800,N=1408": { + "file": "silu_config_M204800_N1408.json", + "M": 204800, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1880.526749999999 + }, + "M=204800,N=1440": { + "file": "silu_config_M204800_N1440.json", + "M": 204800, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1882.8867500000006 + }, + "M=204800,N=1536": { + "file": "silu_config_M204800_N1536.json", + "M": 204800, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1872.2467500000002 + }, + "M=204800,N=1600": { + "file": "silu_config_M204800_N1600.json", + "M": 204800, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1883.60675 + }, + "M=204800,N=1664": { + "file": "silu_config_M204800_N1664.json", + "M": 204800, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1876.60675 + }, + "M=204800,N=1728": { + "file": "silu_config_M204800_N1728.json", + "M": 204800, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1888.2067500000003 + }, + "M=204800,N=1760": { + "file": "silu_config_M204800_N1760.json", + "M": 204800, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1890.8867499999997 + }, + "M=204800,N=1792": { + "file": "silu_config_M204800_N1792.json", + "M": 204800, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1880.3267500000002 + }, + "M=204800,N=1920": { + "file": "silu_config_M204800_N1920.json", + "M": 204800, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1895.8467499999997 + }, + "M=204800,N=2048": { + "file": "silu_config_M204800_N2048.json", + "M": 204800, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1485.165 + }, + "M=204800,N=2080": { + "file": "silu_config_M204800_N2080.json", + "M": 204800, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2090.9674999999997 + }, + "M=204800,N=2240": { + "file": "silu_config_M204800_N2240.json", + "M": 204800, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2205.008 + }, + "M=204800,N=2400": { + "file": "silu_config_M204800_N2400.json", + "M": 204800, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2316.4084999999995 + }, + "M=204800,N=2560": { + "file": "silu_config_M204800_N2560.json", + "M": 204800, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2421.5690000000013 + }, + "M=205824,N=128": { + "file": "silu_config_M205824_N128.json", + "M": 205824, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 115.87950000000004 + }, + "M=205824,N=160": { + "file": "silu_config_M205824_N160.json", + "M": 205824, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 269.47999999999996 + }, + "M=205824,N=192": { + "file": "silu_config_M205824_N192.json", + "M": 205824, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 269.91999999999985 + }, + "M=205824,N=256": { + "file": "silu_config_M205824_N256.json", + "M": 205824, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 223.75999999999988 + }, + "M=205824,N=320": { + "file": "silu_config_M205824_N320.json", + "M": 205824, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 499.9610000000001 + }, + "M=205824,N=384": { + "file": "silu_config_M205824_N384.json", + "M": 205824, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 497.6009999999999 + }, + "M=205824,N=480": { + "file": "silu_config_M205824_N480.json", + "M": 205824, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 506.4010000000003 + }, + "M=205824,N=512": { + "file": "silu_config_M205824_N512.json", + "M": 205824, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 404.44075 + }, + "M=205824,N=576": { + "file": "silu_config_M205824_N576.json", + "M": 205824, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 969.963 + }, + "M=205824,N=640": { + "file": "silu_config_M205824_N640.json", + "M": 205824, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 964.4030000000005 + }, + "M=205824,N=768": { + "file": "silu_config_M205824_N768.json", + "M": 205824, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 956.3629999999998 + }, + "M=205824,N=800": { + "file": "silu_config_M205824_N800.json", + "M": 205824, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 973.6030000000001 + }, + "M=205824,N=896": { + "file": "silu_config_M205824_N896.json", + "M": 205824, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 981.2830000000004 + }, + "M=205824,N=960": { + "file": "silu_config_M205824_N960.json", + "M": 205824, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 988.6430000000005 + }, + "M=205824,N=1024": { + "file": "silu_config_M205824_N1024.json", + "M": 205824, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 779.3222500000002 + }, + "M=205824,N=1120": { + "file": "silu_config_M205824_N1120.json", + "M": 205824, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1932.487 + }, + "M=205824,N=1152": { + "file": "silu_config_M205824_N1152.json", + "M": 205824, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1907.7667499999998 + }, + "M=205824,N=1280": { + "file": "silu_config_M205824_N1280.json", + "M": 205824, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1909.7267499999998 + }, + "M=205824,N=1344": { + "file": "silu_config_M205824_N1344.json", + "M": 205824, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1943.8469999999998 + }, + "M=205824,N=1408": { + "file": "silu_config_M205824_N1408.json", + "M": 205824, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1932.4069999999992 + }, + "M=205824,N=1440": { + "file": "silu_config_M205824_N1440.json", + "M": 205824, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1916.56675 + }, + "M=205824,N=1536": { + "file": "silu_config_M205824_N1536.json", + "M": 205824, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1924.8869999999997 + }, + "M=205824,N=1600": { + "file": "silu_config_M205824_N1600.json", + "M": 205824, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1937.8069999999998 + }, + "M=205824,N=1664": { + "file": "silu_config_M205824_N1664.json", + "M": 205824, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1929.1670000000004 + }, + "M=205824,N=1728": { + "file": "silu_config_M205824_N1728.json", + "M": 205824, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1932.0070000000005 + }, + "M=205824,N=1760": { + "file": "silu_config_M205824_N1760.json", + "M": 205824, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1914.52675 + }, + "M=205824,N=1792": { + "file": "silu_config_M205824_N1792.json", + "M": 205824, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1933.567 + }, + "M=205824,N=1920": { + "file": "silu_config_M205824_N1920.json", + "M": 205824, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1928.8469999999998 + }, + "M=205824,N=2048": { + "file": "silu_config_M205824_N2048.json", + "M": 205824, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1526.4052499999998 + }, + "M=205824,N=2080": { + "file": "silu_config_M205824_N2080.json", + "M": 205824, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2117.56775 + }, + "M=205824,N=2240": { + "file": "silu_config_M205824_N2240.json", + "M": 205824, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2240.928249999999 + }, + "M=205824,N=2400": { + "file": "silu_config_M205824_N2400.json", + "M": 205824, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2376.6887499999984 + }, + "M=205824,N=2560": { + "file": "silu_config_M205824_N2560.json", + "M": 205824, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2457.3689999999997 + }, + "M=206848,N=128": { + "file": "silu_config_M206848_N128.json", + "M": 206848, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 127.59949999999998 + }, + "M=206848,N=160": { + "file": "silu_config_M206848_N160.json", + "M": 206848, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 248.68025 + }, + "M=206848,N=192": { + "file": "silu_config_M206848_N192.json", + "M": 206848, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 249.04025000000013 + }, + "M=206848,N=256": { + "file": "silu_config_M206848_N256.json", + "M": 206848, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 236.07999999999998 + }, + "M=206848,N=320": { + "file": "silu_config_M206848_N320.json", + "M": 206848, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 503.4409999999998 + }, + "M=206848,N=384": { + "file": "silu_config_M206848_N384.json", + "M": 206848, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 500.0409999999998 + }, + "M=206848,N=480": { + "file": "silu_config_M206848_N480.json", + "M": 206848, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 508.9209999999998 + }, + "M=206848,N=512": { + "file": "silu_config_M206848_N512.json", + "M": 206848, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 406.4805000000001 + }, + "M=206848,N=576": { + "file": "silu_config_M206848_N576.json", + "M": 206848, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 986.2430000000004 + }, + "M=206848,N=640": { + "file": "silu_config_M206848_N640.json", + "M": 206848, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 969.0429999999997 + }, + "M=206848,N=768": { + "file": "silu_config_M206848_N768.json", + "M": 206848, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 971.9229999999998 + }, + "M=206848,N=800": { + "file": "silu_config_M206848_N800.json", + "M": 206848, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 989.9629999999997 + }, + "M=206848,N=896": { + "file": "silu_config_M206848_N896.json", + "M": 206848, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 986.0429999999997 + }, + "M=206848,N=960": { + "file": "silu_config_M206848_N960.json", + "M": 206848, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 993.4830000000002 + }, + "M=206848,N=1024": { + "file": "silu_config_M206848_N1024.json", + "M": 206848, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 782.7622500000002 + }, + "M=206848,N=1120": { + "file": "silu_config_M206848_N1120.json", + "M": 206848, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1942.007000000001 + }, + "M=206848,N=1152": { + "file": "silu_config_M206848_N1152.json", + "M": 206848, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1937.607 + }, + "M=206848,N=1280": { + "file": "silu_config_M206848_N1280.json", + "M": 206848, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1918.9669999999996 + }, + "M=206848,N=1344": { + "file": "silu_config_M206848_N1344.json", + "M": 206848, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1942.9269999999997 + }, + "M=206848,N=1408": { + "file": "silu_config_M206848_N1408.json", + "M": 206848, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1910.7667499999998 + }, + "M=206848,N=1440": { + "file": "silu_config_M206848_N1440.json", + "M": 206848, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1935.8469999999998 + }, + "M=206848,N=1536": { + "file": "silu_config_M206848_N1536.json", + "M": 206848, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1923.7269999999999 + }, + "M=206848,N=1600": { + "file": "silu_config_M206848_N1600.json", + "M": 206848, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1946.8869999999997 + }, + "M=206848,N=1664": { + "file": "silu_config_M206848_N1664.json", + "M": 206848, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1933.487000000001 + }, + "M=206848,N=1728": { + "file": "silu_config_M206848_N1728.json", + "M": 206848, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1951.8070000000007 + }, + "M=206848,N=1760": { + "file": "silu_config_M206848_N1760.json", + "M": 206848, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1924.0470000000005 + }, + "M=206848,N=1792": { + "file": "silu_config_M206848_N1792.json", + "M": 206848, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1932.3269999999993 + }, + "M=206848,N=1920": { + "file": "silu_config_M206848_N1920.json", + "M": 206848, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1927.8469999999998 + }, + "M=206848,N=2048": { + "file": "silu_config_M206848_N2048.json", + "M": 206848, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1542.5252499999997 + }, + "M=206848,N=2080": { + "file": "silu_config_M206848_N2080.json", + "M": 206848, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2127.64775 + }, + "M=206848,N=2240": { + "file": "silu_config_M206848_N2240.json", + "M": 206848, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2271.808250000002 + }, + "M=206848,N=2400": { + "file": "silu_config_M206848_N2400.json", + "M": 206848, + "N": 2400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2384.1687500000007 + }, + "M=206848,N=2560": { + "file": "silu_config_M206848_N2560.json", + "M": 206848, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2474.129250000001 + }, + "M=207872,N=128": { + "file": "silu_config_M207872_N128.json", + "M": 207872, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 127.99949999999998 + }, + "M=207872,N=160": { + "file": "silu_config_M207872_N160.json", + "M": 207872, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 268.4000000000001 + }, + "M=207872,N=192": { + "file": "silu_config_M207872_N192.json", + "M": 207872, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 258.1999999999999 + }, + "M=207872,N=256": { + "file": "silu_config_M207872_N256.json", + "M": 207872, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.60000000000014 + }, + "M=207872,N=320": { + "file": "silu_config_M207872_N320.json", + "M": 207872, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 505.9609999999998 + }, + "M=207872,N=384": { + "file": "silu_config_M207872_N384.json", + "M": 207872, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 503.08100000000053 + }, + "M=207872,N=480": { + "file": "silu_config_M207872_N480.json", + "M": 207872, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 511.2409999999997 + }, + "M=207872,N=512": { + "file": "silu_config_M207872_N512.json", + "M": 207872, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 408.44074999999975 + }, + "M=207872,N=576": { + "file": "silu_config_M207872_N576.json", + "M": 207872, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 979.3229999999999 + }, + "M=207872,N=640": { + "file": "silu_config_M207872_N640.json", + "M": 207872, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 963.5629999999996 + }, + "M=207872,N=768": { + "file": "silu_config_M207872_N768.json", + "M": 207872, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 976.2830000000004 + }, + "M=207872,N=800": { + "file": "silu_config_M207872_N800.json", + "M": 207872, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 983.6830000000004 + }, + "M=207872,N=896": { + "file": "silu_config_M207872_N896.json", + "M": 207872, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 980.0830000000005 + }, + "M=207872,N=960": { + "file": "silu_config_M207872_N960.json", + "M": 207872, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 987.8030000000008 + }, + "M=207872,N=1024": { + "file": "silu_config_M207872_N1024.json", + "M": 207872, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 786.4822499999996 + }, + "M=207872,N=1120": { + "file": "silu_config_M207872_N1120.json", + "M": 207872, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1930.6070000000004 + }, + "M=207872,N=1152": { + "file": "silu_config_M207872_N1152.json", + "M": 207872, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1916.0867500000004 + }, + "M=207872,N=1280": { + "file": "silu_config_M207872_N1280.json", + "M": 207872, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1938.7269999999999 + }, + "M=207872,N=1344": { + "file": "silu_config_M207872_N1344.json", + "M": 207872, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1931.8469999999998 + }, + "M=207872,N=1408": { + "file": "silu_config_M207872_N1408.json", + "M": 207872, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1920.2067499999994 + }, + "M=207872,N=1440": { + "file": "silu_config_M207872_N1440.json", + "M": 207872, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1945.6870000000008 + }, + "M=207872,N=1536": { + "file": "silu_config_M207872_N1536.json", + "M": 207872, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1933.367000000001 + }, + "M=207872,N=1600": { + "file": "silu_config_M207872_N1600.json", + "M": 207872, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.9270000000006 + }, + "M=207872,N=1664": { + "file": "silu_config_M207872_N1664.json", + "M": 207872, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1937.6870000000008 + }, + "M=207872,N=1728": { + "file": "silu_config_M207872_N1728.json", + "M": 207872, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1940.647 + }, + "M=207872,N=1760": { + "file": "silu_config_M207872_N1760.json", + "M": 207872, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1943.8469999999998 + }, + "M=207872,N=1792": { + "file": "silu_config_M207872_N1792.json", + "M": 207872, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1962.8869999999997 + }, + "M=207872,N=1920": { + "file": "silu_config_M207872_N1920.json", + "M": 207872, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1968.2070000000003 + }, + "M=207872,N=2048": { + "file": "silu_config_M207872_N2048.json", + "M": 207872, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1536.08525 + }, + "M=207872,N=2080": { + "file": "silu_config_M207872_N2080.json", + "M": 207872, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2125.88775 + }, + "M=207872,N=2240": { + "file": "silu_config_M207872_N2240.json", + "M": 207872, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2251.00825 + }, + "M=207872,N=2400": { + "file": "silu_config_M207872_N2400.json", + "M": 207872, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2387.80875 + }, + "M=207872,N=2560": { + "file": "silu_config_M207872_N2560.json", + "M": 207872, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2492.6492500000004 + }, + "M=208896,N=128": { + "file": "silu_config_M208896_N128.json", + "M": 208896, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 128.83949999999996 + }, + "M=208896,N=160": { + "file": "silu_config_M208896_N160.json", + "M": 208896, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 251.19999999999987 + }, + "M=208896,N=192": { + "file": "silu_config_M208896_N192.json", + "M": 208896, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 259.56000000000006 + }, + "M=208896,N=256": { + "file": "silu_config_M208896_N256.json", + "M": 208896, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 227.12000000000023 + }, + "M=208896,N=320": { + "file": "silu_config_M208896_N320.json", + "M": 208896, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 507.6809999999999 + }, + "M=208896,N=384": { + "file": "silu_config_M208896_N384.json", + "M": 208896, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 504.9609999999998 + }, + "M=208896,N=480": { + "file": "silu_config_M208896_N480.json", + "M": 208896, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 514.1609999999998 + }, + "M=208896,N=512": { + "file": "silu_config_M208896_N512.json", + "M": 208896, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 406.8807500000005 + }, + "M=208896,N=576": { + "file": "silu_config_M208896_N576.json", + "M": 208896, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 984.8030000000003 + }, + "M=208896,N=640": { + "file": "silu_config_M208896_N640.json", + "M": 208896, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 967.7630000000001 + }, + "M=208896,N=768": { + "file": "silu_config_M208896_N768.json", + "M": 208896, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 970.2829999999999 + }, + "M=208896,N=800": { + "file": "silu_config_M208896_N800.json", + "M": 208896, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 998.0030000000002 + }, + "M=208896,N=896": { + "file": "silu_config_M208896_N896.json", + "M": 208896, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 985.163 + }, + "M=208896,N=960": { + "file": "silu_config_M208896_N960.json", + "M": 208896, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1003.3630000000003 + }, + "M=208896,N=1024": { + "file": "silu_config_M208896_N1024.json", + "M": 208896, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 790.0822499999999 + }, + "M=208896,N=1120": { + "file": "silu_config_M208896_N1120.json", + "M": 208896, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1950.8869999999997 + }, + "M=208896,N=1152": { + "file": "silu_config_M208896_N1152.json", + "M": 208896, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1936.2069999999999 + }, + "M=208896,N=1280": { + "file": "silu_config_M208896_N1280.json", + "M": 208896, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1948.647 + }, + "M=208896,N=1344": { + "file": "silu_config_M208896_N1344.json", + "M": 208896, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1941.9670000000006 + }, + "M=208896,N=1408": { + "file": "silu_config_M208896_N1408.json", + "M": 208896, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1947.0070000000005 + }, + "M=208896,N=1440": { + "file": "silu_config_M208896_N1440.json", + "M": 208896, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.3269999999993 + }, + "M=208896,N=1536": { + "file": "silu_config_M208896_N1536.json", + "M": 208896, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1943.0869999999995 + }, + "M=208896,N=1600": { + "file": "silu_config_M208896_N1600.json", + "M": 208896, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1946.3270000000011 + }, + "M=208896,N=1664": { + "file": "silu_config_M208896_N1664.json", + "M": 208896, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1936.687 + }, + "M=208896,N=1728": { + "file": "silu_config_M208896_N1728.json", + "M": 208896, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1939.3670000000002 + }, + "M=208896,N=1760": { + "file": "silu_config_M208896_N1760.json", + "M": 208896, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1952.687 + }, + "M=208896,N=1792": { + "file": "silu_config_M208896_N1792.json", + "M": 208896, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1940.9269999999997 + }, + "M=208896,N=1920": { + "file": "silu_config_M208896_N1920.json", + "M": 208896, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1956.6070000000009 + }, + "M=208896,N=2048": { + "file": "silu_config_M208896_N2048.json", + "M": 208896, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1548.4852499999997 + }, + "M=208896,N=2080": { + "file": "silu_config_M208896_N2080.json", + "M": 208896, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2147.56775 + }, + "M=208896,N=2240": { + "file": "silu_config_M208896_N2240.json", + "M": 208896, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2252.5282499999994 + }, + "M=208896,N=2400": { + "file": "silu_config_M208896_N2400.json", + "M": 208896, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2405.6490000000003 + }, + "M=208896,N=2560": { + "file": "silu_config_M208896_N2560.json", + "M": 208896, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2508.3692500000006 + }, + "M=209920,N=128": { + "file": "silu_config_M209920_N128.json", + "M": 209920, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 118.19950000000003 + }, + "M=209920,N=160": { + "file": "silu_config_M209920_N160.json", + "M": 209920, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 263.36025000000006 + }, + "M=209920,N=192": { + "file": "silu_config_M209920_N192.json", + "M": 209920, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 259.1200000000002 + }, + "M=209920,N=256": { + "file": "silu_config_M209920_N256.json", + "M": 209920, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 228.03975000000003 + }, + "M=209920,N=320": { + "file": "silu_config_M209920_N320.json", + "M": 209920, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 507.2810000000003 + }, + "M=209920,N=384": { + "file": "silu_config_M209920_N384.json", + "M": 209920, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 504.44100000000003 + }, + "M=209920,N=480": { + "file": "silu_config_M209920_N480.json", + "M": 209920, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 512.3209999999997 + }, + "M=209920,N=512": { + "file": "silu_config_M209920_N512.json", + "M": 209920, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 412.3607500000003 + }, + "M=209920,N=576": { + "file": "silu_config_M209920_N576.json", + "M": 209920, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.0029999999997 + }, + "M=209920,N=640": { + "file": "silu_config_M209920_N640.json", + "M": 209920, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 988.0029999999999 + }, + "M=209920,N=768": { + "file": "silu_config_M209920_N768.json", + "M": 209920, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 980.2429999999999 + }, + "M=209920,N=800": { + "file": "silu_config_M209920_N800.json", + "M": 209920, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1006.6429999999999 + }, + "M=209920,N=896": { + "file": "silu_config_M209920_N896.json", + "M": 209920, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.723 + }, + "M=209920,N=960": { + "file": "silu_config_M209920_N960.json", + "M": 209920, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 990.9229999999998 + }, + "M=209920,N=1024": { + "file": "silu_config_M209920_N1024.json", + "M": 209920, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 792.9222500000001 + }, + "M=209920,N=1120": { + "file": "silu_config_M209920_N1120.json", + "M": 209920, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1967.4070000000002 + }, + "M=209920,N=1152": { + "file": "silu_config_M209920_N1152.json", + "M": 209920, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1944.127 + }, + "M=209920,N=1280": { + "file": "silu_config_M209920_N1280.json", + "M": 209920, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1935.8869999999997 + }, + "M=209920,N=1344": { + "file": "silu_config_M209920_N1344.json", + "M": 209920, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1937.9670000000006 + }, + "M=209920,N=1408": { + "file": "silu_config_M209920_N1408.json", + "M": 209920, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1948.0869999999995 + }, + "M=209920,N=1440": { + "file": "silu_config_M209920_N1440.json", + "M": 209920, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1940.9270000000006 + }, + "M=209920,N=1536": { + "file": "silu_config_M209920_N1536.json", + "M": 209920, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1940.3270000000002 + }, + "M=209920,N=1600": { + "file": "silu_config_M209920_N1600.json", + "M": 209920, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1931.4470000000001 + }, + "M=209920,N=1664": { + "file": "silu_config_M209920_N1664.json", + "M": 209920, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1944.0069999999996 + }, + "M=209920,N=1728": { + "file": "silu_config_M209920_N1728.json", + "M": 209920, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.9269999999997 + }, + "M=209920,N=1760": { + "file": "silu_config_M209920_N1760.json", + "M": 209920, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1957.8470000000016 + }, + "M=209920,N=1792": { + "file": "silu_config_M209920_N1792.json", + "M": 209920, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1947.9269999999997 + }, + "M=209920,N=1920": { + "file": "silu_config_M209920_N1920.json", + "M": 209920, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1963.4070000000002 + }, + "M=209920,N=2048": { + "file": "silu_config_M209920_N2048.json", + "M": 209920, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1557.7254999999996 + }, + "M=209920,N=2080": { + "file": "silu_config_M209920_N2080.json", + "M": 209920, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2142.4477500000003 + }, + "M=209920,N=2240": { + "file": "silu_config_M209920_N2240.json", + "M": 209920, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2267.2882499999996 + }, + "M=209920,N=2400": { + "file": "silu_config_M209920_N2400.json", + "M": 209920, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2388.6487500000003 + }, + "M=209920,N=2560": { + "file": "silu_config_M209920_N2560.json", + "M": 209920, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2499.4892499999987 + }, + "M=210944,N=128": { + "file": "silu_config_M210944_N128.json", + "M": 210944, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 118.67950000000002 + }, + "M=210944,N=160": { + "file": "silu_config_M210944_N160.json", + "M": 210944, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 253.44 + }, + "M=210944,N=192": { + "file": "silu_config_M210944_N192.json", + "M": 210944, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 253.7600000000001 + }, + "M=210944,N=256": { + "file": "silu_config_M210944_N256.json", + "M": 210944, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 240.7199999999999 + }, + "M=210944,N=320": { + "file": "silu_config_M210944_N320.json", + "M": 210944, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 512.4810000000001 + }, + "M=210944,N=384": { + "file": "silu_config_M210944_N384.json", + "M": 210944, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 510.9210000000003 + }, + "M=210944,N=480": { + "file": "silu_config_M210944_N480.json", + "M": 210944, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 518.4009999999998 + }, + "M=210944,N=512": { + "file": "silu_config_M210944_N512.json", + "M": 210944, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 414.48074999999994 + }, + "M=210944,N=576": { + "file": "silu_config_M210944_N576.json", + "M": 210944, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 982.9230000000005 + }, + "M=210944,N=640": { + "file": "silu_config_M210944_N640.json", + "M": 210944, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 988.443 + }, + "M=210944,N=768": { + "file": "silu_config_M210944_N768.json", + "M": 210944, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1000.8030000000003 + }, + "M=210944,N=800": { + "file": "silu_config_M210944_N800.json", + "M": 210944, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 996.9229999999998 + }, + "M=210944,N=896": { + "file": "silu_config_M210944_N896.json", + "M": 210944, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 994.0429999999997 + }, + "M=210944,N=960": { + "file": "silu_config_M210944_N960.json", + "M": 210944, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1002.1629999999999 + }, + "M=210944,N=1024": { + "file": "silu_config_M210944_N1024.json", + "M": 210944, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 797.56225 + }, + "M=210944,N=1120": { + "file": "silu_config_M210944_N1120.json", + "M": 210944, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1980.4472500000002 + }, + "M=210944,N=1152": { + "file": "silu_config_M210944_N1152.json", + "M": 210944, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1964.9669999999996 + }, + "M=210944,N=1280": { + "file": "silu_config_M210944_N1280.json", + "M": 210944, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1946.4470000000001 + }, + "M=210944,N=1344": { + "file": "silu_config_M210944_N1344.json", + "M": 210944, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1960.8869999999997 + }, + "M=210944,N=1408": { + "file": "silu_config_M210944_N1408.json", + "M": 210944, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1959.407000000001 + }, + "M=210944,N=1440": { + "file": "silu_config_M210944_N1440.json", + "M": 210944, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1964.4470000000001 + }, + "M=210944,N=1536": { + "file": "silu_config_M210944_N1536.json", + "M": 210944, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1972.3270000000002 + }, + "M=210944,N=1600": { + "file": "silu_config_M210944_N1600.json", + "M": 210944, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1996.0472500000005 + }, + "M=210944,N=1664": { + "file": "silu_config_M210944_N1664.json", + "M": 210944, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1965.687 + }, + "M=210944,N=1728": { + "file": "silu_config_M210944_N1728.json", + "M": 210944, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1989.2872500000003 + }, + "M=210944,N=1760": { + "file": "silu_config_M210944_N1760.json", + "M": 210944, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1981.8072500000007 + }, + "M=210944,N=1792": { + "file": "silu_config_M210944_N1792.json", + "M": 210944, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1959.8869999999997 + }, + "M=210944,N=1920": { + "file": "silu_config_M210944_N1920.json", + "M": 210944, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.8469999999998 + }, + "M=210944,N=2048": { + "file": "silu_config_M210944_N2048.json", + "M": 210944, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1572.2855 + }, + "M=210944,N=2080": { + "file": "silu_config_M210944_N2080.json", + "M": 210944, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2167.727999999999 + }, + "M=210944,N=2240": { + "file": "silu_config_M210944_N2240.json", + "M": 210944, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2282.4084999999986 + }, + "M=210944,N=2400": { + "file": "silu_config_M210944_N2400.json", + "M": 210944, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2419.169000000001 + }, + "M=210944,N=2560": { + "file": "silu_config_M210944_N2560.json", + "M": 210944, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2511.24925 + }, + "M=211968,N=128": { + "file": "silu_config_M211968_N128.json", + "M": 211968, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 130.63950000000003 + }, + "M=211968,N=160": { + "file": "silu_config_M211968_N160.json", + "M": 211968, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 254.79999999999998 + }, + "M=211968,N=192": { + "file": "silu_config_M211968_N192.json", + "M": 211968, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 277.0802499999999 + }, + "M=211968,N=256": { + "file": "silu_config_M211968_N256.json", + "M": 211968, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 230.12000000000006 + }, + "M=211968,N=320": { + "file": "silu_config_M211968_N320.json", + "M": 211968, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 515.4810000000001 + }, + "M=211968,N=384": { + "file": "silu_config_M211968_N384.json", + "M": 211968, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 513.0010000000002 + }, + "M=211968,N=480": { + "file": "silu_config_M211968_N480.json", + "M": 211968, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 521.3209999999997 + }, + "M=211968,N=512": { + "file": "silu_config_M211968_N512.json", + "M": 211968, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 412.1207499999998 + }, + "M=211968,N=576": { + "file": "silu_config_M211968_N576.json", + "M": 211968, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 987.8030000000006 + }, + "M=211968,N=640": { + "file": "silu_config_M211968_N640.json", + "M": 211968, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1002.7230000000005 + }, + "M=211968,N=768": { + "file": "silu_config_M211968_N768.json", + "M": 211968, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 984.2829999999999 + }, + "M=211968,N=800": { + "file": "silu_config_M211968_N800.json", + "M": 211968, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 991.5630000000001 + }, + "M=211968,N=896": { + "file": "silu_config_M211968_N896.json", + "M": 211968, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 998.9229999999998 + }, + "M=211968,N=960": { + "file": "silu_config_M211968_N960.json", + "M": 211968, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1017.28325 + }, + "M=211968,N=1024": { + "file": "silu_config_M211968_N1024.json", + "M": 211968, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 801.56225 + }, + "M=211968,N=1120": { + "file": "silu_config_M211968_N1120.json", + "M": 211968, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1969.0070000000005 + }, + "M=211968,N=1152": { + "file": "silu_config_M211968_N1152.json", + "M": 211968, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1964.087 + }, + "M=211968,N=1280": { + "file": "silu_config_M211968_N1280.json", + "M": 211968, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.1669999999995 + }, + "M=211968,N=1344": { + "file": "silu_config_M211968_N1344.json", + "M": 211968, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1991.0872499999996 + }, + "M=211968,N=1408": { + "file": "silu_config_M211968_N1408.json", + "M": 211968, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1958.607 + }, + "M=211968,N=1440": { + "file": "silu_config_M211968_N1440.json", + "M": 211968, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1984.447250000001 + }, + "M=211968,N=1536": { + "file": "silu_config_M211968_N1536.json", + "M": 211968, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1981.7672499999999 + }, + "M=211968,N=1600": { + "file": "silu_config_M211968_N1600.json", + "M": 211968, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1995.4072499999993 + }, + "M=211968,N=1664": { + "file": "silu_config_M211968_N1664.json", + "M": 211968, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1985.3672500000002 + }, + "M=211968,N=1728": { + "file": "silu_config_M211968_N1728.json", + "M": 211968, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1978.0469999999996 + }, + "M=211968,N=1760": { + "file": "silu_config_M211968_N1760.json", + "M": 211968, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1997.0072499999997 + }, + "M=211968,N=1792": { + "file": "silu_config_M211968_N1792.json", + "M": 211968, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1969.0072500000006 + }, + "M=211968,N=1920": { + "file": "silu_config_M211968_N1920.json", + "M": 211968, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1995.4072499999993 + }, + "M=211968,N=2048": { + "file": "silu_config_M211968_N2048.json", + "M": 211968, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1569.6054999999997 + }, + "M=211968,N=2080": { + "file": "silu_config_M211968_N2080.json", + "M": 211968, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2188.3680000000004 + }, + "M=211968,N=2240": { + "file": "silu_config_M211968_N2240.json", + "M": 211968, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2293.5684999999994 + }, + "M=211968,N=2400": { + "file": "silu_config_M211968_N2400.json", + "M": 211968, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2429.968999999999 + }, + "M=211968,N=2560": { + "file": "silu_config_M211968_N2560.json", + "M": 211968, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2547.049500000001 + }, + "M=212992,N=128": { + "file": "silu_config_M212992_N128.json", + "M": 212992, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.71950000000001 + }, + "M=212992,N=160": { + "file": "silu_config_M212992_N160.json", + "M": 212992, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 255.79999999999993 + }, + "M=212992,N=192": { + "file": "silu_config_M212992_N192.json", + "M": 212992, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 256.2400000000001 + }, + "M=212992,N=256": { + "file": "silu_config_M212992_N256.json", + "M": 212992, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 231.32 + }, + "M=212992,N=320": { + "file": "silu_config_M212992_N320.json", + "M": 212992, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 517.7609999999997 + }, + "M=212992,N=384": { + "file": "silu_config_M212992_N384.json", + "M": 212992, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 514.681 + }, + "M=212992,N=480": { + "file": "silu_config_M212992_N480.json", + "M": 212992, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 523.6410000000001 + }, + "M=212992,N=512": { + "file": "silu_config_M212992_N512.json", + "M": 212992, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 414.1205 + }, + "M=212992,N=576": { + "file": "silu_config_M212992_N576.json", + "M": 212992, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 992.883 + }, + "M=212992,N=640": { + "file": "silu_config_M212992_N640.json", + "M": 212992, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 987.2429999999997 + }, + "M=212992,N=768": { + "file": "silu_config_M212992_N768.json", + "M": 212992, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1000.203 + }, + "M=212992,N=800": { + "file": "silu_config_M212992_N800.json", + "M": 212992, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1007.4830000000002 + }, + "M=212992,N=896": { + "file": "silu_config_M212992_N896.json", + "M": 212992, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 993.4429999999998 + }, + "M=212992,N=960": { + "file": "silu_config_M212992_N960.json", + "M": 212992, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1001.2430000000003 + }, + "M=212992,N=1024": { + "file": "silu_config_M212992_N1024.json", + "M": 212992, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 806.4822499999996 + }, + "M=212992,N=1120": { + "file": "silu_config_M212992_N1120.json", + "M": 212992, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1978.527 + }, + "M=212992,N=1152": { + "file": "silu_config_M212992_N1152.json", + "M": 212992, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1973.487 + }, + "M=212992,N=1280": { + "file": "silu_config_M212992_N1280.json", + "M": 212992, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.9270000000006 + }, + "M=212992,N=1344": { + "file": "silu_config_M212992_N1344.json", + "M": 212992, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1990.1272500000005 + }, + "M=212992,N=1408": { + "file": "silu_config_M212992_N1408.json", + "M": 212992, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1967.607 + }, + "M=212992,N=1440": { + "file": "silu_config_M212992_N1440.json", + "M": 212992, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1994.0072499999997 + }, + "M=212992,N=1536": { + "file": "silu_config_M212992_N1536.json", + "M": 212992, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1987.7672499999999 + }, + "M=212992,N=1600": { + "file": "silu_config_M212992_N1600.json", + "M": 212992, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2004.68725 + }, + "M=212992,N=1664": { + "file": "silu_config_M212992_N1664.json", + "M": 212992, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1974.1670000000004 + }, + "M=212992,N=1728": { + "file": "silu_config_M212992_N1728.json", + "M": 212992, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2004.76725 + }, + "M=212992,N=1760": { + "file": "silu_config_M212992_N1760.json", + "M": 212992, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1980.9269999999997 + }, + "M=212992,N=1792": { + "file": "silu_config_M212992_N1792.json", + "M": 212992, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1996.1272500000005 + }, + "M=212992,N=1920": { + "file": "silu_config_M212992_N1920.json", + "M": 212992, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1994.60725 + }, + "M=212992,N=2048": { + "file": "silu_config_M212992_N2048.json", + "M": 212992, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1576.8455000000004 + }, + "M=212992,N=2080": { + "file": "silu_config_M212992_N2080.json", + "M": 212992, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2199.1280000000006 + }, + "M=212992,N=2240": { + "file": "silu_config_M212992_N2240.json", + "M": 212992, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2304.2084999999997 + }, + "M=212992,N=2400": { + "file": "silu_config_M212992_N2400.json", + "M": 212992, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2442.048999999999 + }, + "M=212992,N=2560": { + "file": "silu_config_M212992_N2560.json", + "M": 212992, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2553.8895 + }, + "M=214016,N=128": { + "file": "silu_config_M214016_N128.json", + "M": 214016, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.2794999999999 + }, + "M=214016,N=160": { + "file": "silu_config_M214016_N160.json", + "M": 214016, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 279.00000000000017 + }, + "M=214016,N=192": { + "file": "silu_config_M214016_N192.json", + "M": 214016, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 265.47999999999985 + }, + "M=214016,N=256": { + "file": "silu_config_M214016_N256.json", + "M": 214016, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 243.87999999999988 + }, + "M=214016,N=320": { + "file": "silu_config_M214016_N320.json", + "M": 214016, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 519.8410000000003 + }, + "M=214016,N=384": { + "file": "silu_config_M214016_N384.json", + "M": 214016, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 517.2410000000002 + }, + "M=214016,N=480": { + "file": "silu_config_M214016_N480.json", + "M": 214016, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 526.001 + }, + "M=214016,N=512": { + "file": "silu_config_M214016_N512.json", + "M": 214016, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 416.2405000000001 + }, + "M=214016,N=576": { + "file": "silu_config_M214016_N576.json", + "M": 214016, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1007.6829999999997 + }, + "M=214016,N=640": { + "file": "silu_config_M214016_N640.json", + "M": 214016, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1001.2030000000001 + }, + "M=214016,N=768": { + "file": "silu_config_M214016_N768.json", + "M": 214016, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1004.7230000000005 + }, + "M=214016,N=800": { + "file": "silu_config_M214016_N800.json", + "M": 214016, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1011.0830000000002 + }, + "M=214016,N=896": { + "file": "silu_config_M214016_N896.json", + "M": 214016, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 997.723 + }, + "M=214016,N=960": { + "file": "silu_config_M214016_N960.json", + "M": 214016, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1026.80325 + }, + "M=214016,N=1024": { + "file": "silu_config_M214016_N1024.json", + "M": 214016, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 809.3222500000006 + }, + "M=214016,N=1120": { + "file": "silu_config_M214016_N1120.json", + "M": 214016, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1988.24725 + }, + "M=214016,N=1152": { + "file": "silu_config_M214016_N1152.json", + "M": 214016, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1982.9272500000006 + }, + "M=214016,N=1280": { + "file": "silu_config_M214016_N1280.json", + "M": 214016, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1995.1672499999995 + }, + "M=214016,N=1344": { + "file": "silu_config_M214016_N1344.json", + "M": 214016, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2006.1672500000002 + }, + "M=214016,N=1408": { + "file": "silu_config_M214016_N1408.json", + "M": 214016, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.8069999999998 + }, + "M=214016,N=1440": { + "file": "silu_config_M214016_N1440.json", + "M": 214016, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2003.1672499999997 + }, + "M=214016,N=1536": { + "file": "silu_config_M214016_N1536.json", + "M": 214016, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2010.60725 + }, + "M=214016,N=1600": { + "file": "silu_config_M214016_N1600.json", + "M": 214016, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2004.0472499999996 + }, + "M=214016,N=1664": { + "file": "silu_config_M214016_N1664.json", + "M": 214016, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2004.7672500000008 + }, + "M=214016,N=1728": { + "file": "silu_config_M214016_N1728.json", + "M": 214016, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2017.52725 + }, + "M=214016,N=1760": { + "file": "silu_config_M214016_N1760.json", + "M": 214016, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2010.2472500000003 + }, + "M=214016,N=1792": { + "file": "silu_config_M214016_N1792.json", + "M": 214016, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1998.4872500000001 + }, + "M=214016,N=1920": { + "file": "silu_config_M214016_N1920.json", + "M": 214016, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2014.3672500000011 + }, + "M=214016,N=2048": { + "file": "silu_config_M214016_N2048.json", + "M": 214016, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1585.0055000000002 + }, + "M=214016,N=2080": { + "file": "silu_config_M214016_N2080.json", + "M": 214016, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2199.2879999999996 + }, + "M=214016,N=2240": { + "file": "silu_config_M214016_N2240.json", + "M": 214016, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2315.2085000000006 + }, + "M=214016,N=2400": { + "file": "silu_config_M214016_N2400.json", + "M": 214016, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2454.769000000001 + }, + "M=214016,N=2560": { + "file": "silu_config_M214016_N2560.json", + "M": 214016, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2566.4495000000006 + }, + "M=215040,N=128": { + "file": "silu_config_M215040_N128.json", + "M": 215040, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.71950000000001 + }, + "M=215040,N=160": { + "file": "silu_config_M215040_N160.json", + "M": 215040, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 269.32000000000005 + }, + "M=215040,N=192": { + "file": "silu_config_M215040_N192.json", + "M": 215040, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 264.87999999999994 + }, + "M=215040,N=256": { + "file": "silu_config_M215040_N256.json", + "M": 215040, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 244.84000000000003 + }, + "M=215040,N=320": { + "file": "silu_config_M215040_N320.json", + "M": 215040, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 519.0810000000001 + }, + "M=215040,N=384": { + "file": "silu_config_M215040_N384.json", + "M": 215040, + "N": 384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 538.6012499999999 + }, + "M=215040,N=480": { + "file": "silu_config_M215040_N480.json", + "M": 215040, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 524.761 + }, + "M=215040,N=512": { + "file": "silu_config_M215040_N512.json", + "M": 215040, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 422.32074999999963 + }, + "M=215040,N=576": { + "file": "silu_config_M215040_N576.json", + "M": 215040, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1024.48325 + }, + "M=215040,N=640": { + "file": "silu_config_M215040_N640.json", + "M": 215040, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 979.9230000000002 + }, + "M=215040,N=768": { + "file": "silu_config_M215040_N768.json", + "M": 215040, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 993.0029999999997 + }, + "M=215040,N=800": { + "file": "silu_config_M215040_N800.json", + "M": 215040, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 988.3229999999999 + }, + "M=215040,N=896": { + "file": "silu_config_M215040_N896.json", + "M": 215040, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1007.2829999999999 + }, + "M=215040,N=960": { + "file": "silu_config_M215040_N960.json", + "M": 215040, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1024.6832499999996 + }, + "M=215040,N=1024": { + "file": "silu_config_M215040_N1024.json", + "M": 215040, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 811.7622500000002 + }, + "M=215040,N=1120": { + "file": "silu_config_M215040_N1120.json", + "M": 215040, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2004.3272499999994 + }, + "M=215040,N=1152": { + "file": "silu_config_M215040_N1152.json", + "M": 215040, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1990.927250000001 + }, + "M=215040,N=1280": { + "file": "silu_config_M215040_N1280.json", + "M": 215040, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2003.0072499999997 + }, + "M=215040,N=1344": { + "file": "silu_config_M215040_N1344.json", + "M": 215040, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1984.64725 + }, + "M=215040,N=1408": { + "file": "silu_config_M215040_N1408.json", + "M": 215040, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1984.68725 + }, + "M=215040,N=1440": { + "file": "silu_config_M215040_N1440.json", + "M": 215040, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1988.1272499999995 + }, + "M=215040,N=1536": { + "file": "silu_config_M215040_N1536.json", + "M": 215040, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1987.0872499999996 + }, + "M=215040,N=1600": { + "file": "silu_config_M215040_N1600.json", + "M": 215040, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2009.32725 + }, + "M=215040,N=1664": { + "file": "silu_config_M215040_N1664.json", + "M": 215040, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1980.4872500000001 + }, + "M=215040,N=1728": { + "file": "silu_config_M215040_N1728.json", + "M": 215040, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1981.5672499999991 + }, + "M=215040,N=1760": { + "file": "silu_config_M215040_N1760.json", + "M": 215040, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1994.4872499999992 + }, + "M=215040,N=1792": { + "file": "silu_config_M215040_N1792.json", + "M": 215040, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1994.8872499999998 + }, + "M=215040,N=1920": { + "file": "silu_config_M215040_N1920.json", + "M": 215040, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1979.4072499999993 + }, + "M=215040,N=2048": { + "file": "silu_config_M215040_N2048.json", + "M": 215040, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1584.8454999999994 + }, + "M=215040,N=2080": { + "file": "silu_config_M215040_N2080.json", + "M": 215040, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2203.688 + }, + "M=215040,N=2240": { + "file": "silu_config_M215040_N2240.json", + "M": 215040, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2299.0085 + }, + "M=215040,N=2400": { + "file": "silu_config_M215040_N2400.json", + "M": 215040, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2436.6889999999994 + }, + "M=215040,N=2560": { + "file": "silu_config_M215040_N2560.json", + "M": 215040, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2560.2495 + }, + "M=216064,N=128": { + "file": "silu_config_M216064_N128.json", + "M": 216064, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.3195 + }, + "M=216064,N=160": { + "file": "silu_config_M216064_N160.json", + "M": 216064, + "N": 160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 285.48025000000007 + }, + "M=216064,N=192": { + "file": "silu_config_M216064_N192.json", + "M": 216064, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 271.0000000000001 + }, + "M=216064,N=256": { + "file": "silu_config_M216064_N256.json", + "M": 216064, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.44000000000005 + }, + "M=216064,N=320": { + "file": "silu_config_M216064_N320.json", + "M": 216064, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 525.1210000000001 + }, + "M=216064,N=384": { + "file": "silu_config_M216064_N384.json", + "M": 216064, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 522.1209999999999 + }, + "M=216064,N=480": { + "file": "silu_config_M216064_N480.json", + "M": 216064, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 531.1612499999999 + }, + "M=216064,N=512": { + "file": "silu_config_M216064_N512.json", + "M": 216064, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 411.00075000000015 + }, + "M=216064,N=576": { + "file": "silu_config_M216064_N576.json", + "M": 216064, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1027.44325 + }, + "M=216064,N=640": { + "file": "silu_config_M216064_N640.json", + "M": 216064, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1011.0430000000001 + }, + "M=216064,N=768": { + "file": "silu_config_M216064_N768.json", + "M": 216064, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1024.0832499999997 + }, + "M=216064,N=800": { + "file": "silu_config_M216064_N800.json", + "M": 216064, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1021.8032499999998 + }, + "M=216064,N=896": { + "file": "silu_config_M216064_N896.json", + "M": 216064, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1017.5232500000006 + }, + "M=216064,N=960": { + "file": "silu_config_M216064_N960.json", + "M": 216064, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1015.3632499999999 + }, + "M=216064,N=1024": { + "file": "silu_config_M216064_N1024.json", + "M": 216064, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 817.1222500000003 + }, + "M=216064,N=1120": { + "file": "silu_config_M216064_N1120.json", + "M": 216064, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2037.96725 + }, + "M=216064,N=1152": { + "file": "silu_config_M216064_N1152.json", + "M": 216064, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2011.6472499999998 + }, + "M=216064,N=1280": { + "file": "silu_config_M216064_N1280.json", + "M": 216064, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2003.8072499999998 + }, + "M=216064,N=1344": { + "file": "silu_config_M216064_N1344.json", + "M": 216064, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2008.1672500000004 + }, + "M=216064,N=1408": { + "file": "silu_config_M216064_N1408.json", + "M": 216064, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2006.0872499999998 + }, + "M=216064,N=1440": { + "file": "silu_config_M216064_N1440.json", + "M": 216064, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2011.8472499999998 + }, + "M=216064,N=1536": { + "file": "silu_config_M216064_N1536.json", + "M": 216064, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1998.68725 + }, + "M=216064,N=1600": { + "file": "silu_config_M216064_N1600.json", + "M": 216064, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2032.9672499999995 + }, + "M=216064,N=1664": { + "file": "silu_config_M216064_N1664.json", + "M": 216064, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2023.3672499999993 + }, + "M=216064,N=1728": { + "file": "silu_config_M216064_N1728.json", + "M": 216064, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2025.96725 + }, + "M=216064,N=1760": { + "file": "silu_config_M216064_N1760.json", + "M": 216064, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2008.48725 + }, + "M=216064,N=1792": { + "file": "silu_config_M216064_N1792.json", + "M": 216064, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2006.6472500000007 + }, + "M=216064,N=1920": { + "file": "silu_config_M216064_N1920.json", + "M": 216064, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2023.0472499999994 + }, + "M=216064,N=2048": { + "file": "silu_config_M216064_N2048.json", + "M": 216064, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1598.8455000000004 + }, + "M=216064,N=2080": { + "file": "silu_config_M216064_N2080.json", + "M": 216064, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2236.6482499999993 + }, + "M=216064,N=2240": { + "file": "silu_config_M216064_N2240.json", + "M": 216064, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2337.3285000000005 + }, + "M=216064,N=2400": { + "file": "silu_config_M216064_N2400.json", + "M": 216064, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2457.729000000001 + }, + "M=216064,N=2560": { + "file": "silu_config_M216064_N2560.json", + "M": 216064, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2573.4894999999997 + }, + "M=217088,N=128": { + "file": "silu_config_M217088_N128.json", + "M": 217088, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.7595 + }, + "M=217088,N=160": { + "file": "silu_config_M217088_N160.json", + "M": 217088, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 260.6800000000001 + }, + "M=217088,N=192": { + "file": "silu_config_M217088_N192.json", + "M": 217088, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 283.0399999999999 + }, + "M=217088,N=256": { + "file": "silu_config_M217088_N256.json", + "M": 217088, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 235.60000000000002 + }, + "M=217088,N=320": { + "file": "silu_config_M217088_N320.json", + "M": 217088, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 527.3209999999997 + }, + "M=217088,N=384": { + "file": "silu_config_M217088_N384.json", + "M": 217088, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 524.4809999999998 + }, + "M=217088,N=480": { + "file": "silu_config_M217088_N480.json", + "M": 217088, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 533.08125 + }, + "M=217088,N=512": { + "file": "silu_config_M217088_N512.json", + "M": 217088, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 426.36075000000005 + }, + "M=217088,N=576": { + "file": "silu_config_M217088_N576.json", + "M": 217088, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1031.7632500000002 + }, + "M=217088,N=640": { + "file": "silu_config_M217088_N640.json", + "M": 217088, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1004.843 + }, + "M=217088,N=768": { + "file": "silu_config_M217088_N768.json", + "M": 217088, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1018.2832499999998 + }, + "M=217088,N=800": { + "file": "silu_config_M217088_N800.json", + "M": 217088, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1026.4832500000002 + }, + "M=217088,N=896": { + "file": "silu_config_M217088_N896.json", + "M": 217088, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1022.1632499999995 + }, + "M=217088,N=960": { + "file": "silu_config_M217088_N960.json", + "M": 217088, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1031.0432500000002 + }, + "M=217088,N=1024": { + "file": "silu_config_M217088_N1024.json", + "M": 217088, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 820.6422499999999 + }, + "M=217088,N=1120": { + "file": "silu_config_M217088_N1120.json", + "M": 217088, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2016.3272500000003 + }, + "M=217088,N=1152": { + "file": "silu_config_M217088_N1152.json", + "M": 217088, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2010.64725 + }, + "M=217088,N=1280": { + "file": "silu_config_M217088_N1280.json", + "M": 217088, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2021.0072500000008 + }, + "M=217088,N=1344": { + "file": "silu_config_M217088_N1344.json", + "M": 217088, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2038.1274999999994 + }, + "M=217088,N=1408": { + "file": "silu_config_M217088_N1408.json", + "M": 217088, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2033.96725 + }, + "M=217088,N=1440": { + "file": "silu_config_M217088_N1440.json", + "M": 217088, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2031.2072500000004 + }, + "M=217088,N=1536": { + "file": "silu_config_M217088_N1536.json", + "M": 217088, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2018.2072499999997 + }, + "M=217088,N=1600": { + "file": "silu_config_M217088_N1600.json", + "M": 217088, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2021.727250000001 + }, + "M=217088,N=1664": { + "file": "silu_config_M217088_N1664.json", + "M": 217088, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2029.527250000001 + }, + "M=217088,N=1728": { + "file": "silu_config_M217088_N1728.json", + "M": 217088, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2025.647250000001 + }, + "M=217088,N=1760": { + "file": "silu_config_M217088_N1760.json", + "M": 217088, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2038.9672499999997 + }, + "M=217088,N=1792": { + "file": "silu_config_M217088_N1792.json", + "M": 217088, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2026.4872499999992 + }, + "M=217088,N=1920": { + "file": "silu_config_M217088_N1920.json", + "M": 217088, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2032.36725 + }, + "M=217088,N=2048": { + "file": "silu_config_M217088_N2048.json", + "M": 217088, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1607.0055000000002 + }, + "M=217088,N=2080": { + "file": "silu_config_M217088_N2080.json", + "M": 217088, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2240.2082500000006 + }, + "M=217088,N=2240": { + "file": "silu_config_M217088_N2240.json", + "M": 217088, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2368.088749999999 + }, + "M=217088,N=2400": { + "file": "silu_config_M217088_N2400.json", + "M": 217088, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2498.6092499999986 + }, + "M=217088,N=2560": { + "file": "silu_config_M217088_N2560.json", + "M": 217088, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2604.889750000002 + }, + "M=218112,N=128": { + "file": "silu_config_M218112_N128.json", + "M": 218112, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 122.3195 + }, + "M=218112,N=160": { + "file": "silu_config_M218112_N160.json", + "M": 218112, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 272.84024999999997 + }, + "M=218112,N=192": { + "file": "silu_config_M218112_N192.json", + "M": 218112, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 284.4399999999999 + }, + "M=218112,N=256": { + "file": "silu_config_M218112_N256.json", + "M": 218112, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 236.68 + }, + "M=218112,N=320": { + "file": "silu_config_M218112_N320.json", + "M": 218112, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 530.3210000000004 + }, + "M=218112,N=384": { + "file": "silu_config_M218112_N384.json", + "M": 218112, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 526.8810000000001 + }, + "M=218112,N=480": { + "file": "silu_config_M218112_N480.json", + "M": 218112, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 535.6812500000001 + }, + "M=218112,N=512": { + "file": "silu_config_M218112_N512.json", + "M": 218112, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 428.16075000000023 + }, + "M=218112,N=576": { + "file": "silu_config_M218112_N576.json", + "M": 218112, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1026.08325 + }, + "M=218112,N=640": { + "file": "silu_config_M218112_N640.json", + "M": 218112, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1032.76325 + }, + "M=218112,N=768": { + "file": "silu_config_M218112_N768.json", + "M": 218112, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1023.6432500000001 + }, + "M=218112,N=800": { + "file": "silu_config_M218112_N800.json", + "M": 218112, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1020.20325 + }, + "M=218112,N=896": { + "file": "silu_config_M218112_N896.json", + "M": 218112, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1037.4432499999998 + }, + "M=218112,N=960": { + "file": "silu_config_M218112_N960.json", + "M": 218112, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1035.0832499999997 + }, + "M=218112,N=1024": { + "file": "silu_config_M218112_N1024.json", + "M": 218112, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 825.1624999999999 + }, + "M=218112,N=1120": { + "file": "silu_config_M218112_N1120.json", + "M": 218112, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2036.1672500000006 + }, + "M=218112,N=1152": { + "file": "silu_config_M218112_N1152.json", + "M": 218112, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2020.5672499999991 + }, + "M=218112,N=1280": { + "file": "silu_config_M218112_N1280.json", + "M": 218112, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2022.6872500000002 + }, + "M=218112,N=1344": { + "file": "silu_config_M218112_N1344.json", + "M": 218112, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2047.6475 + }, + "M=218112,N=1408": { + "file": "silu_config_M218112_N1408.json", + "M": 218112, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2043.2874999999995 + }, + "M=218112,N=1440": { + "file": "silu_config_M218112_N1440.json", + "M": 218112, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2051.0074999999997 + }, + "M=218112,N=1536": { + "file": "silu_config_M218112_N1536.json", + "M": 218112, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2017.4472500000008 + }, + "M=218112,N=1600": { + "file": "silu_config_M218112_N1600.json", + "M": 218112, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2052.2074999999995 + }, + "M=218112,N=1664": { + "file": "silu_config_M218112_N1664.json", + "M": 218112, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2049.8075 + }, + "M=218112,N=1728": { + "file": "silu_config_M218112_N1728.json", + "M": 218112, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2045.1274999999996 + }, + "M=218112,N=1760": { + "file": "silu_config_M218112_N1760.json", + "M": 218112, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2058.7675 + }, + "M=218112,N=1792": { + "file": "silu_config_M218112_N1792.json", + "M": 218112, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2046.3674999999992 + }, + "M=218112,N=1920": { + "file": "silu_config_M218112_N1920.json", + "M": 218112, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2062.807500000001 + }, + "M=218112,N=2048": { + "file": "silu_config_M218112_N2048.json", + "M": 218112, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1615.4054999999998 + }, + "M=218112,N=2080": { + "file": "silu_config_M218112_N2080.json", + "M": 218112, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2251.008249999999 + }, + "M=218112,N=2240": { + "file": "silu_config_M218112_N2240.json", + "M": 218112, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2359.6887500000003 + }, + "M=218112,N=2400": { + "file": "silu_config_M218112_N2400.json", + "M": 218112, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2490.8092499999993 + }, + "M=218112,N=2560": { + "file": "silu_config_M218112_N2560.json", + "M": 218112, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2620.76975 + }, + "M=219136,N=128": { + "file": "silu_config_M219136_N128.json", + "M": 219136, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 122.83950000000004 + }, + "M=219136,N=160": { + "file": "silu_config_M219136_N160.json", + "M": 219136, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 262.9200000000001 + }, + "M=219136,N=192": { + "file": "silu_config_M219136_N192.json", + "M": 219136, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 274.48025 + }, + "M=219136,N=256": { + "file": "silu_config_M219136_N256.json", + "M": 219136, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.83999999999992 + }, + "M=219136,N=320": { + "file": "silu_config_M219136_N320.json", + "M": 219136, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 532.68125 + }, + "M=219136,N=384": { + "file": "silu_config_M219136_N384.json", + "M": 219136, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 529.4412500000001 + }, + "M=219136,N=480": { + "file": "silu_config_M219136_N480.json", + "M": 219136, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 538.0812500000002 + }, + "M=219136,N=512": { + "file": "silu_config_M219136_N512.json", + "M": 219136, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 427.2007500000002 + }, + "M=219136,N=576": { + "file": "silu_config_M219136_N576.json", + "M": 219136, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1020.5632500000004 + }, + "M=219136,N=640": { + "file": "silu_config_M219136_N640.json", + "M": 219136, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1014.2032499999998 + }, + "M=219136,N=768": { + "file": "silu_config_M219136_N768.json", + "M": 219136, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1038.0832500000004 + }, + "M=219136,N=800": { + "file": "silu_config_M219136_N800.json", + "M": 219136, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1045.4432500000005 + }, + "M=219136,N=896": { + "file": "silu_config_M219136_N896.json", + "M": 219136, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1021.1232499999999 + }, + "M=219136,N=960": { + "file": "silu_config_M219136_N960.json", + "M": 219136, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1050.4832500000002 + }, + "M=219136,N=1024": { + "file": "silu_config_M219136_N1024.json", + "M": 219136, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 828.0825 + }, + "M=219136,N=1120": { + "file": "silu_config_M219136_N1120.json", + "M": 219136, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2063.4475 + }, + "M=219136,N=1152": { + "file": "silu_config_M219136_N1152.json", + "M": 219136, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2029.7272500000006 + }, + "M=219136,N=1280": { + "file": "silu_config_M219136_N1280.json", + "M": 219136, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2031.24725 + }, + "M=219136,N=1344": { + "file": "silu_config_M219136_N1344.json", + "M": 219136, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2025.52725 + }, + "M=219136,N=1408": { + "file": "silu_config_M219136_N1408.json", + "M": 219136, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2044.1675000000005 + }, + "M=219136,N=1440": { + "file": "silu_config_M219136_N1440.json", + "M": 219136, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2060.2474999999995 + }, + "M=219136,N=1536": { + "file": "silu_config_M219136_N1536.json", + "M": 219136, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2055.5275 + }, + "M=219136,N=1600": { + "file": "silu_config_M219136_N1600.json", + "M": 219136, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2050.8875000000003 + }, + "M=219136,N=1664": { + "file": "silu_config_M219136_N1664.json", + "M": 219136, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2041.0072500000012 + }, + "M=219136,N=1728": { + "file": "silu_config_M219136_N1728.json", + "M": 219136, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2043.9675000000004 + }, + "M=219136,N=1760": { + "file": "silu_config_M219136_N1760.json", + "M": 219136, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2057.3274999999994 + }, + "M=219136,N=1792": { + "file": "silu_config_M219136_N1792.json", + "M": 219136, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2034.60725 + }, + "M=219136,N=1920": { + "file": "silu_config_M219136_N1920.json", + "M": 219136, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2060.8075000000003 + }, + "M=219136,N=2048": { + "file": "silu_config_M219136_N2048.json", + "M": 219136, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1630.5257500000002 + }, + "M=219136,N=2080": { + "file": "silu_config_M219136_N2080.json", + "M": 219136, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2247.4482500000004 + }, + "M=219136,N=2240": { + "file": "silu_config_M219136_N2240.json", + "M": 219136, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2378.5287499999995 + }, + "M=219136,N=2400": { + "file": "silu_config_M219136_N2400.json", + "M": 219136, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2499.52925 + }, + "M=219136,N=2560": { + "file": "silu_config_M219136_N2560.json", + "M": 219136, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.8900000000003 + }, + "M=220160,N=128": { + "file": "silu_config_M220160_N128.json", + "M": 220160, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 146.11950000000004 + }, + "M=220160,N=160": { + "file": "silu_config_M220160_N160.json", + "M": 220160, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 280.63999999999993 + }, + "M=220160,N=192": { + "file": "silu_config_M220160_N192.json", + "M": 220160, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 286.44024999999993 + }, + "M=220160,N=256": { + "file": "silu_config_M220160_N256.json", + "M": 220160, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 250.28 + }, + "M=220160,N=320": { + "file": "silu_config_M220160_N320.json", + "M": 220160, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 531.32125 + }, + "M=220160,N=384": { + "file": "silu_config_M220160_N384.json", + "M": 220160, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 527.8009999999999 + }, + "M=220160,N=480": { + "file": "silu_config_M220160_N480.json", + "M": 220160, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 536.6012499999999 + }, + "M=220160,N=512": { + "file": "silu_config_M220160_N512.json", + "M": 220160, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 417.5607500000003 + }, + "M=220160,N=576": { + "file": "silu_config_M220160_N576.json", + "M": 220160, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1018.7632499999999 + }, + "M=220160,N=640": { + "file": "silu_config_M220160_N640.json", + "M": 220160, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1023.6432499999996 + }, + "M=220160,N=768": { + "file": "silu_config_M220160_N768.json", + "M": 220160, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1033.80325 + }, + "M=220160,N=800": { + "file": "silu_config_M220160_N800.json", + "M": 220160, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1030.80325 + }, + "M=220160,N=896": { + "file": "silu_config_M220160_N896.json", + "M": 220160, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1042.80325 + }, + "M=220160,N=960": { + "file": "silu_config_M220160_N960.json", + "M": 220160, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1047.7232499999996 + }, + "M=220160,N=1024": { + "file": "silu_config_M220160_N1024.json", + "M": 220160, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 834.5224999999996 + }, + "M=220160,N=1120": { + "file": "silu_config_M220160_N1120.json", + "M": 220160, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2052.9275 + }, + "M=220160,N=1152": { + "file": "silu_config_M220160_N1152.json", + "M": 220160, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2038.0872500000007 + }, + "M=220160,N=1280": { + "file": "silu_config_M220160_N1280.json", + "M": 220160, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2019.8472499999998 + }, + "M=220160,N=1344": { + "file": "silu_config_M220160_N1344.json", + "M": 220160, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2055.0074999999997 + }, + "M=220160,N=1408": { + "file": "silu_config_M220160_N1408.json", + "M": 220160, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2042.7675000000002 + }, + "M=220160,N=1440": { + "file": "silu_config_M220160_N1440.json", + "M": 220160, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2046.1274999999994 + }, + "M=220160,N=1536": { + "file": "silu_config_M220160_N1536.json", + "M": 220160, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2044.8474999999999 + }, + "M=220160,N=1600": { + "file": "silu_config_M220160_N1600.json", + "M": 220160, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2047.3275 + }, + "M=220160,N=1664": { + "file": "silu_config_M220160_N1664.json", + "M": 220160, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2037.68725 + }, + "M=220160,N=1728": { + "file": "silu_config_M220160_N1728.json", + "M": 220160, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2062.8875 + }, + "M=220160,N=1760": { + "file": "silu_config_M220160_N1760.json", + "M": 220160, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2066.1275000000005 + }, + "M=220160,N=1792": { + "file": "silu_config_M220160_N1792.json", + "M": 220160, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2023.2472499999994 + }, + "M=220160,N=1920": { + "file": "silu_config_M220160_N1920.json", + "M": 220160, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2029.32725 + }, + "M=220160,N=2048": { + "file": "silu_config_M220160_N2048.json", + "M": 220160, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1639.7657500000005 + }, + "M=220160,N=2080": { + "file": "silu_config_M220160_N2080.json", + "M": 220160, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2240.728250000001 + }, + "M=220160,N=2240": { + "file": "silu_config_M220160_N2240.json", + "M": 220160, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2382.008749999999 + }, + "M=220160,N=2400": { + "file": "silu_config_M220160_N2400.json", + "M": 220160, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2503.6092499999986 + }, + "M=220160,N=2560": { + "file": "silu_config_M220160_N2560.json", + "M": 220160, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2632.249749999999 + }, + "M=221184,N=128": { + "file": "silu_config_M221184_N128.json", + "M": 221184, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 123.95949999999999 + }, + "M=221184,N=160": { + "file": "silu_config_M221184_N160.json", + "M": 221184, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 262.5199999999999 + }, + "M=221184,N=192": { + "file": "silu_config_M221184_N192.json", + "M": 221184, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 265.7202499999999 + }, + "M=221184,N=256": { + "file": "silu_config_M221184_N256.json", + "M": 221184, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 251.24024999999995 + }, + "M=221184,N=320": { + "file": "silu_config_M221184_N320.json", + "M": 221184, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 537.0412500000001 + }, + "M=221184,N=384": { + "file": "silu_config_M221184_N384.json", + "M": 221184, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 533.6412499999999 + }, + "M=221184,N=480": { + "file": "silu_config_M221184_N480.json", + "M": 221184, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 543.3212499999997 + }, + "M=221184,N=512": { + "file": "silu_config_M221184_N512.json", + "M": 221184, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 418.6007500000003 + }, + "M=221184,N=576": { + "file": "silu_config_M221184_N576.json", + "M": 221184, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1029.6832499999998 + }, + "M=221184,N=640": { + "file": "silu_config_M221184_N640.json", + "M": 221184, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.5632500000002 + }, + "M=221184,N=768": { + "file": "silu_config_M221184_N768.json", + "M": 221184, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1027.64325 + }, + "M=221184,N=800": { + "file": "silu_config_M221184_N800.json", + "M": 221184, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1055.8832499999999 + }, + "M=221184,N=896": { + "file": "silu_config_M221184_N896.json", + "M": 221184, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1044.5632499999997 + }, + "M=221184,N=960": { + "file": "silu_config_M221184_N960.json", + "M": 221184, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1062.76325 + }, + "M=221184,N=1024": { + "file": "silu_config_M221184_N1024.json", + "M": 221184, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 838.4025000000001 + }, + "M=221184,N=1120": { + "file": "silu_config_M221184_N1120.json", + "M": 221184, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2056.2475000000004 + }, + "M=221184,N=1152": { + "file": "silu_config_M221184_N1152.json", + "M": 221184, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2049.7675 + }, + "M=221184,N=1280": { + "file": "silu_config_M221184_N1280.json", + "M": 221184, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2041.2075000000004 + }, + "M=221184,N=1344": { + "file": "silu_config_M221184_N1344.json", + "M": 221184, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2068.407500000001 + }, + "M=221184,N=1408": { + "file": "silu_config_M221184_N1408.json", + "M": 221184, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2062.1674999999996 + }, + "M=221184,N=1440": { + "file": "silu_config_M221184_N1440.json", + "M": 221184, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2081.0474999999997 + }, + "M=221184,N=1536": { + "file": "silu_config_M221184_N1536.json", + "M": 221184, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2046.0875000000005 + }, + "M=221184,N=1600": { + "file": "silu_config_M221184_N1600.json", + "M": 221184, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2081.6475 + }, + "M=221184,N=1664": { + "file": "silu_config_M221184_N1664.json", + "M": 221184, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2070.3675000000003 + }, + "M=221184,N=1728": { + "file": "silu_config_M221184_N1728.json", + "M": 221184, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2077.0874999999996 + }, + "M=221184,N=1760": { + "file": "silu_config_M221184_N1760.json", + "M": 221184, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2098.1275000000005 + }, + "M=221184,N=1792": { + "file": "silu_config_M221184_N1792.json", + "M": 221184, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2056.487500000001 + }, + "M=221184,N=1920": { + "file": "silu_config_M221184_N1920.json", + "M": 221184, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2083.8475 + }, + "M=221184,N=2048": { + "file": "silu_config_M221184_N2048.json", + "M": 221184, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1639.4457499999999 + }, + "M=221184,N=2080": { + "file": "silu_config_M221184_N2080.json", + "M": 221184, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2288.2484999999997 + }, + "M=221184,N=2240": { + "file": "silu_config_M221184_N2240.json", + "M": 221184, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2420.849 + }, + "M=221184,N=2400": { + "file": "silu_config_M221184_N2400.json", + "M": 221184, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2534.4494999999997 + }, + "M=221184,N=2560": { + "file": "silu_config_M221184_N2560.json", + "M": 221184, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2663.7700000000013 + }, + "M=222208,N=128": { + "file": "silu_config_M222208_N128.json", + "M": 222208, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.39950000000002 + }, + "M=222208,N=160": { + "file": "silu_config_M222208_N160.json", + "M": 222208, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 284.5599999999999 + }, + "M=222208,N=192": { + "file": "silu_config_M222208_N192.json", + "M": 222208, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 277.84024999999997 + }, + "M=222208,N=256": { + "file": "silu_config_M222208_N256.json", + "M": 222208, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 241.19999999999993 + }, + "M=222208,N=320": { + "file": "silu_config_M222208_N320.json", + "M": 222208, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 540.0812499999998 + }, + "M=222208,N=384": { + "file": "silu_config_M222208_N384.json", + "M": 222208, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 536.8412500000002 + }, + "M=222208,N=480": { + "file": "silu_config_M222208_N480.json", + "M": 222208, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 545.2012500000001 + }, + "M=222208,N=512": { + "file": "silu_config_M222208_N512.json", + "M": 222208, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 436.1207500000005 + }, + "M=222208,N=576": { + "file": "silu_config_M222208_N576.json", + "M": 222208, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.5632499999997 + }, + "M=222208,N=640": { + "file": "silu_config_M222208_N640.json", + "M": 222208, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1038.5632500000002 + }, + "M=222208,N=768": { + "file": "silu_config_M222208_N768.json", + "M": 222208, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1042.0832500000001 + }, + "M=222208,N=800": { + "file": "silu_config_M222208_N800.json", + "M": 222208, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1051.32325 + }, + "M=222208,N=896": { + "file": "silu_config_M222208_N896.json", + "M": 222208, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1058.12325 + }, + "M=222208,N=960": { + "file": "silu_config_M222208_N960.json", + "M": 222208, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1067.32325 + }, + "M=222208,N=1024": { + "file": "silu_config_M222208_N1024.json", + "M": 222208, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 842.4424999999997 + }, + "M=222208,N=1120": { + "file": "silu_config_M222208_N1120.json", + "M": 222208, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2086.2874999999995 + }, + "M=222208,N=1152": { + "file": "silu_config_M222208_N1152.json", + "M": 222208, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2058.8475000000008 + }, + "M=222208,N=1280": { + "file": "silu_config_M222208_N1280.json", + "M": 222208, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2081.5675 + }, + "M=222208,N=1344": { + "file": "silu_config_M222208_N1344.json", + "M": 222208, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2077.6875 + }, + "M=222208,N=1408": { + "file": "silu_config_M222208_N1408.json", + "M": 222208, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2081.7275 + }, + "M=222208,N=1440": { + "file": "silu_config_M222208_N1440.json", + "M": 222208, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2085.8875000000007 + }, + "M=222208,N=1536": { + "file": "silu_config_M222208_N1536.json", + "M": 222208, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2076.3274999999994 + }, + "M=222208,N=1600": { + "file": "silu_config_M222208_N1600.json", + "M": 222208, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2090.9674999999997 + }, + "M=222208,N=1664": { + "file": "silu_config_M222208_N1664.json", + "M": 222208, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2079.9674999999997 + }, + "M=222208,N=1728": { + "file": "silu_config_M222208_N1728.json", + "M": 222208, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2097.0074999999997 + }, + "M=222208,N=1760": { + "file": "silu_config_M222208_N1760.json", + "M": 222208, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2100.6075 + }, + "M=222208,N=1792": { + "file": "silu_config_M222208_N1792.json", + "M": 222208, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2076.4474999999993 + }, + "M=222208,N=1920": { + "file": "silu_config_M222208_N1920.json", + "M": 222208, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2083.1275000000014 + }, + "M=222208,N=2048": { + "file": "silu_config_M222208_N2048.json", + "M": 222208, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1663.1257499999992 + }, + "M=222208,N=2080": { + "file": "silu_config_M222208_N2080.json", + "M": 222208, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2298.3684999999996 + }, + "M=222208,N=2240": { + "file": "silu_config_M222208_N2240.json", + "M": 222208, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2411.6489999999985 + }, + "M=222208,N=2400": { + "file": "silu_config_M222208_N2400.json", + "M": 222208, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2566.8094999999994 + }, + "M=222208,N=2560": { + "file": "silu_config_M222208_N2560.json", + "M": 222208, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2664.6899999999996 + }, + "M=223232,N=128": { + "file": "silu_config_M223232_N128.json", + "M": 223232, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.95950000000002 + }, + "M=223232,N=160": { + "file": "silu_config_M223232_N160.json", + "M": 223232, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 275.51999999999987 + }, + "M=223232,N=192": { + "file": "silu_config_M223232_N192.json", + "M": 223232, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 286.32000000000005 + }, + "M=223232,N=256": { + "file": "silu_config_M223232_N256.json", + "M": 223232, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 253.80000000000007 + }, + "M=223232,N=320": { + "file": "silu_config_M223232_N320.json", + "M": 223232, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 542.0812500000002 + }, + "M=223232,N=384": { + "file": "silu_config_M223232_N384.json", + "M": 223232, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 539.6812500000001 + }, + "M=223232,N=480": { + "file": "silu_config_M223232_N480.json", + "M": 223232, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 548.7212499999998 + }, + "M=223232,N=512": { + "file": "silu_config_M223232_N512.json", + "M": 223232, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 438.1607499999998 + }, + "M=223232,N=576": { + "file": "silu_config_M223232_N576.json", + "M": 223232, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1059.84325 + }, + "M=223232,N=640": { + "file": "silu_config_M223232_N640.json", + "M": 223232, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1043.4032500000003 + }, + "M=223232,N=768": { + "file": "silu_config_M223232_N768.json", + "M": 223232, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1067.7632500000004 + }, + "M=223232,N=800": { + "file": "silu_config_M223232_N800.json", + "M": 223232, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1065.5632499999997 + }, + "M=223232,N=896": { + "file": "silu_config_M223232_N896.json", + "M": 223232, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1053.76325 + }, + "M=223232,N=960": { + "file": "silu_config_M223232_N960.json", + "M": 223232, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1072.20325 + }, + "M=223232,N=1024": { + "file": "silu_config_M223232_N1024.json", + "M": 223232, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 846.2824999999998 + }, + "M=223232,N=1120": { + "file": "silu_config_M223232_N1120.json", + "M": 223232, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2085.3275000000003 + }, + "M=223232,N=1152": { + "file": "silu_config_M223232_N1152.json", + "M": 223232, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2047.6475 + }, + "M=223232,N=1280": { + "file": "silu_config_M223232_N1280.json", + "M": 223232, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2070.2074999999995 + }, + "M=223232,N=1344": { + "file": "silu_config_M223232_N1344.json", + "M": 223232, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2087.2874999999995 + }, + "M=223232,N=1408": { + "file": "silu_config_M223232_N1408.json", + "M": 223232, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2062.0075000000006 + }, + "M=223232,N=1440": { + "file": "silu_config_M223232_N1440.json", + "M": 223232, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2106.4877500000002 + }, + "M=223232,N=1536": { + "file": "silu_config_M223232_N1536.json", + "M": 223232, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2085.8475 + }, + "M=223232,N=1600": { + "file": "silu_config_M223232_N1600.json", + "M": 223232, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2096.9275 + }, + "M=223232,N=1664": { + "file": "silu_config_M223232_N1664.json", + "M": 223232, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2068.4075000000003 + }, + "M=223232,N=1728": { + "file": "silu_config_M223232_N1728.json", + "M": 223232, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2086.0875000000015 + }, + "M=223232,N=1760": { + "file": "silu_config_M223232_N1760.json", + "M": 223232, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2089.6875 + }, + "M=223232,N=1792": { + "file": "silu_config_M223232_N1792.json", + "M": 223232, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2105.0877499999997 + }, + "M=223232,N=1920": { + "file": "silu_config_M223232_N1920.json", + "M": 223232, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2102.72775 + }, + "M=223232,N=2048": { + "file": "silu_config_M223232_N2048.json", + "M": 223232, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1654.485749999999 + }, + "M=223232,N=2080": { + "file": "silu_config_M223232_N2080.json", + "M": 223232, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2299.008499999999 + }, + "M=223232,N=2240": { + "file": "silu_config_M223232_N2240.json", + "M": 223232, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2433.169 + }, + "M=223232,N=2400": { + "file": "silu_config_M223232_N2400.json", + "M": 223232, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2569.2895 + }, + "M=223232,N=2560": { + "file": "silu_config_M223232_N2560.json", + "M": 223232, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2676.289999999999 + }, + "M=224256,N=128": { + "file": "silu_config_M224256_N128.json", + "M": 224256, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.5995 + }, + "M=224256,N=160": { + "file": "silu_config_M224256_N160.json", + "M": 224256, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 291.24025 + }, + "M=224256,N=192": { + "file": "silu_config_M224256_N192.json", + "M": 224256, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 269.20025 + }, + "M=224256,N=256": { + "file": "silu_config_M224256_N256.json", + "M": 224256, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 243.19999999999993 + }, + "M=224256,N=320": { + "file": "silu_config_M224256_N320.json", + "M": 224256, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 544.0412500000001 + }, + "M=224256,N=384": { + "file": "silu_config_M224256_N384.json", + "M": 224256, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 541.2012499999998 + }, + "M=224256,N=480": { + "file": "silu_config_M224256_N480.json", + "M": 224256, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 550.6012500000002 + }, + "M=224256,N=512": { + "file": "silu_config_M224256_N512.json", + "M": 224256, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 424.7607499999997 + }, + "M=224256,N=576": { + "file": "silu_config_M224256_N576.json", + "M": 224256, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1043.72325 + }, + "M=224256,N=640": { + "file": "silu_config_M224256_N640.json", + "M": 224256, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1027.1232499999996 + }, + "M=224256,N=768": { + "file": "silu_config_M224256_N768.json", + "M": 224256, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1041.20325 + }, + "M=224256,N=800": { + "file": "silu_config_M224256_N800.json", + "M": 224256, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1059.5632500000006 + }, + "M=224256,N=896": { + "file": "silu_config_M224256_N896.json", + "M": 224256, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1057.32325 + }, + "M=224256,N=960": { + "file": "silu_config_M224256_N960.json", + "M": 224256, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1056.0032500000007 + }, + "M=224256,N=1024": { + "file": "silu_config_M224256_N1024.json", + "M": 224256, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 850.2424999999998 + }, + "M=224256,N=1120": { + "file": "silu_config_M224256_N1120.json", + "M": 224256, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2105.2077500000005 + }, + "M=224256,N=1152": { + "file": "silu_config_M224256_N1152.json", + "M": 224256, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2086.2474999999995 + }, + "M=224256,N=1280": { + "file": "silu_config_M224256_N1280.json", + "M": 224256, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2079.6875 + }, + "M=224256,N=1344": { + "file": "silu_config_M224256_N1344.json", + "M": 224256, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2103.52775 + }, + "M=224256,N=1408": { + "file": "silu_config_M224256_N1408.json", + "M": 224256, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2092.0075000000006 + }, + "M=224256,N=1440": { + "file": "silu_config_M224256_N1440.json", + "M": 224256, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2088.4474999999993 + }, + "M=224256,N=1536": { + "file": "silu_config_M224256_N1536.json", + "M": 224256, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2095.0074999999997 + }, + "M=224256,N=1600": { + "file": "silu_config_M224256_N1600.json", + "M": 224256, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2089.0475000000015 + }, + "M=224256,N=1664": { + "file": "silu_config_M224256_N1664.json", + "M": 224256, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2098.7275 + }, + "M=224256,N=1728": { + "file": "silu_config_M224256_N1728.json", + "M": 224256, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2116.0877499999997 + }, + "M=224256,N=1760": { + "file": "silu_config_M224256_N1760.json", + "M": 224256, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2109.3677499999994 + }, + "M=224256,N=1792": { + "file": "silu_config_M224256_N1792.json", + "M": 224256, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2103.4477500000003 + }, + "M=224256,N=1920": { + "file": "silu_config_M224256_N1920.json", + "M": 224256, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2111.88775 + }, + "M=224256,N=2048": { + "file": "silu_config_M224256_N2048.json", + "M": 224256, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1669.2457499999991 + }, + "M=224256,N=2080": { + "file": "silu_config_M224256_N2080.json", + "M": 224256, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2329.4085000000005 + }, + "M=224256,N=2240": { + "file": "silu_config_M224256_N2240.json", + "M": 224256, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2453.849000000001 + }, + "M=224256,N=2400": { + "file": "silu_config_M224256_N2400.json", + "M": 224256, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2579.409499999997 + }, + "M=224256,N=2560": { + "file": "silu_config_M224256_N2560.json", + "M": 224256, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2691.8500000000004 + }, + "M=225280,N=128": { + "file": "silu_config_M225280_N128.json", + "M": 225280, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.91949999999999 + }, + "M=225280,N=160": { + "file": "silu_config_M225280_N160.json", + "M": 225280, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 270.20000000000016 + }, + "M=225280,N=192": { + "file": "silu_config_M225280_N192.json", + "M": 225280, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 266.0402500000001 + }, + "M=225280,N=256": { + "file": "silu_config_M225280_N256.json", + "M": 225280, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 244.12 + }, + "M=225280,N=320": { + "file": "silu_config_M225280_N320.json", + "M": 225280, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 543.3212500000001 + }, + "M=225280,N=384": { + "file": "silu_config_M225280_N384.json", + "M": 225280, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 540.6012499999999 + }, + "M=225280,N=480": { + "file": "silu_config_M225280_N480.json", + "M": 225280, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 549.4812499999998 + }, + "M=225280,N=512": { + "file": "silu_config_M225280_N512.json", + "M": 225280, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 438.5607500000001 + }, + "M=225280,N=576": { + "file": "silu_config_M225280_N576.json", + "M": 225280, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1031.68325 + }, + "M=225280,N=640": { + "file": "silu_config_M225280_N640.json", + "M": 225280, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1036.64325 + }, + "M=225280,N=768": { + "file": "silu_config_M225280_N768.json", + "M": 225280, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1029.16325 + }, + "M=225280,N=800": { + "file": "silu_config_M225280_N800.json", + "M": 225280, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1036.3632499999999 + }, + "M=225280,N=896": { + "file": "silu_config_M225280_N896.json", + "M": 225280, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1034.6032500000006 + }, + "M=225280,N=960": { + "file": "silu_config_M225280_N960.json", + "M": 225280, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1042.8832499999999 + }, + "M=225280,N=1024": { + "file": "silu_config_M225280_N1024.json", + "M": 225280, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 852.8827500000002 + }, + "M=225280,N=1120": { + "file": "silu_config_M225280_N1120.json", + "M": 225280, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2068.7675 + }, + "M=225280,N=1152": { + "file": "silu_config_M225280_N1152.json", + "M": 225280, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2053.7275 + }, + "M=225280,N=1280": { + "file": "silu_config_M225280_N1280.json", + "M": 225280, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2055.4875 + }, + "M=225280,N=1344": { + "file": "silu_config_M225280_N1344.json", + "M": 225280, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2080.9674999999997 + }, + "M=225280,N=1408": { + "file": "silu_config_M225280_N1408.json", + "M": 225280, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2057.3675000000003 + }, + "M=225280,N=1440": { + "file": "silu_config_M225280_N1440.json", + "M": 225280, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2072.3675000000003 + }, + "M=225280,N=1536": { + "file": "silu_config_M225280_N1536.json", + "M": 225280, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2070.9275000000007 + }, + "M=225280,N=1600": { + "file": "silu_config_M225280_N1600.json", + "M": 225280, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2083.8475 + }, + "M=225280,N=1664": { + "file": "silu_config_M225280_N1664.json", + "M": 225280, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2065.6475 + }, + "M=225280,N=1728": { + "file": "silu_config_M225280_N1728.json", + "M": 225280, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2089.3675000000003 + }, + "M=225280,N=1760": { + "file": "silu_config_M225280_N1760.json", + "M": 225280, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2092.8075 + }, + "M=225280,N=1792": { + "file": "silu_config_M225280_N1792.json", + "M": 225280, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2070.3275000000003 + }, + "M=225280,N=1920": { + "file": "silu_config_M225280_N1920.json", + "M": 225280, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2076.3274999999994 + }, + "M=225280,N=2048": { + "file": "silu_config_M225280_N2048.json", + "M": 225280, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1649.6057499999997 + }, + "M=225280,N=2080": { + "file": "silu_config_M225280_N2080.json", + "M": 225280, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2303.5684999999994 + }, + "M=225280,N=2240": { + "file": "silu_config_M225280_N2240.json", + "M": 225280, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2427.048999999999 + }, + "M=225280,N=2400": { + "file": "silu_config_M225280_N2400.json", + "M": 225280, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2591.8897499999975 + }, + "M=225280,N=2560": { + "file": "silu_config_M225280_N2560.json", + "M": 225280, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2661.890000000002 + }, + "M=226304,N=128": { + "file": "silu_config_M226304_N128.json", + "M": 226304, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 126.67950000000003 + }, + "M=226304,N=160": { + "file": "silu_config_M226304_N160.json", + "M": 226304, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.51999999999987 + }, + "M=226304,N=192": { + "file": "silu_config_M226304_N192.json", + "M": 226304, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 271.7602499999998 + }, + "M=226304,N=256": { + "file": "silu_config_M226304_N256.json", + "M": 226304, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 245.36000000000013 + }, + "M=226304,N=320": { + "file": "silu_config_M226304_N320.json", + "M": 226304, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 549.6412499999999 + }, + "M=226304,N=384": { + "file": "silu_config_M226304_N384.json", + "M": 226304, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 546.7212500000003 + }, + "M=226304,N=480": { + "file": "silu_config_M226304_N480.json", + "M": 226304, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 555.72125 + }, + "M=226304,N=512": { + "file": "silu_config_M226304_N512.json", + "M": 226304, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 438.40075 + }, + "M=226304,N=576": { + "file": "silu_config_M226304_N576.json", + "M": 226304, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1063.5632499999997 + }, + "M=226304,N=640": { + "file": "silu_config_M226304_N640.json", + "M": 226304, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1048.32325 + }, + "M=226304,N=768": { + "file": "silu_config_M226304_N768.json", + "M": 226304, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1061.0032500000002 + }, + "M=226304,N=800": { + "file": "silu_config_M226304_N800.json", + "M": 226304, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1070.4832499999998 + }, + "M=226304,N=896": { + "file": "silu_config_M226304_N896.json", + "M": 226304, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1066.6032499999997 + }, + "M=226304,N=960": { + "file": "silu_config_M226304_N960.json", + "M": 226304, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1065.5232499999997 + }, + "M=226304,N=1024": { + "file": "silu_config_M226304_N1024.json", + "M": 226304, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 858.4025000000001 + }, + "M=226304,N=1120": { + "file": "silu_config_M226304_N1120.json", + "M": 226304, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2124.2477500000005 + }, + "M=226304,N=1152": { + "file": "silu_config_M226304_N1152.json", + "M": 226304, + "N": 1152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2116.1677500000005 + }, + "M=226304,N=1280": { + "file": "silu_config_M226304_N1280.json", + "M": 226304, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2098.567499999999 + }, + "M=226304,N=1344": { + "file": "silu_config_M226304_N1344.json", + "M": 226304, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2115.5277499999993 + }, + "M=226304,N=1408": { + "file": "silu_config_M226304_N1408.json", + "M": 226304, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2079.8075 + }, + "M=226304,N=1440": { + "file": "silu_config_M226304_N1440.json", + "M": 226304, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2128.2877500000004 + }, + "M=226304,N=1536": { + "file": "silu_config_M226304_N1536.json", + "M": 226304, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2114.0077499999998 + }, + "M=226304,N=1600": { + "file": "silu_config_M226304_N1600.json", + "M": 226304, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2108.2477499999995 + }, + "M=226304,N=1664": { + "file": "silu_config_M226304_N1664.json", + "M": 226304, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2088.4875 + }, + "M=226304,N=1728": { + "file": "silu_config_M226304_N1728.json", + "M": 226304, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2114.3677499999994 + }, + "M=226304,N=1760": { + "file": "silu_config_M226304_N1760.json", + "M": 226304, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2128.60775 + }, + "M=226304,N=1792": { + "file": "silu_config_M226304_N1792.json", + "M": 226304, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2104.0477499999997 + }, + "M=226304,N=1920": { + "file": "silu_config_M226304_N1920.json", + "M": 226304, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2120.68775 + }, + "M=226304,N=2048": { + "file": "silu_config_M226304_N2048.json", + "M": 226304, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1683.286 + }, + "M=226304,N=2080": { + "file": "silu_config_M226304_N2080.json", + "M": 226304, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2330.768499999999 + }, + "M=226304,N=2240": { + "file": "silu_config_M226304_N2240.json", + "M": 226304, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2466.089250000001 + }, + "M=226304,N=2400": { + "file": "silu_config_M226304_N2400.json", + "M": 226304, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2592.969750000001 + }, + "M=226304,N=2560": { + "file": "silu_config_M226304_N2560.json", + "M": 226304, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2714.2902499999973 + }, + "M=227328,N=128": { + "file": "silu_config_M227328_N128.json", + "M": 227328, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 127.15974999999996 + }, + "M=227328,N=160": { + "file": "silu_config_M227328_N160.json", + "M": 227328, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 272.64025000000004 + }, + "M=227328,N=192": { + "file": "silu_config_M227328_N192.json", + "M": 227328, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 283.92024999999995 + }, + "M=227328,N=256": { + "file": "silu_config_M227328_N256.json", + "M": 227328, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 246.36 + }, + "M=227328,N=320": { + "file": "silu_config_M227328_N320.json", + "M": 227328, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 551.7612500000001 + }, + "M=227328,N=384": { + "file": "silu_config_M227328_N384.json", + "M": 227328, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 549.04125 + }, + "M=227328,N=480": { + "file": "silu_config_M227328_N480.json", + "M": 227328, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 558.0412500000002 + }, + "M=227328,N=512": { + "file": "silu_config_M227328_N512.json", + "M": 227328, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 429.8407500000003 + }, + "M=227328,N=576": { + "file": "silu_config_M227328_N576.json", + "M": 227328, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1068.1232500000006 + }, + "M=227328,N=640": { + "file": "silu_config_M227328_N640.json", + "M": 227328, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1061.8032500000004 + }, + "M=227328,N=768": { + "file": "silu_config_M227328_N768.json", + "M": 227328, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1076.1635 + }, + "M=227328,N=800": { + "file": "silu_config_M227328_N800.json", + "M": 227328, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1084.2835 + }, + "M=227328,N=896": { + "file": "silu_config_M227328_N896.json", + "M": 227328, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1071.4032499999998 + }, + "M=227328,N=960": { + "file": "silu_config_M227328_N960.json", + "M": 227328, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1090.9634999999998 + }, + "M=227328,N=1024": { + "file": "silu_config_M227328_N1024.json", + "M": 227328, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 861.64275 + }, + "M=227328,N=1120": { + "file": "silu_config_M227328_N1120.json", + "M": 227328, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2123.0077499999998 + }, + "M=227328,N=1152": { + "file": "silu_config_M227328_N1152.json", + "M": 227328, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2115.887750000001 + }, + "M=227328,N=1280": { + "file": "silu_config_M227328_N1280.json", + "M": 227328, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2107.807750000001 + }, + "M=227328,N=1344": { + "file": "silu_config_M227328_N1344.json", + "M": 227328, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2124.80775 + }, + "M=227328,N=1408": { + "file": "silu_config_M227328_N1408.json", + "M": 227328, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2099.5275 + }, + "M=227328,N=1440": { + "file": "silu_config_M227328_N1440.json", + "M": 227328, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2147.92775 + }, + "M=227328,N=1536": { + "file": "silu_config_M227328_N1536.json", + "M": 227328, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2102.4877500000002 + }, + "M=227328,N=1600": { + "file": "silu_config_M227328_N1600.json", + "M": 227328, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2117.4077500000003 + }, + "M=227328,N=1664": { + "file": "silu_config_M227328_N1664.json", + "M": 227328, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2098.3674999999994 + }, + "M=227328,N=1728": { + "file": "silu_config_M227328_N1728.json", + "M": 227328, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2144.447750000001 + }, + "M=227328,N=1760": { + "file": "silu_config_M227328_N1760.json", + "M": 227328, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2137.96775 + }, + "M=227328,N=1792": { + "file": "silu_config_M227328_N1792.json", + "M": 227328, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2123.4877499999993 + }, + "M=227328,N=1920": { + "file": "silu_config_M227328_N1920.json", + "M": 227328, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2119.847749999999 + }, + "M=227328,N=2048": { + "file": "silu_config_M227328_N2048.json", + "M": 227328, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1693.326 + }, + "M=227328,N=2080": { + "file": "silu_config_M227328_N2080.json", + "M": 227328, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2340.968499999999 + }, + "M=227328,N=2240": { + "file": "silu_config_M227328_N2240.json", + "M": 227328, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2476.6892500000013 + }, + "M=227328,N=2400": { + "file": "silu_config_M227328_N2400.json", + "M": 227328, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2604.4497500000016 + }, + "M=227328,N=2560": { + "file": "silu_config_M227328_N2560.json", + "M": 227328, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2715.9302499999976 + }, + "M=228352,N=128": { + "file": "silu_config_M228352_N128.json", + "M": 228352, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 127.79974999999999 + }, + "M=228352,N=160": { + "file": "silu_config_M228352_N160.json", + "M": 228352, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 281.2 + }, + "M=228352,N=192": { + "file": "silu_config_M228352_N192.json", + "M": 228352, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 274.04025000000007 + }, + "M=228352,N=256": { + "file": "silu_config_M228352_N256.json", + "M": 228352, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 270.24 + }, + "M=228352,N=320": { + "file": "silu_config_M228352_N320.json", + "M": 228352, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 554.3212499999998 + }, + "M=228352,N=384": { + "file": "silu_config_M228352_N384.json", + "M": 228352, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 550.7612499999998 + }, + "M=228352,N=480": { + "file": "silu_config_M228352_N480.json", + "M": 228352, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.6012499999997 + }, + "M=228352,N=512": { + "file": "silu_config_M228352_N512.json", + "M": 228352, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 447.8810000000003 + }, + "M=228352,N=576": { + "file": "silu_config_M228352_N576.json", + "M": 228352, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1062.2832500000004 + }, + "M=228352,N=640": { + "file": "silu_config_M228352_N640.json", + "M": 228352, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1066.84325 + }, + "M=228352,N=768": { + "file": "silu_config_M228352_N768.json", + "M": 228352, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1070.2832500000004 + }, + "M=228352,N=800": { + "file": "silu_config_M228352_N800.json", + "M": 228352, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1078.4434999999999 + }, + "M=228352,N=896": { + "file": "silu_config_M228352_N896.json", + "M": 228352, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1077.5234999999998 + }, + "M=228352,N=960": { + "file": "silu_config_M228352_N960.json", + "M": 228352, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1106.4834999999998 + }, + "M=228352,N=1024": { + "file": "silu_config_M228352_N1024.json", + "M": 228352, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 864.9625000000001 + }, + "M=228352,N=1120": { + "file": "silu_config_M228352_N1120.json", + "M": 228352, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2142.848 + }, + "M=228352,N=1152": { + "file": "silu_config_M228352_N1152.json", + "M": 228352, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2115.2077500000005 + }, + "M=228352,N=1280": { + "file": "silu_config_M228352_N1280.json", + "M": 228352, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2096.5274999999992 + }, + "M=228352,N=1344": { + "file": "silu_config_M228352_N1344.json", + "M": 228352, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2124.0477499999997 + }, + "M=228352,N=1408": { + "file": "silu_config_M228352_N1408.json", + "M": 228352, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2140.1677499999996 + }, + "M=228352,N=1440": { + "file": "silu_config_M228352_N1440.json", + "M": 228352, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2126.4477499999994 + }, + "M=228352,N=1536": { + "file": "silu_config_M228352_N1536.json", + "M": 228352, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2132.80775 + }, + "M=228352,N=1600": { + "file": "silu_config_M228352_N1600.json", + "M": 228352, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2127.2077499999996 + }, + "M=228352,N=1664": { + "file": "silu_config_M228352_N1664.json", + "M": 228352, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2127.88775 + }, + "M=228352,N=1728": { + "file": "silu_config_M228352_N1728.json", + "M": 228352, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2143.76775 + }, + "M=228352,N=1760": { + "file": "silu_config_M228352_N1760.json", + "M": 228352, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2147.4477500000003 + }, + "M=228352,N=1792": { + "file": "silu_config_M228352_N1792.json", + "M": 228352, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2153.9277500000007 + }, + "M=228352,N=1920": { + "file": "silu_config_M228352_N1920.json", + "M": 228352, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2139.60775 + }, + "M=228352,N=2048": { + "file": "silu_config_M228352_N2048.json", + "M": 228352, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1699.2859999999991 + }, + "M=228352,N=2080": { + "file": "silu_config_M228352_N2080.json", + "M": 228352, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2360.968750000001 + }, + "M=228352,N=2240": { + "file": "silu_config_M228352_N2240.json", + "M": 228352, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2487.969250000001 + }, + "M=228352,N=2400": { + "file": "silu_config_M228352_N2400.json", + "M": 228352, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2636.7297499999977 + }, + "M=228352,N=2560": { + "file": "silu_config_M228352_N2560.json", + "M": 228352, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2736.8502499999986 + }, + "M=229376,N=128": { + "file": "silu_config_M229376_N128.json", + "M": 229376, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 128.2795 + }, + "M=229376,N=160": { + "file": "silu_config_M229376_N160.json", + "M": 229376, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 274.84000000000003 + }, + "M=229376,N=192": { + "file": "silu_config_M229376_N192.json", + "M": 229376, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 275.28024999999997 + }, + "M=229376,N=256": { + "file": "silu_config_M229376_N256.json", + "M": 229376, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 248.56000000000006 + }, + "M=229376,N=320": { + "file": "silu_config_M229376_N320.json", + "M": 229376, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 557.00125 + }, + "M=229376,N=384": { + "file": "silu_config_M229376_N384.json", + "M": 229376, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 553.4412499999999 + }, + "M=229376,N=480": { + "file": "silu_config_M229376_N480.json", + "M": 229376, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 563.6012499999997 + }, + "M=229376,N=512": { + "file": "silu_config_M229376_N512.json", + "M": 229376, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 424.2807499999999 + }, + "M=229376,N=576": { + "file": "silu_config_M229376_N576.json", + "M": 229376, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1068.3632500000003 + }, + "M=229376,N=640": { + "file": "silu_config_M229376_N640.json", + "M": 229376, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1062.4432499999998 + }, + "M=229376,N=768": { + "file": "silu_config_M229376_N768.json", + "M": 229376, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1076.2835 + }, + "M=229376,N=800": { + "file": "silu_config_M229376_N800.json", + "M": 229376, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1073.8432500000004 + }, + "M=229376,N=896": { + "file": "silu_config_M229376_N896.json", + "M": 229376, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1080.5634999999997 + }, + "M=229376,N=960": { + "file": "silu_config_M229376_N960.json", + "M": 229376, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1079.5634999999997 + }, + "M=229376,N=1024": { + "file": "silu_config_M229376_N1024.json", + "M": 229376, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 869.60275 + }, + "M=229376,N=1120": { + "file": "silu_config_M229376_N1120.json", + "M": 229376, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2131.84775 + }, + "M=229376,N=1152": { + "file": "silu_config_M229376_N1152.json", + "M": 229376, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2124.1277500000006 + }, + "M=229376,N=1280": { + "file": "silu_config_M229376_N1280.json", + "M": 229376, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2126.84775 + }, + "M=229376,N=1344": { + "file": "silu_config_M229376_N1344.json", + "M": 229376, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2154.1277500000006 + }, + "M=229376,N=1408": { + "file": "silu_config_M229376_N1408.json", + "M": 229376, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2108.0077499999998 + }, + "M=229376,N=1440": { + "file": "silu_config_M229376_N1440.json", + "M": 229376, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2146.3277500000004 + }, + "M=229376,N=1536": { + "file": "silu_config_M229376_N1536.json", + "M": 229376, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2131.76775 + }, + "M=229376,N=1600": { + "file": "silu_config_M229376_N1600.json", + "M": 229376, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2146.72775 + }, + "M=229376,N=1664": { + "file": "silu_config_M229376_N1664.json", + "M": 229376, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2127.2477500000005 + }, + "M=229376,N=1728": { + "file": "silu_config_M229376_N1728.json", + "M": 229376, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2132.64775 + }, + "M=229376,N=1760": { + "file": "silu_config_M229376_N1760.json", + "M": 229376, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2164.2479999999996 + }, + "M=229376,N=1792": { + "file": "silu_config_M229376_N1792.json", + "M": 229376, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2142.5277499999993 + }, + "M=229376,N=1920": { + "file": "silu_config_M229376_N1920.json", + "M": 229376, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2138.56775 + }, + "M=229376,N=2048": { + "file": "silu_config_M229376_N2048.json", + "M": 229376, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1690.406 + }, + "M=229376,N=2080": { + "file": "silu_config_M229376_N2080.json", + "M": 229376, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2356.96875 + }, + "M=229376,N=2240": { + "file": "silu_config_M229376_N2240.json", + "M": 229376, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2492.8492499999993 + }, + "M=229376,N=2400": { + "file": "silu_config_M229376_N2400.json", + "M": 229376, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2611.6897500000005 + }, + "M=229376,N=2560": { + "file": "silu_config_M229376_N2560.json", + "M": 229376, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2749.970250000003 + }, + "M=230400,N=128": { + "file": "silu_config_M230400_N128.json", + "M": 230400, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 150.07950000000002 + }, + "M=230400,N=160": { + "file": "silu_config_M230400_N160.json", + "M": 230400, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 287.0802499999999 + }, + "M=230400,N=192": { + "file": "silu_config_M230400_N192.json", + "M": 230400, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 287.48025000000007 + }, + "M=230400,N=256": { + "file": "silu_config_M230400_N256.json", + "M": 230400, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 261.12 + }, + "M=230400,N=320": { + "file": "silu_config_M230400_N320.json", + "M": 230400, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 555.80125 + }, + "M=230400,N=384": { + "file": "silu_config_M230400_N384.json", + "M": 230400, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 552.56125 + }, + "M=230400,N=480": { + "file": "silu_config_M230400_N480.json", + "M": 230400, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 561.3612500000004 + }, + "M=230400,N=512": { + "file": "silu_config_M230400_N512.json", + "M": 230400, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 451.80099999999993 + }, + "M=230400,N=576": { + "file": "silu_config_M230400_N576.json", + "M": 230400, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1086.2034999999996 + }, + "M=230400,N=640": { + "file": "silu_config_M230400_N640.json", + "M": 230400, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1089.2835 + }, + "M=230400,N=768": { + "file": "silu_config_M230400_N768.json", + "M": 230400, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1073.0432500000002 + }, + "M=230400,N=800": { + "file": "silu_config_M230400_N800.json", + "M": 230400, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1079.4435000000003 + }, + "M=230400,N=896": { + "file": "silu_config_M230400_N896.json", + "M": 230400, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1076.7635 + }, + "M=230400,N=960": { + "file": "silu_config_M230400_N960.json", + "M": 230400, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1074.4035 + }, + "M=230400,N=1024": { + "file": "silu_config_M230400_N1024.json", + "M": 230400, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 868.4024999999997 + }, + "M=230400,N=1120": { + "file": "silu_config_M230400_N1120.json", + "M": 230400, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2154.2077499999996 + }, + "M=230400,N=1152": { + "file": "silu_config_M230400_N1152.json", + "M": 230400, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2119.72775 + }, + "M=230400,N=1280": { + "file": "silu_config_M230400_N1280.json", + "M": 230400, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2132.0477499999997 + }, + "M=230400,N=1344": { + "file": "silu_config_M230400_N1344.json", + "M": 230400, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2135.60775 + }, + "M=230400,N=1408": { + "file": "silu_config_M230400_N1408.json", + "M": 230400, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2124.0877500000006 + }, + "M=230400,N=1440": { + "file": "silu_config_M230400_N1440.json", + "M": 230400, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2138.76775 + }, + "M=230400,N=1536": { + "file": "silu_config_M230400_N1536.json", + "M": 230400, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2116.2877499999995 + }, + "M=230400,N=1600": { + "file": "silu_config_M230400_N1600.json", + "M": 230400, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2149.88775 + }, + "M=230400,N=1664": { + "file": "silu_config_M230400_N1664.json", + "M": 230400, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2130.64775 + }, + "M=230400,N=1728": { + "file": "silu_config_M230400_N1728.json", + "M": 230400, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2133.1277499999997 + }, + "M=230400,N=1760": { + "file": "silu_config_M230400_N1760.json", + "M": 230400, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2146.1677499999996 + }, + "M=230400,N=1792": { + "file": "silu_config_M230400_N1792.json", + "M": 230400, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2135.3277499999995 + }, + "M=230400,N=1920": { + "file": "silu_config_M230400_N1920.json", + "M": 230400, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2140.92775 + }, + "M=230400,N=2048": { + "file": "silu_config_M230400_N2048.json", + "M": 230400, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1703.7259999999997 + }, + "M=230400,N=2080": { + "file": "silu_config_M230400_N2080.json", + "M": 230400, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2367.2887499999997 + }, + "M=230400,N=2240": { + "file": "silu_config_M230400_N2240.json", + "M": 230400, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2474.28925 + }, + "M=230400,N=2400": { + "file": "silu_config_M230400_N2400.json", + "M": 230400, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2600.289749999997 + }, + "M=230400,N=2560": { + "file": "silu_config_M230400_N2560.json", + "M": 230400, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2736.450249999999 + }, + "M=231424,N=128": { + "file": "silu_config_M231424_N128.json", + "M": 231424, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.27949999999998 + }, + "M=231424,N=160": { + "file": "silu_config_M231424_N160.json", + "M": 231424, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 295.36025000000006 + }, + "M=231424,N=192": { + "file": "silu_config_M231424_N192.json", + "M": 231424, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 299.6402500000001 + }, + "M=231424,N=256": { + "file": "silu_config_M231424_N256.json", + "M": 231424, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 250.67999999999995 + }, + "M=231424,N=320": { + "file": "silu_config_M231424_N320.json", + "M": 231424, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 562.0012499999998 + }, + "M=231424,N=384": { + "file": "silu_config_M231424_N384.json", + "M": 231424, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 559.08125 + }, + "M=231424,N=480": { + "file": "silu_config_M231424_N480.json", + "M": 231424, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 568.00125 + }, + "M=231424,N=512": { + "file": "silu_config_M231424_N512.json", + "M": 231424, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 453.76075000000014 + }, + "M=231424,N=576": { + "file": "silu_config_M231424_N576.json", + "M": 231424, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1097.3235000000004 + }, + "M=231424,N=640": { + "file": "silu_config_M231424_N640.json", + "M": 231424, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1083.1235000000001 + }, + "M=231424,N=768": { + "file": "silu_config_M231424_N768.json", + "M": 231424, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1104.6035000000002 + }, + "M=231424,N=800": { + "file": "silu_config_M231424_N800.json", + "M": 231424, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1101.8034999999995 + }, + "M=231424,N=896": { + "file": "silu_config_M231424_N896.json", + "M": 231424, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.2435 + }, + "M=231424,N=960": { + "file": "silu_config_M231424_N960.json", + "M": 231424, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1087.5634999999997 + }, + "M=231424,N=1024": { + "file": "silu_config_M231424_N1024.json", + "M": 231424, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 873.7225000000003 + }, + "M=231424,N=1120": { + "file": "silu_config_M231424_N1120.json", + "M": 231424, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2178.4879999999994 + }, + "M=231424,N=1152": { + "file": "silu_config_M231424_N1152.json", + "M": 231424, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2131.0877499999997 + }, + "M=231424,N=1280": { + "file": "silu_config_M231424_N1280.json", + "M": 231424, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2144.0077500000007 + }, + "M=231424,N=1344": { + "file": "silu_config_M231424_N1344.json", + "M": 231424, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2149.3677499999994 + }, + "M=231424,N=1408": { + "file": "silu_config_M231424_N1408.json", + "M": 231424, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2156.88775 + }, + "M=231424,N=1440": { + "file": "silu_config_M231424_N1440.json", + "M": 231424, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2153.2877500000004 + }, + "M=231424,N=1536": { + "file": "silu_config_M231424_N1536.json", + "M": 231424, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2170.1679999999997 + }, + "M=231424,N=1600": { + "file": "silu_config_M231424_N1600.json", + "M": 231424, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2164.568 + }, + "M=231424,N=1664": { + "file": "silu_config_M231424_N1664.json", + "M": 231424, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2143.68775 + }, + "M=231424,N=1728": { + "file": "silu_config_M231424_N1728.json", + "M": 231424, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2158.2077499999996 + }, + "M=231424,N=1760": { + "file": "silu_config_M231424_N1760.json", + "M": 231424, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2161.688 + }, + "M=231424,N=1792": { + "file": "silu_config_M231424_N1792.json", + "M": 231424, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2158.76775 + }, + "M=231424,N=1920": { + "file": "silu_config_M231424_N1920.json", + "M": 231424, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2164.687999999999 + }, + "M=231424,N=2048": { + "file": "silu_config_M231424_N2048.json", + "M": 231424, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1721.0060000000003 + }, + "M=231424,N=2080": { + "file": "silu_config_M231424_N2080.json", + "M": 231424, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2365.4487499999996 + }, + "M=231424,N=2240": { + "file": "silu_config_M231424_N2240.json", + "M": 231424, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2493.32925 + }, + "M=231424,N=2400": { + "file": "silu_config_M231424_N2400.json", + "M": 231424, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2663.9700000000003 + }, + "M=231424,N=2560": { + "file": "silu_config_M231424_N2560.json", + "M": 231424, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2753.370249999998 + }, + "M=232448,N=128": { + "file": "silu_config_M232448_N128.json", + "M": 232448, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.8795 + }, + "M=232448,N=160": { + "file": "silu_config_M232448_N160.json", + "M": 232448, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 278.52000000000004 + }, + "M=232448,N=192": { + "file": "silu_config_M232448_N192.json", + "M": 232448, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 289.68025000000006 + }, + "M=232448,N=256": { + "file": "silu_config_M232448_N256.json", + "M": 232448, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 263.24 + }, + "M=232448,N=320": { + "file": "silu_config_M232448_N320.json", + "M": 232448, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 563.9612500000001 + }, + "M=232448,N=384": { + "file": "silu_config_M232448_N384.json", + "M": 232448, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.84125 + }, + "M=232448,N=480": { + "file": "silu_config_M232448_N480.json", + "M": 232448, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 570.6812499999999 + }, + "M=232448,N=512": { + "file": "silu_config_M232448_N512.json", + "M": 232448, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 455.68074999999976 + }, + "M=232448,N=576": { + "file": "silu_config_M232448_N576.json", + "M": 232448, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1091.3635 + }, + "M=232448,N=640": { + "file": "silu_config_M232448_N640.json", + "M": 232448, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1074.6834999999996 + }, + "M=232448,N=768": { + "file": "silu_config_M232448_N768.json", + "M": 232448, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1088.4835000000003 + }, + "M=232448,N=800": { + "file": "silu_config_M232448_N800.json", + "M": 232448, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1106.6834999999996 + }, + "M=232448,N=896": { + "file": "silu_config_M232448_N896.json", + "M": 232448, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1092.6435000000001 + }, + "M=232448,N=960": { + "file": "silu_config_M232448_N960.json", + "M": 232448, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1091.3635 + }, + "M=232448,N=1024": { + "file": "silu_config_M232448_N1024.json", + "M": 232448, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 877.2427500000008 + }, + "M=232448,N=1120": { + "file": "silu_config_M232448_N1120.json", + "M": 232448, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2177.648 + }, + "M=232448,N=1152": { + "file": "silu_config_M232448_N1152.json", + "M": 232448, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2151.3277499999995 + }, + "M=232448,N=1280": { + "file": "silu_config_M232448_N1280.json", + "M": 232448, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2163.888000000001 + }, + "M=232448,N=1344": { + "file": "silu_config_M232448_N1344.json", + "M": 232448, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2179.5279999999993 + }, + "M=232448,N=1408": { + "file": "silu_config_M232448_N1408.json", + "M": 232448, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2156.0077499999998 + }, + "M=232448,N=1440": { + "file": "silu_config_M232448_N1440.json", + "M": 232448, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2162.608 + }, + "M=232448,N=1536": { + "file": "silu_config_M232448_N1536.json", + "M": 232448, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2158.96775 + }, + "M=232448,N=1600": { + "file": "silu_config_M232448_N1600.json", + "M": 232448, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2184.008 + }, + "M=232448,N=1664": { + "file": "silu_config_M232448_N1664.json", + "M": 232448, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2171.888 + }, + "M=232448,N=1728": { + "file": "silu_config_M232448_N1728.json", + "M": 232448, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2167.3679999999995 + }, + "M=232448,N=1760": { + "file": "silu_config_M232448_N1760.json", + "M": 232448, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2181.3680000000004 + }, + "M=232448,N=1792": { + "file": "silu_config_M232448_N1792.json", + "M": 232448, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2178.3680000000004 + }, + "M=232448,N=1920": { + "file": "silu_config_M232448_N1920.json", + "M": 232448, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2184.4880000000003 + }, + "M=232448,N=2048": { + "file": "silu_config_M232448_N2048.json", + "M": 232448, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1718.7259999999997 + }, + "M=232448,N=2080": { + "file": "silu_config_M232448_N2080.json", + "M": 232448, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2395.6887500000003 + }, + "M=232448,N=2240": { + "file": "silu_config_M232448_N2240.json", + "M": 232448, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2514.2492500000017 + }, + "M=232448,N=2400": { + "file": "silu_config_M232448_N2400.json", + "M": 232448, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2683.6900000000005 + }, + "M=232448,N=2560": { + "file": "silu_config_M232448_N2560.json", + "M": 232448, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2797.330499999999 + }, + "M=233472,N=128": { + "file": "silu_config_M233472_N128.json", + "M": 233472, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.7195 + }, + "M=233472,N=160": { + "file": "silu_config_M233472_N160.json", + "M": 233472, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 279.4800000000001 + }, + "M=233472,N=192": { + "file": "silu_config_M233472_N192.json", + "M": 233472, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 290.96000000000015 + }, + "M=233472,N=256": { + "file": "silu_config_M233472_N256.json", + "M": 233472, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 264.24 + }, + "M=233472,N=320": { + "file": "silu_config_M233472_N320.json", + "M": 233472, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 566.6412500000001 + }, + "M=233472,N=384": { + "file": "silu_config_M233472_N384.json", + "M": 233472, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 563.4412499999999 + }, + "M=233472,N=480": { + "file": "silu_config_M233472_N480.json", + "M": 233472, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 573.2412499999998 + }, + "M=233472,N=512": { + "file": "silu_config_M233472_N512.json", + "M": 233472, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 451.68074999999976 + }, + "M=233472,N=576": { + "file": "silu_config_M233472_N576.json", + "M": 233472, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1106.7635 + }, + "M=233472,N=640": { + "file": "silu_config_M233472_N640.json", + "M": 233472, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1090.0434999999998 + }, + "M=233472,N=768": { + "file": "silu_config_M233472_N768.json", + "M": 233472, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1093.2035000000005 + }, + "M=233472,N=800": { + "file": "silu_config_M233472_N800.json", + "M": 233472, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1092.0834999999997 + }, + "M=233472,N=896": { + "file": "silu_config_M233472_N896.json", + "M": 233472, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1107.9635000000003 + }, + "M=233472,N=960": { + "file": "silu_config_M233472_N960.json", + "M": 233472, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1116.8035 + }, + "M=233472,N=1024": { + "file": "silu_config_M233472_N1024.json", + "M": 233472, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 882.2424999999998 + }, + "M=233472,N=1120": { + "file": "silu_config_M233472_N1120.json", + "M": 233472, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2176.968 + }, + "M=233472,N=1152": { + "file": "silu_config_M233472_N1152.json", + "M": 233472, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2160.4077500000003 + }, + "M=233472,N=1280": { + "file": "silu_config_M233472_N1280.json", + "M": 233472, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2152.60775 + }, + "M=233472,N=1344": { + "file": "silu_config_M233472_N1344.json", + "M": 233472, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2168.1279999999997 + }, + "M=233472,N=1408": { + "file": "silu_config_M233472_N1408.json", + "M": 233472, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2165.2080000000005 + }, + "M=233472,N=1440": { + "file": "silu_config_M233472_N1440.json", + "M": 233472, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2172.1279999999997 + }, + "M=233472,N=1536": { + "file": "silu_config_M233472_N1536.json", + "M": 233472, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2168.4080000000004 + }, + "M=233472,N=1600": { + "file": "silu_config_M233472_N1600.json", + "M": 233472, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2172.968 + }, + "M=233472,N=1664": { + "file": "silu_config_M233472_N1664.json", + "M": 233472, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2183.2479999999996 + }, + "M=233472,N=1728": { + "file": "silu_config_M233472_N1728.json", + "M": 233472, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2187.2479999999996 + }, + "M=233472,N=1760": { + "file": "silu_config_M233472_N1760.json", + "M": 233472, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2190.808 + }, + "M=233472,N=1792": { + "file": "silu_config_M233472_N1792.json", + "M": 233472, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2198.048 + }, + "M=233472,N=1920": { + "file": "silu_config_M233472_N1920.json", + "M": 233472, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2204.1279999999997 + }, + "M=233472,N=2048": { + "file": "silu_config_M233472_N2048.json", + "M": 233472, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1729.0460000000003 + }, + "M=233472,N=2080": { + "file": "silu_config_M233472_N2080.json", + "M": 233472, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2406.7689999999993 + }, + "M=233472,N=2240": { + "file": "silu_config_M233472_N2240.json", + "M": 233472, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2535.4495000000015 + }, + "M=233472,N=2400": { + "file": "silu_config_M233472_N2400.json", + "M": 233472, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2665.5300000000016 + }, + "M=233472,N=2560": { + "file": "silu_config_M233472_N2560.json", + "M": 233472, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2790.8504999999986 + }, + "M=234496,N=128": { + "file": "silu_config_M234496_N128.json", + "M": 234496, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 130.91949999999994 + }, + "M=234496,N=160": { + "file": "silu_config_M234496_N160.json", + "M": 234496, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 280.76025 + }, + "M=234496,N=192": { + "file": "silu_config_M234496_N192.json", + "M": 234496, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 303.2002499999999 + }, + "M=234496,N=256": { + "file": "silu_config_M234496_N256.json", + "M": 234496, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 253.88000000000022 + }, + "M=234496,N=320": { + "file": "silu_config_M234496_N320.json", + "M": 234496, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 569.5212500000002 + }, + "M=234496,N=384": { + "file": "silu_config_M234496_N384.json", + "M": 234496, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.9612499999998 + }, + "M=234496,N=480": { + "file": "silu_config_M234496_N480.json", + "M": 234496, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 575.4812499999998 + }, + "M=234496,N=512": { + "file": "silu_config_M234496_N512.json", + "M": 234496, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 445.44075 + }, + "M=234496,N=576": { + "file": "silu_config_M234496_N576.json", + "M": 234496, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1103.4035000000003 + }, + "M=234496,N=640": { + "file": "silu_config_M234496_N640.json", + "M": 234496, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1105.0435000000002 + }, + "M=234496,N=768": { + "file": "silu_config_M234496_N768.json", + "M": 234496, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1099.4034999999994 + }, + "M=234496,N=800": { + "file": "silu_config_M234496_N800.json", + "M": 234496, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1094.6835000000005 + }, + "M=234496,N=896": { + "file": "silu_config_M234496_N896.json", + "M": 234496, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1091.9235000000003 + }, + "M=234496,N=960": { + "file": "silu_config_M234496_N960.json", + "M": 234496, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.8435000000004 + }, + "M=234496,N=1024": { + "file": "silu_config_M234496_N1024.json", + "M": 234496, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 885.6424999999999 + }, + "M=234496,N=1120": { + "file": "silu_config_M234496_N1120.json", + "M": 234496, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2186.2080000000005 + }, + "M=234496,N=1152": { + "file": "silu_config_M234496_N1152.json", + "M": 234496, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2169.5280000000002 + }, + "M=234496,N=1280": { + "file": "silu_config_M234496_N1280.json", + "M": 234496, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2182.5280000000002 + }, + "M=234496,N=1344": { + "file": "silu_config_M234496_N1344.json", + "M": 234496, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2167.0879999999997 + }, + "M=234496,N=1408": { + "file": "silu_config_M234496_N1408.json", + "M": 234496, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2174.6079999999993 + }, + "M=234496,N=1440": { + "file": "silu_config_M234496_N1440.json", + "M": 234496, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2181.648 + }, + "M=234496,N=1536": { + "file": "silu_config_M234496_N1536.json", + "M": 234496, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2177.768000000001 + }, + "M=234496,N=1600": { + "file": "silu_config_M234496_N1600.json", + "M": 234496, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2182.4079999999994 + }, + "M=234496,N=1664": { + "file": "silu_config_M234496_N1664.json", + "M": 234496, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2182.2079999999996 + }, + "M=234496,N=1728": { + "file": "silu_config_M234496_N1728.json", + "M": 234496, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2196.6479999999992 + }, + "M=234496,N=1760": { + "file": "silu_config_M234496_N1760.json", + "M": 234496, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2210.3279999999995 + }, + "M=234496,N=1792": { + "file": "silu_config_M234496_N1792.json", + "M": 234496, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2197.2079999999996 + }, + "M=234496,N=1920": { + "file": "silu_config_M234496_N1920.json", + "M": 234496, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2182.4080000000004 + }, + "M=234496,N=2048": { + "file": "silu_config_M234496_N2048.json", + "M": 234496, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1743.0462499999994 + }, + "M=234496,N=2080": { + "file": "silu_config_M234496_N2080.json", + "M": 234496, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2406.6489999999994 + }, + "M=234496,N=2240": { + "file": "silu_config_M234496_N2240.json", + "M": 234496, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2547.009500000001 + }, + "M=234496,N=2400": { + "file": "silu_config_M234496_N2400.json", + "M": 234496, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2698.290000000001 + }, + "M=234496,N=2560": { + "file": "silu_config_M234496_N2560.json", + "M": 234496, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2819.050500000001 + }, + "M=235520,N=128": { + "file": "silu_config_M235520_N128.json", + "M": 235520, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 131.43974999999995 + }, + "M=235520,N=160": { + "file": "silu_config_M235520_N160.json", + "M": 235520, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 298.32025000000004 + }, + "M=235520,N=192": { + "file": "silu_config_M235520_N192.json", + "M": 235520, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 293.5202500000001 + }, + "M=235520,N=256": { + "file": "silu_config_M235520_N256.json", + "M": 235520, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 254.9200000000001 + }, + "M=235520,N=320": { + "file": "silu_config_M235520_N320.json", + "M": 235520, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 567.9212500000001 + }, + "M=235520,N=384": { + "file": "silu_config_M235520_N384.json", + "M": 235520, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.04125 + }, + "M=235520,N=480": { + "file": "silu_config_M235520_N480.json", + "M": 235520, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 574.04125 + }, + "M=235520,N=512": { + "file": "silu_config_M235520_N512.json", + "M": 235520, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 461.721 + }, + "M=235520,N=576": { + "file": "silu_config_M235520_N576.json", + "M": 235520, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.9635000000003 + }, + "M=235520,N=640": { + "file": "silu_config_M235520_N640.json", + "M": 235520, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1082.7635 + }, + "M=235520,N=768": { + "file": "silu_config_M235520_N768.json", + "M": 235520, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1096.2034999999996 + }, + "M=235520,N=800": { + "file": "silu_config_M235520_N800.json", + "M": 235520, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1102.8035 + }, + "M=235520,N=896": { + "file": "silu_config_M235520_N896.json", + "M": 235520, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1089.9634999999998 + }, + "M=235520,N=960": { + "file": "silu_config_M235520_N960.json", + "M": 235520, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1108.0835000000006 + }, + "M=235520,N=1024": { + "file": "silu_config_M235520_N1024.json", + "M": 235520, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 887.042750000001 + }, + "M=235520,N=1120": { + "file": "silu_config_M235520_N1120.json", + "M": 235520, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2180.2080000000005 + }, + "M=235520,N=1152": { + "file": "silu_config_M235520_N1152.json", + "M": 235520, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2155.567750000001 + }, + "M=235520,N=1280": { + "file": "silu_config_M235520_N1280.json", + "M": 235520, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2168.648000000001 + }, + "M=235520,N=1344": { + "file": "silu_config_M235520_N1344.json", + "M": 235520, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2171.848 + }, + "M=235520,N=1408": { + "file": "silu_config_M235520_N1408.json", + "M": 235520, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2170.568 + }, + "M=235520,N=1440": { + "file": "silu_config_M235520_N1440.json", + "M": 235520, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2206.2879999999996 + }, + "M=235520,N=1536": { + "file": "silu_config_M235520_N1536.json", + "M": 235520, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2173.6479999999992 + }, + "M=235520,N=1600": { + "file": "silu_config_M235520_N1600.json", + "M": 235520, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2197.1680000000006 + }, + "M=235520,N=1664": { + "file": "silu_config_M235520_N1664.json", + "M": 235520, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2157.2077500000005 + }, + "M=235520,N=1728": { + "file": "silu_config_M235520_N1728.json", + "M": 235520, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2169.728 + }, + "M=235520,N=1760": { + "file": "silu_config_M235520_N1760.json", + "M": 235520, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2193.1280000000006 + }, + "M=235520,N=1792": { + "file": "silu_config_M235520_N1792.json", + "M": 235520, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2171.688 + }, + "M=235520,N=1920": { + "file": "silu_config_M235520_N1920.json", + "M": 235520, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2177.1679999999997 + }, + "M=235520,N=2048": { + "file": "silu_config_M235520_N2048.json", + "M": 235520, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1741.1662500000002 + }, + "M=235520,N=2080": { + "file": "silu_config_M235520_N2080.json", + "M": 235520, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2409.6090000000013 + }, + "M=235520,N=2240": { + "file": "silu_config_M235520_N2240.json", + "M": 235520, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2539.4894999999997 + }, + "M=235520,N=2400": { + "file": "silu_config_M235520_N2400.json", + "M": 235520, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2689.850000000002 + }, + "M=235520,N=2560": { + "file": "silu_config_M235520_N2560.json", + "M": 235520, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2804.5705000000007 + }, + "M=236544,N=128": { + "file": "silu_config_M236544_N128.json", + "M": 236544, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 143.4795 + }, + "M=236544,N=160": { + "file": "silu_config_M236544_N160.json", + "M": 236544, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 305.36025000000006 + }, + "M=236544,N=192": { + "file": "silu_config_M236544_N192.json", + "M": 236544, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 283.56000000000006 + }, + "M=236544,N=256": { + "file": "silu_config_M236544_N256.json", + "M": 236544, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 256.1200000000001 + }, + "M=236544,N=320": { + "file": "silu_config_M236544_N320.json", + "M": 236544, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 574.0412499999998 + }, + "M=236544,N=384": { + "file": "silu_config_M236544_N384.json", + "M": 236544, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 570.04125 + }, + "M=236544,N=480": { + "file": "silu_config_M236544_N480.json", + "M": 236544, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 580.84125 + }, + "M=236544,N=512": { + "file": "silu_config_M236544_N512.json", + "M": 236544, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 456.92074999999977 + }, + "M=236544,N=576": { + "file": "silu_config_M236544_N576.json", + "M": 236544, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.3634999999995 + }, + "M=236544,N=640": { + "file": "silu_config_M236544_N640.json", + "M": 236544, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1106.6835 + }, + "M=236544,N=768": { + "file": "silu_config_M236544_N768.json", + "M": 236544, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1096.6835000000005 + }, + "M=236544,N=800": { + "file": "silu_config_M236544_N800.json", + "M": 236544, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1135.40375 + }, + "M=236544,N=896": { + "file": "silu_config_M236544_N896.json", + "M": 236544, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1101.0834999999997 + }, + "M=236544,N=960": { + "file": "silu_config_M236544_N960.json", + "M": 236544, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1130.9634999999998 + }, + "M=236544,N=1024": { + "file": "silu_config_M236544_N1024.json", + "M": 236544, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 894.2027499999999 + }, + "M=236544,N=1120": { + "file": "silu_config_M236544_N1120.json", + "M": 236544, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2215.4880000000003 + }, + "M=236544,N=1152": { + "file": "silu_config_M236544_N1152.json", + "M": 236544, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2188.1680000000015 + }, + "M=236544,N=1280": { + "file": "silu_config_M236544_N1280.json", + "M": 236544, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2170.4479999999994 + }, + "M=236544,N=1344": { + "file": "silu_config_M236544_N1344.json", + "M": 236544, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2217.2879999999996 + }, + "M=236544,N=1408": { + "file": "silu_config_M236544_N1408.json", + "M": 236544, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2193.648 + }, + "M=236544,N=1440": { + "file": "silu_config_M236544_N1440.json", + "M": 236544, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2221.2479999999996 + }, + "M=236544,N=1536": { + "file": "silu_config_M236544_N1536.json", + "M": 236544, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2196.728000000001 + }, + "M=236544,N=1600": { + "file": "silu_config_M236544_N1600.json", + "M": 236544, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2211.928 + }, + "M=236544,N=1664": { + "file": "silu_config_M236544_N1664.json", + "M": 236544, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2211.687999999999 + }, + "M=236544,N=1728": { + "file": "silu_config_M236544_N1728.json", + "M": 236544, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2226.1282500000007 + }, + "M=236544,N=1760": { + "file": "silu_config_M236544_N1760.json", + "M": 236544, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2238.4882499999994 + }, + "M=236544,N=1792": { + "file": "silu_config_M236544_N1792.json", + "M": 236544, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2205.728 + }, + "M=236544,N=1920": { + "file": "silu_config_M236544_N1920.json", + "M": 236544, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2222.2082500000006 + }, + "M=236544,N=2048": { + "file": "silu_config_M236544_N2048.json", + "M": 236544, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1748.32625 + }, + "M=236544,N=2080": { + "file": "silu_config_M236544_N2080.json", + "M": 236544, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2417.208999999998 + }, + "M=236544,N=2240": { + "file": "silu_config_M236544_N2240.json", + "M": 236544, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2578.8094999999994 + }, + "M=236544,N=2400": { + "file": "silu_config_M236544_N2400.json", + "M": 236544, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2712.210250000001 + }, + "M=236544,N=2560": { + "file": "silu_config_M236544_N2560.json", + "M": 236544, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2847.9707499999995 + }, + "M=237568,N=128": { + "file": "silu_config_M237568_N128.json", + "M": 237568, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 143.99975000000003 + }, + "M=237568,N=160": { + "file": "silu_config_M237568_N160.json", + "M": 237568, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 295.24025 + }, + "M=237568,N=192": { + "file": "silu_config_M237568_N192.json", + "M": 237568, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 295.92025 + }, + "M=237568,N=256": { + "file": "silu_config_M237568_N256.json", + "M": 237568, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 257.12025000000006 + }, + "M=237568,N=320": { + "file": "silu_config_M237568_N320.json", + "M": 237568, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 576.2012499999998 + }, + "M=237568,N=384": { + "file": "silu_config_M237568_N384.json", + "M": 237568, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 573.6412499999999 + }, + "M=237568,N=480": { + "file": "silu_config_M237568_N480.json", + "M": 237568, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 582.9612499999998 + }, + "M=237568,N=512": { + "file": "silu_config_M237568_N512.json", + "M": 237568, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 458.64099999999985 + }, + "M=237568,N=576": { + "file": "silu_config_M237568_N576.json", + "M": 237568, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1106.5635000000002 + }, + "M=237568,N=640": { + "file": "silu_config_M237568_N640.json", + "M": 237568, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1100.1635 + }, + "M=237568,N=768": { + "file": "silu_config_M237568_N768.json", + "M": 237568, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1102.5234999999998 + }, + "M=237568,N=800": { + "file": "silu_config_M237568_N800.json", + "M": 237568, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1122.0035000000003 + }, + "M=237568,N=896": { + "file": "silu_config_M237568_N896.json", + "M": 237568, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1118.5234999999998 + }, + "M=237568,N=960": { + "file": "silu_config_M237568_N960.json", + "M": 237568, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1125.2435 + }, + "M=237568,N=1024": { + "file": "silu_config_M237568_N1024.json", + "M": 237568, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 897.8827500000002 + }, + "M=237568,N=1120": { + "file": "silu_config_M237568_N1120.json", + "M": 237568, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2224.2880000000005 + }, + "M=237568,N=1152": { + "file": "silu_config_M237568_N1152.json", + "M": 237568, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2197.968 + }, + "M=237568,N=1280": { + "file": "silu_config_M237568_N1280.json", + "M": 237568, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2210.928000000001 + }, + "M=237568,N=1344": { + "file": "silu_config_M237568_N1344.json", + "M": 237568, + "N": 1344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2225.72825 + }, + "M=237568,N=1408": { + "file": "silu_config_M237568_N1408.json", + "M": 237568, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2192.6479999999992 + }, + "M=237568,N=1440": { + "file": "silu_config_M237568_N1440.json", + "M": 237568, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2229.1282500000007 + }, + "M=237568,N=1536": { + "file": "silu_config_M237568_N1536.json", + "M": 237568, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2216.3680000000004 + }, + "M=237568,N=1600": { + "file": "silu_config_M237568_N1600.json", + "M": 237568, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2241.2882500000005 + }, + "M=237568,N=1664": { + "file": "silu_config_M237568_N1664.json", + "M": 237568, + "N": 1664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2220.048 + }, + "M=237568,N=1728": { + "file": "silu_config_M237568_N1728.json", + "M": 237568, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2214.848 + }, + "M=237568,N=1760": { + "file": "silu_config_M237568_N1760.json", + "M": 237568, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2218.4480000000003 + }, + "M=237568,N=1792": { + "file": "silu_config_M237568_N1792.json", + "M": 237568, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2225.5682500000003 + }, + "M=237568,N=1920": { + "file": "silu_config_M237568_N1920.json", + "M": 237568, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2231.5682499999994 + }, + "M=237568,N=2048": { + "file": "silu_config_M237568_N2048.json", + "M": 237568, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1757.1262500000003 + }, + "M=237568,N=2080": { + "file": "silu_config_M237568_N2080.json", + "M": 237568, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2428.5290000000014 + }, + "M=237568,N=2240": { + "file": "silu_config_M237568_N2240.json", + "M": 237568, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2569.1694999999972 + }, + "M=237568,N=2400": { + "file": "silu_config_M237568_N2400.json", + "M": 237568, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2724.130250000003 + }, + "M=237568,N=2560": { + "file": "silu_config_M237568_N2560.json", + "M": 237568, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2824.7304999999988 + }, + "M=238592,N=128": { + "file": "silu_config_M238592_N128.json", + "M": 238592, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 133.11975 + }, + "M=238592,N=160": { + "file": "silu_config_M238592_N160.json", + "M": 238592, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 307.80025 + }, + "M=238592,N=192": { + "file": "silu_config_M238592_N192.json", + "M": 238592, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 293.60025000000013 + }, + "M=238592,N=256": { + "file": "silu_config_M238592_N256.json", + "M": 238592, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 280.9602500000002 + }, + "M=238592,N=320": { + "file": "silu_config_M238592_N320.json", + "M": 238592, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 578.9212499999999 + }, + "M=238592,N=384": { + "file": "silu_config_M238592_N384.json", + "M": 238592, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 575.8012500000002 + }, + "M=238592,N=480": { + "file": "silu_config_M238592_N480.json", + "M": 238592, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 585.6412500000001 + }, + "M=238592,N=512": { + "file": "silu_config_M238592_N512.json", + "M": 238592, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 450.84074999999984 + }, + "M=238592,N=576": { + "file": "silu_config_M238592_N576.json", + "M": 238592, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1119.6835000000005 + }, + "M=238592,N=640": { + "file": "silu_config_M238592_N640.json", + "M": 238592, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1127.0835000000002 + }, + "M=238592,N=768": { + "file": "silu_config_M238592_N768.json", + "M": 238592, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1107.2835 + }, + "M=238592,N=800": { + "file": "silu_config_M238592_N800.json", + "M": 238592, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1124.1234999999997 + }, + "M=238592,N=896": { + "file": "silu_config_M238592_N896.json", + "M": 238592, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1131.5634999999997 + }, + "M=238592,N=960": { + "file": "silu_config_M238592_N960.json", + "M": 238592, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1129.9234999999999 + }, + "M=238592,N=1024": { + "file": "silu_config_M238592_N1024.json", + "M": 238592, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 901.2427499999999 + }, + "M=238592,N=1120": { + "file": "silu_config_M238592_N1120.json", + "M": 238592, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2224.00825 + }, + "M=238592,N=1152": { + "file": "silu_config_M238592_N1152.json", + "M": 238592, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2207.0880000000006 + }, + "M=238592,N=1280": { + "file": "silu_config_M238592_N1280.json", + "M": 238592, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2199.608 + }, + "M=238592,N=1344": { + "file": "silu_config_M238592_N1344.json", + "M": 238592, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2235.96825 + }, + "M=238592,N=1408": { + "file": "silu_config_M238592_N1408.json", + "M": 238592, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2202.048 + }, + "M=238592,N=1440": { + "file": "silu_config_M238592_N1440.json", + "M": 238592, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2250.4882500000012 + }, + "M=238592,N=1536": { + "file": "silu_config_M238592_N1536.json", + "M": 238592, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2225.848250000001 + }, + "M=238592,N=1600": { + "file": "silu_config_M238592_N1600.json", + "M": 238592, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2220.4079999999994 + }, + "M=238592,N=1664": { + "file": "silu_config_M238592_N1664.json", + "M": 238592, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2230.5282499999994 + }, + "M=238592,N=1728": { + "file": "silu_config_M238592_N1728.json", + "M": 238592, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2244.968250000001 + }, + "M=238592,N=1760": { + "file": "silu_config_M238592_N1760.json", + "M": 238592, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2227.88825 + }, + "M=238592,N=1792": { + "file": "silu_config_M238592_N1792.json", + "M": 238592, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2203.848000000001 + }, + "M=238592,N=1920": { + "file": "silu_config_M238592_N1920.json", + "M": 238592, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2241.2082500000006 + }, + "M=238592,N=2048": { + "file": "silu_config_M238592_N2048.json", + "M": 238592, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1772.52625 + }, + "M=238592,N=2080": { + "file": "silu_config_M238592_N2080.json", + "M": 238592, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2448.2889999999998 + }, + "M=238592,N=2240": { + "file": "silu_config_M238592_N2240.json", + "M": 238592, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2570.3695000000007 + }, + "M=238592,N=2400": { + "file": "silu_config_M238592_N2400.json", + "M": 238592, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2745.570249999998 + }, + "M=238592,N=2560": { + "file": "silu_config_M238592_N2560.json", + "M": 238592, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2853.61075 + }, + "M=239616,N=128": { + "file": "silu_config_M239616_N128.json", + "M": 239616, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 134.11975000000004 + }, + "M=239616,N=160": { + "file": "silu_config_M239616_N160.json", + "M": 239616, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 294.16000000000014 + }, + "M=239616,N=192": { + "file": "silu_config_M239616_N192.json", + "M": 239616, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 294.80025 + }, + "M=239616,N=256": { + "file": "silu_config_M239616_N256.json", + "M": 239616, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 270.5600000000002 + }, + "M=239616,N=320": { + "file": "silu_config_M239616_N320.json", + "M": 239616, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 581.4012499999999 + }, + "M=239616,N=384": { + "file": "silu_config_M239616_N384.json", + "M": 239616, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 578.1612500000001 + }, + "M=239616,N=480": { + "file": "silu_config_M239616_N480.json", + "M": 239616, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 587.48125 + }, + "M=239616,N=512": { + "file": "silu_config_M239616_N512.json", + "M": 239616, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 462.721 + }, + "M=239616,N=576": { + "file": "silu_config_M239616_N576.json", + "M": 239616, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1124.6035000000004 + }, + "M=239616,N=640": { + "file": "silu_config_M239616_N640.json", + "M": 239616, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1128.5235 + }, + "M=239616,N=768": { + "file": "silu_config_M239616_N768.json", + "M": 239616, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1112.1235 + }, + "M=239616,N=800": { + "file": "silu_config_M239616_N800.json", + "M": 239616, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1118.6035000000002 + }, + "M=239616,N=896": { + "file": "silu_config_M239616_N896.json", + "M": 239616, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1146.5637499999996 + }, + "M=239616,N=960": { + "file": "silu_config_M239616_N960.json", + "M": 239616, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1136.92375 + }, + "M=239616,N=1024": { + "file": "silu_config_M239616_N1024.json", + "M": 239616, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 905.4827500000004 + }, + "M=239616,N=1120": { + "file": "silu_config_M239616_N1120.json", + "M": 239616, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2244.24825 + }, + "M=239616,N=1152": { + "file": "silu_config_M239616_N1152.json", + "M": 239616, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2227.0482500000016 + }, + "M=239616,N=1280": { + "file": "silu_config_M239616_N1280.json", + "M": 239616, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2229.04825 + }, + "M=239616,N=1344": { + "file": "silu_config_M239616_N1344.json", + "M": 239616, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2244.96825 + }, + "M=239616,N=1408": { + "file": "silu_config_M239616_N1408.json", + "M": 239616, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2210.9279999999994 + }, + "M=239616,N=1440": { + "file": "silu_config_M239616_N1440.json", + "M": 239616, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2238.96825 + }, + "M=239616,N=1536": { + "file": "silu_config_M239616_N1536.json", + "M": 239616, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2224.448 + }, + "M=239616,N=1600": { + "file": "silu_config_M239616_N1600.json", + "M": 239616, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2239.92825 + }, + "M=239616,N=1664": { + "file": "silu_config_M239616_N1664.json", + "M": 239616, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2228.8482500000005 + }, + "M=239616,N=1728": { + "file": "silu_config_M239616_N1728.json", + "M": 239616, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2264.2482499999996 + }, + "M=239616,N=1760": { + "file": "silu_config_M239616_N1760.json", + "M": 239616, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2267.7682499999996 + }, + "M=239616,N=1792": { + "file": "silu_config_M239616_N1792.json", + "M": 239616, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2223.168 + }, + "M=239616,N=1920": { + "file": "silu_config_M239616_N1920.json", + "M": 239616, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2239.488250000001 + }, + "M=239616,N=2048": { + "file": "silu_config_M239616_N2048.json", + "M": 239616, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1769.40625 + }, + "M=239616,N=2080": { + "file": "silu_config_M239616_N2080.json", + "M": 239616, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2444.8489999999997 + }, + "M=239616,N=2240": { + "file": "silu_config_M239616_N2240.json", + "M": 239616, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2587.5695 + }, + "M=239616,N=2400": { + "file": "silu_config_M239616_N2400.json", + "M": 239616, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2751.25025 + }, + "M=239616,N=2560": { + "file": "silu_config_M239616_N2560.json", + "M": 239616, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2857.130750000003 + }, + "M=240640,N=128": { + "file": "silu_config_M240640_N128.json", + "M": 240640, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 133.79974999999996 + }, + "M=240640,N=160": { + "file": "silu_config_M240640_N160.json", + "M": 240640, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 288.0802500000001 + }, + "M=240640,N=192": { + "file": "silu_config_M240640_N192.json", + "M": 240640, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 304.28024999999997 + }, + "M=240640,N=256": { + "file": "silu_config_M240640_N256.json", + "M": 240640, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 260.6 + }, + "M=240640,N=320": { + "file": "silu_config_M240640_N320.json", + "M": 240640, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 579.8812499999999 + }, + "M=240640,N=384": { + "file": "silu_config_M240640_N384.json", + "M": 240640, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 576.7612500000005 + }, + "M=240640,N=480": { + "file": "silu_config_M240640_N480.json", + "M": 240640, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 586.4812499999998 + }, + "M=240640,N=512": { + "file": "silu_config_M240640_N512.json", + "M": 240640, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 468.20100000000025 + }, + "M=240640,N=576": { + "file": "silu_config_M240640_N576.json", + "M": 240640, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1133.44375 + }, + "M=240640,N=640": { + "file": "silu_config_M240640_N640.json", + "M": 240640, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1114.6435000000001 + }, + "M=240640,N=768": { + "file": "silu_config_M240640_N768.json", + "M": 240640, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1119.6834999999996 + }, + "M=240640,N=800": { + "file": "silu_config_M240640_N800.json", + "M": 240640, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1126.6035000000002 + }, + "M=240640,N=896": { + "file": "silu_config_M240640_N896.json", + "M": 240640, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.5234999999998 + }, + "M=240640,N=960": { + "file": "silu_config_M240640_N960.json", + "M": 240640, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1132.0434999999998 + }, + "M=240640,N=1024": { + "file": "silu_config_M240640_N1024.json", + "M": 240640, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 906.3627500000002 + }, + "M=240640,N=1120": { + "file": "silu_config_M240640_N1120.json", + "M": 240640, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2227.92825 + }, + "M=240640,N=1152": { + "file": "silu_config_M240640_N1152.json", + "M": 240640, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2213.2080000000014 + }, + "M=240640,N=1280": { + "file": "silu_config_M240640_N1280.json", + "M": 240640, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2205.888 + }, + "M=240640,N=1344": { + "file": "silu_config_M240640_N1344.json", + "M": 240640, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2243.6082499999993 + }, + "M=240640,N=1408": { + "file": "silu_config_M240640_N1408.json", + "M": 240640, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2209.728 + }, + "M=240640,N=1440": { + "file": "silu_config_M240640_N1440.json", + "M": 240640, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2227.4482499999995 + }, + "M=240640,N=1536": { + "file": "silu_config_M240640_N1536.json", + "M": 240640, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2223.0879999999997 + }, + "M=240640,N=1600": { + "file": "silu_config_M240640_N1600.json", + "M": 240640, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2248.4482499999995 + }, + "M=240640,N=1664": { + "file": "silu_config_M240640_N1664.json", + "M": 240640, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2205.4880000000003 + }, + "M=240640,N=1728": { + "file": "silu_config_M240640_N1728.json", + "M": 240640, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2241.2082499999997 + }, + "M=240640,N=1760": { + "file": "silu_config_M240640_N1760.json", + "M": 240640, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2255.2482499999996 + }, + "M=240640,N=1792": { + "file": "silu_config_M240640_N1792.json", + "M": 240640, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2231.4882500000003 + }, + "M=240640,N=1920": { + "file": "silu_config_M240640_N1920.json", + "M": 240640, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2248.048249999999 + }, + "M=240640,N=2048": { + "file": "silu_config_M240640_N2048.json", + "M": 240640, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1787.2462499999992 + }, + "M=240640,N=2080": { + "file": "silu_config_M240640_N2080.json", + "M": 240640, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2469.4092500000015 + }, + "M=240640,N=2240": { + "file": "silu_config_M240640_N2240.json", + "M": 240640, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2610.6497499999978 + }, + "M=240640,N=2400": { + "file": "silu_config_M240640_N2400.json", + "M": 240640, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2746.0502499999993 + }, + "M=240640,N=2560": { + "file": "silu_config_M240640_N2560.json", + "M": 240640, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2873.0509999999977 + }, + "M=241664,N=128": { + "file": "silu_config_M241664_N128.json", + "M": 241664, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 134.5995 + }, + "M=241664,N=160": { + "file": "silu_config_M241664_N160.json", + "M": 241664, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 286.00025 + }, + "M=241664,N=192": { + "file": "silu_config_M241664_N192.json", + "M": 241664, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 297.00025 + }, + "M=241664,N=256": { + "file": "silu_config_M241664_N256.json", + "M": 241664, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 261.4800000000001 + }, + "M=241664,N=320": { + "file": "silu_config_M241664_N320.json", + "M": 241664, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 586.4412500000004 + }, + "M=241664,N=384": { + "file": "silu_config_M241664_N384.json", + "M": 241664, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 582.8412500000002 + }, + "M=241664,N=480": { + "file": "silu_config_M241664_N480.json", + "M": 241664, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 593.2415000000003 + }, + "M=241664,N=512": { + "file": "silu_config_M241664_N512.json", + "M": 241664, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 466.5609999999999 + }, + "M=241664,N=576": { + "file": "silu_config_M241664_N576.json", + "M": 241664, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1137.0434999999998 + }, + "M=241664,N=640": { + "file": "silu_config_M241664_N640.json", + "M": 241664, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1116.8835 + }, + "M=241664,N=768": { + "file": "silu_config_M241664_N768.json", + "M": 241664, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1121.6034999999997 + }, + "M=241664,N=800": { + "file": "silu_config_M241664_N800.json", + "M": 241664, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1141.72375 + }, + "M=241664,N=896": { + "file": "silu_config_M241664_N896.json", + "M": 241664, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1145.7237499999997 + }, + "M=241664,N=960": { + "file": "silu_config_M241664_N960.json", + "M": 241664, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1157.6037500000007 + }, + "M=241664,N=1024": { + "file": "silu_config_M241664_N1024.json", + "M": 241664, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 913.5627499999996 + }, + "M=241664,N=1120": { + "file": "silu_config_M241664_N1120.json", + "M": 241664, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2242.84825 + }, + "M=241664,N=1152": { + "file": "silu_config_M241664_N1152.json", + "M": 241664, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2235.4082499999995 + }, + "M=241664,N=1280": { + "file": "silu_config_M241664_N1280.json", + "M": 241664, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2239.5682499999994 + }, + "M=241664,N=1344": { + "file": "silu_config_M241664_N1344.json", + "M": 241664, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2268.92825 + }, + "M=241664,N=1408": { + "file": "silu_config_M241664_N1408.json", + "M": 241664, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2243.5282500000003 + }, + "M=241664,N=1440": { + "file": "silu_config_M241664_N1440.json", + "M": 241664, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2243.2882499999996 + }, + "M=241664,N=1536": { + "file": "silu_config_M241664_N1536.json", + "M": 241664, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2257.2082499999997 + }, + "M=241664,N=1600": { + "file": "silu_config_M241664_N1600.json", + "M": 241664, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2274.3682500000004 + }, + "M=241664,N=1664": { + "file": "silu_config_M241664_N1664.json", + "M": 241664, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2260.2482500000006 + }, + "M=241664,N=1728": { + "file": "silu_config_M241664_N1728.json", + "M": 241664, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2256.728249999999 + }, + "M=241664,N=1760": { + "file": "silu_config_M241664_N1760.json", + "M": 241664, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2271.6482499999993 + }, + "M=241664,N=1792": { + "file": "silu_config_M241664_N1792.json", + "M": 241664, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2256.08825 + }, + "M=241664,N=1920": { + "file": "silu_config_M241664_N1920.json", + "M": 241664, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2281.60825 + }, + "M=241664,N=2048": { + "file": "silu_config_M241664_N2048.json", + "M": 241664, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1799.7664999999997 + }, + "M=241664,N=2080": { + "file": "silu_config_M241664_N2080.json", + "M": 241664, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2516.969249999999 + }, + "M=241664,N=2240": { + "file": "silu_config_M241664_N2240.json", + "M": 241664, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2630.7297499999995 + }, + "M=241664,N=2400": { + "file": "silu_config_M241664_N2400.json", + "M": 241664, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2779.410499999998 + }, + "M=241664,N=2560": { + "file": "silu_config_M241664_N2560.json", + "M": 241664, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2891.131000000002 + }, + "M=242688,N=128": { + "file": "silu_config_M242688_N128.json", + "M": 242688, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 135.2794999999999 + }, + "M=242688,N=160": { + "file": "silu_config_M242688_N160.json", + "M": 242688, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.4002500000002 + }, + "M=242688,N=192": { + "file": "silu_config_M242688_N192.json", + "M": 242688, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 301.72024999999996 + }, + "M=242688,N=256": { + "file": "silu_config_M242688_N256.json", + "M": 242688, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 285.19999999999993 + }, + "M=242688,N=320": { + "file": "silu_config_M242688_N320.json", + "M": 242688, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 588.84125 + }, + "M=242688,N=384": { + "file": "silu_config_M242688_N384.json", + "M": 242688, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 584.5612500000004 + }, + "M=242688,N=480": { + "file": "silu_config_M242688_N480.json", + "M": 242688, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 595.3215000000002 + }, + "M=242688,N=512": { + "file": "silu_config_M242688_N512.json", + "M": 242688, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 469.6009999999999 + }, + "M=242688,N=576": { + "file": "silu_config_M242688_N576.json", + "M": 242688, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1128.7635000000005 + }, + "M=242688,N=640": { + "file": "silu_config_M242688_N640.json", + "M": 242688, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1135.0037499999999 + }, + "M=242688,N=768": { + "file": "silu_config_M242688_N768.json", + "M": 242688, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1125.0435000000002 + }, + "M=242688,N=800": { + "file": "silu_config_M242688_N800.json", + "M": 242688, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1143.4837499999999 + }, + "M=242688,N=896": { + "file": "silu_config_M242688_N896.json", + "M": 242688, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1129.3635 + }, + "M=242688,N=960": { + "file": "silu_config_M242688_N960.json", + "M": 242688, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1149.2837500000005 + }, + "M=242688,N=1024": { + "file": "silu_config_M242688_N1024.json", + "M": 242688, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 917.16275 + }, + "M=242688,N=1120": { + "file": "silu_config_M242688_N1120.json", + "M": 242688, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2283.2885000000006 + }, + "M=242688,N=1152": { + "file": "silu_config_M242688_N1152.json", + "M": 242688, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2244.608250000001 + }, + "M=242688,N=1280": { + "file": "silu_config_M242688_N1280.json", + "M": 242688, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2238.4882500000003 + }, + "M=242688,N=1344": { + "file": "silu_config_M242688_N1344.json", + "M": 242688, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2257.80825 + }, + "M=242688,N=1408": { + "file": "silu_config_M242688_N1408.json", + "M": 242688, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2252.848250000001 + }, + "M=242688,N=1440": { + "file": "silu_config_M242688_N1440.json", + "M": 242688, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2281.72825 + }, + "M=242688,N=1536": { + "file": "silu_config_M242688_N1536.json", + "M": 242688, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2256.0882500000007 + }, + "M=242688,N=1600": { + "file": "silu_config_M242688_N1600.json", + "M": 242688, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2263.1282499999998 + }, + "M=242688,N=1664": { + "file": "silu_config_M242688_N1664.json", + "M": 242688, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2279.96825 + }, + "M=242688,N=1728": { + "file": "silu_config_M242688_N1728.json", + "M": 242688, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2287.008500000001 + }, + "M=242688,N=1760": { + "file": "silu_config_M242688_N1760.json", + "M": 242688, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2260.2482500000006 + }, + "M=242688,N=1792": { + "file": "silu_config_M242688_N1792.json", + "M": 242688, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2285.8485 + }, + "M=242688,N=1920": { + "file": "silu_config_M242688_N1920.json", + "M": 242688, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2280.4882500000003 + }, + "M=242688,N=2048": { + "file": "silu_config_M242688_N2048.json", + "M": 242688, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1811.0065000000004 + }, + "M=242688,N=2080": { + "file": "silu_config_M242688_N2080.json", + "M": 242688, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2508.4492500000006 + }, + "M=242688,N=2240": { + "file": "silu_config_M242688_N2240.json", + "M": 242688, + "N": 2240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2655.769999999997 + }, + "M=242688,N=2400": { + "file": "silu_config_M242688_N2400.json", + "M": 242688, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2781.490500000001 + }, + "M=242688,N=2560": { + "file": "silu_config_M242688_N2560.json", + "M": 242688, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2900.371000000001 + }, + "M=243712,N=128": { + "file": "silu_config_M243712_N128.json", + "M": 243712, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 146.95975 + }, + "M=243712,N=160": { + "file": "silu_config_M243712_N160.json", + "M": 243712, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 309.16025000000013 + }, + "M=243712,N=192": { + "file": "silu_config_M243712_N192.json", + "M": 243712, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.08025 + }, + "M=243712,N=256": { + "file": "silu_config_M243712_N256.json", + "M": 243712, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 263.55999999999995 + }, + "M=243712,N=320": { + "file": "silu_config_M243712_N320.json", + "M": 243712, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 590.8415 + }, + "M=243712,N=384": { + "file": "silu_config_M243712_N384.json", + "M": 243712, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 587.6412499999997 + }, + "M=243712,N=480": { + "file": "silu_config_M243712_N480.json", + "M": 243712, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 598.2415000000001 + }, + "M=243712,N=512": { + "file": "silu_config_M243712_N512.json", + "M": 243712, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 477.44075 + }, + "M=243712,N=576": { + "file": "silu_config_M243712_N576.json", + "M": 243712, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1146.1637499999997 + }, + "M=243712,N=640": { + "file": "silu_config_M243712_N640.json", + "M": 243712, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1139.76375 + }, + "M=243712,N=768": { + "file": "silu_config_M243712_N768.json", + "M": 243712, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1142.44375 + }, + "M=243712,N=800": { + "file": "silu_config_M243712_N800.json", + "M": 243712, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1148.32375 + }, + "M=243712,N=896": { + "file": "silu_config_M243712_N896.json", + "M": 243712, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1154.88375 + }, + "M=243712,N=960": { + "file": "silu_config_M243712_N960.json", + "M": 243712, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1153.9237499999995 + }, + "M=243712,N=1024": { + "file": "silu_config_M243712_N1024.json", + "M": 243712, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 920.7227499999999 + }, + "M=243712,N=1120": { + "file": "silu_config_M243712_N1120.json", + "M": 243712, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2282.60825 + }, + "M=243712,N=1152": { + "file": "silu_config_M243712_N1152.json", + "M": 243712, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2254.2482499999996 + }, + "M=243712,N=1280": { + "file": "silu_config_M243712_N1280.json", + "M": 243712, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2258.3282499999996 + }, + "M=243712,N=1344": { + "file": "silu_config_M243712_N1344.json", + "M": 243712, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2277.4482500000004 + }, + "M=243712,N=1408": { + "file": "silu_config_M243712_N1408.json", + "M": 243712, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2262.2482500000006 + }, + "M=243712,N=1440": { + "file": "silu_config_M243712_N1440.json", + "M": 243712, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2293.0085 + }, + "M=243712,N=1536": { + "file": "silu_config_M243712_N1536.json", + "M": 243712, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2265.4082499999995 + }, + "M=243712,N=1600": { + "file": "silu_config_M243712_N1600.json", + "M": 243712, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2293.3284999999996 + }, + "M=243712,N=1664": { + "file": "silu_config_M243712_N1664.json", + "M": 243712, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2268.88825 + }, + "M=243712,N=1728": { + "file": "silu_config_M243712_N1728.json", + "M": 243712, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2286.1285 + }, + "M=243712,N=1760": { + "file": "silu_config_M243712_N1760.json", + "M": 243712, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2280.00825 + }, + "M=243712,N=1792": { + "file": "silu_config_M243712_N1792.json", + "M": 243712, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2285.4885000000004 + }, + "M=243712,N=1920": { + "file": "silu_config_M243712_N1920.json", + "M": 243712, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2270.92825 + }, + "M=243712,N=2048": { + "file": "silu_config_M243712_N2048.json", + "M": 243712, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1800.5265 + }, + "M=243712,N=2080": { + "file": "silu_config_M243712_N2080.json", + "M": 243712, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2518.8092499999993 + }, + "M=243712,N=2240": { + "file": "silu_config_M243712_N2240.json", + "M": 243712, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2653.0499999999993 + }, + "M=243712,N=2400": { + "file": "silu_config_M243712_N2400.json", + "M": 243712, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2792.650499999999 + }, + "M=243712,N=2560": { + "file": "silu_config_M243712_N2560.json", + "M": 243712, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2912.8110000000006 + }, + "M=244736,N=128": { + "file": "silu_config_M244736_N128.json", + "M": 244736, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 136.31950000000003 + }, + "M=244736,N=160": { + "file": "silu_config_M244736_N160.json", + "M": 244736, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.72025 + }, + "M=244736,N=192": { + "file": "silu_config_M244736_N192.json", + "M": 244736, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 290.1599999999998 + }, + "M=244736,N=256": { + "file": "silu_config_M244736_N256.json", + "M": 244736, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 264.55999999999995 + }, + "M=244736,N=320": { + "file": "silu_config_M244736_N320.json", + "M": 244736, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 593.7214999999999 + }, + "M=244736,N=384": { + "file": "silu_config_M244736_N384.json", + "M": 244736, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 590.5612500000002 + }, + "M=244736,N=480": { + "file": "silu_config_M244736_N480.json", + "M": 244736, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 600.4415000000004 + }, + "M=244736,N=512": { + "file": "silu_config_M244736_N512.json", + "M": 244736, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 472.2810000000004 + }, + "M=244736,N=576": { + "file": "silu_config_M244736_N576.json", + "M": 244736, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1148.2837499999996 + }, + "M=244736,N=640": { + "file": "silu_config_M244736_N640.json", + "M": 244736, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1151.84375 + }, + "M=244736,N=768": { + "file": "silu_config_M244736_N768.json", + "M": 244736, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1165.6037499999998 + }, + "M=244736,N=800": { + "file": "silu_config_M244736_N800.json", + "M": 244736, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1152.9637500000003 + }, + "M=244736,N=896": { + "file": "silu_config_M244736_N896.json", + "M": 244736, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1151.72375 + }, + "M=244736,N=960": { + "file": "silu_config_M244736_N960.json", + "M": 244736, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1161.0037500000003 + }, + "M=244736,N=1024": { + "file": "silu_config_M244736_N1024.json", + "M": 244736, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 924.9227499999997 + }, + "M=244736,N=1120": { + "file": "silu_config_M244736_N1120.json", + "M": 244736, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2291.8085 + }, + "M=244736,N=1152": { + "file": "silu_config_M244736_N1152.json", + "M": 244736, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2253.4082500000004 + }, + "M=244736,N=1280": { + "file": "silu_config_M244736_N1280.json", + "M": 244736, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2288.2884999999997 + }, + "M=244736,N=1344": { + "file": "silu_config_M244736_N1344.json", + "M": 244736, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2297.2884999999997 + }, + "M=244736,N=1408": { + "file": "silu_config_M244736_N1408.json", + "M": 244736, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2281.68825 + }, + "M=244736,N=1440": { + "file": "silu_config_M244736_N1440.json", + "M": 244736, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2292.2884999999997 + }, + "M=244736,N=1536": { + "file": "silu_config_M244736_N1536.json", + "M": 244736, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2264.2082499999997 + }, + "M=244736,N=1600": { + "file": "silu_config_M244736_N1600.json", + "M": 244736, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2282.00825 + }, + "M=244736,N=1664": { + "file": "silu_config_M244736_N1664.json", + "M": 244736, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2267.728249999999 + }, + "M=244736,N=1728": { + "file": "silu_config_M244736_N1728.json", + "M": 244736, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2295.768500000001 + }, + "M=244736,N=1760": { + "file": "silu_config_M244736_N1760.json", + "M": 244736, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2310.3684999999996 + }, + "M=244736,N=1792": { + "file": "silu_config_M244736_N1792.json", + "M": 244736, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2284.048500000001 + }, + "M=244736,N=1920": { + "file": "silu_config_M244736_N1920.json", + "M": 244736, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2310.6484999999993 + }, + "M=244736,N=2048": { + "file": "silu_config_M244736_N2048.json", + "M": 244736, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1816.5265 + }, + "M=244736,N=2080": { + "file": "silu_config_M244736_N2080.json", + "M": 244736, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2518.6492499999995 + }, + "M=244736,N=2240": { + "file": "silu_config_M244736_N2240.json", + "M": 244736, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2673.3700000000017 + }, + "M=244736,N=2400": { + "file": "silu_config_M244736_N2400.json", + "M": 244736, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2795.130500000003 + }, + "M=244736,N=2560": { + "file": "silu_config_M244736_N2560.json", + "M": 244736, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2946.4509999999973 + }, + "M=245760,N=128": { + "file": "silu_config_M245760_N128.json", + "M": 245760, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 136.47950000000003 + }, + "M=245760,N=160": { + "file": "silu_config_M245760_N160.json", + "M": 245760, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 299.44025 + }, + "M=245760,N=192": { + "file": "silu_config_M245760_N192.json", + "M": 245760, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.52025 + }, + "M=245760,N=256": { + "file": "silu_config_M245760_N256.json", + "M": 245760, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 265.6800000000002 + }, + "M=245760,N=320": { + "file": "silu_config_M245760_N320.json", + "M": 245760, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 592.0415000000003 + }, + "M=245760,N=384": { + "file": "silu_config_M245760_N384.json", + "M": 245760, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 589.2414999999999 + }, + "M=245760,N=480": { + "file": "silu_config_M245760_N480.json", + "M": 245760, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 598.2415000000001 + }, + "M=245760,N=512": { + "file": "silu_config_M245760_N512.json", + "M": 245760, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 454.24075000000016 + }, + "M=245760,N=576": { + "file": "silu_config_M245760_N576.json", + "M": 245760, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1125.3235 + }, + "M=245760,N=640": { + "file": "silu_config_M245760_N640.json", + "M": 245760, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1119.2034999999996 + }, + "M=245760,N=768": { + "file": "silu_config_M245760_N768.json", + "M": 245760, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1122.0034999999998 + }, + "M=245760,N=800": { + "file": "silu_config_M245760_N800.json", + "M": 245760, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1129.0435000000002 + }, + "M=245760,N=896": { + "file": "silu_config_M245760_N896.json", + "M": 245760, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1126.4435000000003 + }, + "M=245760,N=960": { + "file": "silu_config_M245760_N960.json", + "M": 245760, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.6837499999997 + }, + "M=245760,N=1024": { + "file": "silu_config_M245760_N1024.json", + "M": 245760, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 925.6429999999996 + }, + "M=245760,N=1120": { + "file": "silu_config_M245760_N1120.json", + "M": 245760, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2254.2082499999997 + }, + "M=245760,N=1152": { + "file": "silu_config_M245760_N1152.json", + "M": 245760, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2238.728249999999 + }, + "M=245760,N=1280": { + "file": "silu_config_M245760_N1280.json", + "M": 245760, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2242.4482500000004 + }, + "M=245760,N=1344": { + "file": "silu_config_M245760_N1344.json", + "M": 245760, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2259.4882500000012 + }, + "M=245760,N=1408": { + "file": "silu_config_M245760_N1408.json", + "M": 245760, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2256.3682500000004 + }, + "M=245760,N=1440": { + "file": "silu_config_M245760_N1440.json", + "M": 245760, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2284.96825 + }, + "M=245760,N=1536": { + "file": "silu_config_M245760_N1536.json", + "M": 245760, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2248.92825 + }, + "M=245760,N=1600": { + "file": "silu_config_M245760_N1600.json", + "M": 245760, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2264.6082499999993 + }, + "M=245760,N=1664": { + "file": "silu_config_M245760_N1664.json", + "M": 245760, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2272.76825 + }, + "M=245760,N=1728": { + "file": "silu_config_M245760_N1728.json", + "M": 245760, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2267.72825 + }, + "M=245760,N=1760": { + "file": "silu_config_M245760_N1760.json", + "M": 245760, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2271.3282499999996 + }, + "M=245760,N=1792": { + "file": "silu_config_M245760_N1792.json", + "M": 245760, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2257.5682500000003 + }, + "M=245760,N=1920": { + "file": "silu_config_M245760_N1920.json", + "M": 245760, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2274.6082499999993 + }, + "M=245760,N=2048": { + "file": "silu_config_M245760_N2048.json", + "M": 245760, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1806.7664999999997 + }, + "M=245760,N=2080": { + "file": "silu_config_M245760_N2080.json", + "M": 245760, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2521.4092500000006 + }, + "M=245760,N=2240": { + "file": "silu_config_M245760_N2240.json", + "M": 245760, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2635.4497499999998 + }, + "M=245760,N=2400": { + "file": "silu_config_M245760_N2400.json", + "M": 245760, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2794.4905 + }, + "M=245760,N=2560": { + "file": "silu_config_M245760_N2560.json", + "M": 245760, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2906.1709999999985 + }, + "M=246784,N=128": { + "file": "silu_config_M246784_N128.json", + "M": 246784, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.8395 + }, + "M=246784,N=160": { + "file": "silu_config_M246784_N160.json", + "M": 246784, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 317.32025 + }, + "M=246784,N=192": { + "file": "silu_config_M246784_N192.json", + "M": 246784, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 302.96025000000014 + }, + "M=246784,N=256": { + "file": "silu_config_M246784_N256.json", + "M": 246784, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 267.0 + }, + "M=246784,N=320": { + "file": "silu_config_M246784_N320.json", + "M": 246784, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 598.3615 + }, + "M=246784,N=384": { + "file": "silu_config_M246784_N384.json", + "M": 246784, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 594.7615000000003 + }, + "M=246784,N=480": { + "file": "silu_config_M246784_N480.json", + "M": 246784, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 605.1614999999999 + }, + "M=246784,N=512": { + "file": "silu_config_M246784_N512.json", + "M": 246784, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 475.60075000000006 + }, + "M=246784,N=576": { + "file": "silu_config_M246784_N576.json", + "M": 246784, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1168.1637500000002 + }, + "M=246784,N=640": { + "file": "silu_config_M246784_N640.json", + "M": 246784, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1142.6837500000001 + }, + "M=246784,N=768": { + "file": "silu_config_M246784_N768.json", + "M": 246784, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1164.4837500000003 + }, + "M=246784,N=800": { + "file": "silu_config_M246784_N800.json", + "M": 246784, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1162.5237499999998 + }, + "M=246784,N=896": { + "file": "silu_config_M246784_N896.json", + "M": 246784, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1169.1637499999997 + }, + "M=246784,N=960": { + "file": "silu_config_M246784_N960.json", + "M": 246784, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1178.4837499999999 + }, + "M=246784,N=1024": { + "file": "silu_config_M246784_N1024.json", + "M": 246784, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 932.0827500000005 + }, + "M=246784,N=1120": { + "file": "silu_config_M246784_N1120.json", + "M": 246784, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2300.1285000000007 + }, + "M=246784,N=1152": { + "file": "silu_config_M246784_N1152.json", + "M": 246784, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2292.7285 + }, + "M=246784,N=1280": { + "file": "silu_config_M246784_N1280.json", + "M": 246784, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2296.6085000000003 + }, + "M=246784,N=1344": { + "file": "silu_config_M246784_N1344.json", + "M": 246784, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2316.4084999999995 + }, + "M=246784,N=1408": { + "file": "silu_config_M246784_N1408.json", + "M": 246784, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2290.1285 + }, + "M=246784,N=1440": { + "file": "silu_config_M246784_N1440.json", + "M": 246784, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2321.6085000000003 + }, + "M=246784,N=1536": { + "file": "silu_config_M246784_N1536.json", + "M": 246784, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2283.2082500000006 + }, + "M=246784,N=1600": { + "file": "silu_config_M246784_N1600.json", + "M": 246784, + "N": 1600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2331.2884999999997 + }, + "M=246784,N=1664": { + "file": "silu_config_M246784_N1664.json", + "M": 246784, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2287.5282500000003 + }, + "M=246784,N=1728": { + "file": "silu_config_M246784_N1728.json", + "M": 246784, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2314.6485000000002 + }, + "M=246784,N=1760": { + "file": "silu_config_M246784_N1760.json", + "M": 246784, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2329.5285000000003 + }, + "M=246784,N=1792": { + "file": "silu_config_M246784_N1792.json", + "M": 246784, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2303.0485 + }, + "M=246784,N=1920": { + "file": "silu_config_M246784_N1920.json", + "M": 246784, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2309.888500000001 + }, + "M=246784,N=2048": { + "file": "silu_config_M246784_N2048.json", + "M": 246784, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1822.2864999999993 + }, + "M=246784,N=2080": { + "file": "silu_config_M246784_N2080.json", + "M": 246784, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2559.8094999999985 + }, + "M=246784,N=2240": { + "file": "silu_config_M246784_N2240.json", + "M": 246784, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2675.9699999999993 + }, + "M=246784,N=2400": { + "file": "silu_config_M246784_N2400.json", + "M": 246784, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2848.6907500000016 + }, + "M=246784,N=2560": { + "file": "silu_config_M246784_N2560.json", + "M": 246784, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2950.0510000000013 + }, + "M=247808,N=128": { + "file": "silu_config_M247808_N128.json", + "M": 247808, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 137.95974999999999 + }, + "M=247808,N=160": { + "file": "silu_config_M247808_N160.json", + "M": 247808, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 303.60025000000024 + }, + "M=247808,N=192": { + "file": "silu_config_M247808_N192.json", + "M": 247808, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 314.68025 + }, + "M=247808,N=256": { + "file": "silu_config_M247808_N256.json", + "M": 247808, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 290.6402499999998 + }, + "M=247808,N=320": { + "file": "silu_config_M247808_N320.json", + "M": 247808, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 600.8015000000003 + }, + "M=247808,N=384": { + "file": "silu_config_M247808_N384.json", + "M": 247808, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 597.6815000000001 + }, + "M=247808,N=480": { + "file": "silu_config_M247808_N480.json", + "M": 247808, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 607.4415000000004 + }, + "M=247808,N=512": { + "file": "silu_config_M247808_N512.json", + "M": 247808, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 481.24099999999976 + }, + "M=247808,N=576": { + "file": "silu_config_M247808_N576.json", + "M": 247808, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1162.28375 + }, + "M=247808,N=640": { + "file": "silu_config_M247808_N640.json", + "M": 247808, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.0834999999997 + }, + "M=247808,N=768": { + "file": "silu_config_M247808_N768.json", + "M": 247808, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1148.2437499999996 + }, + "M=247808,N=800": { + "file": "silu_config_M247808_N800.json", + "M": 247808, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1157.2437499999996 + }, + "M=247808,N=896": { + "file": "silu_config_M247808_N896.json", + "M": 247808, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1163.2837499999996 + }, + "M=247808,N=960": { + "file": "silu_config_M247808_N960.json", + "M": 247808, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1183.24375 + }, + "M=247808,N=1024": { + "file": "silu_config_M247808_N1024.json", + "M": 247808, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 935.6429999999996 + }, + "M=247808,N=1120": { + "file": "silu_config_M247808_N1120.json", + "M": 247808, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2319.768499999999 + }, + "M=247808,N=1152": { + "file": "silu_config_M247808_N1152.json", + "M": 247808, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2291.768499999999 + }, + "M=247808,N=1280": { + "file": "silu_config_M247808_N1280.json", + "M": 247808, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2316.3685000000005 + }, + "M=247808,N=1344": { + "file": "silu_config_M247808_N1344.json", + "M": 247808, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2305.0085 + }, + "M=247808,N=1408": { + "file": "silu_config_M247808_N1408.json", + "M": 247808, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2289.0485 + }, + "M=247808,N=1440": { + "file": "silu_config_M247808_N1440.json", + "M": 247808, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2320.6885 + }, + "M=247808,N=1536": { + "file": "silu_config_M247808_N1536.json", + "M": 247808, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2303.0085 + }, + "M=247808,N=1600": { + "file": "silu_config_M247808_N1600.json", + "M": 247808, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2320.6085000000003 + }, + "M=247808,N=1664": { + "file": "silu_config_M247808_N1664.json", + "M": 247808, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2306.9685 + }, + "M=247808,N=1728": { + "file": "silu_config_M247808_N1728.json", + "M": 247808, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2334.4084999999995 + }, + "M=247808,N=1760": { + "file": "silu_config_M247808_N1760.json", + "M": 247808, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2318.1684999999998 + }, + "M=247808,N=1792": { + "file": "silu_config_M247808_N1792.json", + "M": 247808, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2301.968499999999 + }, + "M=247808,N=1920": { + "file": "silu_config_M247808_N1920.json", + "M": 247808, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2319.2485000000015 + }, + "M=247808,N=2048": { + "file": "silu_config_M247808_N2048.json", + "M": 247808, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1839.5265 + }, + "M=247808,N=2080": { + "file": "silu_config_M247808_N2080.json", + "M": 247808, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2560.449499999999 + }, + "M=247808,N=2240": { + "file": "silu_config_M247808_N2240.json", + "M": 247808, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2706.7299999999977 + }, + "M=247808,N=2400": { + "file": "silu_config_M247808_N2400.json", + "M": 247808, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2840.1707499999984 + }, + "M=247808,N=2560": { + "file": "silu_config_M247808_N2560.json", + "M": 247808, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2975.53125 + }, + "M=248832,N=128": { + "file": "silu_config_M248832_N128.json", + "M": 248832, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 138.59975 + }, + "M=248832,N=160": { + "file": "silu_config_M248832_N160.json", + "M": 248832, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 319.6805 + }, + "M=248832,N=192": { + "file": "silu_config_M248832_N192.json", + "M": 248832, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 320.00025000000005 + }, + "M=248832,N=256": { + "file": "silu_config_M248832_N256.json", + "M": 248832, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 280.44000000000005 + }, + "M=248832,N=320": { + "file": "silu_config_M248832_N320.json", + "M": 248832, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 602.8414999999998 + }, + "M=248832,N=384": { + "file": "silu_config_M248832_N384.json", + "M": 248832, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 600.2015000000001 + }, + "M=248832,N=480": { + "file": "silu_config_M248832_N480.json", + "M": 248832, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 609.8415 + }, + "M=248832,N=512": { + "file": "silu_config_M248832_N512.json", + "M": 248832, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 487.4010000000005 + }, + "M=248832,N=576": { + "file": "silu_config_M248832_N576.json", + "M": 248832, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1166.8837500000004 + }, + "M=248832,N=640": { + "file": "silu_config_M248832_N640.json", + "M": 248832, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1163.4437500000004 + }, + "M=248832,N=768": { + "file": "silu_config_M248832_N768.json", + "M": 248832, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1166.0437499999998 + }, + "M=248832,N=800": { + "file": "silu_config_M248832_N800.json", + "M": 248832, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1164.6037499999998 + }, + "M=248832,N=896": { + "file": "silu_config_M248832_N896.json", + "M": 248832, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1178.3237499999996 + }, + "M=248832,N=960": { + "file": "silu_config_M248832_N960.json", + "M": 248832, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1156.76375 + }, + "M=248832,N=1024": { + "file": "silu_config_M248832_N1024.json", + "M": 248832, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 940.16275 + }, + "M=248832,N=1120": { + "file": "silu_config_M248832_N1120.json", + "M": 248832, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2329.5285000000003 + }, + "M=248832,N=1152": { + "file": "silu_config_M248832_N1152.json", + "M": 248832, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2291.0885 + }, + "M=248832,N=1280": { + "file": "silu_config_M248832_N1280.json", + "M": 248832, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2305.1685000000007 + }, + "M=248832,N=1344": { + "file": "silu_config_M248832_N1344.json", + "M": 248832, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2314.6085000000003 + }, + "M=248832,N=1408": { + "file": "silu_config_M248832_N1408.json", + "M": 248832, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2298.7685 + }, + "M=248832,N=1440": { + "file": "silu_config_M248832_N1440.json", + "M": 248832, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2320.1285 + }, + "M=248832,N=1536": { + "file": "silu_config_M248832_N1536.json", + "M": 248832, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2312.0885 + }, + "M=248832,N=1600": { + "file": "silu_config_M248832_N1600.json", + "M": 248832, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2350.6887500000003 + }, + "M=248832,N=1664": { + "file": "silu_config_M248832_N1664.json", + "M": 248832, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2306.0885000000007 + }, + "M=248832,N=1728": { + "file": "silu_config_M248832_N1728.json", + "M": 248832, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2323.6084999999994 + }, + "M=248832,N=1760": { + "file": "silu_config_M248832_N1760.json", + "M": 248832, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2338.2884999999987 + }, + "M=248832,N=1792": { + "file": "silu_config_M248832_N1792.json", + "M": 248832, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2321.8885 + }, + "M=248832,N=1920": { + "file": "silu_config_M248832_N1920.json", + "M": 248832, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2328.848500000001 + }, + "M=248832,N=2048": { + "file": "silu_config_M248832_N2048.json", + "M": 248832, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1852.7664999999997 + }, + "M=248832,N=2080": { + "file": "silu_config_M248832_N2080.json", + "M": 248832, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2550.8895 + }, + "M=248832,N=2240": { + "file": "silu_config_M248832_N2240.json", + "M": 248832, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2703.250000000001 + }, + "M=248832,N=2400": { + "file": "silu_config_M248832_N2400.json", + "M": 248832, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2866.0507500000012 + }, + "M=248832,N=2560": { + "file": "silu_config_M248832_N2560.json", + "M": 248832, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2994.1712500000012 + }, + "M=249856,N=128": { + "file": "silu_config_M249856_N128.json", + "M": 249856, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.3995 + }, + "M=249856,N=160": { + "file": "silu_config_M249856_N160.json", + "M": 249856, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 295.6802500000002 + }, + "M=249856,N=192": { + "file": "silu_config_M249856_N192.json", + "M": 249856, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 299.52 + }, + "M=249856,N=256": { + "file": "silu_config_M249856_N256.json", + "M": 249856, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 282.1199999999999 + }, + "M=249856,N=320": { + "file": "silu_config_M249856_N320.json", + "M": 249856, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 606.6814999999999 + }, + "M=249856,N=384": { + "file": "silu_config_M249856_N384.json", + "M": 249856, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 602.9214999999999 + }, + "M=249856,N=480": { + "file": "silu_config_M249856_N480.json", + "M": 249856, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 613.4015000000002 + }, + "M=249856,N=512": { + "file": "silu_config_M249856_N512.json", + "M": 249856, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 485.001 + }, + "M=249856,N=576": { + "file": "silu_config_M249856_N576.json", + "M": 249856, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1163.5237499999998 + }, + "M=249856,N=640": { + "file": "silu_config_M249856_N640.json", + "M": 249856, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1154.36375 + }, + "M=249856,N=768": { + "file": "silu_config_M249856_N768.json", + "M": 249856, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1168.2437499999996 + }, + "M=249856,N=800": { + "file": "silu_config_M249856_N800.json", + "M": 249856, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1169.40375 + }, + "M=249856,N=896": { + "file": "silu_config_M249856_N896.json", + "M": 249856, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1162.6037500000002 + }, + "M=249856,N=960": { + "file": "silu_config_M249856_N960.json", + "M": 249856, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1196.32375 + }, + "M=249856,N=1024": { + "file": "silu_config_M249856_N1024.json", + "M": 249856, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 944.9229999999998 + }, + "M=249856,N=1120": { + "file": "silu_config_M249856_N1120.json", + "M": 249856, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2339.2884999999997 + }, + "M=249856,N=1152": { + "file": "silu_config_M249856_N1152.json", + "M": 249856, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2311.0485 + }, + "M=249856,N=1280": { + "file": "silu_config_M249856_N1280.json", + "M": 249856, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2312.2484999999997 + }, + "M=249856,N=1344": { + "file": "silu_config_M249856_N1344.json", + "M": 249856, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2329.0485 + }, + "M=249856,N=1408": { + "file": "silu_config_M249856_N1408.json", + "M": 249856, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2314.888499999999 + }, + "M=249856,N=1440": { + "file": "silu_config_M249856_N1440.json", + "M": 249856, + "N": 1440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2343.2084999999997 + }, + "M=249856,N=1536": { + "file": "silu_config_M249856_N1536.json", + "M": 249856, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2308.2084999999997 + }, + "M=249856,N=1600": { + "file": "silu_config_M249856_N1600.json", + "M": 249856, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2344.6484999999993 + }, + "M=249856,N=1664": { + "file": "silu_config_M249856_N1664.json", + "M": 249856, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2323.4885000000004 + }, + "M=249856,N=1728": { + "file": "silu_config_M249856_N1728.json", + "M": 249856, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2338.7285 + }, + "M=249856,N=1760": { + "file": "silu_config_M249856_N1760.json", + "M": 249856, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2321.4085000000005 + }, + "M=249856,N=1792": { + "file": "silu_config_M249856_N1792.json", + "M": 249856, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2328.1685000000007 + }, + "M=249856,N=1920": { + "file": "silu_config_M249856_N1920.json", + "M": 249856, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2324.5685000000003 + }, + "M=249856,N=2048": { + "file": "silu_config_M249856_N2048.json", + "M": 249856, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1854.2064999999993 + }, + "M=249856,N=2080": { + "file": "silu_config_M249856_N2080.json", + "M": 249856, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2583.889500000002 + }, + "M=249856,N=2240": { + "file": "silu_config_M249856_N2240.json", + "M": 249856, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2702.5700000000006 + }, + "M=249856,N=2400": { + "file": "silu_config_M249856_N2400.json", + "M": 249856, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2873.410749999998 + }, + "M=249856,N=2560": { + "file": "silu_config_M249856_N2560.json", + "M": 249856, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3008.531250000002 + }, + "M=250880,N=128": { + "file": "silu_config_M250880_N128.json", + "M": 250880, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.95950000000002 + }, + "M=250880,N=160": { + "file": "silu_config_M250880_N160.json", + "M": 250880, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 311.2004999999999 + }, + "M=250880,N=192": { + "file": "silu_config_M250880_N192.json", + "M": 250880, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 300.84024999999986 + }, + "M=250880,N=256": { + "file": "silu_config_M250880_N256.json", + "M": 250880, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 271.9200000000001 + }, + "M=250880,N=320": { + "file": "silu_config_M250880_N320.json", + "M": 250880, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 604.8415 + }, + "M=250880,N=384": { + "file": "silu_config_M250880_N384.json", + "M": 250880, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 601.7614999999998 + }, + "M=250880,N=480": { + "file": "silu_config_M250880_N480.json", + "M": 250880, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.4815000000003 + }, + "M=250880,N=512": { + "file": "silu_config_M250880_N512.json", + "M": 250880, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 483.32100000000037 + }, + "M=250880,N=576": { + "file": "silu_config_M250880_N576.json", + "M": 250880, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1159.0837500000002 + }, + "M=250880,N=640": { + "file": "silu_config_M250880_N640.json", + "M": 250880, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.1637499999997 + }, + "M=250880,N=768": { + "file": "silu_config_M250880_N768.json", + "M": 250880, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1176.9237499999995 + }, + "M=250880,N=800": { + "file": "silu_config_M250880_N800.json", + "M": 250880, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1174.0037499999999 + }, + "M=250880,N=896": { + "file": "silu_config_M250880_N896.json", + "M": 250880, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1170.6837499999997 + }, + "M=250880,N=960": { + "file": "silu_config_M250880_N960.json", + "M": 250880, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1189.7637500000005 + }, + "M=250880,N=1024": { + "file": "silu_config_M250880_N1024.json", + "M": 250880, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 946.2427500000008 + }, + "M=250880,N=1120": { + "file": "silu_config_M250880_N1120.json", + "M": 250880, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2311.4885000000004 + }, + "M=250880,N=1152": { + "file": "silu_config_M250880_N1152.json", + "M": 250880, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2296.3685000000005 + }, + "M=250880,N=1280": { + "file": "silu_config_M250880_N1280.json", + "M": 250880, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2318.1285 + }, + "M=250880,N=1344": { + "file": "silu_config_M250880_N1344.json", + "M": 250880, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2322.4085000000005 + }, + "M=250880,N=1408": { + "file": "silu_config_M250880_N1408.json", + "M": 250880, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2310.0085 + }, + "M=250880,N=1440": { + "file": "silu_config_M250880_N1440.json", + "M": 250880, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2325.6485000000002 + }, + "M=250880,N=1536": { + "file": "silu_config_M250880_N1536.json", + "M": 250880, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2323.848500000001 + }, + "M=250880,N=1600": { + "file": "silu_config_M250880_N1600.json", + "M": 250880, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2337.8085 + }, + "M=250880,N=1664": { + "file": "silu_config_M250880_N1664.json", + "M": 250880, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2317.7685 + }, + "M=250880,N=1728": { + "file": "silu_config_M250880_N1728.json", + "M": 250880, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2320.7685 + }, + "M=250880,N=1760": { + "file": "silu_config_M250880_N1760.json", + "M": 250880, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2344.3685000000005 + }, + "M=250880,N=1792": { + "file": "silu_config_M250880_N1792.json", + "M": 250880, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2322.7284999999993 + }, + "M=250880,N=1920": { + "file": "silu_config_M250880_N1920.json", + "M": 250880, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2339.1684999999998 + }, + "M=250880,N=2048": { + "file": "silu_config_M250880_N2048.json", + "M": 250880, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1853.0465000000004 + }, + "M=250880,N=2080": { + "file": "silu_config_M250880_N2080.json", + "M": 250880, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2575.3695 + }, + "M=250880,N=2240": { + "file": "silu_config_M250880_N2240.json", + "M": 250880, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2684.409999999998 + }, + "M=250880,N=2400": { + "file": "silu_config_M250880_N2400.json", + "M": 250880, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2872.8107500000033 + }, + "M=250880,N=2560": { + "file": "silu_config_M250880_N2560.json", + "M": 250880, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2987.7312500000007 + }, + "M=251904,N=128": { + "file": "silu_config_M251904_N128.json", + "M": 251904, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.63950000000003 + }, + "M=251904,N=160": { + "file": "silu_config_M251904_N160.json", + "M": 251904, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 319.04025000000007 + }, + "M=251904,N=192": { + "file": "silu_config_M251904_N192.json", + "M": 251904, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.8002499999999 + }, + "M=251904,N=256": { + "file": "silu_config_M251904_N256.json", + "M": 251904, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 284.51999999999987 + }, + "M=251904,N=320": { + "file": "silu_config_M251904_N320.json", + "M": 251904, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 612.1614999999999 + }, + "M=251904,N=384": { + "file": "silu_config_M251904_N384.json", + "M": 251904, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 607.6815000000001 + }, + "M=251904,N=480": { + "file": "silu_config_M251904_N480.json", + "M": 251904, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 618.2415000000001 + }, + "M=251904,N=512": { + "file": "silu_config_M251904_N512.json", + "M": 251904, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 476.1210000000001 + }, + "M=251904,N=576": { + "file": "silu_config_M251904_N576.json", + "M": 251904, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1181.24375 + }, + "M=251904,N=640": { + "file": "silu_config_M251904_N640.json", + "M": 251904, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1163.80375 + }, + "M=251904,N=768": { + "file": "silu_config_M251904_N768.json", + "M": 251904, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1177.6037499999998 + }, + "M=251904,N=800": { + "file": "silu_config_M251904_N800.json", + "M": 251904, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1176.163750000001 + }, + "M=251904,N=896": { + "file": "silu_config_M251904_N896.json", + "M": 251904, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1185.0837499999998 + }, + "M=251904,N=960": { + "file": "silu_config_M251904_N960.json", + "M": 251904, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1192.2037499999997 + }, + "M=251904,N=1024": { + "file": "silu_config_M251904_N1024.json", + "M": 251904, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 951.643 + }, + "M=251904,N=1120": { + "file": "silu_config_M251904_N1120.json", + "M": 251904, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2368.20875 + }, + "M=251904,N=1152": { + "file": "silu_config_M251904_N1152.json", + "M": 251904, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2350.2887500000006 + }, + "M=251904,N=1280": { + "file": "silu_config_M251904_N1280.json", + "M": 251904, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2351.76875 + }, + "M=251904,N=1344": { + "file": "silu_config_M251904_N1344.json", + "M": 251904, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2347.88875 + }, + "M=251904,N=1408": { + "file": "silu_config_M251904_N1408.json", + "M": 251904, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2333.8885 + }, + "M=251904,N=1440": { + "file": "silu_config_M251904_N1440.json", + "M": 251904, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2352.04875 + }, + "M=251904,N=1536": { + "file": "silu_config_M251904_N1536.json", + "M": 251904, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2337.7284999999993 + }, + "M=251904,N=1600": { + "file": "silu_config_M251904_N1600.json", + "M": 251904, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2364.00875 + }, + "M=251904,N=1664": { + "file": "silu_config_M251904_N1664.json", + "M": 251904, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2341.9685 + }, + "M=251904,N=1728": { + "file": "silu_config_M251904_N1728.json", + "M": 251904, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2357.72875 + }, + "M=251904,N=1760": { + "file": "silu_config_M251904_N1760.json", + "M": 251904, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2371.4087499999996 + }, + "M=251904,N=1792": { + "file": "silu_config_M251904_N1792.json", + "M": 251904, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2347.20875 + }, + "M=251904,N=1920": { + "file": "silu_config_M251904_N1920.json", + "M": 251904, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2353.84875 + }, + "M=251904,N=2048": { + "file": "silu_config_M251904_N2048.json", + "M": 251904, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1867.366750000001 + }, + "M=251904,N=2080": { + "file": "silu_config_M251904_N2080.json", + "M": 251904, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2604.8097500000003 + }, + "M=251904,N=2240": { + "file": "silu_config_M251904_N2240.json", + "M": 251904, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2735.250249999999 + }, + "M=251904,N=2400": { + "file": "silu_config_M251904_N2400.json", + "M": 251904, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2876.8107500000015 + }, + "M=251904,N=2560": { + "file": "silu_config_M251904_N2560.json", + "M": 251904, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3039.611499999999 + }, + "M=252928,N=128": { + "file": "silu_config_M252928_N128.json", + "M": 252928, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 152.55975 + }, + "M=252928,N=160": { + "file": "silu_config_M252928_N160.json", + "M": 252928, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 309.8802499999999 + }, + "M=252928,N=192": { + "file": "silu_config_M252928_N192.json", + "M": 252928, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.28049999999985 + }, + "M=252928,N=256": { + "file": "silu_config_M252928_N256.json", + "M": 252928, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 273.9599999999999 + }, + "M=252928,N=320": { + "file": "silu_config_M252928_N320.json", + "M": 252928, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 614.0815 + }, + "M=252928,N=384": { + "file": "silu_config_M252928_N384.json", + "M": 252928, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.0815000000002 + }, + "M=252928,N=480": { + "file": "silu_config_M252928_N480.json", + "M": 252928, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 620.6814999999999 + }, + "M=252928,N=512": { + "file": "silu_config_M252928_N512.json", + "M": 252928, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 495.4409999999998 + }, + "M=252928,N=576": { + "file": "silu_config_M252928_N576.json", + "M": 252928, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1177.84375 + }, + "M=252928,N=640": { + "file": "silu_config_M252928_N640.json", + "M": 252928, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1182.36375 + }, + "M=252928,N=768": { + "file": "silu_config_M252928_N768.json", + "M": 252928, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1182.24375 + }, + "M=252928,N=800": { + "file": "silu_config_M252928_N800.json", + "M": 252928, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1205.724000000001 + }, + "M=252928,N=896": { + "file": "silu_config_M252928_N896.json", + "M": 252928, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1179.2037499999997 + }, + "M=252928,N=960": { + "file": "silu_config_M252928_N960.json", + "M": 252928, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1207.4440000000004 + }, + "M=252928,N=1024": { + "file": "silu_config_M252928_N1024.json", + "M": 252928, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 955.9629999999997 + }, + "M=252928,N=1120": { + "file": "silu_config_M252928_N1120.json", + "M": 252928, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2347.3287499999997 + }, + "M=252928,N=1152": { + "file": "silu_config_M252928_N1152.json", + "M": 252928, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2338.9685 + }, + "M=252928,N=1280": { + "file": "silu_config_M252928_N1280.json", + "M": 252928, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2360.96875 + }, + "M=252928,N=1344": { + "file": "silu_config_M252928_N1344.json", + "M": 252928, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2357.2487499999997 + }, + "M=252928,N=1408": { + "file": "silu_config_M252928_N1408.json", + "M": 252928, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2353.2487499999997 + }, + "M=252928,N=1440": { + "file": "silu_config_M252928_N1440.json", + "M": 252928, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2382.16875 + }, + "M=252928,N=1536": { + "file": "silu_config_M252928_N1536.json", + "M": 252928, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2346.80875 + }, + "M=252928,N=1600": { + "file": "silu_config_M252928_N1600.json", + "M": 252928, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2363.20875 + }, + "M=252928,N=1664": { + "file": "silu_config_M252928_N1664.json", + "M": 252928, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2362.04875 + }, + "M=252928,N=1728": { + "file": "silu_config_M252928_N1728.json", + "M": 252928, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2356.5687500000004 + }, + "M=252928,N=1760": { + "file": "silu_config_M252928_N1760.json", + "M": 252928, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2370.4087499999996 + }, + "M=252928,N=1792": { + "file": "silu_config_M252928_N1792.json", + "M": 252928, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2356.4887499999995 + }, + "M=252928,N=1920": { + "file": "silu_config_M252928_N1920.json", + "M": 252928, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2363.3287499999997 + }, + "M=252928,N=2048": { + "file": "silu_config_M252928_N2048.json", + "M": 252928, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1867.52675 + }, + "M=252928,N=2080": { + "file": "silu_config_M252928_N2080.json", + "M": 252928, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2595.089750000001 + }, + "M=252928,N=2240": { + "file": "silu_config_M252928_N2240.json", + "M": 252928, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2736.0502500000002 + }, + "M=252928,N=2400": { + "file": "silu_config_M252928_N2400.json", + "M": 252928, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2888.4507499999963 + }, + "M=252928,N=2560": { + "file": "silu_config_M252928_N2560.json", + "M": 252928, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3031.3314999999984 + }, + "M=253952,N=128": { + "file": "silu_config_M253952_N128.json", + "M": 253952, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.7195 + }, + "M=253952,N=160": { + "file": "silu_config_M253952_N160.json", + "M": 253952, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.68024999999983 + }, + "M=253952,N=192": { + "file": "silu_config_M253952_N192.json", + "M": 253952, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 301.4402499999999 + }, + "M=253952,N=256": { + "file": "silu_config_M253952_N256.json", + "M": 253952, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 274.9599999999999 + }, + "M=253952,N=320": { + "file": "silu_config_M253952_N320.json", + "M": 253952, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 616.8015000000003 + }, + "M=253952,N=384": { + "file": "silu_config_M253952_N384.json", + "M": 253952, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 612.6814999999997 + }, + "M=253952,N=480": { + "file": "silu_config_M253952_N480.json", + "M": 253952, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.2014999999999 + }, + "M=253952,N=512": { + "file": "silu_config_M253952_N512.json", + "M": 253952, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 489.001 + }, + "M=253952,N=576": { + "file": "silu_config_M253952_N576.json", + "M": 253952, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1193.4837499999999 + }, + "M=253952,N=640": { + "file": "silu_config_M253952_N640.json", + "M": 253952, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1187.1237499999997 + }, + "M=253952,N=768": { + "file": "silu_config_M253952_N768.json", + "M": 253952, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1189.8037499999996 + }, + "M=253952,N=800": { + "file": "silu_config_M253952_N800.json", + "M": 253952, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1199.484 + }, + "M=253952,N=896": { + "file": "silu_config_M253952_N896.json", + "M": 253952, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1183.76375 + }, + "M=253952,N=960": { + "file": "silu_config_M253952_N960.json", + "M": 253952, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1201.5239999999994 + }, + "M=253952,N=1024": { + "file": "silu_config_M253952_N1024.json", + "M": 253952, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 959.5629999999996 + }, + "M=253952,N=1120": { + "file": "silu_config_M253952_N1120.json", + "M": 253952, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2367.04875 + }, + "M=253952,N=1152": { + "file": "silu_config_M253952_N1152.json", + "M": 253952, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2359.04875 + }, + "M=253952,N=1280": { + "file": "silu_config_M253952_N1280.json", + "M": 253952, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2360.16875 + }, + "M=253952,N=1344": { + "file": "silu_config_M253952_N1344.json", + "M": 253952, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2376.7287499999993 + }, + "M=253952,N=1408": { + "file": "silu_config_M253952_N1408.json", + "M": 253952, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2362.96875 + }, + "M=253952,N=1440": { + "file": "silu_config_M253952_N1440.json", + "M": 253952, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2380.968749999999 + }, + "M=253952,N=1536": { + "file": "silu_config_M253952_N1536.json", + "M": 253952, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2345.9285 + }, + "M=253952,N=1600": { + "file": "silu_config_M253952_N1600.json", + "M": 253952, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2383.08875 + }, + "M=253952,N=1664": { + "file": "silu_config_M253952_N1664.json", + "M": 253952, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2371.4487499999996 + }, + "M=253952,N=1728": { + "file": "silu_config_M253952_N1728.json", + "M": 253952, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2386.92875 + }, + "M=253952,N=1760": { + "file": "silu_config_M253952_N1760.json", + "M": 253952, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2400.6887500000003 + }, + "M=253952,N=1792": { + "file": "silu_config_M253952_N1792.json", + "M": 253952, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2366.088749999999 + }, + "M=253952,N=1920": { + "file": "silu_config_M253952_N1920.json", + "M": 253952, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2382.76875 + }, + "M=253952,N=2048": { + "file": "silu_config_M253952_N2048.json", + "M": 253952, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1883.1267499999994 + }, + "M=253952,N=2080": { + "file": "silu_config_M253952_N2080.json", + "M": 253952, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2615.089749999999 + }, + "M=253952,N=2240": { + "file": "silu_config_M253952_N2240.json", + "M": 253952, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2757.530249999996 + }, + "M=253952,N=2400": { + "file": "silu_config_M253952_N2400.json", + "M": 253952, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2920.411000000001 + }, + "M=253952,N=2560": { + "file": "silu_config_M253952_N2560.json", + "M": 253952, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3051.1714999999986 + }, + "M=254976,N=128": { + "file": "silu_config_M254976_N128.json", + "M": 254976, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 143.03950000000003 + }, + "M=254976,N=160": { + "file": "silu_config_M254976_N160.json", + "M": 254976, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 304.8402499999999 + }, + "M=254976,N=192": { + "file": "silu_config_M254976_N192.json", + "M": 254976, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 305.48025000000007 + }, + "M=254976,N=256": { + "file": "silu_config_M254976_N256.json", + "M": 254976, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 276.12 + }, + "M=254976,N=320": { + "file": "silu_config_M254976_N320.json", + "M": 254976, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 618.5615000000003 + }, + "M=254976,N=384": { + "file": "silu_config_M254976_N384.json", + "M": 254976, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 615.0014999999999 + }, + "M=254976,N=480": { + "file": "silu_config_M254976_N480.json", + "M": 254976, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 625.4815000000001 + }, + "M=254976,N=512": { + "file": "silu_config_M254976_N512.json", + "M": 254976, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 499.12099999999987 + }, + "M=254976,N=576": { + "file": "silu_config_M254976_N576.json", + "M": 254976, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.8437500000005 + }, + "M=254976,N=640": { + "file": "silu_config_M254976_N640.json", + "M": 254976, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1209.2040000000002 + }, + "M=254976,N=768": { + "file": "silu_config_M254976_N768.json", + "M": 254976, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1183.6437499999997 + }, + "M=254976,N=800": { + "file": "silu_config_M254976_N800.json", + "M": 254976, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1204.364 + }, + "M=254976,N=896": { + "file": "silu_config_M254976_N896.json", + "M": 254976, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1186.0437500000003 + }, + "M=254976,N=960": { + "file": "silu_config_M254976_N960.json", + "M": 254976, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1216.6840000000007 + }, + "M=254976,N=1024": { + "file": "silu_config_M254976_N1024.json", + "M": 254976, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 963.2027499999995 + }, + "M=254976,N=1120": { + "file": "silu_config_M254976_N1120.json", + "M": 254976, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2386.80875 + }, + "M=254976,N=1152": { + "file": "silu_config_M254976_N1152.json", + "M": 254976, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2357.5687500000004 + }, + "M=254976,N=1280": { + "file": "silu_config_M254976_N1280.json", + "M": 254976, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2338.4884999999995 + }, + "M=254976,N=1344": { + "file": "silu_config_M254976_N1344.json", + "M": 254976, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2355.2487500000007 + }, + "M=254976,N=1408": { + "file": "silu_config_M254976_N1408.json", + "M": 254976, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2372.2487499999997 + }, + "M=254976,N=1440": { + "file": "silu_config_M254976_N1440.json", + "M": 254976, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2401.04875 + }, + "M=254976,N=1536": { + "file": "silu_config_M254976_N1536.json", + "M": 254976, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2396.16875 + }, + "M=254976,N=1600": { + "file": "silu_config_M254976_N1600.json", + "M": 254976, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2382.2887499999997 + }, + "M=254976,N=1664": { + "file": "silu_config_M254976_N1664.json", + "M": 254976, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2380.648750000001 + }, + "M=254976,N=1728": { + "file": "silu_config_M254976_N1728.json", + "M": 254976, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2386.04875 + }, + "M=254976,N=1760": { + "file": "silu_config_M254976_N1760.json", + "M": 254976, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2389.5687499999995 + }, + "M=254976,N=1792": { + "file": "silu_config_M254976_N1792.json", + "M": 254976, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2364.928750000001 + }, + "M=254976,N=1920": { + "file": "silu_config_M254976_N1920.json", + "M": 254976, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2372.04875 + }, + "M=254976,N=2048": { + "file": "silu_config_M254976_N2048.json", + "M": 254976, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1873.6467499999999 + }, + "M=254976,N=2080": { + "file": "silu_config_M254976_N2080.json", + "M": 254976, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2616.129750000001 + }, + "M=254976,N=2240": { + "file": "silu_config_M254976_N2240.json", + "M": 254976, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2747.9302499999985 + }, + "M=254976,N=2400": { + "file": "silu_config_M254976_N2400.json", + "M": 254976, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2922.0109999999977 + }, + "M=254976,N=2560": { + "file": "silu_config_M254976_N2560.json", + "M": 254976, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3042.4915 + }, + "M=256000,N=128": { + "file": "silu_config_M256000_N128.json", + "M": 256000, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 142.75975 + }, + "M=256000,N=160": { + "file": "silu_config_M256000_N160.json", + "M": 256000, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 300.80024999999983 + }, + "M=256000,N=192": { + "file": "silu_config_M256000_N192.json", + "M": 256000, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 317.8002499999999 + }, + "M=256000,N=256": { + "file": "silu_config_M256000_N256.json", + "M": 256000, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 288.52025000000003 + }, + "M=256000,N=320": { + "file": "silu_config_M256000_N320.json", + "M": 256000, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 617.0015000000003 + }, + "M=256000,N=384": { + "file": "silu_config_M256000_N384.json", + "M": 256000, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 614.3614999999998 + }, + "M=256000,N=480": { + "file": "silu_config_M256000_N480.json", + "M": 256000, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.5615000000005 + }, + "M=256000,N=512": { + "file": "silu_config_M256000_N512.json", + "M": 256000, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 492.721 + }, + "M=256000,N=576": { + "file": "silu_config_M256000_N576.json", + "M": 256000, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1171.80375 + }, + "M=256000,N=640": { + "file": "silu_config_M256000_N640.json", + "M": 256000, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1207.4039999999995 + }, + "M=256000,N=768": { + "file": "silu_config_M256000_N768.json", + "M": 256000, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1189.7637500000005 + }, + "M=256000,N=800": { + "file": "silu_config_M256000_N800.json", + "M": 256000, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1197.364 + }, + "M=256000,N=896": { + "file": "silu_config_M256000_N896.json", + "M": 256000, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1204.524 + }, + "M=256000,N=960": { + "file": "silu_config_M256000_N960.json", + "M": 256000, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1202.6840000000002 + }, + "M=256000,N=1024": { + "file": "silu_config_M256000_N1024.json", + "M": 256000, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 965.683 + }, + "M=256000,N=1120": { + "file": "silu_config_M256000_N1120.json", + "M": 256000, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2369.00875 + }, + "M=256000,N=1152": { + "file": "silu_config_M256000_N1152.json", + "M": 256000, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2352.84875 + }, + "M=256000,N=1280": { + "file": "silu_config_M256000_N1280.json", + "M": 256000, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2333.2485000000006 + }, + "M=256000,N=1344": { + "file": "silu_config_M256000_N1344.json", + "M": 256000, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2369.04875 + }, + "M=256000,N=1408": { + "file": "silu_config_M256000_N1408.json", + "M": 256000, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2366.88875 + }, + "M=256000,N=1440": { + "file": "silu_config_M256000_N1440.json", + "M": 256000, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2362.2887499999997 + }, + "M=256000,N=1536": { + "file": "silu_config_M256000_N1536.json", + "M": 256000, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2360.2487499999997 + }, + "M=256000,N=1600": { + "file": "silu_config_M256000_N1600.json", + "M": 256000, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2374.2087500000007 + }, + "M=256000,N=1664": { + "file": "silu_config_M256000_N1664.json", + "M": 256000, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2364.96875 + }, + "M=256000,N=1728": { + "file": "silu_config_M256000_N1728.json", + "M": 256000, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2357.16875 + }, + "M=256000,N=1760": { + "file": "silu_config_M256000_N1760.json", + "M": 256000, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2381.4487499999996 + }, + "M=256000,N=1792": { + "file": "silu_config_M256000_N1792.json", + "M": 256000, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2389.76875 + }, + "M=256000,N=1920": { + "file": "silu_config_M256000_N1920.json", + "M": 256000, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2354.928749999999 + }, + "M=256000,N=2048": { + "file": "silu_config_M256000_N2048.json", + "M": 256000, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1889.6867499999998 + }, + "M=256000,N=2080": { + "file": "silu_config_M256000_N2080.json", + "M": 256000, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2617.449749999997 + }, + "M=256000,N=2240": { + "file": "silu_config_M256000_N2240.json", + "M": 256000, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2738.9702499999994 + }, + "M=256000,N=2400": { + "file": "silu_config_M256000_N2400.json", + "M": 256000, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2901.4510000000037 + }, + "M=256000,N=2560": { + "file": "silu_config_M256000_N2560.json", + "M": 256000, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3042.3315000000002 + }, + "M=257024,N=128": { + "file": "silu_config_M257024_N128.json", + "M": 257024, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 143.1995 + }, + "M=257024,N=160": { + "file": "silu_config_M257024_N160.json", + "M": 257024, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 314.4002499999999 + }, + "M=257024,N=192": { + "file": "silu_config_M257024_N192.json", + "M": 257024, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 315.44025 + }, + "M=257024,N=256": { + "file": "silu_config_M257024_N256.json", + "M": 257024, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 278.27999999999986 + }, + "M=257024,N=320": { + "file": "silu_config_M257024_N320.json", + "M": 257024, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.7214999999999 + }, + "M=257024,N=384": { + "file": "silu_config_M257024_N384.json", + "M": 257024, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 619.8815000000002 + }, + "M=257024,N=480": { + "file": "silu_config_M257024_N480.json", + "M": 257024, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 630.4415000000001 + }, + "M=257024,N=512": { + "file": "silu_config_M257024_N512.json", + "M": 257024, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 484.4010000000003 + }, + "M=257024,N=576": { + "file": "silu_config_M257024_N576.json", + "M": 257024, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1183.6437500000002 + }, + "M=257024,N=640": { + "file": "silu_config_M257024_N640.json", + "M": 257024, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1197.5637499999998 + }, + "M=257024,N=768": { + "file": "silu_config_M257024_N768.json", + "M": 257024, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1201.1640000000002 + }, + "M=257024,N=800": { + "file": "silu_config_M257024_N800.json", + "M": 257024, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1220.524 + }, + "M=257024,N=896": { + "file": "silu_config_M257024_N896.json", + "M": 257024, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1216.444 + }, + "M=257024,N=960": { + "file": "silu_config_M257024_N960.json", + "M": 257024, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1218.324 + }, + "M=257024,N=1024": { + "file": "silu_config_M257024_N1024.json", + "M": 257024, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 970.723 + }, + "M=257024,N=1120": { + "file": "silu_config_M257024_N1120.json", + "M": 257024, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2405.5287499999995 + }, + "M=257024,N=1152": { + "file": "silu_config_M257024_N1152.json", + "M": 257024, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2376.5687500000013 + }, + "M=257024,N=1280": { + "file": "silu_config_M257024_N1280.json", + "M": 257024, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2378.16875 + }, + "M=257024,N=1344": { + "file": "silu_config_M257024_N1344.json", + "M": 257024, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2384.72875 + }, + "M=257024,N=1408": { + "file": "silu_config_M257024_N1408.json", + "M": 257024, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2390.96875 + }, + "M=257024,N=1440": { + "file": "silu_config_M257024_N1440.json", + "M": 257024, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2419.889 + }, + "M=257024,N=1536": { + "file": "silu_config_M257024_N1536.json", + "M": 257024, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2405.2487499999997 + }, + "M=257024,N=1600": { + "file": "silu_config_M257024_N1600.json", + "M": 257024, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2400.5287499999995 + }, + "M=257024,N=1664": { + "file": "silu_config_M257024_N1664.json", + "M": 257024, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2379.3687500000005 + }, + "M=257024,N=1728": { + "file": "silu_config_M257024_N1728.json", + "M": 257024, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2404.76875 + }, + "M=257024,N=1760": { + "file": "silu_config_M257024_N1760.json", + "M": 257024, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2398.3287499999997 + }, + "M=257024,N=1792": { + "file": "silu_config_M257024_N1792.json", + "M": 257024, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2404.5687500000004 + }, + "M=257024,N=1920": { + "file": "silu_config_M257024_N1920.json", + "M": 257024, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2411.6090000000013 + }, + "M=257024,N=2048": { + "file": "silu_config_M257024_N2048.json", + "M": 257024, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1906.2067499999994 + }, + "M=257024,N=2080": { + "file": "silu_config_M257024_N2080.json", + "M": 257024, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2646.6497500000005 + }, + "M=257024,N=2240": { + "file": "silu_config_M257024_N2240.json", + "M": 257024, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2790.4505000000026 + }, + "M=257024,N=2400": { + "file": "silu_config_M257024_N2400.json", + "M": 257024, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2934.8909999999996 + }, + "M=257024,N=2560": { + "file": "silu_config_M257024_N2560.json", + "M": 257024, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3081.4917499999992 + }, + "M=258048,N=128": { + "file": "silu_config_M258048_N128.json", + "M": 258048, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 143.71950000000004 + }, + "M=258048,N=160": { + "file": "silu_config_M258048_N160.json", + "M": 258048, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 308.7202499999999 + }, + "M=258048,N=192": { + "file": "silu_config_M258048_N192.json", + "M": 258048, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 309.2004999999999 + }, + "M=258048,N=256": { + "file": "silu_config_M258048_N256.json", + "M": 258048, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 279.28 + }, + "M=258048,N=320": { + "file": "silu_config_M258048_N320.json", + "M": 258048, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 626.4415000000001 + }, + "M=258048,N=384": { + "file": "silu_config_M258048_N384.json", + "M": 258048, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.0414999999998 + }, + "M=258048,N=480": { + "file": "silu_config_M258048_N480.json", + "M": 258048, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 633.2414999999996 + }, + "M=258048,N=512": { + "file": "silu_config_M258048_N512.json", + "M": 258048, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 498.1610000000003 + }, + "M=258048,N=576": { + "file": "silu_config_M258048_N576.json", + "M": 258048, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1224.004 + }, + "M=258048,N=640": { + "file": "silu_config_M258048_N640.json", + "M": 258048, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1191.8837499999995 + }, + "M=258048,N=768": { + "file": "silu_config_M258048_N768.json", + "M": 258048, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1216.364 + }, + "M=258048,N=800": { + "file": "silu_config_M258048_N800.json", + "M": 258048, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1225.2440000000001 + }, + "M=258048,N=896": { + "file": "silu_config_M258048_N896.json", + "M": 258048, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1221.0040000000004 + }, + "M=258048,N=960": { + "file": "silu_config_M258048_N960.json", + "M": 258048, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1234.444 + }, + "M=258048,N=1024": { + "file": "silu_config_M258048_N1024.json", + "M": 258048, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 974.6829999999995 + }, + "M=258048,N=1120": { + "file": "silu_config_M258048_N1120.json", + "M": 258048, + "N": 1120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2416.8089999999997 + }, + "M=258048,N=1152": { + "file": "silu_config_M258048_N1152.json", + "M": 258048, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2375.0887500000003 + }, + "M=258048,N=1280": { + "file": "silu_config_M258048_N1280.json", + "M": 258048, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2377.2887499999997 + }, + "M=258048,N=1344": { + "file": "silu_config_M258048_N1344.json", + "M": 258048, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2425.3289999999997 + }, + "M=258048,N=1408": { + "file": "silu_config_M258048_N1408.json", + "M": 258048, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2403.3287499999997 + }, + "M=258048,N=1440": { + "file": "silu_config_M258048_N1440.json", + "M": 258048, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2398.76875 + }, + "M=258048,N=1536": { + "file": "silu_config_M258048_N1536.json", + "M": 258048, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2414.249 + }, + "M=258048,N=1600": { + "file": "silu_config_M258048_N1600.json", + "M": 258048, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2430.089 + }, + "M=258048,N=1664": { + "file": "silu_config_M258048_N1664.json", + "M": 258048, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2387.8087499999992 + }, + "M=258048,N=1728": { + "file": "silu_config_M258048_N1728.json", + "M": 258048, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2424.169000000001 + }, + "M=258048,N=1760": { + "file": "silu_config_M258048_N1760.json", + "M": 258048, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2427.929 + }, + "M=258048,N=1792": { + "file": "silu_config_M258048_N1792.json", + "M": 258048, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2402.7287499999998 + }, + "M=258048,N=1920": { + "file": "silu_config_M258048_N1920.json", + "M": 258048, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2430.089 + }, + "M=258048,N=2048": { + "file": "silu_config_M258048_N2048.json", + "M": 258048, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1904.6867500000003 + }, + "M=258048,N=2080": { + "file": "silu_config_M258048_N2080.json", + "M": 258048, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2633.00975 + }, + "M=258048,N=2240": { + "file": "silu_config_M258048_N2240.json", + "M": 258048, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2796.7305000000006 + }, + "M=258048,N=2400": { + "file": "silu_config_M258048_N2400.json", + "M": 258048, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2954.611 + }, + "M=258048,N=2560": { + "file": "silu_config_M258048_N2560.json", + "M": 258048, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3091.2517500000013 + }, + "M=259072,N=128": { + "file": "silu_config_M259072_N128.json", + "M": 259072, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 154.91950000000003 + }, + "M=259072,N=160": { + "file": "silu_config_M259072_N160.json", + "M": 259072, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 320.4802499999999 + }, + "M=259072,N=192": { + "file": "silu_config_M259072_N192.json", + "M": 259072, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 317.1602499999998 + }, + "M=259072,N=256": { + "file": "silu_config_M259072_N256.json", + "M": 259072, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 279.6400000000001 + }, + "M=259072,N=320": { + "file": "silu_config_M259072_N320.json", + "M": 259072, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 627.6815000000004 + }, + "M=259072,N=384": { + "file": "silu_config_M259072_N384.json", + "M": 259072, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 624.8814999999997 + }, + "M=259072,N=480": { + "file": "silu_config_M259072_N480.json", + "M": 259072, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 634.6815000000001 + }, + "M=259072,N=512": { + "file": "silu_config_M259072_N512.json", + "M": 259072, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 507.0412500000002 + }, + "M=259072,N=576": { + "file": "silu_config_M259072_N576.json", + "M": 259072, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1206.3639999999996 + }, + "M=259072,N=640": { + "file": "silu_config_M259072_N640.json", + "M": 259072, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1206.6440000000002 + }, + "M=259072,N=768": { + "file": "silu_config_M259072_N768.json", + "M": 259072, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1209.884 + }, + "M=259072,N=800": { + "file": "silu_config_M259072_N800.json", + "M": 259072, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1229.2040000000006 + }, + "M=259072,N=896": { + "file": "silu_config_M259072_N896.json", + "M": 259072, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1214.884 + }, + "M=259072,N=960": { + "file": "silu_config_M259072_N960.json", + "M": 259072, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1235.1639999999993 + }, + "M=259072,N=1024": { + "file": "silu_config_M259072_N1024.json", + "M": 259072, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 977.203 + }, + "M=259072,N=1120": { + "file": "silu_config_M259072_N1120.json", + "M": 259072, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2413.209 + }, + "M=259072,N=1152": { + "file": "silu_config_M259072_N1152.json", + "M": 259072, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2394.7287499999993 + }, + "M=259072,N=1280": { + "file": "silu_config_M259072_N1280.json", + "M": 259072, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2376.928749999999 + }, + "M=259072,N=1344": { + "file": "silu_config_M259072_N1344.json", + "M": 259072, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2416.4489999999996 + }, + "M=259072,N=1408": { + "file": "silu_config_M259072_N1408.json", + "M": 259072, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2411.4089999999997 + }, + "M=259072,N=1440": { + "file": "silu_config_M259072_N1440.json", + "M": 259072, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2411.249 + }, + "M=259072,N=1536": { + "file": "silu_config_M259072_N1536.json", + "M": 259072, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2404.4887499999995 + }, + "M=259072,N=1600": { + "file": "silu_config_M259072_N1600.json", + "M": 259072, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2439.6890000000003 + }, + "M=259072,N=1664": { + "file": "silu_config_M259072_N1664.json", + "M": 259072, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2413.5290000000005 + }, + "M=259072,N=1728": { + "file": "silu_config_M259072_N1728.json", + "M": 259072, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2418.2890000000016 + }, + "M=259072,N=1760": { + "file": "silu_config_M259072_N1760.json", + "M": 259072, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2422.7690000000002 + }, + "M=259072,N=1792": { + "file": "silu_config_M259072_N1792.json", + "M": 259072, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2416.4889999999996 + }, + "M=259072,N=1920": { + "file": "silu_config_M259072_N1920.json", + "M": 259072, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2434.2889999999998 + }, + "M=259072,N=2048": { + "file": "silu_config_M259072_N2048.json", + "M": 259072, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1922.40675 + }, + "M=259072,N=2080": { + "file": "silu_config_M259072_N2080.json", + "M": 259072, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2677.130000000001 + }, + "M=259072,N=2240": { + "file": "silu_config_M259072_N2240.json", + "M": 259072, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2810.8505000000014 + }, + "M=259072,N=2400": { + "file": "silu_config_M259072_N2400.json", + "M": 259072, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2971.1312500000004 + }, + "M=259072,N=2560": { + "file": "silu_config_M259072_N2560.json", + "M": 259072, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3110.45175 + }, + "M=260096,N=128": { + "file": "silu_config_M260096_N128.json", + "M": 260096, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 144.39949999999993 + }, + "M=260096,N=160": { + "file": "silu_config_M260096_N160.json", + "M": 260096, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 321.76050000000004 + }, + "M=260096,N=192": { + "file": "silu_config_M260096_N192.json", + "M": 260096, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 328.6402499999999 + }, + "M=260096,N=256": { + "file": "silu_config_M260096_N256.json", + "M": 260096, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 280.55999999999995 + }, + "M=260096,N=320": { + "file": "silu_config_M260096_N320.json", + "M": 260096, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 630.2815 + }, + "M=260096,N=384": { + "file": "silu_config_M260096_N384.json", + "M": 260096, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 626.3214999999998 + }, + "M=260096,N=480": { + "file": "silu_config_M260096_N480.json", + "M": 260096, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.4415000000001 + }, + "M=260096,N=512": { + "file": "silu_config_M260096_N512.json", + "M": 260096, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 508.96100000000024 + }, + "M=260096,N=576": { + "file": "silu_config_M260096_N576.json", + "M": 260096, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1229.0839999999998 + }, + "M=260096,N=640": { + "file": "silu_config_M260096_N640.json", + "M": 260096, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1211.1640000000002 + }, + "M=260096,N=768": { + "file": "silu_config_M260096_N768.json", + "M": 260096, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1225.2439999999997 + }, + "M=260096,N=800": { + "file": "silu_config_M260096_N800.json", + "M": 260096, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.924 + }, + "M=260096,N=896": { + "file": "silu_config_M260096_N896.json", + "M": 260096, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1230.2039999999997 + }, + "M=260096,N=960": { + "file": "silu_config_M260096_N960.json", + "M": 260096, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1229.4840000000004 + }, + "M=260096,N=1024": { + "file": "silu_config_M260096_N1024.json", + "M": 260096, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 981.203 + }, + "M=260096,N=1120": { + "file": "silu_config_M260096_N1120.json", + "M": 260096, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2423.049 + }, + "M=260096,N=1152": { + "file": "silu_config_M260096_N1152.json", + "M": 260096, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2403.80875 + }, + "M=260096,N=1280": { + "file": "silu_config_M260096_N1280.json", + "M": 260096, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2396.4887500000004 + }, + "M=260096,N=1344": { + "file": "silu_config_M260096_N1344.json", + "M": 260096, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2425.4490000000005 + }, + "M=260096,N=1408": { + "file": "silu_config_M260096_N1408.json", + "M": 260096, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2420.7690000000002 + }, + "M=260096,N=1440": { + "file": "silu_config_M260096_N1440.json", + "M": 260096, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2430.969 + }, + "M=260096,N=1536": { + "file": "silu_config_M260096_N1536.json", + "M": 260096, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2414.129000000001 + }, + "M=260096,N=1600": { + "file": "silu_config_M260096_N1600.json", + "M": 260096, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2449.4090000000006 + }, + "M=260096,N=1664": { + "file": "silu_config_M260096_N1664.json", + "M": 260096, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2423.169000000001 + }, + "M=260096,N=1728": { + "file": "silu_config_M260096_N1728.json", + "M": 260096, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2427.6090000000013 + }, + "M=260096,N=1760": { + "file": "silu_config_M260096_N1760.json", + "M": 260096, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2442.6090000000004 + }, + "M=260096,N=1792": { + "file": "silu_config_M260096_N1792.json", + "M": 260096, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2425.7690000000002 + }, + "M=260096,N=1920": { + "file": "silu_config_M260096_N1920.json", + "M": 260096, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2433.2890000000007 + }, + "M=260096,N=2048": { + "file": "silu_config_M260096_N2048.json", + "M": 260096, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1938.1269999999995 + }, + "M=260096,N=2080": { + "file": "silu_config_M260096_N2080.json", + "M": 260096, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2688.289999999999 + }, + "M=260096,N=2240": { + "file": "silu_config_M260096_N2240.json", + "M": 260096, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2821.490500000001 + }, + "M=260096,N=2400": { + "file": "silu_config_M260096_N2400.json", + "M": 260096, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3003.65125 + }, + "M=260096,N=2560": { + "file": "silu_config_M260096_N2560.json", + "M": 260096, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3134.2517499999994 + }, + "M=261120,N=128": { + "file": "silu_config_M261120_N128.json", + "M": 261120, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.23950000000002 + }, + "M=261120,N=160": { + "file": "silu_config_M261120_N160.json", + "M": 261120, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 327.12049999999977 + }, + "M=261120,N=192": { + "file": "silu_config_M261120_N192.json", + "M": 261120, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 323.2802499999999 + }, + "M=261120,N=256": { + "file": "silu_config_M261120_N256.json", + "M": 261120, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 281.76 + }, + "M=261120,N=320": { + "file": "silu_config_M261120_N320.json", + "M": 261120, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 628.3214999999998 + }, + "M=261120,N=384": { + "file": "silu_config_M261120_N384.json", + "M": 261120, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 625.4015000000002 + }, + "M=261120,N=480": { + "file": "silu_config_M261120_N480.json", + "M": 261120, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 635.2414999999996 + }, + "M=261120,N=512": { + "file": "silu_config_M261120_N512.json", + "M": 261120, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 511.0012500000001 + }, + "M=261120,N=576": { + "file": "silu_config_M261120_N576.json", + "M": 261120, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1205.1639999999998 + }, + "M=261120,N=640": { + "file": "silu_config_M261120_N640.json", + "M": 261120, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1219.924 + }, + "M=261120,N=768": { + "file": "silu_config_M261120_N768.json", + "M": 261120, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.5239999999994 + }, + "M=261120,N=800": { + "file": "silu_config_M261120_N800.json", + "M": 261120, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1230.3639999999996 + }, + "M=261120,N=896": { + "file": "silu_config_M261120_N896.json", + "M": 261120, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1206.8439999999996 + }, + "M=261120,N=960": { + "file": "silu_config_M261120_N960.json", + "M": 261120, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1236.2039999999993 + }, + "M=261120,N=1024": { + "file": "silu_config_M261120_N1024.json", + "M": 261120, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 983.20325 + }, + "M=261120,N=1120": { + "file": "silu_config_M261120_N1120.json", + "M": 261120, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2415.4089999999997 + }, + "M=261120,N=1152": { + "file": "silu_config_M261120_N1152.json", + "M": 261120, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2419.889 + }, + "M=261120,N=1280": { + "file": "silu_config_M261120_N1280.json", + "M": 261120, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2401.6087500000003 + }, + "M=261120,N=1344": { + "file": "silu_config_M261120_N1344.json", + "M": 261120, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2428.2090000000007 + }, + "M=261120,N=1408": { + "file": "silu_config_M261120_N1408.json", + "M": 261120, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2394.4087499999996 + }, + "M=261120,N=1440": { + "file": "silu_config_M261120_N1440.json", + "M": 261120, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2432.8089999999993 + }, + "M=261120,N=1536": { + "file": "silu_config_M261120_N1536.json", + "M": 261120, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2428.8089999999993 + }, + "M=261120,N=1600": { + "file": "silu_config_M261120_N1600.json", + "M": 261120, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2451.249 + }, + "M=261120,N=1664": { + "file": "silu_config_M261120_N1664.json", + "M": 261120, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2437.4489999999996 + }, + "M=261120,N=1728": { + "file": "silu_config_M261120_N1728.json", + "M": 261120, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2439.929 + }, + "M=261120,N=1760": { + "file": "silu_config_M261120_N1760.json", + "M": 261120, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2423.249 + }, + "M=261120,N=1792": { + "file": "silu_config_M261120_N1792.json", + "M": 261120, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2418.968999999999 + }, + "M=261120,N=1920": { + "file": "silu_config_M261120_N1920.json", + "M": 261120, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2426.2889999999998 + }, + "M=261120,N=2048": { + "file": "silu_config_M261120_N2048.json", + "M": 261120, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1937.0470000000005 + }, + "M=261120,N=2080": { + "file": "silu_config_M261120_N2080.json", + "M": 261120, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2679.010000000002 + }, + "M=261120,N=2240": { + "file": "silu_config_M261120_N2240.json", + "M": 261120, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2803.170500000001 + }, + "M=261120,N=2400": { + "file": "silu_config_M261120_N2400.json", + "M": 261120, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2971.451250000001 + }, + "M=261120,N=2560": { + "file": "silu_config_M261120_N2560.json", + "M": 261120, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3117.45175 + }, + "M=262144,N=128": { + "file": "silu_config_M262144_N128.json", + "M": 262144, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 145.27974999999998 + }, + "M=262144,N=160": { + "file": "silu_config_M262144_N160.json", + "M": 262144, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.88025 + }, + "M=262144,N=192": { + "file": "silu_config_M262144_N192.json", + "M": 262144, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 324.32025 + }, + "M=262144,N=256": { + "file": "silu_config_M262144_N256.json", + "M": 262144, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 282.8799999999999 + }, + "M=262144,N=320": { + "file": "silu_config_M262144_N320.json", + "M": 262144, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 635.3215 + }, + "M=262144,N=384": { + "file": "silu_config_M262144_N384.json", + "M": 262144, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 631.4814999999999 + }, + "M=262144,N=480": { + "file": "silu_config_M262144_N480.json", + "M": 262144, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 642.2815 + }, + "M=262144,N=512": { + "file": "silu_config_M262144_N512.json", + "M": 262144, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 483.8410000000001 + }, + "M=262144,N=576": { + "file": "silu_config_M262144_N576.json", + "M": 262144, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1220.6840000000002 + }, + "M=262144,N=640": { + "file": "silu_config_M262144_N640.json", + "M": 262144, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1209.8439999999996 + }, + "M=262144,N=768": { + "file": "silu_config_M262144_N768.json", + "M": 262144, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1216.1639999999998 + }, + "M=262144,N=800": { + "file": "silu_config_M262144_N800.json", + "M": 262144, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1236.884 + }, + "M=262144,N=896": { + "file": "silu_config_M262144_N896.json", + "M": 262144, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1228.9640000000004 + }, + "M=262144,N=960": { + "file": "silu_config_M262144_N960.json", + "M": 262144, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1230.924 + }, + "M=262144,N=1024": { + "file": "silu_config_M262144_N1024.json", + "M": 262144, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 989.3229999999999 + }, + "M=262144,N=1120": { + "file": "silu_config_M262144_N1120.json", + "M": 262144, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2441.929 + }, + "M=262144,N=1152": { + "file": "silu_config_M262144_N1152.json", + "M": 262144, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2422.888999999999 + }, + "M=262144,N=1280": { + "file": "silu_config_M262144_N1280.json", + "M": 262144, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2415.3289999999997 + }, + "M=262144,N=1344": { + "file": "silu_config_M262144_N1344.json", + "M": 262144, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2423.7290000000003 + }, + "M=262144,N=1408": { + "file": "silu_config_M262144_N1408.json", + "M": 262144, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2439.4489999999996 + }, + "M=262144,N=1440": { + "file": "silu_config_M262144_N1440.json", + "M": 262144, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2449.7289999999994 + }, + "M=262144,N=1536": { + "file": "silu_config_M262144_N1536.json", + "M": 262144, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2422.2889999999998 + }, + "M=262144,N=1600": { + "file": "silu_config_M262144_N1600.json", + "M": 262144, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2468.049000000001 + }, + "M=262144,N=1664": { + "file": "silu_config_M262144_N1664.json", + "M": 262144, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2452.3689999999997 + }, + "M=262144,N=1728": { + "file": "silu_config_M262144_N1728.json", + "M": 262144, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2467.4090000000015 + }, + "M=262144,N=1760": { + "file": "silu_config_M262144_N1760.json", + "M": 262144, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2461.889 + }, + "M=262144,N=1792": { + "file": "silu_config_M262144_N1792.json", + "M": 262144, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2434.4890000000005 + }, + "M=262144,N=1920": { + "file": "silu_config_M262144_N1920.json", + "M": 262144, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2452.168999999999 + }, + "M=262144,N=2048": { + "file": "silu_config_M262144_N2048.json", + "M": 262144, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1928.4070000000002 + }, + "M=262144,N=2080": { + "file": "silu_config_M262144_N2080.json", + "M": 262144, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2699.5699999999997 + }, + "M=262144,N=2240": { + "file": "silu_config_M262144_N2240.json", + "M": 262144, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2844.170750000002 + }, + "M=262144,N=2400": { + "file": "silu_config_M262144_N2400.json", + "M": 262144, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2996.9712499999987 + }, + "M=262144,N=2560": { + "file": "silu_config_M262144_N2560.json", + "M": 262144, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3155.6519999999964 + }, + "M=263168,N=128": { + "file": "silu_config_M263168_N128.json", + "M": 263168, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 145.95975 + }, + "M=263168,N=160": { + "file": "silu_config_M263168_N160.json", + "M": 263168, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.08025000000004 + }, + "M=263168,N=192": { + "file": "silu_config_M263168_N192.json", + "M": 263168, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 311.16025 + }, + "M=263168,N=256": { + "file": "silu_config_M263168_N256.json", + "M": 263168, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 283.9200000000001 + }, + "M=263168,N=320": { + "file": "silu_config_M263168_N320.json", + "M": 263168, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.8815 + }, + "M=263168,N=384": { + "file": "silu_config_M263168_N384.json", + "M": 263168, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 633.4814999999999 + }, + "M=263168,N=480": { + "file": "silu_config_M263168_N480.json", + "M": 263168, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 644.6815000000001 + }, + "M=263168,N=512": { + "file": "silu_config_M263168_N512.json", + "M": 263168, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 515.0410000000002 + }, + "M=263168,N=576": { + "file": "silu_config_M263168_N576.json", + "M": 263168, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1242.7240000000002 + }, + "M=263168,N=640": { + "file": "silu_config_M263168_N640.json", + "M": 263168, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1214.6440000000002 + }, + "M=263168,N=768": { + "file": "silu_config_M263168_N768.json", + "M": 263168, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1220.8039999999996 + }, + "M=263168,N=800": { + "file": "silu_config_M263168_N800.json", + "M": 263168, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1248.0039999999995 + }, + "M=263168,N=896": { + "file": "silu_config_M263168_N896.json", + "M": 263168, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1223.2439999999997 + }, + "M=263168,N=960": { + "file": "silu_config_M263168_N960.json", + "M": 263168, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1243.6840000000002 + }, + "M=263168,N=1024": { + "file": "silu_config_M263168_N1024.json", + "M": 263168, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 992.4430000000002 + }, + "M=263168,N=1120": { + "file": "silu_config_M263168_N1120.json", + "M": 263168, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2461.5289999999995 + }, + "M=263168,N=1152": { + "file": "silu_config_M263168_N1152.json", + "M": 263168, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2442.4489999999996 + }, + "M=263168,N=1280": { + "file": "silu_config_M263168_N1280.json", + "M": 263168, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2455.6089999999995 + }, + "M=263168,N=1344": { + "file": "silu_config_M263168_N1344.json", + "M": 263168, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2443.4490000000005 + }, + "M=263168,N=1408": { + "file": "silu_config_M263168_N1408.json", + "M": 263168, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2448.929 + }, + "M=263168,N=1440": { + "file": "silu_config_M263168_N1440.json", + "M": 263168, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2448.928999999999 + }, + "M=263168,N=1536": { + "file": "silu_config_M263168_N1536.json", + "M": 263168, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2452.4489999999996 + }, + "M=263168,N=1600": { + "file": "silu_config_M263168_N1600.json", + "M": 263168, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2457.009000000001 + }, + "M=263168,N=1664": { + "file": "silu_config_M263168_N1664.json", + "M": 263168, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2451.3289999999997 + }, + "M=263168,N=1728": { + "file": "silu_config_M263168_N1728.json", + "M": 263168, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2466.769000000001 + }, + "M=263168,N=1760": { + "file": "silu_config_M263168_N1760.json", + "M": 263168, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2461.089000000001 + }, + "M=263168,N=1792": { + "file": "silu_config_M263168_N1792.json", + "M": 263168, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2454.129000000001 + }, + "M=263168,N=1920": { + "file": "silu_config_M263168_N1920.json", + "M": 263168, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2471.8889999999983 + }, + "M=263168,N=2048": { + "file": "silu_config_M263168_N2048.json", + "M": 263168, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1955.7670000000007 + }, + "M=263168,N=2080": { + "file": "silu_config_M263168_N2080.json", + "M": 263168, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2709.4900000000016 + }, + "M=263168,N=2240": { + "file": "silu_config_M263168_N2240.json", + "M": 263168, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2865.1307499999966 + }, + "M=263168,N=2400": { + "file": "silu_config_M263168_N2400.json", + "M": 263168, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3017.8914999999997 + }, + "M=263168,N=2560": { + "file": "silu_config_M263168_N2560.json", + "M": 263168, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3154.692000000001 + }, + "M=264192,N=128": { + "file": "silu_config_M264192_N128.json", + "M": 264192, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 146.43949999999998 + }, + "M=264192,N=160": { + "file": "silu_config_M264192_N160.json", + "M": 264192, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 322.48025000000007 + }, + "M=264192,N=192": { + "file": "silu_config_M264192_N192.json", + "M": 264192, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 312.5602500000002 + }, + "M=264192,N=256": { + "file": "silu_config_M264192_N256.json", + "M": 264192, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 284.91999999999985 + }, + "M=264192,N=320": { + "file": "silu_config_M264192_N320.json", + "M": 264192, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 639.8415000000005 + }, + "M=264192,N=384": { + "file": "silu_config_M264192_N384.json", + "M": 264192, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 635.6415 + }, + "M=264192,N=480": { + "file": "silu_config_M264192_N480.json", + "M": 264192, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.1614999999997 + }, + "M=264192,N=512": { + "file": "silu_config_M264192_N512.json", + "M": 264192, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 497.44100000000003 + }, + "M=264192,N=576": { + "file": "silu_config_M264192_N576.json", + "M": 264192, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1247.8039999999996 + }, + "M=264192,N=640": { + "file": "silu_config_M264192_N640.json", + "M": 264192, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1240.2440000000006 + }, + "M=264192,N=768": { + "file": "silu_config_M264192_N768.json", + "M": 264192, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1222.8439999999996 + }, + "M=264192,N=800": { + "file": "silu_config_M264192_N800.json", + "M": 264192, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1252.6039999999998 + }, + "M=264192,N=896": { + "file": "silu_config_M264192_N896.json", + "M": 264192, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1238.2440000000001 + }, + "M=264192,N=960": { + "file": "silu_config_M264192_N960.json", + "M": 264192, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1248.1640000000007 + }, + "M=264192,N=1024": { + "file": "silu_config_M264192_N1024.json", + "M": 264192, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 996.7230000000004 + }, + "M=264192,N=1120": { + "file": "silu_config_M264192_N1120.json", + "M": 264192, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2450.6890000000003 + }, + "M=264192,N=1152": { + "file": "silu_config_M264192_N1152.json", + "M": 264192, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2441.6889999999994 + }, + "M=264192,N=1280": { + "file": "silu_config_M264192_N1280.json", + "M": 264192, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2465.049 + }, + "M=264192,N=1344": { + "file": "silu_config_M264192_N1344.json", + "M": 264192, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2463.5289999999995 + }, + "M=264192,N=1408": { + "file": "silu_config_M264192_N1408.json", + "M": 264192, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2437.7289999999994 + }, + "M=264192,N=1440": { + "file": "silu_config_M264192_N1440.json", + "M": 264192, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2458.5689999999995 + }, + "M=264192,N=1536": { + "file": "silu_config_M264192_N1536.json", + "M": 264192, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2441.129000000001 + }, + "M=264192,N=1600": { + "file": "silu_config_M264192_N1600.json", + "M": 264192, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2476.7292499999994 + }, + "M=264192,N=1664": { + "file": "silu_config_M264192_N1664.json", + "M": 264192, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2471.129000000001 + }, + "M=264192,N=1728": { + "file": "silu_config_M264192_N1728.json", + "M": 264192, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2476.16925 + }, + "M=264192,N=1760": { + "file": "silu_config_M264192_N1760.json", + "M": 264192, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2480.889250000001 + }, + "M=264192,N=1792": { + "file": "silu_config_M264192_N1792.json", + "M": 264192, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2463.5689999999995 + }, + "M=264192,N=1920": { + "file": "silu_config_M264192_N1920.json", + "M": 264192, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2471.0890000000018 + }, + "M=264192,N=2048": { + "file": "silu_config_M264192_N2048.json", + "M": 264192, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1959.9270000000006 + }, + "M=264192,N=2080": { + "file": "silu_config_M264192_N2080.json", + "M": 264192, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2730.5702499999998 + }, + "M=264192,N=2240": { + "file": "silu_config_M264192_N2240.json", + "M": 264192, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2866.410749999998 + }, + "M=264192,N=2400": { + "file": "silu_config_M264192_N2400.json", + "M": 264192, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3019.211499999996 + }, + "M=264192,N=2560": { + "file": "silu_config_M264192_N2560.json", + "M": 264192, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3168.691999999999 + }, + "M=265216,N=128": { + "file": "silu_config_M265216_N128.json", + "M": 265216, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 146.91974999999996 + }, + "M=265216,N=160": { + "file": "silu_config_M265216_N160.json", + "M": 265216, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 313.0802500000001 + }, + "M=265216,N=192": { + "file": "silu_config_M265216_N192.json", + "M": 265216, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 327.96025 + }, + "M=265216,N=256": { + "file": "silu_config_M265216_N256.json", + "M": 265216, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 297.36024999999984 + }, + "M=265216,N=320": { + "file": "silu_config_M265216_N320.json", + "M": 265216, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 642.3215 + }, + "M=265216,N=384": { + "file": "silu_config_M265216_N384.json", + "M": 265216, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 638.8415000000002 + }, + "M=265216,N=480": { + "file": "silu_config_M265216_N480.json", + "M": 265216, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 649.3615 + }, + "M=265216,N=512": { + "file": "silu_config_M265216_N512.json", + "M": 265216, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 513.681 + }, + "M=265216,N=576": { + "file": "silu_config_M265216_N576.json", + "M": 265216, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1245.7640000000006 + }, + "M=265216,N=640": { + "file": "silu_config_M265216_N640.json", + "M": 265216, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1234.484 + }, + "M=265216,N=768": { + "file": "silu_config_M265216_N768.json", + "M": 265216, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1241.3240000000005 + }, + "M=265216,N=800": { + "file": "silu_config_M265216_N800.json", + "M": 265216, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1239.9240000000004 + }, + "M=265216,N=896": { + "file": "silu_config_M265216_N896.json", + "M": 265216, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1232.7640000000001 + }, + "M=265216,N=960": { + "file": "silu_config_M265216_N960.json", + "M": 265216, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1256.364 + }, + "M=265216,N=1024": { + "file": "silu_config_M265216_N1024.json", + "M": 265216, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1001.1632499999996 + }, + "M=265216,N=1120": { + "file": "silu_config_M265216_N1120.json", + "M": 265216, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2459.4889999999996 + }, + "M=265216,N=1152": { + "file": "silu_config_M265216_N1152.json", + "M": 265216, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2450.929 + }, + "M=265216,N=1280": { + "file": "silu_config_M265216_N1280.json", + "M": 265216, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2453.5690000000004 + }, + "M=265216,N=1344": { + "file": "silu_config_M265216_N1344.json", + "M": 265216, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2483.16925 + }, + "M=265216,N=1408": { + "file": "silu_config_M265216_N1408.json", + "M": 265216, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2457.209 + }, + "M=265216,N=1440": { + "file": "silu_config_M265216_N1440.json", + "M": 265216, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2488.6092500000013 + }, + "M=265216,N=1536": { + "file": "silu_config_M265216_N1536.json", + "M": 265216, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2460.969 + }, + "M=265216,N=1600": { + "file": "silu_config_M265216_N1600.json", + "M": 265216, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2475.5292500000005 + }, + "M=265216,N=1664": { + "file": "silu_config_M265216_N1664.json", + "M": 265216, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2456.6490000000003 + }, + "M=265216,N=1728": { + "file": "silu_config_M265216_N1728.json", + "M": 265216, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2506.209249999998 + }, + "M=265216,N=1760": { + "file": "silu_config_M265216_N1760.json", + "M": 265216, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2500.4892500000005 + }, + "M=265216,N=1792": { + "file": "silu_config_M265216_N1792.json", + "M": 265216, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2462.4089999999987 + }, + "M=265216,N=1920": { + "file": "silu_config_M265216_N1920.json", + "M": 265216, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2490.969250000001 + }, + "M=265216,N=2048": { + "file": "silu_config_M265216_N2048.json", + "M": 265216, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1958.4070000000002 + }, + "M=265216,N=2080": { + "file": "silu_config_M265216_N2080.json", + "M": 265216, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2730.8102500000005 + }, + "M=265216,N=2240": { + "file": "silu_config_M265216_N2240.json", + "M": 265216, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2887.4507500000027 + }, + "M=265216,N=2400": { + "file": "silu_config_M265216_N2400.json", + "M": 265216, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3041.531500000001 + }, + "M=265216,N=2560": { + "file": "silu_config_M265216_N2560.json", + "M": 265216, + "N": 2560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 3188.612000000001 + }, + "M=266240,N=128": { + "file": "silu_config_M266240_N128.json", + "M": 266240, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 147.43975 + }, + "M=266240,N=160": { + "file": "silu_config_M266240_N160.json", + "M": 266240, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 312.48024999999996 + }, + "M=266240,N=192": { + "file": "silu_config_M266240_N192.json", + "M": 266240, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 312.8802499999998 + }, + "M=266240,N=256": { + "file": "silu_config_M266240_N256.json", + "M": 266240, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 287.15999999999997 + }, + "M=266240,N=320": { + "file": "silu_config_M266240_N320.json", + "M": 266240, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 640.4815000000003 + }, + "M=266240,N=384": { + "file": "silu_config_M266240_N384.json", + "M": 266240, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.6014999999998 + }, + "M=266240,N=480": { + "file": "silu_config_M266240_N480.json", + "M": 266240, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.6814999999999 + }, + "M=266240,N=512": { + "file": "silu_config_M266240_N512.json", + "M": 266240, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 514.8812500000006 + }, + "M=266240,N=576": { + "file": "silu_config_M266240_N576.json", + "M": 266240, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1218.2440000000001 + }, + "M=266240,N=640": { + "file": "silu_config_M266240_N640.json", + "M": 266240, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1211.6039999999998 + }, + "M=266240,N=768": { + "file": "silu_config_M266240_N768.json", + "M": 266240, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1214.7639999999997 + }, + "M=266240,N=800": { + "file": "silu_config_M266240_N800.json", + "M": 266240, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1222.484 + }, + "M=266240,N=896": { + "file": "silu_config_M266240_N896.json", + "M": 266240, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1219.6439999999993 + }, + "M=266240,N=960": { + "file": "silu_config_M266240_N960.json", + "M": 266240, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1228.444 + }, + "M=266240,N=1024": { + "file": "silu_config_M266240_N1024.json", + "M": 266240, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1001.6432499999996 + }, + "M=266240,N=1120": { + "file": "silu_config_M266240_N1120.json", + "M": 266240, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2441.4489999999996 + }, + "M=266240,N=1152": { + "file": "silu_config_M266240_N1152.json", + "M": 266240, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2455.8489999999993 + }, + "M=266240,N=1280": { + "file": "silu_config_M266240_N1280.json", + "M": 266240, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2437.4890000000005 + }, + "M=266240,N=1344": { + "file": "silu_config_M266240_N1344.json", + "M": 266240, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2444.089 + }, + "M=266240,N=1408": { + "file": "silu_config_M266240_N1408.json", + "M": 266240, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2430.7290000000003 + }, + "M=266240,N=1440": { + "file": "silu_config_M266240_N1440.json", + "M": 266240, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2448.689000000001 + }, + "M=266240,N=1536": { + "file": "silu_config_M266240_N1536.json", + "M": 266240, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2434.168999999999 + }, + "M=266240,N=1600": { + "file": "silu_config_M266240_N1600.json", + "M": 266240, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2467.089 + }, + "M=266240,N=1664": { + "file": "silu_config_M266240_N1664.json", + "M": 266240, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2450.208999999999 + }, + "M=266240,N=1728": { + "file": "silu_config_M266240_N1728.json", + "M": 266240, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2466.5292499999996 + }, + "M=266240,N=1760": { + "file": "silu_config_M266240_N1760.json", + "M": 266240, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2459.8489999999993 + }, + "M=266240,N=1792": { + "file": "silu_config_M266240_N1792.json", + "M": 266240, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2445.169000000001 + }, + "M=266240,N=1920": { + "file": "silu_config_M266240_N1920.json", + "M": 266240, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2462.849000000001 + }, + "M=266240,N=2048": { + "file": "silu_config_M266240_N2048.json", + "M": 266240, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1937.7669999999998 + }, + "M=266240,N=2080": { + "file": "silu_config_M266240_N2080.json", + "M": 266240, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2731.6102500000006 + }, + "M=266240,N=2240": { + "file": "silu_config_M266240_N2240.json", + "M": 266240, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2877.690749999999 + }, + "M=266240,N=2400": { + "file": "silu_config_M266240_N2400.json", + "M": 266240, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3010.891249999997 + }, + "M=266240,N=2560": { + "file": "silu_config_M266240_N2560.json", + "M": 266240, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3147.132 + }, + "M=267264,N=128": { + "file": "silu_config_M267264_N128.json", + "M": 267264, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 170.5995 + }, + "M=267264,N=160": { + "file": "silu_config_M267264_N160.json", + "M": 267264, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 336.24049999999994 + }, + "M=267264,N=192": { + "file": "silu_config_M267264_N192.json", + "M": 267264, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 329.96025000000003 + }, + "M=267264,N=256": { + "file": "silu_config_M267264_N256.json", + "M": 267264, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 287.99999999999994 + }, + "M=267264,N=320": { + "file": "silu_config_M267264_N320.json", + "M": 267264, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 646.8815 + }, + "M=267264,N=384": { + "file": "silu_config_M267264_N384.json", + "M": 267264, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 643.6015000000001 + }, + "M=267264,N=480": { + "file": "silu_config_M267264_N480.json", + "M": 267264, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 654.1215 + }, + "M=267264,N=512": { + "file": "silu_config_M267264_N512.json", + "M": 267264, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 522.8812500000003 + }, + "M=267264,N=576": { + "file": "silu_config_M267264_N576.json", + "M": 267264, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1251.1240000000005 + }, + "M=267264,N=640": { + "file": "silu_config_M267264_N640.json", + "M": 267264, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1254.004 + }, + "M=267264,N=768": { + "file": "silu_config_M267264_N768.json", + "M": 267264, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1250.4839999999997 + }, + "M=267264,N=800": { + "file": "silu_config_M267264_N800.json", + "M": 267264, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1276.4042500000007 + }, + "M=267264,N=896": { + "file": "silu_config_M267264_N896.json", + "M": 267264, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1262.44425 + }, + "M=267264,N=960": { + "file": "silu_config_M267264_N960.json", + "M": 267264, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1272.2042499999998 + }, + "M=267264,N=1024": { + "file": "silu_config_M267264_N1024.json", + "M": 267264, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1007.8029999999998 + }, + "M=267264,N=1120": { + "file": "silu_config_M267264_N1120.json", + "M": 267264, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2487.2492500000003 + }, + "M=267264,N=1152": { + "file": "silu_config_M267264_N1152.json", + "M": 267264, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2479.16925 + }, + "M=267264,N=1280": { + "file": "silu_config_M267264_N1280.json", + "M": 267264, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2471.9692500000006 + }, + "M=267264,N=1344": { + "file": "silu_config_M267264_N1344.json", + "M": 267264, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2489.96925 + }, + "M=267264,N=1408": { + "file": "silu_config_M267264_N1408.json", + "M": 267264, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2464.6089999999995 + }, + "M=267264,N=1440": { + "file": "silu_config_M267264_N1440.json", + "M": 267264, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2505.20925 + }, + "M=267264,N=1536": { + "file": "silu_config_M267264_N1536.json", + "M": 267264, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2468.209000000001 + }, + "M=267264,N=1600": { + "file": "silu_config_M267264_N1600.json", + "M": 267264, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2516.4492499999997 + }, + "M=267264,N=1664": { + "file": "silu_config_M267264_N1664.json", + "M": 267264, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2473.00925 + }, + "M=267264,N=1728": { + "file": "silu_config_M267264_N1728.json", + "M": 267264, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2510.0892500000014 + }, + "M=267264,N=1760": { + "file": "silu_config_M267264_N1760.json", + "M": 267264, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.2492500000003 + }, + "M=267264,N=1792": { + "file": "silu_config_M267264_N1792.json", + "M": 267264, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2488.40925 + }, + "M=267264,N=1920": { + "file": "silu_config_M267264_N1920.json", + "M": 267264, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2505.28925 + }, + "M=267264,N=2048": { + "file": "silu_config_M267264_N2048.json", + "M": 267264, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1979.2869999999998 + }, + "M=267264,N=2080": { + "file": "silu_config_M267264_N2080.json", + "M": 267264, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2747.0102500000003 + }, + "M=267264,N=2240": { + "file": "silu_config_M267264_N2240.json", + "M": 267264, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2904.771 + }, + "M=267264,N=2400": { + "file": "silu_config_M267264_N2400.json", + "M": 267264, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3043.6515000000004 + }, + "M=267264,N=2560": { + "file": "silu_config_M267264_N2560.json", + "M": 267264, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3202.012249999998 + }, + "M=268288,N=128": { + "file": "silu_config_M268288_N128.json", + "M": 268288, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.63975000000005 + }, + "M=268288,N=160": { + "file": "silu_config_M268288_N160.json", + "M": 268288, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.44049999999993 + }, + "M=268288,N=192": { + "file": "silu_config_M268288_N192.json", + "M": 268288, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 328.5602500000001 + }, + "M=268288,N=256": { + "file": "silu_config_M268288_N256.json", + "M": 268288, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 291.40025 + }, + "M=268288,N=320": { + "file": "silu_config_M268288_N320.json", + "M": 268288, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 652.08175 + }, + "M=268288,N=384": { + "file": "silu_config_M268288_N384.json", + "M": 268288, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.8015000000003 + }, + "M=268288,N=480": { + "file": "silu_config_M268288_N480.json", + "M": 268288, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 663.2017499999997 + }, + "M=268288,N=512": { + "file": "silu_config_M268288_N512.json", + "M": 268288, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 525.6812499999999 + }, + "M=268288,N=576": { + "file": "silu_config_M268288_N576.json", + "M": 268288, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1274.1242499999998 + }, + "M=268288,N=640": { + "file": "silu_config_M268288_N640.json", + "M": 268288, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1240.0839999999998 + }, + "M=268288,N=768": { + "file": "silu_config_M268288_N768.json", + "M": 268288, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1247.8040000000005 + }, + "M=268288,N=800": { + "file": "silu_config_M268288_N800.json", + "M": 268288, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1290.1242499999998 + }, + "M=268288,N=896": { + "file": "silu_config_M268288_N896.json", + "M": 268288, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1262.0042499999995 + }, + "M=268288,N=960": { + "file": "silu_config_M268288_N960.json", + "M": 268288, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1277.9642499999995 + }, + "M=268288,N=1024": { + "file": "silu_config_M268288_N1024.json", + "M": 268288, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1020.3632499999995 + }, + "M=268288,N=1120": { + "file": "silu_config_M268288_N1120.json", + "M": 268288, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2516.169249999999 + }, + "M=268288,N=1152": { + "file": "silu_config_M268288_N1152.json", + "M": 268288, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2492.4492499999997 + }, + "M=268288,N=1280": { + "file": "silu_config_M268288_N1280.json", + "M": 268288, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2494.129249999999 + }, + "M=268288,N=1344": { + "file": "silu_config_M268288_N1344.json", + "M": 268288, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2525.5292500000005 + }, + "M=268288,N=1408": { + "file": "silu_config_M268288_N1408.json", + "M": 268288, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2477.7692500000003 + }, + "M=268288,N=1440": { + "file": "silu_config_M268288_N1440.json", + "M": 268288, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2512.2892500000007 + }, + "M=268288,N=1536": { + "file": "silu_config_M268288_N1536.json", + "M": 268288, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2493.249249999999 + }, + "M=268288,N=1600": { + "file": "silu_config_M268288_N1600.json", + "M": 268288, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.7292499999994 + }, + "M=268288,N=1664": { + "file": "silu_config_M268288_N1664.json", + "M": 268288, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2508.3692499999997 + }, + "M=268288,N=1728": { + "file": "silu_config_M268288_N1728.json", + "M": 268288, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2498.7292500000003 + }, + "M=268288,N=1760": { + "file": "silu_config_M268288_N1760.json", + "M": 268288, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2525.16925 + }, + "M=268288,N=1792": { + "file": "silu_config_M268288_N1792.json", + "M": 268288, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2515.7292500000003 + }, + "M=268288,N=1920": { + "file": "silu_config_M268288_N1920.json", + "M": 268288, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.649250000002 + }, + "M=268288,N=2048": { + "file": "silu_config_M268288_N2048.json", + "M": 268288, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1999.72725 + }, + "M=268288,N=2080": { + "file": "silu_config_M268288_N2080.json", + "M": 268288, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2781.8505000000023 + }, + "M=268288,N=2240": { + "file": "silu_config_M268288_N2240.json", + "M": 268288, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2937.8110000000015 + }, + "M=268288,N=2400": { + "file": "silu_config_M268288_N2400.json", + "M": 268288, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3113.1317499999986 + }, + "M=268288,N=2560": { + "file": "silu_config_M268288_N2560.json", + "M": 268288, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3215.772250000002 + }, + "M=269312,N=128": { + "file": "silu_config_M269312_N128.json", + "M": 269312, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 149.11950000000002 + }, + "M=269312,N=160": { + "file": "silu_config_M269312_N160.json", + "M": 269312, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 339.08050000000014 + }, + "M=269312,N=192": { + "file": "silu_config_M269312_N192.json", + "M": 269312, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 322.2002500000001 + }, + "M=269312,N=256": { + "file": "silu_config_M269312_N256.json", + "M": 269312, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 292.36000000000024 + }, + "M=269312,N=320": { + "file": "silu_config_M269312_N320.json", + "M": 269312, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 654.68175 + }, + "M=269312,N=384": { + "file": "silu_config_M269312_N384.json", + "M": 269312, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 649.7215000000001 + }, + "M=269312,N=480": { + "file": "silu_config_M269312_N480.json", + "M": 269312, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.2017500000002 + }, + "M=269312,N=512": { + "file": "silu_config_M269312_N512.json", + "M": 269312, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 524.1212499999999 + }, + "M=269312,N=576": { + "file": "silu_config_M269312_N576.json", + "M": 269312, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1264.5642500000004 + }, + "M=269312,N=640": { + "file": "silu_config_M269312_N640.json", + "M": 269312, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1259.364 + }, + "M=269312,N=768": { + "file": "silu_config_M269312_N768.json", + "M": 269312, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1260.6842500000007 + }, + "M=269312,N=800": { + "file": "silu_config_M269312_N800.json", + "M": 269312, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1273.8442499999996 + }, + "M=269312,N=896": { + "file": "silu_config_M269312_N896.json", + "M": 269312, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1270.2042500000002 + }, + "M=269312,N=960": { + "file": "silu_config_M269312_N960.json", + "M": 269312, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1281.84425 + }, + "M=269312,N=1024": { + "file": "silu_config_M269312_N1024.json", + "M": 269312, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1030.0432500000002 + }, + "M=269312,N=1120": { + "file": "silu_config_M269312_N1120.json", + "M": 269312, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2504.84925 + }, + "M=269312,N=1152": { + "file": "silu_config_M269312_N1152.json", + "M": 269312, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2512.08925 + }, + "M=269312,N=1280": { + "file": "silu_config_M269312_N1280.json", + "M": 269312, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2513.6092500000013 + }, + "M=269312,N=1344": { + "file": "silu_config_M269312_N1344.json", + "M": 269312, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2514.5292500000014 + }, + "M=269312,N=1408": { + "file": "silu_config_M269312_N1408.json", + "M": 269312, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2497.6492499999995 + }, + "M=269312,N=1440": { + "file": "silu_config_M269312_N1440.json", + "M": 269312, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2511.4092499999997 + }, + "M=269312,N=1536": { + "file": "silu_config_M269312_N1536.json", + "M": 269312, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2502.4492500000024 + }, + "M=269312,N=1600": { + "file": "silu_config_M269312_N1600.json", + "M": 269312, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.089249999999 + }, + "M=269312,N=1664": { + "file": "silu_config_M269312_N1664.json", + "M": 269312, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2517.7692499999994 + }, + "M=269312,N=1728": { + "file": "silu_config_M269312_N1728.json", + "M": 269312, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2549.6895000000013 + }, + "M=269312,N=1760": { + "file": "silu_config_M269312_N1760.json", + "M": 269312, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2534.849500000001 + }, + "M=269312,N=1792": { + "file": "silu_config_M269312_N1792.json", + "M": 269312, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2535.9295 + }, + "M=269312,N=1920": { + "file": "silu_config_M269312_N1920.json", + "M": 269312, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2534.0494999999983 + }, + "M=269312,N=2048": { + "file": "silu_config_M269312_N2048.json", + "M": 269312, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2005.0072499999999 + }, + "M=269312,N=2080": { + "file": "silu_config_M269312_N2080.json", + "M": 269312, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2782.5305000000008 + }, + "M=269312,N=2240": { + "file": "silu_config_M269312_N2240.json", + "M": 269312, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2939.8110000000024 + }, + "M=269312,N=2400": { + "file": "silu_config_M269312_N2400.json", + "M": 269312, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3115.2917500000003 + }, + "M=269312,N=2560": { + "file": "silu_config_M269312_N2560.json", + "M": 269312, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3221.932249999998 + }, + "M=270336,N=128": { + "file": "silu_config_M270336_N128.json", + "M": 270336, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 149.67950000000002 + }, + "M=270336,N=160": { + "file": "silu_config_M270336_N160.json", + "M": 270336, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 322.76025000000004 + }, + "M=270336,N=192": { + "file": "silu_config_M270336_N192.json", + "M": 270336, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 334.6002500000001 + }, + "M=270336,N=256": { + "file": "silu_config_M270336_N256.json", + "M": 270336, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 293.60024999999996 + }, + "M=270336,N=320": { + "file": "silu_config_M270336_N320.json", + "M": 270336, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 656.6017499999998 + }, + "M=270336,N=384": { + "file": "silu_config_M270336_N384.json", + "M": 270336, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 652.5215000000001 + }, + "M=270336,N=480": { + "file": "silu_config_M270336_N480.json", + "M": 270336, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 668.4817499999999 + }, + "M=270336,N=512": { + "file": "silu_config_M270336_N512.json", + "M": 270336, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 515.8812500000001 + }, + "M=270336,N=576": { + "file": "silu_config_M270336_N576.json", + "M": 270336, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1248.4040000000005 + }, + "M=270336,N=640": { + "file": "silu_config_M270336_N640.json", + "M": 270336, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1260.1242499999998 + }, + "M=270336,N=768": { + "file": "silu_config_M270336_N768.json", + "M": 270336, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1276.1242500000003 + }, + "M=270336,N=800": { + "file": "silu_config_M270336_N800.json", + "M": 270336, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1270.0042500000009 + }, + "M=270336,N=896": { + "file": "silu_config_M270336_N896.json", + "M": 270336, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1271.52425 + }, + "M=270336,N=960": { + "file": "silu_config_M270336_N960.json", + "M": 270336, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1286.6842499999998 + }, + "M=270336,N=1024": { + "file": "silu_config_M270336_N1024.json", + "M": 270336, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1029.2032499999996 + }, + "M=270336,N=1120": { + "file": "silu_config_M270336_N1120.json", + "M": 270336, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.7292500000003 + }, + "M=270336,N=1152": { + "file": "silu_config_M270336_N1152.json", + "M": 270336, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2511.4492499999997 + }, + "M=270336,N=1280": { + "file": "silu_config_M270336_N1280.json", + "M": 270336, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2502.32925 + }, + "M=270336,N=1344": { + "file": "silu_config_M270336_N1344.json", + "M": 270336, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2513.8092500000002 + }, + "M=270336,N=1408": { + "file": "silu_config_M270336_N1408.json", + "M": 270336, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2506.92925 + }, + "M=270336,N=1440": { + "file": "silu_config_M270336_N1440.json", + "M": 270336, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2531.32925 + }, + "M=270336,N=1536": { + "file": "silu_config_M270336_N1536.json", + "M": 270336, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2501.7292500000003 + }, + "M=270336,N=1600": { + "file": "silu_config_M270336_N1600.json", + "M": 270336, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2554.3695 + }, + "M=270336,N=1664": { + "file": "silu_config_M270336_N1664.json", + "M": 270336, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2517.049249999999 + }, + "M=270336,N=1728": { + "file": "silu_config_M270336_N1728.json", + "M": 270336, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2559.249500000001 + }, + "M=270336,N=1760": { + "file": "silu_config_M270336_N1760.json", + "M": 270336, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2554.6495000000004 + }, + "M=270336,N=1792": { + "file": "silu_config_M270336_N1792.json", + "M": 270336, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.6892500000004 + }, + "M=270336,N=1920": { + "file": "silu_config_M270336_N1920.json", + "M": 270336, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2532.8094999999985 + }, + "M=270336,N=2048": { + "file": "silu_config_M270336_N2048.json", + "M": 270336, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1994.60725 + }, + "M=270336,N=2080": { + "file": "silu_config_M270336_N2080.json", + "M": 270336, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2782.9704999999976 + }, + "M=270336,N=2240": { + "file": "silu_config_M270336_N2240.json", + "M": 270336, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2950.571000000001 + }, + "M=270336,N=2400": { + "file": "silu_config_M270336_N2400.json", + "M": 270336, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3115.691750000002 + }, + "M=270336,N=2560": { + "file": "silu_config_M270336_N2560.json", + "M": 270336, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3244.772249999998 + }, + "M=271360,N=128": { + "file": "silu_config_M271360_N128.json", + "M": 271360, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 150.11950000000002 + }, + "M=271360,N=160": { + "file": "silu_config_M271360_N160.json", + "M": 271360, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 329.0402500000001 + }, + "M=271360,N=192": { + "file": "silu_config_M271360_N192.json", + "M": 271360, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 347.0005 + }, + "M=271360,N=256": { + "file": "silu_config_M271360_N256.json", + "M": 271360, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 294.7202500000001 + }, + "M=271360,N=320": { + "file": "silu_config_M271360_N320.json", + "M": 271360, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 654.8417499999998 + }, + "M=271360,N=384": { + "file": "silu_config_M271360_N384.json", + "M": 271360, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 650.8815 + }, + "M=271360,N=480": { + "file": "silu_config_M271360_N480.json", + "M": 271360, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.84175 + }, + "M=271360,N=512": { + "file": "silu_config_M271360_N512.json", + "M": 271360, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 528.1212499999997 + }, + "M=271360,N=576": { + "file": "silu_config_M271360_N576.json", + "M": 271360, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1266.00425 + }, + "M=271360,N=640": { + "file": "silu_config_M271360_N640.json", + "M": 271360, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1257.7240000000002 + }, + "M=271360,N=768": { + "file": "silu_config_M271360_N768.json", + "M": 271360, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1262.1242499999998 + }, + "M=271360,N=800": { + "file": "silu_config_M271360_N800.json", + "M": 271360, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1274.2442500000002 + }, + "M=271360,N=896": { + "file": "silu_config_M271360_N896.json", + "M": 271360, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1268.5642499999994 + }, + "M=271360,N=960": { + "file": "silu_config_M271360_N960.json", + "M": 271360, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1261.3242499999997 + }, + "M=271360,N=1024": { + "file": "silu_config_M271360_N1024.json", + "M": 271360, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1031.12325 + }, + "M=271360,N=1120": { + "file": "silu_config_M271360_N1120.json", + "M": 271360, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2515.7292499999994 + }, + "M=271360,N=1152": { + "file": "silu_config_M271360_N1152.json", + "M": 271360, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2505.16925 + }, + "M=271360,N=1280": { + "file": "silu_config_M271360_N1280.json", + "M": 271360, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2485.5692499999996 + }, + "M=271360,N=1344": { + "file": "silu_config_M271360_N1344.json", + "M": 271360, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2505.249249999999 + }, + "M=271360,N=1408": { + "file": "silu_config_M271360_N1408.json", + "M": 271360, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2511.129249999999 + }, + "M=271360,N=1440": { + "file": "silu_config_M271360_N1440.json", + "M": 271360, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2521.7692500000003 + }, + "M=271360,N=1536": { + "file": "silu_config_M271360_N1536.json", + "M": 271360, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2515.8892499999993 + }, + "M=271360,N=1600": { + "file": "silu_config_M271360_N1600.json", + "M": 271360, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2503.92925 + }, + "M=271360,N=1664": { + "file": "silu_config_M271360_N1664.json", + "M": 271360, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2509.9292499999992 + }, + "M=271360,N=1728": { + "file": "silu_config_M271360_N1728.json", + "M": 271360, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2529.169250000001 + }, + "M=271360,N=1760": { + "file": "silu_config_M271360_N1760.json", + "M": 271360, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2555.2895 + }, + "M=271360,N=1792": { + "file": "silu_config_M271360_N1792.json", + "M": 271360, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2496.249249999999 + }, + "M=271360,N=1920": { + "file": "silu_config_M271360_N1920.json", + "M": 271360, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2525.049250000001 + }, + "M=271360,N=2048": { + "file": "silu_config_M271360_N2048.json", + "M": 271360, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2011.9272499999997 + }, + "M=271360,N=2080": { + "file": "silu_config_M271360_N2080.json", + "M": 271360, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2783.410500000001 + }, + "M=271360,N=2240": { + "file": "silu_config_M271360_N2240.json", + "M": 271360, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2930.530999999998 + }, + "M=271360,N=2400": { + "file": "silu_config_M271360_N2400.json", + "M": 271360, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3124.851749999998 + }, + "M=271360,N=2560": { + "file": "silu_config_M271360_N2560.json", + "M": 271360, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3232.2922500000022 + }, + "M=272384,N=128": { + "file": "silu_config_M272384_N128.json", + "M": 272384, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 150.79950000000002 + }, + "M=272384,N=160": { + "file": "silu_config_M272384_N160.json", + "M": 272384, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.1204999999999 + }, + "M=272384,N=192": { + "file": "silu_config_M272384_N192.json", + "M": 272384, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 343.56049999999993 + }, + "M=272384,N=256": { + "file": "silu_config_M272384_N256.json", + "M": 272384, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 295.76025000000004 + }, + "M=272384,N=320": { + "file": "silu_config_M272384_N320.json", + "M": 272384, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 661.5617499999998 + }, + "M=272384,N=384": { + "file": "silu_config_M272384_N384.json", + "M": 272384, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 657.8417500000003 + }, + "M=272384,N=480": { + "file": "silu_config_M272384_N480.json", + "M": 272384, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 673.92175 + }, + "M=272384,N=512": { + "file": "silu_config_M272384_N512.json", + "M": 272384, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 533.28125 + }, + "M=272384,N=576": { + "file": "silu_config_M272384_N576.json", + "M": 272384, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1278.6442499999998 + }, + "M=272384,N=640": { + "file": "silu_config_M272384_N640.json", + "M": 272384, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1273.4842499999995 + }, + "M=272384,N=768": { + "file": "silu_config_M272384_N768.json", + "M": 272384, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1274.96425 + }, + "M=272384,N=800": { + "file": "silu_config_M272384_N800.json", + "M": 272384, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1308.8842499999996 + }, + "M=272384,N=896": { + "file": "silu_config_M272384_N896.json", + "M": 272384, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1280.7242500000002 + }, + "M=272384,N=960": { + "file": "silu_config_M272384_N960.json", + "M": 272384, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1297.0442499999995 + }, + "M=272384,N=1024": { + "file": "silu_config_M272384_N1024.json", + "M": 272384, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1035.3232500000004 + }, + "M=272384,N=1120": { + "file": "silu_config_M272384_N1120.json", + "M": 272384, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2544.049499999999 + }, + "M=272384,N=1152": { + "file": "silu_config_M272384_N1152.json", + "M": 272384, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2519.6492499999995 + }, + "M=272384,N=1280": { + "file": "silu_config_M272384_N1280.json", + "M": 272384, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2521.2492500000008 + }, + "M=272384,N=1344": { + "file": "silu_config_M272384_N1344.json", + "M": 272384, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2543.2095000000018 + }, + "M=272384,N=1408": { + "file": "silu_config_M272384_N1408.json", + "M": 272384, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2546.6895000000013 + }, + "M=272384,N=1440": { + "file": "silu_config_M272384_N1440.json", + "M": 272384, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2550.2495 + }, + "M=272384,N=1536": { + "file": "silu_config_M272384_N1536.json", + "M": 272384, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2544.089499999999 + }, + "M=272384,N=1600": { + "file": "silu_config_M272384_N1600.json", + "M": 272384, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2573.4094999999998 + }, + "M=272384,N=1664": { + "file": "silu_config_M272384_N1664.json", + "M": 272384, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2535.4094999999998 + }, + "M=272384,N=1728": { + "file": "silu_config_M272384_N1728.json", + "M": 272384, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2557.7294999999986 + }, + "M=272384,N=1760": { + "file": "silu_config_M272384_N1760.json", + "M": 272384, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2583.929500000001 + }, + "M=272384,N=1792": { + "file": "silu_config_M272384_N1792.json", + "M": 272384, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2533.009250000001 + }, + "M=272384,N=1920": { + "file": "silu_config_M272384_N1920.json", + "M": 272384, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2572.7295000000004 + }, + "M=272384,N=2048": { + "file": "silu_config_M272384_N2048.json", + "M": 272384, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2009.7672500000008 + }, + "M=272384,N=2080": { + "file": "silu_config_M272384_N2080.json", + "M": 272384, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2830.7704999999987 + }, + "M=272384,N=2240": { + "file": "silu_config_M272384_N2240.json", + "M": 272384, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2962.8912499999988 + }, + "M=272384,N=2400": { + "file": "silu_config_M272384_N2400.json", + "M": 272384, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3149.492000000004 + }, + "M=272384,N=2560": { + "file": "silu_config_M272384_N2560.json", + "M": 272384, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3279.2125000000015 + }, + "M=273408,N=128": { + "file": "silu_config_M273408_N128.json", + "M": 273408, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 151.19950000000006 + }, + "M=273408,N=160": { + "file": "silu_config_M273408_N160.json", + "M": 273408, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 322.8402500000002 + }, + "M=273408,N=192": { + "file": "silu_config_M273408_N192.json", + "M": 273408, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 338.36024999999995 + }, + "M=273408,N=256": { + "file": "silu_config_M273408_N256.json", + "M": 273408, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 296.84000000000003 + }, + "M=273408,N=320": { + "file": "silu_config_M273408_N320.json", + "M": 273408, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 664.5617500000001 + }, + "M=273408,N=384": { + "file": "silu_config_M273408_N384.json", + "M": 273408, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 659.4417500000002 + }, + "M=273408,N=480": { + "file": "silu_config_M273408_N480.json", + "M": 273408, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.7617500000001 + }, + "M=273408,N=512": { + "file": "silu_config_M273408_N512.json", + "M": 273408, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 531.7612499999998 + }, + "M=273408,N=576": { + "file": "silu_config_M273408_N576.json", + "M": 273408, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1293.5642499999994 + }, + "M=273408,N=640": { + "file": "silu_config_M273408_N640.json", + "M": 273408, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1274.0842500000003 + }, + "M=273408,N=768": { + "file": "silu_config_M273408_N768.json", + "M": 273408, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1290.0442500000004 + }, + "M=273408,N=800": { + "file": "silu_config_M273408_N800.json", + "M": 273408, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1282.0842499999999 + }, + "M=273408,N=896": { + "file": "silu_config_M273408_N896.json", + "M": 273408, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1285.6842500000002 + }, + "M=273408,N=960": { + "file": "silu_config_M273408_N960.json", + "M": 273408, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1311.3242500000001 + }, + "M=273408,N=1024": { + "file": "silu_config_M273408_N1024.json", + "M": 273408, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1039.6034999999997 + }, + "M=273408,N=1120": { + "file": "silu_config_M273408_N1120.json", + "M": 273408, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2553.089499999999 + }, + "M=273408,N=1152": { + "file": "silu_config_M273408_N1152.json", + "M": 273408, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2529.20925 + }, + "M=273408,N=1280": { + "file": "silu_config_M273408_N1280.json", + "M": 273408, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2530.4092500000006 + }, + "M=273408,N=1344": { + "file": "silu_config_M273408_N1344.json", + "M": 273408, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2572.9695 + }, + "M=273408,N=1408": { + "file": "silu_config_M273408_N1408.json", + "M": 273408, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2535.289499999999 + }, + "M=273408,N=1440": { + "file": "silu_config_M273408_N1440.json", + "M": 273408, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2570.1695 + }, + "M=273408,N=1536": { + "file": "silu_config_M273408_N1536.json", + "M": 273408, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2529.96925 + }, + "M=273408,N=1600": { + "file": "silu_config_M273408_N1600.json", + "M": 273408, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2572.5295000000006 + }, + "M=273408,N=1664": { + "file": "silu_config_M273408_N1664.json", + "M": 273408, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2555.169500000001 + }, + "M=273408,N=1728": { + "file": "silu_config_M273408_N1728.json", + "M": 273408, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2566.6894999999986 + }, + "M=273408,N=1760": { + "file": "silu_config_M273408_N1760.json", + "M": 273408, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2583.329499999999 + }, + "M=273408,N=1792": { + "file": "silu_config_M273408_N1792.json", + "M": 273408, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2542.7695000000012 + }, + "M=273408,N=1920": { + "file": "silu_config_M273408_N1920.json", + "M": 273408, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2572.249499999999 + }, + "M=273408,N=2048": { + "file": "silu_config_M273408_N2048.json", + "M": 273408, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2045.2075000000004 + }, + "M=273408,N=2080": { + "file": "silu_config_M273408_N2080.json", + "M": 273408, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2834.7707499999997 + }, + "M=273408,N=2240": { + "file": "silu_config_M273408_N2240.json", + "M": 273408, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2984.0112499999996 + }, + "M=273408,N=2400": { + "file": "silu_config_M273408_N2400.json", + "M": 273408, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3162.0519999999997 + }, + "M=273408,N=2560": { + "file": "silu_config_M273408_N2560.json", + "M": 273408, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3291.3324999999986 + }, + "M=274432,N=128": { + "file": "silu_config_M274432_N128.json", + "M": 274432, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 151.75949999999995 + }, + "M=274432,N=160": { + "file": "silu_config_M274432_N160.json", + "M": 274432, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 338.6005000000001 + }, + "M=274432,N=192": { + "file": "silu_config_M274432_N192.json", + "M": 274432, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 339.64025000000004 + }, + "M=274432,N=256": { + "file": "silu_config_M274432_N256.json", + "M": 274432, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 297.8402500000001 + }, + "M=274432,N=320": { + "file": "silu_config_M274432_N320.json", + "M": 274432, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.40175 + }, + "M=274432,N=384": { + "file": "silu_config_M274432_N384.json", + "M": 274432, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 661.7617499999999 + }, + "M=274432,N=480": { + "file": "silu_config_M274432_N480.json", + "M": 274432, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 678.60175 + }, + "M=274432,N=512": { + "file": "silu_config_M274432_N512.json", + "M": 274432, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 533.04125 + }, + "M=274432,N=576": { + "file": "silu_config_M274432_N576.json", + "M": 274432, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1277.2842499999997 + }, + "M=274432,N=640": { + "file": "silu_config_M274432_N640.json", + "M": 274432, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1278.6442500000003 + }, + "M=274432,N=768": { + "file": "silu_config_M274432_N768.json", + "M": 274432, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1304.96425 + }, + "M=274432,N=800": { + "file": "silu_config_M274432_N800.json", + "M": 274432, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1297.4042499999996 + }, + "M=274432,N=896": { + "file": "silu_config_M274432_N896.json", + "M": 274432, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1290.2842499999997 + }, + "M=274432,N=960": { + "file": "silu_config_M274432_N960.json", + "M": 274432, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.2042500000002 + }, + "M=274432,N=1024": { + "file": "silu_config_M274432_N1024.json", + "M": 274432, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1043.80325 + }, + "M=274432,N=1120": { + "file": "silu_config_M274432_N1120.json", + "M": 274432, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2552.4094999999998 + }, + "M=274432,N=1152": { + "file": "silu_config_M274432_N1152.json", + "M": 274432, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2559.2095 + }, + "M=274432,N=1280": { + "file": "silu_config_M274432_N1280.json", + "M": 274432, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2539.7695000000003 + }, + "M=274432,N=1344": { + "file": "silu_config_M274432_N1344.json", + "M": 274432, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2562.0495 + }, + "M=274432,N=1408": { + "file": "silu_config_M274432_N1408.json", + "M": 274432, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2544.6095000000005 + }, + "M=274432,N=1440": { + "file": "silu_config_M274432_N1440.json", + "M": 274432, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2559.169499999999 + }, + "M=274432,N=1536": { + "file": "silu_config_M274432_N1536.json", + "M": 274432, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2549.8494999999994 + }, + "M=274432,N=1600": { + "file": "silu_config_M274432_N1600.json", + "M": 274432, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2571.849500000002 + }, + "M=274432,N=1664": { + "file": "silu_config_M274432_N1664.json", + "M": 274432, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2575.129500000001 + }, + "M=274432,N=1728": { + "file": "silu_config_M274432_N1728.json", + "M": 274432, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2586.889500000001 + }, + "M=274432,N=1760": { + "file": "silu_config_M274432_N1760.json", + "M": 274432, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2593.20975 + }, + "M=274432,N=1792": { + "file": "silu_config_M274432_N1792.json", + "M": 274432, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2562.169499999999 + }, + "M=274432,N=1920": { + "file": "silu_config_M274432_N1920.json", + "M": 274432, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2581.169499999999 + }, + "M=274432,N=2048": { + "file": "silu_config_M274432_N2048.json", + "M": 274432, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2043.8872499999995 + }, + "M=274432,N=2080": { + "file": "silu_config_M274432_N2080.json", + "M": 274432, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2835.490499999997 + }, + "M=274432,N=2240": { + "file": "silu_config_M274432_N2240.json", + "M": 274432, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3005.8512500000006 + }, + "M=274432,N=2400": { + "file": "silu_config_M274432_N2400.json", + "M": 274432, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3184.051999999998 + }, + "M=274432,N=2560": { + "file": "silu_config_M274432_N2560.json", + "M": 274432, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3277.5725 + }, + "M=275456,N=128": { + "file": "silu_config_M275456_N128.json", + "M": 275456, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 152.35950000000003 + }, + "M=275456,N=160": { + "file": "silu_config_M275456_N160.json", + "M": 275456, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 325.2004999999998 + }, + "M=275456,N=192": { + "file": "silu_config_M275456_N192.json", + "M": 275456, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 347.3204999999999 + }, + "M=275456,N=256": { + "file": "silu_config_M275456_N256.json", + "M": 275456, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.68025 + }, + "M=275456,N=320": { + "file": "silu_config_M275456_N320.json", + "M": 275456, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 668.8817499999998 + }, + "M=275456,N=384": { + "file": "silu_config_M275456_N384.json", + "M": 275456, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 665.0417500000001 + }, + "M=275456,N=480": { + "file": "silu_config_M275456_N480.json", + "M": 275456, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 680.8017499999999 + }, + "M=275456,N=512": { + "file": "silu_config_M275456_N512.json", + "M": 275456, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 539.2412499999998 + }, + "M=275456,N=576": { + "file": "silu_config_M275456_N576.json", + "M": 275456, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1282.2442500000002 + }, + "M=275456,N=640": { + "file": "silu_config_M275456_N640.json", + "M": 275456, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1272.96425 + }, + "M=275456,N=768": { + "file": "silu_config_M275456_N768.json", + "M": 275456, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1299.36425 + }, + "M=275456,N=800": { + "file": "silu_config_M275456_N800.json", + "M": 275456, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1312.5642500000004 + }, + "M=275456,N=896": { + "file": "silu_config_M275456_N896.json", + "M": 275456, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1298.8442499999996 + }, + "M=275456,N=960": { + "file": "silu_config_M275456_N960.json", + "M": 275456, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1322.8045000000002 + }, + "M=275456,N=1024": { + "file": "silu_config_M275456_N1024.json", + "M": 275456, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1046.8835000000004 + }, + "M=275456,N=1120": { + "file": "silu_config_M275456_N1120.json", + "M": 275456, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2582.6095000000005 + }, + "M=275456,N=1152": { + "file": "silu_config_M275456_N1152.json", + "M": 275456, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2558.129499999999 + }, + "M=275456,N=1280": { + "file": "silu_config_M275456_N1280.json", + "M": 275456, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2538.9294999999993 + }, + "M=275456,N=1344": { + "file": "silu_config_M275456_N1344.json", + "M": 275456, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2571.4495000000006 + }, + "M=275456,N=1408": { + "file": "silu_config_M275456_N1408.json", + "M": 275456, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2564.4895000000006 + }, + "M=275456,N=1440": { + "file": "silu_config_M275456_N1440.json", + "M": 275456, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2579.2895000000008 + }, + "M=275456,N=1536": { + "file": "silu_config_M275456_N1536.json", + "M": 275456, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2569.6895000000004 + }, + "M=275456,N=1600": { + "file": "silu_config_M275456_N1600.json", + "M": 275456, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2591.6094999999987 + }, + "M=275456,N=1664": { + "file": "silu_config_M275456_N1664.json", + "M": 275456, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2563.6094999999978 + }, + "M=275456,N=1728": { + "file": "silu_config_M275456_N1728.json", + "M": 275456, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2586.129499999998 + }, + "M=275456,N=1760": { + "file": "silu_config_M275456_N1760.json", + "M": 275456, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2592.2895000000017 + }, + "M=275456,N=1792": { + "file": "silu_config_M275456_N1792.json", + "M": 275456, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2561.089500000002 + }, + "M=275456,N=1920": { + "file": "silu_config_M275456_N1920.json", + "M": 275456, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2590.969500000001 + }, + "M=275456,N=2048": { + "file": "silu_config_M275456_N2048.json", + "M": 275456, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2053.2075000000013 + }, + "M=275456,N=2080": { + "file": "silu_config_M275456_N2080.json", + "M": 275456, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2835.690499999998 + }, + "M=275456,N=2240": { + "file": "silu_config_M275456_N2240.json", + "M": 275456, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3009.771249999999 + }, + "M=275456,N=2400": { + "file": "silu_config_M275456_N2400.json", + "M": 275456, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3166.3719999999967 + }, + "M=275456,N=2560": { + "file": "silu_config_M275456_N2560.json", + "M": 275456, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3294.1324999999997 + }, + "M=276480,N=128": { + "file": "silu_config_M276480_N128.json", + "M": 276480, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 152.39974999999998 + }, + "M=276480,N=160": { + "file": "silu_config_M276480_N160.json", + "M": 276480, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 334.44049999999993 + }, + "M=276480,N=192": { + "file": "silu_config_M276480_N192.json", + "M": 276480, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 335.2405000000001 + }, + "M=276480,N=256": { + "file": "silu_config_M276480_N256.json", + "M": 276480, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 299.7602499999999 + }, + "M=276480,N=320": { + "file": "silu_config_M276480_N320.json", + "M": 276480, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 667.5217500000001 + }, + "M=276480,N=384": { + "file": "silu_config_M276480_N384.json", + "M": 276480, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 662.64175 + }, + "M=276480,N=480": { + "file": "silu_config_M276480_N480.json", + "M": 276480, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.9617500000002 + }, + "M=276480,N=512": { + "file": "silu_config_M276480_N512.json", + "M": 276480, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 537.6812500000005 + }, + "M=276480,N=576": { + "file": "silu_config_M276480_N576.json", + "M": 276480, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1280.0042499999995 + }, + "M=276480,N=640": { + "file": "silu_config_M276480_N640.json", + "M": 276480, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1270.40425 + }, + "M=276480,N=768": { + "file": "silu_config_M276480_N768.json", + "M": 276480, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1285.8442499999996 + }, + "M=276480,N=800": { + "file": "silu_config_M276480_N800.json", + "M": 276480, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1286.7242499999998 + }, + "M=276480,N=896": { + "file": "silu_config_M276480_N896.json", + "M": 276480, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1269.2442499999993 + }, + "M=276480,N=960": { + "file": "silu_config_M276480_N960.json", + "M": 276480, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1292.52425 + }, + "M=276480,N=1024": { + "file": "silu_config_M276480_N1024.json", + "M": 276480, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1047.84325 + }, + "M=276480,N=1120": { + "file": "silu_config_M276480_N1120.json", + "M": 276480, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2538.8095000000003 + }, + "M=276480,N=1152": { + "file": "silu_config_M276480_N1152.json", + "M": 276480, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2529.4092500000006 + }, + "M=276480,N=1280": { + "file": "silu_config_M276480_N1280.json", + "M": 276480, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2542.7295000000013 + }, + "M=276480,N=1344": { + "file": "silu_config_M276480_N1344.json", + "M": 276480, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2541.6494999999995 + }, + "M=276480,N=1408": { + "file": "silu_config_M276480_N1408.json", + "M": 276480, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2536.049499999999 + }, + "M=276480,N=1440": { + "file": "silu_config_M276480_N1440.json", + "M": 276480, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2587.9295 + }, + "M=276480,N=1536": { + "file": "silu_config_M276480_N1536.json", + "M": 276480, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2540.4094999999998 + }, + "M=276480,N=1600": { + "file": "silu_config_M276480_N1600.json", + "M": 276480, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2578.8895 + }, + "M=276480,N=1664": { + "file": "silu_config_M276480_N1664.json", + "M": 276480, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2554.7694999999985 + }, + "M=276480,N=1728": { + "file": "silu_config_M276480_N1728.json", + "M": 276480, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2561.809500000001 + }, + "M=276480,N=1760": { + "file": "silu_config_M276480_N1760.json", + "M": 276480, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2576.3695000000007 + }, + "M=276480,N=1792": { + "file": "silu_config_M276480_N1792.json", + "M": 276480, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2560.5295000000015 + }, + "M=276480,N=1920": { + "file": "silu_config_M276480_N1920.json", + "M": 276480, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2589.8095000000003 + }, + "M=276480,N=2048": { + "file": "silu_config_M276480_N2048.json", + "M": 276480, + "N": 2048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 2056.0875000000015 + }, + "M=276480,N=2080": { + "file": "silu_config_M276480_N2080.json", + "M": 276480, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2839.330750000001 + }, + "M=276480,N=2240": { + "file": "silu_config_M276480_N2240.json", + "M": 276480, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2990.291250000001 + }, + "M=276480,N=2400": { + "file": "silu_config_M276480_N2400.json", + "M": 276480, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3162.2120000000014 + }, + "M=276480,N=2560": { + "file": "silu_config_M276480_N2560.json", + "M": 276480, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3301.532500000001 + }, + "M=277504,N=128": { + "file": "silu_config_M277504_N128.json", + "M": 277504, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.91974999999996 + }, + "M=277504,N=160": { + "file": "silu_config_M277504_N160.json", + "M": 277504, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 341.84050000000013 + }, + "M=277504,N=192": { + "file": "silu_config_M277504_N192.json", + "M": 277504, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 342.16025000000013 + }, + "M=277504,N=256": { + "file": "silu_config_M277504_N256.json", + "M": 277504, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.4402500000001 + }, + "M=277504,N=320": { + "file": "silu_config_M277504_N320.json", + "M": 277504, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 674.0417499999999 + }, + "M=277504,N=384": { + "file": "silu_config_M277504_N384.json", + "M": 277504, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 669.3617499999998 + }, + "M=277504,N=480": { + "file": "silu_config_M277504_N480.json", + "M": 277504, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 687.1617499999998 + }, + "M=277504,N=512": { + "file": "silu_config_M277504_N512.json", + "M": 277504, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 543.52125 + }, + "M=277504,N=576": { + "file": "silu_config_M277504_N576.json", + "M": 277504, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1303.2042500000002 + }, + "M=277504,N=640": { + "file": "silu_config_M277504_N640.json", + "M": 277504, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1292.88425 + }, + "M=277504,N=768": { + "file": "silu_config_M277504_N768.json", + "M": 277504, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1287.2442500000002 + }, + "M=277504,N=800": { + "file": "silu_config_M277504_N800.json", + "M": 277504, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1321.9644999999996 + }, + "M=277504,N=896": { + "file": "silu_config_M277504_N896.json", + "M": 277504, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1281.84425 + }, + "M=277504,N=960": { + "file": "silu_config_M277504_N960.json", + "M": 277504, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1329.8445000000002 + }, + "M=277504,N=1024": { + "file": "silu_config_M277504_N1024.json", + "M": 277504, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1053.4032499999994 + }, + "M=277504,N=1120": { + "file": "silu_config_M277504_N1120.json", + "M": 277504, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2607.6097499999996 + }, + "M=277504,N=1152": { + "file": "silu_config_M277504_N1152.json", + "M": 277504, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2564.7295000000004 + }, + "M=277504,N=1280": { + "file": "silu_config_M277504_N1280.json", + "M": 277504, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2557.6495000000014 + }, + "M=277504,N=1344": { + "file": "silu_config_M277504_N1344.json", + "M": 277504, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2589.8895 + }, + "M=277504,N=1408": { + "file": "silu_config_M277504_N1408.json", + "M": 277504, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2572.0895 + }, + "M=277504,N=1440": { + "file": "silu_config_M277504_N1440.json", + "M": 277504, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2606.489749999999 + }, + "M=277504,N=1536": { + "file": "silu_config_M277504_N1536.json", + "M": 277504, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2576.049500000001 + }, + "M=277504,N=1600": { + "file": "silu_config_M277504_N1600.json", + "M": 277504, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2606.7297500000004 + }, + "M=277504,N=1664": { + "file": "silu_config_M277504_N1664.json", + "M": 277504, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2580.5695000000005 + }, + "M=277504,N=1728": { + "file": "silu_config_M277504_N1728.json", + "M": 277504, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2610.6097500000005 + }, + "M=277504,N=1760": { + "file": "silu_config_M277504_N1760.json", + "M": 277504, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.5297500000006 + }, + "M=277504,N=1792": { + "file": "silu_config_M277504_N1792.json", + "M": 277504, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2587.209499999999 + }, + "M=277504,N=1920": { + "file": "silu_config_M277504_N1920.json", + "M": 277504, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2596.169750000002 + }, + "M=277504,N=2048": { + "file": "silu_config_M277504_N2048.json", + "M": 277504, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2065.607500000001 + }, + "M=277504,N=2080": { + "file": "silu_config_M277504_N2080.json", + "M": 277504, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2870.330750000002 + }, + "M=277504,N=2240": { + "file": "silu_config_M277504_N2240.json", + "M": 277504, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3022.2115000000013 + }, + "M=277504,N=2400": { + "file": "silu_config_M277504_N2400.json", + "M": 277504, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3197.0522500000025 + }, + "M=277504,N=2560": { + "file": "silu_config_M277504_N2560.json", + "M": 277504, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3334.132749999999 + }, + "M=278528,N=128": { + "file": "silu_config_M278528_N128.json", + "M": 278528, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 153.95975000000004 + }, + "M=278528,N=160": { + "file": "silu_config_M278528_N160.json", + "M": 278528, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.9202499999998 + }, + "M=278528,N=192": { + "file": "silu_config_M278528_N192.json", + "M": 278528, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 343.40025 + }, + "M=278528,N=256": { + "file": "silu_config_M278528_N256.json", + "M": 278528, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 302.12024999999994 + }, + "M=278528,N=320": { + "file": "silu_config_M278528_N320.json", + "M": 278528, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 676.9617499999997 + }, + "M=278528,N=384": { + "file": "silu_config_M278528_N384.json", + "M": 278528, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 672.12175 + }, + "M=278528,N=480": { + "file": "silu_config_M278528_N480.json", + "M": 278528, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 689.64175 + }, + "M=278528,N=512": { + "file": "silu_config_M278528_N512.json", + "M": 278528, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 531.7612500000005 + }, + "M=278528,N=576": { + "file": "silu_config_M278528_N576.json", + "M": 278528, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1311.2442500000002 + }, + "M=278528,N=640": { + "file": "silu_config_M278528_N640.json", + "M": 278528, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1291.2042500000002 + }, + "M=278528,N=768": { + "file": "silu_config_M278528_N768.json", + "M": 278528, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1292.3242499999997 + }, + "M=278528,N=800": { + "file": "silu_config_M278528_N800.json", + "M": 278528, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1307.9642499999995 + }, + "M=278528,N=896": { + "file": "silu_config_M278528_N896.json", + "M": 278528, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1286.4842500000004 + }, + "M=278528,N=960": { + "file": "silu_config_M278528_N960.json", + "M": 278528, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1312.7242499999998 + }, + "M=278528,N=1024": { + "file": "silu_config_M278528_N1024.json", + "M": 278528, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1056.4432500000007 + }, + "M=278528,N=1120": { + "file": "silu_config_M278528_N1120.json", + "M": 278528, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2596.36975 + }, + "M=278528,N=1152": { + "file": "silu_config_M278528_N1152.json", + "M": 278528, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2563.5695000000005 + }, + "M=278528,N=1280": { + "file": "silu_config_M278528_N1280.json", + "M": 278528, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2577.6094999999996 + }, + "M=278528,N=1344": { + "file": "silu_config_M278528_N1344.json", + "M": 278528, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2599.28975 + }, + "M=278528,N=1408": { + "file": "silu_config_M278528_N1408.json", + "M": 278528, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2571.329499999999 + }, + "M=278528,N=1440": { + "file": "silu_config_M278528_N1440.json", + "M": 278528, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2615.9297499999993 + }, + "M=278528,N=1536": { + "file": "silu_config_M278528_N1536.json", + "M": 278528, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2575.089500000001 + }, + "M=278528,N=1600": { + "file": "silu_config_M278528_N1600.json", + "M": 278528, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2606.0097499999993 + }, + "M=278528,N=1664": { + "file": "silu_config_M278528_N1664.json", + "M": 278528, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2579.6095000000005 + }, + "M=278528,N=1728": { + "file": "silu_config_M278528_N1728.json", + "M": 278528, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2609.6897500000014 + }, + "M=278528,N=1760": { + "file": "silu_config_M278528_N1760.json", + "M": 278528, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2614.7697500000004 + }, + "M=278528,N=1792": { + "file": "silu_config_M278528_N1792.json", + "M": 278528, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2606.8097499999985 + }, + "M=278528,N=1920": { + "file": "silu_config_M278528_N1920.json", + "M": 278528, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2615.929750000002 + }, + "M=278528,N=2048": { + "file": "silu_config_M278528_N2048.json", + "M": 278528, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2042.6872499999988 + }, + "M=278528,N=2080": { + "file": "silu_config_M278528_N2080.json", + "M": 278528, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2861.570749999997 + }, + "M=278528,N=2240": { + "file": "silu_config_M278528_N2240.json", + "M": 278528, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3012.9712500000005 + }, + "M=278528,N=2400": { + "file": "silu_config_M278528_N2400.json", + "M": 278528, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3210.2122499999987 + }, + "M=278528,N=2560": { + "file": "silu_config_M278528_N2560.json", + "M": 278528, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3346.212749999999 + }, + "M=279552,N=128": { + "file": "silu_config_M279552_N128.json", + "M": 279552, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.51975000000004 + }, + "M=279552,N=160": { + "file": "silu_config_M279552_N160.json", + "M": 279552, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 344.08050000000003 + }, + "M=279552,N=192": { + "file": "silu_config_M279552_N192.json", + "M": 279552, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 351.20050000000015 + }, + "M=279552,N=256": { + "file": "silu_config_M279552_N256.json", + "M": 279552, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 303.0802500000001 + }, + "M=279552,N=320": { + "file": "silu_config_M279552_N320.json", + "M": 279552, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 678.4817499999999 + }, + "M=279552,N=384": { + "file": "silu_config_M279552_N384.json", + "M": 279552, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 674.2017500000002 + }, + "M=279552,N=480": { + "file": "silu_config_M279552_N480.json", + "M": 279552, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 691.92175 + }, + "M=279552,N=512": { + "file": "silu_config_M279552_N512.json", + "M": 279552, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 534.6812499999996 + }, + "M=279552,N=576": { + "file": "silu_config_M279552_N576.json", + "M": 279552, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1312.6442500000003 + }, + "M=279552,N=640": { + "file": "silu_config_M279552_N640.json", + "M": 279552, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1302.5242500000004 + }, + "M=279552,N=768": { + "file": "silu_config_M279552_N768.json", + "M": 279552, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1310.7642499999997 + }, + "M=279552,N=800": { + "file": "silu_config_M279552_N800.json", + "M": 279552, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.0845 + }, + "M=279552,N=896": { + "file": "silu_config_M279552_N896.json", + "M": 279552, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1312.1242499999998 + }, + "M=279552,N=960": { + "file": "silu_config_M279552_N960.json", + "M": 279552, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1317.6442499999998 + }, + "M=279552,N=1024": { + "file": "silu_config_M279552_N1024.json", + "M": 279552, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1058.6035000000002 + }, + "M=279552,N=1120": { + "file": "silu_config_M279552_N1120.json", + "M": 279552, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.7297499999995 + }, + "M=279552,N=1152": { + "file": "silu_config_M279552_N1152.json", + "M": 279552, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2593.6095000000005 + }, + "M=279552,N=1280": { + "file": "silu_config_M279552_N1280.json", + "M": 279552, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2587.2095 + }, + "M=279552,N=1344": { + "file": "silu_config_M279552_N1344.json", + "M": 279552, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2608.8097499999994 + }, + "M=279552,N=1408": { + "file": "silu_config_M279552_N1408.json", + "M": 279552, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2601.289749999999 + }, + "M=279552,N=1440": { + "file": "silu_config_M279552_N1440.json", + "M": 279552, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2624.8497500000003 + }, + "M=279552,N=1536": { + "file": "silu_config_M279552_N1536.json", + "M": 279552, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.4097500000016 + }, + "M=279552,N=1600": { + "file": "silu_config_M279552_N1600.json", + "M": 279552, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2625.9697499999984 + }, + "M=279552,N=1664": { + "file": "silu_config_M279552_N1664.json", + "M": 279552, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2599.5697500000006 + }, + "M=279552,N=1728": { + "file": "silu_config_M279552_N1728.json", + "M": 279552, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2608.969750000001 + }, + "M=279552,N=1760": { + "file": "silu_config_M279552_N1760.json", + "M": 279552, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2634.969750000001 + }, + "M=279552,N=1792": { + "file": "silu_config_M279552_N1792.json", + "M": 279552, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.7297499999986 + }, + "M=279552,N=1920": { + "file": "silu_config_M279552_N1920.json", + "M": 279552, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2625.4497500000016 + }, + "M=279552,N=2048": { + "file": "silu_config_M279552_N2048.json", + "M": 279552, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2071.0475000000006 + }, + "M=279552,N=2080": { + "file": "silu_config_M279552_N2080.json", + "M": 279552, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2901.571 + }, + "M=279552,N=2240": { + "file": "silu_config_M279552_N2240.json", + "M": 279552, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3023.9315000000006 + }, + "M=279552,N=2400": { + "file": "silu_config_M279552_N2400.json", + "M": 279552, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3221.85225 + }, + "M=279552,N=2560": { + "file": "silu_config_M279552_N2560.json", + "M": 279552, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3349.2927500000005 + }, + "M=280576,N=128": { + "file": "silu_config_M280576_N128.json", + "M": 280576, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.91974999999996 + }, + "M=280576,N=160": { + "file": "silu_config_M280576_N160.json", + "M": 280576, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 351.6405000000001 + }, + "M=280576,N=192": { + "file": "silu_config_M280576_N192.json", + "M": 280576, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 356.7605000000002 + }, + "M=280576,N=256": { + "file": "silu_config_M280576_N256.json", + "M": 280576, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 315.60024999999985 + }, + "M=280576,N=320": { + "file": "silu_config_M280576_N320.json", + "M": 280576, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 681.0417500000001 + }, + "M=280576,N=384": { + "file": "silu_config_M280576_N384.json", + "M": 280576, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 676.5217500000006 + }, + "M=280576,N=480": { + "file": "silu_config_M280576_N480.json", + "M": 280576, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 694.1617499999998 + }, + "M=280576,N=512": { + "file": "silu_config_M280576_N512.json", + "M": 280576, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 549.56125 + }, + "M=280576,N=576": { + "file": "silu_config_M280576_N576.json", + "M": 280576, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1331.7644999999998 + }, + "M=280576,N=640": { + "file": "silu_config_M280576_N640.json", + "M": 280576, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1311.56425 + }, + "M=280576,N=768": { + "file": "silu_config_M280576_N768.json", + "M": 280576, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1301.40425 + }, + "M=280576,N=800": { + "file": "silu_config_M280576_N800.json", + "M": 280576, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1325.324500000001 + }, + "M=280576,N=896": { + "file": "silu_config_M280576_N896.json", + "M": 280576, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.8042500000001 + }, + "M=280576,N=960": { + "file": "silu_config_M280576_N960.json", + "M": 280576, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1333.6045000000008 + }, + "M=280576,N=1024": { + "file": "silu_config_M280576_N1024.json", + "M": 280576, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1063.3232500000008 + }, + "M=280576,N=1120": { + "file": "silu_config_M280576_N1120.json", + "M": 280576, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2635.6497499999996 + }, + "M=280576,N=1152": { + "file": "silu_config_M280576_N1152.json", + "M": 280576, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2602.9297499999993 + }, + "M=280576,N=1280": { + "file": "silu_config_M280576_N1280.json", + "M": 280576, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2606.6097499999996 + }, + "M=280576,N=1344": { + "file": "silu_config_M280576_N1344.json", + "M": 280576, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2628.409749999999 + }, + "M=280576,N=1408": { + "file": "silu_config_M280576_N1408.json", + "M": 280576, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2620.6897500000005 + }, + "M=280576,N=1440": { + "file": "silu_config_M280576_N1440.json", + "M": 280576, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2614.129749999999 + }, + "M=280576,N=1536": { + "file": "silu_config_M280576_N1536.json", + "M": 280576, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2604.4897500000006 + }, + "M=280576,N=1600": { + "file": "silu_config_M280576_N1600.json", + "M": 280576, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2614.4897500000015 + }, + "M=280576,N=1664": { + "file": "silu_config_M280576_N1664.json", + "M": 280576, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2609.129749999999 + }, + "M=280576,N=1728": { + "file": "silu_config_M280576_N1728.json", + "M": 280576, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2628.8497499999994 + }, + "M=280576,N=1760": { + "file": "silu_config_M280576_N1760.json", + "M": 280576, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.6497500000014 + }, + "M=280576,N=1792": { + "file": "silu_config_M280576_N1792.json", + "M": 280576, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2635.9297499999993 + }, + "M=280576,N=1920": { + "file": "silu_config_M280576_N1920.json", + "M": 280576, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2634.5697499999987 + }, + "M=280576,N=2048": { + "file": "silu_config_M280576_N2048.json", + "M": 280576, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2097.2075000000004 + }, + "M=280576,N=2080": { + "file": "silu_config_M280576_N2080.json", + "M": 280576, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2891.570749999999 + }, + "M=280576,N=2240": { + "file": "silu_config_M280576_N2240.json", + "M": 280576, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3055.9715000000015 + }, + "M=280576,N=2400": { + "file": "silu_config_M280576_N2400.json", + "M": 280576, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3243.0922500000015 + }, + "M=280576,N=2560": { + "file": "silu_config_M280576_N2560.json", + "M": 280576, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3352.1727499999997 + }, + "M=281600,N=128": { + "file": "silu_config_M281600_N128.json", + "M": 281600, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 155.51975 + }, + "M=281600,N=160": { + "file": "silu_config_M281600_N160.json", + "M": 281600, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.6004999999999 + }, + "M=281600,N=192": { + "file": "silu_config_M281600_N192.json", + "M": 281600, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.4402499999999 + }, + "M=281600,N=256": { + "file": "silu_config_M281600_N256.json", + "M": 281600, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 327.8805 + }, + "M=281600,N=320": { + "file": "silu_config_M281600_N320.json", + "M": 281600, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.0417499999999 + }, + "M=281600,N=384": { + "file": "silu_config_M281600_N384.json", + "M": 281600, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 674.8417499999998 + }, + "M=281600,N=480": { + "file": "silu_config_M281600_N480.json", + "M": 281600, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 692.2817499999996 + }, + "M=281600,N=512": { + "file": "silu_config_M281600_N512.json", + "M": 281600, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 551.3612499999999 + }, + "M=281600,N=576": { + "file": "silu_config_M281600_N576.json", + "M": 281600, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1303.4442500000005 + }, + "M=281600,N=640": { + "file": "silu_config_M281600_N640.json", + "M": 281600, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1314.7642499999997 + }, + "M=281600,N=768": { + "file": "silu_config_M281600_N768.json", + "M": 281600, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1309.0842499999994 + }, + "M=281600,N=800": { + "file": "silu_config_M281600_N800.json", + "M": 281600, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.0845 + }, + "M=281600,N=896": { + "file": "silu_config_M281600_N896.json", + "M": 281600, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1324.2045000000003 + }, + "M=281600,N=960": { + "file": "silu_config_M281600_N960.json", + "M": 281600, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1338.3245000000002 + }, + "M=281600,N=1024": { + "file": "silu_config_M281600_N1024.json", + "M": 281600, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1067.20325 + }, + "M=281600,N=1120": { + "file": "silu_config_M281600_N1120.json", + "M": 281600, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2626.8097500000003 + }, + "M=281600,N=1152": { + "file": "silu_config_M281600_N1152.json", + "M": 281600, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2565.8495000000003 + }, + "M=281600,N=1280": { + "file": "silu_config_M281600_N1280.json", + "M": 281600, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2589.4895000000015 + }, + "M=281600,N=1344": { + "file": "silu_config_M281600_N1344.json", + "M": 281600, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2609.32975 + }, + "M=281600,N=1408": { + "file": "silu_config_M281600_N1408.json", + "M": 281600, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2593.289499999999 + }, + "M=281600,N=1440": { + "file": "silu_config_M281600_N1440.json", + "M": 281600, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2604.00975 + }, + "M=281600,N=1536": { + "file": "silu_config_M281600_N1536.json", + "M": 281600, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2586.7695000000003 + }, + "M=281600,N=1600": { + "file": "silu_config_M281600_N1600.json", + "M": 281600, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.209749999998 + }, + "M=281600,N=1664": { + "file": "silu_config_M281600_N1664.json", + "M": 281600, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2580.6894999999986 + }, + "M=281600,N=1728": { + "file": "silu_config_M281600_N1728.json", + "M": 281600, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2629.4097500000007 + }, + "M=281600,N=1760": { + "file": "silu_config_M281600_N1760.json", + "M": 281600, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2633.8897499999994 + }, + "M=281600,N=1792": { + "file": "silu_config_M281600_N1792.json", + "M": 281600, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2607.4497500000007 + }, + "M=281600,N=1920": { + "file": "silu_config_M281600_N1920.json", + "M": 281600, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.6097500000014 + }, + "M=281600,N=2048": { + "file": "silu_config_M281600_N2048.json", + "M": 281600, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2085.6074999999983 + }, + "M=281600,N=2080": { + "file": "silu_config_M281600_N2080.json", + "M": 281600, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2891.8907500000005 + }, + "M=281600,N=2240": { + "file": "silu_config_M281600_N2240.json", + "M": 281600, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3035.371500000001 + }, + "M=281600,N=2400": { + "file": "silu_config_M281600_N2400.json", + "M": 281600, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3208.6922499999982 + }, + "M=281600,N=2560": { + "file": "silu_config_M281600_N2560.json", + "M": 281600, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3347.93275 + }, + "M=282624,N=128": { + "file": "silu_config_M282624_N128.json", + "M": 282624, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.51975000000004 + }, + "M=282624,N=160": { + "file": "silu_config_M282624_N160.json", + "M": 282624, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 336.7605000000001 + }, + "M=282624,N=192": { + "file": "silu_config_M282624_N192.json", + "M": 282624, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 344.48050000000023 + }, + "M=282624,N=256": { + "file": "silu_config_M282624_N256.json", + "M": 282624, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 317.72050000000024 + }, + "M=282624,N=320": { + "file": "silu_config_M282624_N320.json", + "M": 282624, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 686.8417499999998 + }, + "M=282624,N=384": { + "file": "silu_config_M282624_N384.json", + "M": 282624, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 681.2817499999999 + }, + "M=282624,N=480": { + "file": "silu_config_M282624_N480.json", + "M": 282624, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 699.4817499999997 + }, + "M=282624,N=512": { + "file": "silu_config_M282624_N512.json", + "M": 282624, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 539.56125 + }, + "M=282624,N=576": { + "file": "silu_config_M282624_N576.json", + "M": 282624, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.2042500000002 + }, + "M=282624,N=640": { + "file": "silu_config_M282624_N640.json", + "M": 282624, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.0842499999999 + }, + "M=282624,N=768": { + "file": "silu_config_M282624_N768.json", + "M": 282624, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.7644999999998 + }, + "M=282624,N=800": { + "file": "silu_config_M282624_N800.json", + "M": 282624, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1345.2044999999994 + }, + "M=282624,N=896": { + "file": "silu_config_M282624_N896.json", + "M": 282624, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1326.3244999999997 + }, + "M=282624,N=960": { + "file": "silu_config_M282624_N960.json", + "M": 282624, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1340.6444999999994 + }, + "M=282624,N=1024": { + "file": "silu_config_M282624_N1024.json", + "M": 282624, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1070.5234999999998 + }, + "M=282624,N=1120": { + "file": "silu_config_M282624_N1120.json", + "M": 282624, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.28975 + }, + "M=282624,N=1152": { + "file": "silu_config_M282624_N1152.json", + "M": 282624, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2621.6497500000005 + }, + "M=282624,N=1280": { + "file": "silu_config_M282624_N1280.json", + "M": 282624, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2625.28975 + }, + "M=282624,N=1344": { + "file": "silu_config_M282624_N1344.json", + "M": 282624, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2637.00975 + }, + "M=282624,N=1408": { + "file": "silu_config_M282624_N1408.json", + "M": 282624, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2618.8097500000003 + }, + "M=282624,N=1440": { + "file": "silu_config_M282624_N1440.json", + "M": 282624, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2664.13 + }, + "M=282624,N=1536": { + "file": "silu_config_M282624_N1536.json", + "M": 282624, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2623.129750000001 + }, + "M=282624,N=1600": { + "file": "silu_config_M282624_N1600.json", + "M": 282624, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.0097499999983 + }, + "M=282624,N=1664": { + "file": "silu_config_M282624_N1664.json", + "M": 282624, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2637.7297500000004 + }, + "M=282624,N=1728": { + "file": "silu_config_M282624_N1728.json", + "M": 282624, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2637.5697499999987 + }, + "M=282624,N=1760": { + "file": "silu_config_M282624_N1760.json", + "M": 282624, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2663.25 + }, + "M=282624,N=1792": { + "file": "silu_config_M282624_N1792.json", + "M": 282624, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2634.4897500000006 + }, + "M=282624,N=1920": { + "file": "silu_config_M282624_N1920.json", + "M": 282624, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2643.5697500000024 + }, + "M=282624,N=2048": { + "file": "silu_config_M282624_N2048.json", + "M": 282624, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2102.807499999999 + }, + "M=282624,N=2080": { + "file": "silu_config_M282624_N2080.json", + "M": 282624, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2903.1709999999994 + }, + "M=282624,N=2240": { + "file": "silu_config_M282624_N2240.json", + "M": 282624, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3078.2517499999976 + }, + "M=282624,N=2400": { + "file": "silu_config_M282624_N2400.json", + "M": 282624, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3266.612500000001 + }, + "M=282624,N=2560": { + "file": "silu_config_M282624_N2560.json", + "M": 282624, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3368.85275 + }, + "M=283648,N=128": { + "file": "silu_config_M283648_N128.json", + "M": 283648, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.55974999999998 + }, + "M=283648,N=160": { + "file": "silu_config_M283648_N160.json", + "M": 283648, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 344.8405 + }, + "M=283648,N=192": { + "file": "silu_config_M283648_N192.json", + "M": 283648, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 338.32025 + }, + "M=283648,N=256": { + "file": "silu_config_M283648_N256.json", + "M": 283648, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 318.7602499999998 + }, + "M=283648,N=320": { + "file": "silu_config_M283648_N320.json", + "M": 283648, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 688.5617499999998 + }, + "M=283648,N=384": { + "file": "silu_config_M283648_N384.json", + "M": 283648, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 684.0017499999999 + }, + "M=283648,N=480": { + "file": "silu_config_M283648_N480.json", + "M": 283648, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 702.2017500000002 + }, + "M=283648,N=512": { + "file": "silu_config_M283648_N512.json", + "M": 283648, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 555.2412500000003 + }, + "M=283648,N=576": { + "file": "silu_config_M283648_N576.json", + "M": 283648, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.2845000000002 + }, + "M=283648,N=640": { + "file": "silu_config_M283648_N640.json", + "M": 283648, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1299.96425 + }, + "M=283648,N=768": { + "file": "silu_config_M283648_N768.json", + "M": 283648, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1336.6844999999998 + }, + "M=283648,N=800": { + "file": "silu_config_M283648_N800.json", + "M": 283648, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1349.8445000000002 + }, + "M=283648,N=896": { + "file": "silu_config_M283648_N896.json", + "M": 283648, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1320.2842499999997 + }, + "M=283648,N=960": { + "file": "silu_config_M283648_N960.json", + "M": 283648, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1324.6445000000003 + }, + "M=283648,N=1024": { + "file": "silu_config_M283648_N1024.json", + "M": 283648, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1080.283500000001 + }, + "M=283648,N=1120": { + "file": "silu_config_M283648_N1120.json", + "M": 283648, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2636.969750000001 + }, + "M=283648,N=1152": { + "file": "silu_config_M283648_N1152.json", + "M": 283648, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2612.16975 + }, + "M=283648,N=1280": { + "file": "silu_config_M283648_N1280.json", + "M": 283648, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2615.09 + }, + "M=283648,N=1344": { + "file": "silu_config_M283648_N1344.json", + "M": 283648, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2649.16975 + }, + "M=283648,N=1408": { + "file": "silu_config_M283648_N1408.json", + "M": 283648, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2619.7297499999995 + }, + "M=283648,N=1440": { + "file": "silu_config_M283648_N1440.json", + "M": 283648, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2655.6900000000005 + }, + "M=283648,N=1536": { + "file": "silu_config_M283648_N1536.json", + "M": 283648, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.8497500000003 + }, + "M=283648,N=1600": { + "file": "silu_config_M283648_N1600.json", + "M": 283648, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2656.09 + }, + "M=283648,N=1664": { + "file": "silu_config_M283648_N1664.json", + "M": 283648, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2636.4497499999998 + }, + "M=283648,N=1728": { + "file": "silu_config_M283648_N1728.json", + "M": 283648, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2656.7700000000004 + }, + "M=283648,N=1760": { + "file": "silu_config_M283648_N1760.json", + "M": 283648, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2662.21 + }, + "M=283648,N=1792": { + "file": "silu_config_M283648_N1792.json", + "M": 283648, + "N": 1792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2659.5699999999997 + }, + "M=283648,N=1920": { + "file": "silu_config_M283648_N1920.json", + "M": 283648, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2651.28975 + }, + "M=283648,N=2048": { + "file": "silu_config_M283648_N2048.json", + "M": 283648, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2108.64775 + }, + "M=283648,N=2080": { + "file": "silu_config_M283648_N2080.json", + "M": 283648, + "N": 2080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2943.1310000000003 + }, + "M=283648,N=2240": { + "file": "silu_config_M283648_N2240.json", + "M": 283648, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3107.2917500000012 + }, + "M=283648,N=2400": { + "file": "silu_config_M283648_N2400.json", + "M": 283648, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3281.1724999999997 + }, + "M=283648,N=2560": { + "file": "silu_config_M283648_N2560.json", + "M": 283648, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3402.4530000000013 + }, + "M=284672,N=128": { + "file": "silu_config_M284672_N128.json", + "M": 284672, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 157.07974999999993 + }, + "M=284672,N=160": { + "file": "silu_config_M284672_N160.json", + "M": 284672, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 349.84050000000013 + }, + "M=284672,N=192": { + "file": "silu_config_M284672_N192.json", + "M": 284672, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.92050000000006 + }, + "M=284672,N=256": { + "file": "silu_config_M284672_N256.json", + "M": 284672, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 319.88025000000005 + }, + "M=284672,N=320": { + "file": "silu_config_M284672_N320.json", + "M": 284672, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 691.0817499999998 + }, + "M=284672,N=384": { + "file": "silu_config_M284672_N384.json", + "M": 284672, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 686.8817499999998 + }, + "M=284672,N=480": { + "file": "silu_config_M284672_N480.json", + "M": 284672, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 703.8017500000003 + }, + "M=284672,N=512": { + "file": "silu_config_M284672_N512.json", + "M": 284672, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 557.2412499999998 + }, + "M=284672,N=576": { + "file": "silu_config_M284672_N576.json", + "M": 284672, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1339.6045 + }, + "M=284672,N=640": { + "file": "silu_config_M284672_N640.json", + "M": 284672, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1319.40425 + }, + "M=284672,N=768": { + "file": "silu_config_M284672_N768.json", + "M": 284672, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1345.5245 + }, + "M=284672,N=800": { + "file": "silu_config_M284672_N800.json", + "M": 284672, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1365.1245 + }, + "M=284672,N=896": { + "file": "silu_config_M284672_N896.json", + "M": 284672, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1335.7645000000007 + }, + "M=284672,N=960": { + "file": "silu_config_M284672_N960.json", + "M": 284672, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1352.4445000000005 + }, + "M=284672,N=1024": { + "file": "silu_config_M284672_N1024.json", + "M": 284672, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1078.2034999999996 + }, + "M=284672,N=1120": { + "file": "silu_config_M284672_N1120.json", + "M": 284672, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2652.12975 + }, + "M=284672,N=1152": { + "file": "silu_config_M284672_N1152.json", + "M": 284672, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2619.4497499999998 + }, + "M=284672,N=1280": { + "file": "silu_config_M284672_N1280.json", + "M": 284672, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2633.36975 + }, + "M=284672,N=1344": { + "file": "silu_config_M284672_N1344.json", + "M": 284672, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2655.9299999999994 + }, + "M=284672,N=1408": { + "file": "silu_config_M284672_N1408.json", + "M": 284672, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2658.7300000000005 + }, + "M=284672,N=1440": { + "file": "silu_config_M284672_N1440.json", + "M": 284672, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2673.0499999999993 + }, + "M=284672,N=1536": { + "file": "silu_config_M284672_N1536.json", + "M": 284672, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2621.409749999998 + }, + "M=284672,N=1600": { + "file": "silu_config_M284672_N1600.json", + "M": 284672, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2673.45 + }, + "M=284672,N=1664": { + "file": "silu_config_M284672_N1664.json", + "M": 284672, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2658.6500000000005 + }, + "M=284672,N=1728": { + "file": "silu_config_M284672_N1728.json", + "M": 284672, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2690.289999999999 + }, + "M=284672,N=1760": { + "file": "silu_config_M284672_N1760.json", + "M": 284672, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2675.289999999998 + }, + "M=284672,N=1792": { + "file": "silu_config_M284672_N1792.json", + "M": 284672, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2665.569999999998 + }, + "M=284672,N=1920": { + "file": "silu_config_M284672_N1920.json", + "M": 284672, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2664.129999999999 + }, + "M=284672,N=2048": { + "file": "silu_config_M284672_N2048.json", + "M": 284672, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2107.0874999999987 + }, + "M=284672,N=2080": { + "file": "silu_config_M284672_N2080.json", + "M": 284672, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2935.610999999997 + }, + "M=284672,N=2240": { + "file": "silu_config_M284672_N2240.json", + "M": 284672, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3090.731749999999 + }, + "M=284672,N=2400": { + "file": "silu_config_M284672_N2400.json", + "M": 284672, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3272.1325000000015 + }, + "M=284672,N=2560": { + "file": "silu_config_M284672_N2560.json", + "M": 284672, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3417.4929999999986 + }, + "M=285696,N=128": { + "file": "silu_config_M285696_N128.json", + "M": 285696, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 157.71974999999998 + }, + "M=285696,N=160": { + "file": "silu_config_M285696_N160.json", + "M": 285696, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 357.24024999999983 + }, + "M=285696,N=192": { + "file": "silu_config_M285696_N192.json", + "M": 285696, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 340.64025000000015 + }, + "M=285696,N=256": { + "file": "silu_config_M285696_N256.json", + "M": 285696, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 309.36024999999995 + }, + "M=285696,N=320": { + "file": "silu_config_M285696_N320.json", + "M": 285696, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 693.5217500000001 + }, + "M=285696,N=384": { + "file": "silu_config_M285696_N384.json", + "M": 285696, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 689.1617499999998 + }, + "M=285696,N=480": { + "file": "silu_config_M285696_N480.json", + "M": 285696, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.8417499999998 + }, + "M=285696,N=512": { + "file": "silu_config_M285696_N512.json", + "M": 285696, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 554.6012500000004 + }, + "M=285696,N=576": { + "file": "silu_config_M285696_N576.json", + "M": 285696, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1330.2044999999998 + }, + "M=285696,N=640": { + "file": "silu_config_M285696_N640.json", + "M": 285696, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1335.1644999999999 + }, + "M=285696,N=768": { + "file": "silu_config_M285696_N768.json", + "M": 285696, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1328.2045000000003 + }, + "M=285696,N=800": { + "file": "silu_config_M285696_N800.json", + "M": 285696, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1339.7245000000003 + }, + "M=285696,N=896": { + "file": "silu_config_M285696_N896.json", + "M": 285696, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1350.924500000001 + }, + "M=285696,N=960": { + "file": "silu_config_M285696_N960.json", + "M": 285696, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1355.6445000000003 + }, + "M=285696,N=1024": { + "file": "silu_config_M285696_N1024.json", + "M": 285696, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1082.5234999999998 + }, + "M=285696,N=1120": { + "file": "silu_config_M285696_N1120.json", + "M": 285696, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2661.5699999999997 + }, + "M=285696,N=1152": { + "file": "silu_config_M285696_N1152.json", + "M": 285696, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2649.5297499999997 + }, + "M=285696,N=1280": { + "file": "silu_config_M285696_N1280.json", + "M": 285696, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2643.16975 + }, + "M=285696,N=1344": { + "file": "silu_config_M285696_N1344.json", + "M": 285696, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2655.249999999999 + }, + "M=285696,N=1408": { + "file": "silu_config_M285696_N1408.json", + "M": 285696, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2637.0097499999993 + }, + "M=285696,N=1440": { + "file": "silu_config_M285696_N1440.json", + "M": 285696, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2682.0899999999992 + }, + "M=285696,N=1536": { + "file": "silu_config_M285696_N1536.json", + "M": 285696, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2651.969750000001 + }, + "M=285696,N=1600": { + "file": "silu_config_M285696_N1600.json", + "M": 285696, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2665.209999999998 + }, + "M=285696,N=1664": { + "file": "silu_config_M285696_N1664.json", + "M": 285696, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2668.130000000001 + }, + "M=285696,N=1728": { + "file": "silu_config_M285696_N1728.json", + "M": 285696, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2689.0899999999992 + }, + "M=285696,N=1760": { + "file": "silu_config_M285696_N1760.json", + "M": 285696, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2697.7700000000013 + }, + "M=285696,N=1792": { + "file": "silu_config_M285696_N1792.json", + "M": 285696, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2675.170000000002 + }, + "M=285696,N=1920": { + "file": "silu_config_M285696_N1920.json", + "M": 285696, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2684.0099999999993 + }, + "M=285696,N=2048": { + "file": "silu_config_M285696_N2048.json", + "M": 285696, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2116.087749999999 + }, + "M=285696,N=2080": { + "file": "silu_config_M285696_N2080.json", + "M": 285696, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2955.6510000000026 + }, + "M=285696,N=2240": { + "file": "silu_config_M285696_N2240.json", + "M": 285696, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3110.9317499999997 + }, + "M=285696,N=2400": { + "file": "silu_config_M285696_N2400.json", + "M": 285696, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3268.852500000001 + }, + "M=285696,N=2560": { + "file": "silu_config_M285696_N2560.json", + "M": 285696, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3446.6932499999984 + }, + "M=286720,N=128": { + "file": "silu_config_M286720_N128.json", + "M": 286720, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 157.47975000000002 + }, + "M=286720,N=160": { + "file": "silu_config_M286720_N160.json", + "M": 286720, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 341.24024999999995 + }, + "M=286720,N=192": { + "file": "silu_config_M286720_N192.json", + "M": 286720, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 352.8404999999998 + }, + "M=286720,N=256": { + "file": "silu_config_M286720_N256.json", + "M": 286720, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.52025000000003 + }, + "M=286720,N=320": { + "file": "silu_config_M286720_N320.json", + "M": 286720, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 690.8817499999998 + }, + "M=286720,N=384": { + "file": "silu_config_M286720_N384.json", + "M": 286720, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 686.92175 + }, + "M=286720,N=480": { + "file": "silu_config_M286720_N480.json", + "M": 286720, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 704.5617500000003 + }, + "M=286720,N=512": { + "file": "silu_config_M286720_N512.json", + "M": 286720, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 546.0812500000002 + }, + "M=286720,N=576": { + "file": "silu_config_M286720_N576.json", + "M": 286720, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.04425 + }, + "M=286720,N=640": { + "file": "silu_config_M286720_N640.json", + "M": 286720, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.8042499999997 + }, + "M=286720,N=768": { + "file": "silu_config_M286720_N768.json", + "M": 286720, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1311.3242500000001 + }, + "M=286720,N=800": { + "file": "silu_config_M286720_N800.json", + "M": 286720, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.4045000000006 + }, + "M=286720,N=896": { + "file": "silu_config_M286720_N896.json", + "M": 286720, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.0842500000003 + }, + "M=286720,N=960": { + "file": "silu_config_M286720_N960.json", + "M": 286720, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1327.8845000000001 + }, + "M=286720,N=1024": { + "file": "silu_config_M286720_N1024.json", + "M": 286720, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1086.2035 + }, + "M=286720,N=1120": { + "file": "silu_config_M286720_N1120.json", + "M": 286720, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2642.04975 + }, + "M=286720,N=1152": { + "file": "silu_config_M286720_N1152.json", + "M": 286720, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2611.7297499999995 + }, + "M=286720,N=1280": { + "file": "silu_config_M286720_N1280.json", + "M": 286720, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2615.329750000001 + }, + "M=286720,N=1344": { + "file": "silu_config_M286720_N1344.json", + "M": 286720, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2645.6497500000005 + }, + "M=286720,N=1408": { + "file": "silu_config_M286720_N1408.json", + "M": 286720, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2619.209749999999 + }, + "M=286720,N=1440": { + "file": "silu_config_M286720_N1440.json", + "M": 286720, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2640.8097500000003 + }, + "M=286720,N=1536": { + "file": "silu_config_M286720_N1536.json", + "M": 286720, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2623.4097500000016 + }, + "M=286720,N=1600": { + "file": "silu_config_M286720_N1600.json", + "M": 286720, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2655.169999999999 + }, + "M=286720,N=1664": { + "file": "silu_config_M286720_N1664.json", + "M": 286720, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2639.6897499999977 + }, + "M=286720,N=1728": { + "file": "silu_config_M286720_N1728.json", + "M": 286720, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2647.849750000001 + }, + "M=286720,N=1760": { + "file": "silu_config_M286720_N1760.json", + "M": 286720, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2663.7700000000023 + }, + "M=286720,N=1792": { + "file": "silu_config_M286720_N1792.json", + "M": 286720, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2635.6897499999986 + }, + "M=286720,N=1920": { + "file": "silu_config_M286720_N1920.json", + "M": 286720, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.0497499999974 + }, + "M=286720,N=2048": { + "file": "silu_config_M286720_N2048.json", + "M": 286720, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2121.807749999998 + }, + "M=286720,N=2080": { + "file": "silu_config_M286720_N2080.json", + "M": 286720, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2925.411000000003 + }, + "M=286720,N=2240": { + "file": "silu_config_M286720_N2240.json", + "M": 286720, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3080.1717499999977 + }, + "M=286720,N=2400": { + "file": "silu_config_M286720_N2400.json", + "M": 286720, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3254.1322500000024 + }, + "M=286720,N=2560": { + "file": "silu_config_M286720_N2560.json", + "M": 286720, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3393.292999999998 + }, + "M=287744,N=128": { + "file": "silu_config_M287744_N128.json", + "M": 287744, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 158.83974999999992 + }, + "M=287744,N=160": { + "file": "silu_config_M287744_N160.json", + "M": 287744, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 353.6005 + }, + "M=287744,N=192": { + "file": "silu_config_M287744_N192.json", + "M": 287744, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 354.1205000000001 + }, + "M=287744,N=256": { + "file": "silu_config_M287744_N256.json", + "M": 287744, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 323.2002500000001 + }, + "M=287744,N=320": { + "file": "silu_config_M287744_N320.json", + "M": 287744, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 697.88175 + }, + "M=287744,N=384": { + "file": "silu_config_M287744_N384.json", + "M": 287744, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 693.6417500000002 + }, + "M=287744,N=480": { + "file": "silu_config_M287744_N480.json", + "M": 287744, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 711.8017500000005 + }, + "M=287744,N=512": { + "file": "silu_config_M287744_N512.json", + "M": 287744, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 552.4412500000001 + }, + "M=287744,N=576": { + "file": "silu_config_M287744_N576.json", + "M": 287744, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1342.8844999999997 + }, + "M=287744,N=640": { + "file": "silu_config_M287744_N640.json", + "M": 287744, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1339.5645 + }, + "M=287744,N=768": { + "file": "silu_config_M287744_N768.json", + "M": 287744, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1355.0045000000005 + }, + "M=287744,N=800": { + "file": "silu_config_M287744_N800.json", + "M": 287744, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1356.1644999999999 + }, + "M=287744,N=896": { + "file": "silu_config_M287744_N896.json", + "M": 287744, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1349.5645000000004 + }, + "M=287744,N=960": { + "file": "silu_config_M287744_N960.json", + "M": 287744, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1373.0845 + }, + "M=287744,N=1024": { + "file": "silu_config_M287744_N1024.json", + "M": 287744, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1090.4035000000003 + }, + "M=287744,N=1120": { + "file": "silu_config_M287744_N1120.json", + "M": 287744, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2680.6099999999988 + }, + "M=287744,N=1152": { + "file": "silu_config_M287744_N1152.json", + "M": 287744, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2658.13 + }, + "M=287744,N=1280": { + "file": "silu_config_M287744_N1280.json", + "M": 287744, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2651.5297499999997 + }, + "M=287744,N=1344": { + "file": "silu_config_M287744_N1344.json", + "M": 287744, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2684.370000000001 + }, + "M=287744,N=1408": { + "file": "silu_config_M287744_N1408.json", + "M": 287744, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2655.7299999999996 + }, + "M=287744,N=1440": { + "file": "silu_config_M287744_N1440.json", + "M": 287744, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2691.0499999999993 + }, + "M=287744,N=1536": { + "file": "silu_config_M287744_N1536.json", + "M": 287744, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2670.5699999999997 + }, + "M=287744,N=1600": { + "file": "silu_config_M287744_N1600.json", + "M": 287744, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2704.8499999999995 + }, + "M=287744,N=1664": { + "file": "silu_config_M287744_N1664.json", + "M": 287744, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2686.8500000000004 + }, + "M=287744,N=1728": { + "file": "silu_config_M287744_N1728.json", + "M": 287744, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2708.330000000001 + }, + "M=287744,N=1760": { + "file": "silu_config_M287744_N1760.json", + "M": 287744, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2703.8499999999985 + }, + "M=287744,N=1792": { + "file": "silu_config_M287744_N1792.json", + "M": 287744, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2683.8100000000004 + }, + "M=287744,N=1920": { + "file": "silu_config_M287744_N1920.json", + "M": 287744, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2692.5700000000006 + }, + "M=287744,N=2048": { + "file": "silu_config_M287744_N2048.json", + "M": 287744, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2140.0877500000015 + }, + "M=287744,N=2080": { + "file": "silu_config_M287744_N2080.json", + "M": 287744, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2956.571249999999 + }, + "M=287744,N=2240": { + "file": "silu_config_M287744_N2240.json", + "M": 287744, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3134.9317499999997 + }, + "M=287744,N=2400": { + "file": "silu_config_M287744_N2400.json", + "M": 287744, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3312.0525000000034 + }, + "M=287744,N=2560": { + "file": "silu_config_M287744_N2560.json", + "M": 287744, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3449.613250000004 + }, + "M=288768,N=128": { + "file": "silu_config_M288768_N128.json", + "M": 288768, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 170.59975000000003 + }, + "M=288768,N=160": { + "file": "silu_config_M288768_N160.json", + "M": 288768, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 343.6405 + }, + "M=288768,N=192": { + "file": "silu_config_M288768_N192.json", + "M": 288768, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 355.4005000000001 + }, + "M=288768,N=256": { + "file": "silu_config_M288768_N256.json", + "M": 288768, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 324.08025 + }, + "M=288768,N=320": { + "file": "silu_config_M288768_N320.json", + "M": 288768, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 700.8417499999998 + }, + "M=288768,N=384": { + "file": "silu_config_M288768_N384.json", + "M": 288768, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 696.20175 + }, + "M=288768,N=480": { + "file": "silu_config_M288768_N480.json", + "M": 288768, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 714.4017500000002 + }, + "M=288768,N=512": { + "file": "silu_config_M288768_N512.json", + "M": 288768, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 560.72125 + }, + "M=288768,N=576": { + "file": "silu_config_M288768_N576.json", + "M": 288768, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1365.3245000000006 + }, + "M=288768,N=640": { + "file": "silu_config_M288768_N640.json", + "M": 288768, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1354.6444999999994 + }, + "M=288768,N=768": { + "file": "silu_config_M288768_N768.json", + "M": 288768, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1364.3245000000002 + }, + "M=288768,N=800": { + "file": "silu_config_M288768_N800.json", + "M": 288768, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1376.2044999999998 + }, + "M=288768,N=896": { + "file": "silu_config_M288768_N896.json", + "M": 288768, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1354.3645000000001 + }, + "M=288768,N=960": { + "file": "silu_config_M288768_N960.json", + "M": 288768, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1359.0044999999996 + }, + "M=288768,N=1024": { + "file": "silu_config_M288768_N1024.json", + "M": 288768, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1094.7634999999996 + }, + "M=288768,N=1120": { + "file": "silu_config_M288768_N1120.json", + "M": 288768, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2700.0899999999992 + }, + "M=288768,N=1152": { + "file": "silu_config_M288768_N1152.json", + "M": 288768, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2677.6500000000005 + }, + "M=288768,N=1280": { + "file": "silu_config_M288768_N1280.json", + "M": 288768, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2681.249999999999 + }, + "M=288768,N=1344": { + "file": "silu_config_M288768_N1344.json", + "M": 288768, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2683.41 + }, + "M=288768,N=1408": { + "file": "silu_config_M288768_N1408.json", + "M": 288768, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2675.329999999998 + }, + "M=288768,N=1440": { + "file": "silu_config_M288768_N1440.json", + "M": 288768, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2690.5699999999997 + }, + "M=288768,N=1536": { + "file": "silu_config_M288768_N1536.json", + "M": 288768, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2669.250000000001 + }, + "M=288768,N=1600": { + "file": "silu_config_M288768_N1600.json", + "M": 288768, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2724.45025 + }, + "M=288768,N=1664": { + "file": "silu_config_M288768_N1664.json", + "M": 288768, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2696.209999999999 + }, + "M=288768,N=1728": { + "file": "silu_config_M288768_N1728.json", + "M": 288768, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2728.0102500000003 + }, + "M=288768,N=1760": { + "file": "silu_config_M288768_N1760.json", + "M": 288768, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2702.8100000000004 + }, + "M=288768,N=1792": { + "file": "silu_config_M288768_N1792.json", + "M": 288768, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2682.5699999999997 + }, + "M=288768,N=1920": { + "file": "silu_config_M288768_N1920.json", + "M": 288768, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2712.7300000000005 + }, + "M=288768,N=2048": { + "file": "silu_config_M288768_N2048.json", + "M": 288768, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2127.687750000002 + }, + "M=288768,N=2080": { + "file": "silu_config_M288768_N2080.json", + "M": 288768, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2987.771249999999 + }, + "M=288768,N=2240": { + "file": "silu_config_M288768_N2240.json", + "M": 288768, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3135.1317500000023 + }, + "M=288768,N=2400": { + "file": "silu_config_M288768_N2400.json", + "M": 288768, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3303.3725000000013 + }, + "M=288768,N=2560": { + "file": "silu_config_M288768_N2560.json", + "M": 288768, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3452.4932499999977 + }, + "M=289792,N=128": { + "file": "silu_config_M289792_N128.json", + "M": 289792, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 171.27975000000004 + }, + "M=289792,N=160": { + "file": "silu_config_M289792_N160.json", + "M": 289792, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 344.8405 + }, + "M=289792,N=192": { + "file": "silu_config_M289792_N192.json", + "M": 289792, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 363.0005000000001 + }, + "M=289792,N=256": { + "file": "silu_config_M289792_N256.json", + "M": 289792, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.04025 + }, + "M=289792,N=320": { + "file": "silu_config_M289792_N320.json", + "M": 289792, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 702.9617499999999 + }, + "M=289792,N=384": { + "file": "silu_config_M289792_N384.json", + "M": 289792, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 698.3217500000003 + }, + "M=289792,N=480": { + "file": "silu_config_M289792_N480.json", + "M": 289792, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 717.002 + }, + "M=289792,N=512": { + "file": "silu_config_M289792_N512.json", + "M": 289792, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 566.9612500000001 + }, + "M=289792,N=576": { + "file": "silu_config_M289792_N576.json", + "M": 289792, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1359.6045000000004 + }, + "M=289792,N=640": { + "file": "silu_config_M289792_N640.json", + "M": 289792, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1359.4445 + }, + "M=289792,N=768": { + "file": "silu_config_M289792_N768.json", + "M": 289792, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1354.3645000000001 + }, + "M=289792,N=800": { + "file": "silu_config_M289792_N800.json", + "M": 289792, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1365.3245000000002 + }, + "M=289792,N=896": { + "file": "silu_config_M289792_N896.json", + "M": 289792, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1363.8444999999997 + }, + "M=289792,N=960": { + "file": "silu_config_M289792_N960.json", + "M": 289792, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1385.8047500000002 + }, + "M=289792,N=1024": { + "file": "silu_config_M289792_N1024.json", + "M": 289792, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1097.9235000000008 + }, + "M=289792,N=1120": { + "file": "silu_config_M289792_N1120.json", + "M": 289792, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2699.5299999999997 + }, + "M=289792,N=1152": { + "file": "silu_config_M289792_N1152.json", + "M": 289792, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2676.6899999999996 + }, + "M=289792,N=1280": { + "file": "silu_config_M289792_N1280.json", + "M": 289792, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2680.6099999999988 + }, + "M=289792,N=1344": { + "file": "silu_config_M289792_N1344.json", + "M": 289792, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2703.369999999999 + }, + "M=289792,N=1408": { + "file": "silu_config_M289792_N1408.json", + "M": 289792, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2684.970000000001 + }, + "M=289792,N=1440": { + "file": "silu_config_M289792_N1440.json", + "M": 289792, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2720.250250000001 + }, + "M=289792,N=1536": { + "file": "silu_config_M289792_N1536.json", + "M": 289792, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2678.8100000000013 + }, + "M=289792,N=1600": { + "file": "silu_config_M289792_N1600.json", + "M": 289792, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2734.0902499999993 + }, + "M=289792,N=1664": { + "file": "silu_config_M289792_N1664.json", + "M": 289792, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2705.9699999999984 + }, + "M=289792,N=1728": { + "file": "silu_config_M289792_N1728.json", + "M": 289792, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2737.8102499999986 + }, + "M=289792,N=1760": { + "file": "silu_config_M289792_N1760.json", + "M": 289792, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2723.13025 + }, + "M=289792,N=1792": { + "file": "silu_config_M289792_N1792.json", + "M": 289792, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2712.8499999999985 + }, + "M=289792,N=1920": { + "file": "silu_config_M289792_N1920.json", + "M": 289792, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2711.290000000001 + }, + "M=289792,N=2048": { + "file": "silu_config_M289792_N2048.json", + "M": 289792, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2145.3677500000003 + }, + "M=289792,N=2080": { + "file": "silu_config_M289792_N2080.json", + "M": 289792, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2977.8912500000015 + }, + "M=289792,N=2240": { + "file": "silu_config_M289792_N2240.json", + "M": 289792, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3176.2520000000004 + }, + "M=289792,N=2400": { + "file": "silu_config_M289792_N2400.json", + "M": 289792, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3325.3327499999996 + }, + "M=289792,N=2560": { + "file": "silu_config_M289792_N2560.json", + "M": 289792, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3476.0932499999963 + }, + "M=290816,N=128": { + "file": "silu_config_M290816_N128.json", + "M": 290816, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.19975 + }, + "M=290816,N=160": { + "file": "silu_config_M290816_N160.json", + "M": 290816, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.16025 + }, + "M=290816,N=192": { + "file": "silu_config_M290816_N192.json", + "M": 290816, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.7202500000003 + }, + "M=290816,N=256": { + "file": "silu_config_M290816_N256.json", + "M": 290816, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 337.8805 + }, + "M=290816,N=320": { + "file": "silu_config_M290816_N320.json", + "M": 290816, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 705.4017499999998 + }, + "M=290816,N=384": { + "file": "silu_config_M290816_N384.json", + "M": 290816, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 701.9617500000002 + }, + "M=290816,N=480": { + "file": "silu_config_M290816_N480.json", + "M": 290816, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 719.6019999999999 + }, + "M=290816,N=512": { + "file": "silu_config_M290816_N512.json", + "M": 290816, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 569.2012499999998 + }, + "M=290816,N=576": { + "file": "silu_config_M290816_N576.json", + "M": 290816, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1368.2844999999993 + }, + "M=290816,N=640": { + "file": "silu_config_M290816_N640.json", + "M": 290816, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1343.1644999999999 + }, + "M=290816,N=768": { + "file": "silu_config_M290816_N768.json", + "M": 290816, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1358.6045 + }, + "M=290816,N=800": { + "file": "silu_config_M290816_N800.json", + "M": 290816, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1370.2044999999994 + }, + "M=290816,N=896": { + "file": "silu_config_M290816_N896.json", + "M": 290816, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1368.7645000000002 + }, + "M=290816,N=960": { + "file": "silu_config_M290816_N960.json", + "M": 290816, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1377.2044999999998 + }, + "M=290816,N=1024": { + "file": "silu_config_M290816_N1024.json", + "M": 290816, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1101.2035 + }, + "M=290816,N=1120": { + "file": "silu_config_M290816_N1120.json", + "M": 290816, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2719.210250000001 + }, + "M=290816,N=1152": { + "file": "silu_config_M290816_N1152.json", + "M": 290816, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2696.369999999999 + }, + "M=290816,N=1280": { + "file": "silu_config_M290816_N1280.json", + "M": 290816, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2690.05 + }, + "M=290816,N=1344": { + "file": "silu_config_M290816_N1344.json", + "M": 290816, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2702.33 + }, + "M=290816,N=1408": { + "file": "silu_config_M290816_N1408.json", + "M": 290816, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2694.05 + }, + "M=290816,N=1440": { + "file": "silu_config_M290816_N1440.json", + "M": 290816, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2709.209999999999 + }, + "M=290816,N=1536": { + "file": "silu_config_M290816_N1536.json", + "M": 290816, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2708.7699999999986 + }, + "M=290816,N=1600": { + "file": "silu_config_M290816_N1600.json", + "M": 290816, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2712.5700000000015 + }, + "M=290816,N=1664": { + "file": "silu_config_M290816_N1664.json", + "M": 290816, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2725.7702499999996 + }, + "M=290816,N=1728": { + "file": "silu_config_M290816_N1728.json", + "M": 290816, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2726.450249999999 + }, + "M=290816,N=1760": { + "file": "silu_config_M290816_N1760.json", + "M": 290816, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2721.9702499999994 + }, + "M=290816,N=1792": { + "file": "silu_config_M290816_N1792.json", + "M": 290816, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2722.290250000002 + }, + "M=290816,N=1920": { + "file": "silu_config_M290816_N1920.json", + "M": 290816, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2741.8902499999995 + }, + "M=290816,N=2048": { + "file": "silu_config_M290816_N2048.json", + "M": 290816, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2153.687749999999 + }, + "M=290816,N=2080": { + "file": "silu_config_M290816_N2080.json", + "M": 290816, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2998.4912500000037 + }, + "M=290816,N=2240": { + "file": "silu_config_M290816_N2240.json", + "M": 290816, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3177.652 + }, + "M=290816,N=2400": { + "file": "silu_config_M290816_N2400.json", + "M": 290816, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3336.2527500000015 + }, + "M=290816,N=2560": { + "file": "silu_config_M290816_N2560.json", + "M": 290816, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3499.053250000001 + }, + "M=291840,N=128": { + "file": "silu_config_M291840_N128.json", + "M": 291840, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 171.11975 + }, + "M=291840,N=160": { + "file": "silu_config_M291840_N160.json", + "M": 291840, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 362.32050000000004 + }, + "M=291840,N=192": { + "file": "silu_config_M291840_N192.json", + "M": 291840, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 342.20050000000003 + }, + "M=291840,N=256": { + "file": "silu_config_M291840_N256.json", + "M": 291840, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 316.28025 + }, + "M=291840,N=320": { + "file": "silu_config_M291840_N320.json", + "M": 291840, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 703.36175 + }, + "M=291840,N=384": { + "file": "silu_config_M291840_N384.json", + "M": 291840, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 699.5617499999998 + }, + "M=291840,N=480": { + "file": "silu_config_M291840_N480.json", + "M": 291840, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 717.3619999999999 + }, + "M=291840,N=512": { + "file": "silu_config_M291840_N512.json", + "M": 291840, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 571.1212499999999 + }, + "M=291840,N=576": { + "file": "silu_config_M291840_N576.json", + "M": 291840, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1339.5645000000004 + }, + "M=291840,N=640": { + "file": "silu_config_M291840_N640.json", + "M": 291840, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1363.5645 + }, + "M=291840,N=768": { + "file": "silu_config_M291840_N768.json", + "M": 291840, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1335.9644999999996 + }, + "M=291840,N=800": { + "file": "silu_config_M291840_N800.json", + "M": 291840, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1359.0845000000008 + }, + "M=291840,N=896": { + "file": "silu_config_M291840_N896.json", + "M": 291840, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1372.3244999999997 + }, + "M=291840,N=960": { + "file": "silu_config_M291840_N960.json", + "M": 291840, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1388.8847500000006 + }, + "M=291840,N=1024": { + "file": "silu_config_M291840_N1024.json", + "M": 291840, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1112.04375 + }, + "M=291840,N=1120": { + "file": "silu_config_M291840_N1120.json", + "M": 291840, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2703.8100000000004 + }, + "M=291840,N=1152": { + "file": "silu_config_M291840_N1152.json", + "M": 291840, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2691.3699999999994 + }, + "M=291840,N=1280": { + "file": "silu_config_M291840_N1280.json", + "M": 291840, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2674.5299999999993 + }, + "M=291840,N=1344": { + "file": "silu_config_M291840_N1344.json", + "M": 291840, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2698.0499999999997 + }, + "M=291840,N=1408": { + "file": "silu_config_M291840_N1408.json", + "M": 291840, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2679.4500000000003 + }, + "M=291840,N=1440": { + "file": "silu_config_M291840_N1440.json", + "M": 291840, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2735.57025 + }, + "M=291840,N=1536": { + "file": "silu_config_M291840_N1536.json", + "M": 291840, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2684.0499999999997 + }, + "M=291840,N=1600": { + "file": "silu_config_M291840_N1600.json", + "M": 291840, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2711.9699999999993 + }, + "M=291840,N=1664": { + "file": "silu_config_M291840_N1664.json", + "M": 291840, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2707.0499999999993 + }, + "M=291840,N=1728": { + "file": "silu_config_M291840_N1728.json", + "M": 291840, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2740.2902499999996 + }, + "M=291840,N=1760": { + "file": "silu_config_M291840_N1760.json", + "M": 291840, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2748.37025 + }, + "M=291840,N=1792": { + "file": "silu_config_M291840_N1792.json", + "M": 291840, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2724.33025 + }, + "M=291840,N=1920": { + "file": "silu_config_M291840_N1920.json", + "M": 291840, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2714.570000000001 + }, + "M=291840,N=2048": { + "file": "silu_config_M291840_N2048.json", + "M": 291840, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2181.008 + }, + "M=291840,N=2080": { + "file": "silu_config_M291840_N2080.json", + "M": 291840, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3001.81125 + }, + "M=291840,N=2240": { + "file": "silu_config_M291840_N2240.json", + "M": 291840, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3161.7719999999995 + }, + "M=291840,N=2400": { + "file": "silu_config_M291840_N2400.json", + "M": 291840, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3314.6125 + }, + "M=291840,N=2560": { + "file": "silu_config_M291840_N2560.json", + "M": 291840, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3482.333249999998 + }, + "M=292864,N=128": { + "file": "silu_config_M292864_N128.json", + "M": 292864, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 161.91975000000002 + }, + "M=292864,N=160": { + "file": "silu_config_M292864_N160.json", + "M": 292864, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 348.7605 + }, + "M=292864,N=192": { + "file": "silu_config_M292864_N192.json", + "M": 292864, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 349.20050000000003 + }, + "M=292864,N=256": { + "file": "silu_config_M292864_N256.json", + "M": 292864, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 317.3202500000001 + }, + "M=292864,N=320": { + "file": "silu_config_M292864_N320.json", + "M": 292864, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 710.8017499999996 + }, + "M=292864,N=384": { + "file": "silu_config_M292864_N384.json", + "M": 292864, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.6417500000005 + }, + "M=292864,N=480": { + "file": "silu_config_M292864_N480.json", + "M": 292864, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 724.3220000000003 + }, + "M=292864,N=512": { + "file": "silu_config_M292864_N512.json", + "M": 292864, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 562.2412500000005 + }, + "M=292864,N=576": { + "file": "silu_config_M292864_N576.json", + "M": 292864, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1389.2047499999999 + }, + "M=292864,N=640": { + "file": "silu_config_M292864_N640.json", + "M": 292864, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1368.4044999999996 + }, + "M=292864,N=768": { + "file": "silu_config_M292864_N768.json", + "M": 292864, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1378.7245000000003 + }, + "M=292864,N=800": { + "file": "silu_config_M292864_N800.json", + "M": 292864, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1384.5647499999995 + }, + "M=292864,N=896": { + "file": "silu_config_M292864_N896.json", + "M": 292864, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1373.2844999999998 + }, + "M=292864,N=960": { + "file": "silu_config_M292864_N960.json", + "M": 292864, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1399.1647499999995 + }, + "M=292864,N=1024": { + "file": "silu_config_M292864_N1024.json", + "M": 292864, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1110.1234999999992 + }, + "M=292864,N=1120": { + "file": "silu_config_M292864_N1120.json", + "M": 292864, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2719.450249999999 + }, + "M=292864,N=1152": { + "file": "silu_config_M292864_N1152.json", + "M": 292864, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2705.49 + }, + "M=292864,N=1280": { + "file": "silu_config_M292864_N1280.json", + "M": 292864, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2711.7300000000005 + }, + "M=292864,N=1344": { + "file": "silu_config_M292864_N1344.json", + "M": 292864, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2739.290249999998 + }, + "M=292864,N=1408": { + "file": "silu_config_M292864_N1408.json", + "M": 292864, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2717.290249999999 + }, + "M=292864,N=1440": { + "file": "silu_config_M292864_N1440.json", + "M": 292864, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2746.6502500000024 + }, + "M=292864,N=1536": { + "file": "silu_config_M292864_N1536.json", + "M": 292864, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2721.7302500000005 + }, + "M=292864,N=1600": { + "file": "silu_config_M292864_N1600.json", + "M": 292864, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2740.8502500000004 + }, + "M=292864,N=1664": { + "file": "silu_config_M292864_N1664.json", + "M": 292864, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2729.530249999997 + }, + "M=292864,N=1728": { + "file": "silu_config_M292864_N1728.json", + "M": 292864, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2747.6902500000015 + }, + "M=292864,N=1760": { + "file": "silu_config_M292864_N1760.json", + "M": 292864, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2764.250250000001 + }, + "M=292864,N=1792": { + "file": "silu_config_M292864_N1792.json", + "M": 292864, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2722.0502499999993 + }, + "M=292864,N=1920": { + "file": "silu_config_M292864_N1920.json", + "M": 292864, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2721.53025 + }, + "M=292864,N=2048": { + "file": "silu_config_M292864_N2048.json", + "M": 292864, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2179.767999999999 + }, + "M=292864,N=2080": { + "file": "silu_config_M292864_N2080.json", + "M": 292864, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3026.0115000000023 + }, + "M=292864,N=2240": { + "file": "silu_config_M292864_N2240.json", + "M": 292864, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3196.2519999999986 + }, + "M=292864,N=2400": { + "file": "silu_config_M292864_N2400.json", + "M": 292864, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3373.4527500000004 + }, + "M=292864,N=2560": { + "file": "silu_config_M292864_N2560.json", + "M": 292864, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3516.5734999999986 + }, + "M=293888,N=128": { + "file": "silu_config_M293888_N128.json", + "M": 293888, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 173.35949999999997 + }, + "M=293888,N=160": { + "file": "silu_config_M293888_N160.json", + "M": 293888, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 377.36075000000017 + }, + "M=293888,N=192": { + "file": "silu_config_M293888_N192.json", + "M": 293888, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 350.44025 + }, + "M=293888,N=256": { + "file": "silu_config_M293888_N256.json", + "M": 293888, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 318.3202499999999 + }, + "M=293888,N=320": { + "file": "silu_config_M293888_N320.json", + "M": 293888, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 712.962 + }, + "M=293888,N=384": { + "file": "silu_config_M293888_N384.json", + "M": 293888, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 708.6417499999998 + }, + "M=293888,N=480": { + "file": "silu_config_M293888_N480.json", + "M": 293888, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 727.0420000000004 + }, + "M=293888,N=512": { + "file": "silu_config_M293888_N512.json", + "M": 293888, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 569.2812500000002 + }, + "M=293888,N=576": { + "file": "silu_config_M293888_N576.json", + "M": 293888, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1368.1245000000004 + }, + "M=293888,N=640": { + "file": "silu_config_M293888_N640.json", + "M": 293888, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1373.0445000000004 + }, + "M=293888,N=768": { + "file": "silu_config_M293888_N768.json", + "M": 293888, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1352.1644999999999 + }, + "M=293888,N=800": { + "file": "silu_config_M293888_N800.json", + "M": 293888, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1374.6045 + }, + "M=293888,N=896": { + "file": "silu_config_M293888_N896.json", + "M": 293888, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1399.1247500000004 + }, + "M=293888,N=960": { + "file": "silu_config_M293888_N960.json", + "M": 293888, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1393.2047500000003 + }, + "M=293888,N=1024": { + "file": "silu_config_M293888_N1024.json", + "M": 293888, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1114.7237500000006 + }, + "M=293888,N=1120": { + "file": "silu_config_M293888_N1120.json", + "M": 293888, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2718.21 + }, + "M=293888,N=1152": { + "file": "silu_config_M293888_N1152.json", + "M": 293888, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2715.09 + }, + "M=293888,N=1280": { + "file": "silu_config_M293888_N1280.json", + "M": 293888, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2731.290250000001 + }, + "M=293888,N=1344": { + "file": "silu_config_M293888_N1344.json", + "M": 293888, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2769.290249999999 + }, + "M=293888,N=1408": { + "file": "silu_config_M293888_N1408.json", + "M": 293888, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2726.650249999998 + }, + "M=293888,N=1440": { + "file": "silu_config_M293888_N1440.json", + "M": 293888, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2776.9302500000013 + }, + "M=293888,N=1536": { + "file": "silu_config_M293888_N1536.json", + "M": 293888, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2720.6902499999997 + }, + "M=293888,N=1600": { + "file": "silu_config_M293888_N1600.json", + "M": 293888, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2771.410250000001 + }, + "M=293888,N=1664": { + "file": "silu_config_M293888_N1664.json", + "M": 293888, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2738.21025 + }, + "M=293888,N=1728": { + "file": "silu_config_M293888_N1728.json", + "M": 293888, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2767.3702500000018 + }, + "M=293888,N=1760": { + "file": "silu_config_M293888_N1760.json", + "M": 293888, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2773.8902499999976 + }, + "M=293888,N=1792": { + "file": "silu_config_M293888_N1792.json", + "M": 293888, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2752.010250000001 + }, + "M=293888,N=1920": { + "file": "silu_config_M293888_N1920.json", + "M": 293888, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2741.6502500000006 + }, + "M=293888,N=2048": { + "file": "silu_config_M293888_N2048.json", + "M": 293888, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2177.0080000000016 + }, + "M=293888,N=2080": { + "file": "silu_config_M293888_N2080.json", + "M": 293888, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3046.451499999999 + }, + "M=293888,N=2240": { + "file": "silu_config_M293888_N2240.json", + "M": 293888, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3208.1322499999987 + }, + "M=293888,N=2400": { + "file": "silu_config_M293888_N2400.json", + "M": 293888, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3375.692750000002 + }, + "M=293888,N=2560": { + "file": "silu_config_M293888_N2560.json", + "M": 293888, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3519.573500000004 + }, + "M=294912,N=128": { + "file": "silu_config_M294912_N128.json", + "M": 294912, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 162.87975 + }, + "M=294912,N=160": { + "file": "silu_config_M294912_N160.json", + "M": 294912, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 357.44049999999993 + }, + "M=294912,N=192": { + "file": "silu_config_M294912_N192.json", + "M": 294912, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.7204999999999 + }, + "M=294912,N=256": { + "file": "silu_config_M294912_N256.json", + "M": 294912, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 330.84024999999997 + }, + "M=294912,N=320": { + "file": "silu_config_M294912_N320.json", + "M": 294912, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 716.0419999999999 + }, + "M=294912,N=384": { + "file": "silu_config_M294912_N384.json", + "M": 294912, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 711.0417500000001 + }, + "M=294912,N=480": { + "file": "silu_config_M294912_N480.json", + "M": 294912, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 729.5620000000004 + }, + "M=294912,N=512": { + "file": "silu_config_M294912_N512.json", + "M": 294912, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 563.00125 + }, + "M=294912,N=576": { + "file": "silu_config_M294912_N576.json", + "M": 294912, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1387.5247500000005 + }, + "M=294912,N=640": { + "file": "silu_config_M294912_N640.json", + "M": 294912, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1367.0445 + }, + "M=294912,N=768": { + "file": "silu_config_M294912_N768.json", + "M": 294912, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1370.9244999999996 + }, + "M=294912,N=800": { + "file": "silu_config_M294912_N800.json", + "M": 294912, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1382.9245 + }, + "M=294912,N=896": { + "file": "silu_config_M294912_N896.json", + "M": 294912, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1376.9645 + }, + "M=294912,N=960": { + "file": "silu_config_M294912_N960.json", + "M": 294912, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1389.3647500000002 + }, + "M=294912,N=1024": { + "file": "silu_config_M294912_N1024.json", + "M": 294912, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1118.4835000000003 + }, + "M=294912,N=1120": { + "file": "silu_config_M294912_N1120.json", + "M": 294912, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2748.5302500000007 + }, + "M=294912,N=1152": { + "file": "silu_config_M294912_N1152.json", + "M": 294912, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2724.25025 + }, + "M=294912,N=1280": { + "file": "silu_config_M294912_N1280.json", + "M": 294912, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2741.0902499999993 + }, + "M=294912,N=1344": { + "file": "silu_config_M294912_N1344.json", + "M": 294912, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2758.010250000002 + }, + "M=294912,N=1408": { + "file": "silu_config_M294912_N1408.json", + "M": 294912, + "N": 1408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2751.650249999999 + }, + "M=294912,N=1440": { + "file": "silu_config_M294912_N1440.json", + "M": 294912, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2755.330250000002 + }, + "M=294912,N=1536": { + "file": "silu_config_M294912_N1536.json", + "M": 294912, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2740.6902499999987 + }, + "M=294912,N=1600": { + "file": "silu_config_M294912_N1600.json", + "M": 294912, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2759.6502500000006 + }, + "M=294912,N=1664": { + "file": "silu_config_M294912_N1664.json", + "M": 294912, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2758.410249999999 + }, + "M=294912,N=1728": { + "file": "silu_config_M294912_N1728.json", + "M": 294912, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2756.450249999998 + }, + "M=294912,N=1760": { + "file": "silu_config_M294912_N1760.json", + "M": 294912, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2773.0902499999984 + }, + "M=294912,N=1792": { + "file": "silu_config_M294912_N1792.json", + "M": 294912, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2762.0502499999984 + }, + "M=294912,N=1920": { + "file": "silu_config_M294912_N1920.json", + "M": 294912, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2761.490499999998 + }, + "M=294912,N=2048": { + "file": "silu_config_M294912_N2048.json", + "M": 294912, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2176.2079999999987 + }, + "M=294912,N=2080": { + "file": "silu_config_M294912_N2080.json", + "M": 294912, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3057.1714999999967 + }, + "M=294912,N=2240": { + "file": "silu_config_M294912_N2240.json", + "M": 294912, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3209.212249999997 + }, + "M=294912,N=2400": { + "file": "silu_config_M294912_N2400.json", + "M": 294912, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3387.4130000000005 + }, + "M=294912,N=2560": { + "file": "silu_config_M294912_N2560.json", + "M": 294912, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3549.933499999999 + }, + "M=295936,N=128": { + "file": "silu_config_M295936_N128.json", + "M": 295936, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 163.27950000000004 + }, + "M=295936,N=160": { + "file": "silu_config_M295936_N160.json", + "M": 295936, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.80049999999994 + }, + "M=295936,N=192": { + "file": "silu_config_M295936_N192.json", + "M": 295936, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 370.1605000000002 + }, + "M=295936,N=256": { + "file": "silu_config_M295936_N256.json", + "M": 295936, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.8805000000001 + }, + "M=295936,N=320": { + "file": "silu_config_M295936_N320.json", + "M": 295936, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 718.0419999999999 + }, + "M=295936,N=384": { + "file": "silu_config_M295936_N384.json", + "M": 295936, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 713.4817499999999 + }, + "M=295936,N=480": { + "file": "silu_config_M295936_N480.json", + "M": 295936, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 731.9620000000002 + }, + "M=295936,N=512": { + "file": "silu_config_M295936_N512.json", + "M": 295936, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 573.4012500000001 + }, + "M=295936,N=576": { + "file": "silu_config_M295936_N576.json", + "M": 295936, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1392.4447499999997 + }, + "M=295936,N=640": { + "file": "silu_config_M295936_N640.json", + "M": 295936, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1377.0845 + }, + "M=295936,N=768": { + "file": "silu_config_M295936_N768.json", + "M": 295936, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1386.6447500000004 + }, + "M=295936,N=800": { + "file": "silu_config_M295936_N800.json", + "M": 295936, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1394.1247500000004 + }, + "M=295936,N=896": { + "file": "silu_config_M295936_N896.json", + "M": 295936, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1387.4847499999996 + }, + "M=295936,N=960": { + "file": "silu_config_M295936_N960.json", + "M": 295936, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1405.4447500000015 + }, + "M=295936,N=1024": { + "file": "silu_config_M295936_N1024.json", + "M": 295936, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1122.9232500000003 + }, + "M=295936,N=1120": { + "file": "silu_config_M295936_N1120.json", + "M": 295936, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2758.0902499999993 + }, + "M=295936,N=1152": { + "file": "silu_config_M295936_N1152.json", + "M": 295936, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2743.8502499999995 + }, + "M=295936,N=1280": { + "file": "silu_config_M295936_N1280.json", + "M": 295936, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2750.370250000001 + }, + "M=295936,N=1344": { + "file": "silu_config_M295936_N1344.json", + "M": 295936, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2767.17025 + }, + "M=295936,N=1408": { + "file": "silu_config_M295936_N1408.json", + "M": 295936, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2745.37025 + }, + "M=295936,N=1440": { + "file": "silu_config_M295936_N1440.json", + "M": 295936, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2774.9302499999985 + }, + "M=295936,N=1536": { + "file": "silu_config_M295936_N1536.json", + "M": 295936, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2750.0102499999994 + }, + "M=295936,N=1600": { + "file": "silu_config_M295936_N1600.json", + "M": 295936, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2779.7704999999996 + }, + "M=295936,N=1664": { + "file": "silu_config_M295936_N1664.json", + "M": 295936, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2767.8902499999995 + }, + "M=295936,N=1728": { + "file": "silu_config_M295936_N1728.json", + "M": 295936, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2786.2105 + }, + "M=295936,N=1760": { + "file": "silu_config_M295936_N1760.json", + "M": 295936, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2792.8904999999977 + }, + "M=295936,N=1792": { + "file": "silu_config_M295936_N1792.json", + "M": 295936, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2750.2102499999983 + }, + "M=295936,N=1920": { + "file": "silu_config_M295936_N1920.json", + "M": 295936, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2770.6102500000006 + }, + "M=295936,N=2048": { + "file": "silu_config_M295936_N2048.json", + "M": 295936, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2213.1680000000015 + }, + "M=295936,N=2080": { + "file": "silu_config_M295936_N2080.json", + "M": 295936, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3058.0115000000023 + }, + "M=295936,N=2240": { + "file": "silu_config_M295936_N2240.json", + "M": 295936, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3249.8922500000026 + }, + "M=295936,N=2400": { + "file": "silu_config_M295936_N2400.json", + "M": 295936, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3419.4529999999995 + }, + "M=295936,N=2560": { + "file": "silu_config_M295936_N2560.json", + "M": 295936, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3541.5334999999977 + }, + "M=296960,N=128": { + "file": "silu_config_M296960_N128.json", + "M": 296960, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 163.63975000000005 + }, + "M=296960,N=160": { + "file": "silu_config_M296960_N160.json", + "M": 296960, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 353.40049999999985 + }, + "M=296960,N=192": { + "file": "silu_config_M296960_N192.json", + "M": 296960, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 348.16025 + }, + "M=296960,N=256": { + "file": "silu_config_M296960_N256.json", + "M": 296960, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 321.68025 + }, + "M=296960,N=320": { + "file": "silu_config_M296960_N320.json", + "M": 296960, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 716.0420000000001 + }, + "M=296960,N=384": { + "file": "silu_config_M296960_N384.json", + "M": 296960, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 711.9219999999998 + }, + "M=296960,N=480": { + "file": "silu_config_M296960_N480.json", + "M": 296960, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 729.682 + }, + "M=296960,N=512": { + "file": "silu_config_M296960_N512.json", + "M": 296960, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 575.5212500000002 + }, + "M=296960,N=576": { + "file": "silu_config_M296960_N576.json", + "M": 296960, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1373.5645000000004 + }, + "M=296960,N=640": { + "file": "silu_config_M296960_N640.json", + "M": 296960, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1363.5645 + }, + "M=296960,N=768": { + "file": "silu_config_M296960_N768.json", + "M": 296960, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1378.9645000000005 + }, + "M=296960,N=800": { + "file": "silu_config_M296960_N800.json", + "M": 296960, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1410.2047499999999 + }, + "M=296960,N=896": { + "file": "silu_config_M296960_N896.json", + "M": 296960, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1373.4045000000015 + }, + "M=296960,N=960": { + "file": "silu_config_M296960_N960.json", + "M": 296960, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1410.1647500000008 + }, + "M=296960,N=1024": { + "file": "silu_config_M296960_N1024.json", + "M": 296960, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1125.5634999999993 + }, + "M=296960,N=1120": { + "file": "silu_config_M296960_N1120.json", + "M": 296960, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2737.610249999999 + }, + "M=296960,N=1152": { + "file": "silu_config_M296960_N1152.json", + "M": 296960, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2746.8902499999995 + }, + "M=296960,N=1280": { + "file": "silu_config_M296960_N1280.json", + "M": 296960, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2742.170249999999 + }, + "M=296960,N=1344": { + "file": "silu_config_M296960_N1344.json", + "M": 296960, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2746.7302499999987 + }, + "M=296960,N=1408": { + "file": "silu_config_M296960_N1408.json", + "M": 296960, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2757.8102499999986 + }, + "M=296960,N=1440": { + "file": "silu_config_M296960_N1440.json", + "M": 296960, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2742.7702500000005 + }, + "M=296960,N=1536": { + "file": "silu_config_M296960_N1536.json", + "M": 296960, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2741.490249999998 + }, + "M=296960,N=1600": { + "file": "silu_config_M296960_N1600.json", + "M": 296960, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2758.410250000001 + }, + "M=296960,N=1664": { + "file": "silu_config_M296960_N1664.json", + "M": 296960, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2748.8102500000005 + }, + "M=296960,N=1728": { + "file": "silu_config_M296960_N1728.json", + "M": 296960, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2765.0502499999975 + }, + "M=296960,N=1760": { + "file": "silu_config_M296960_N1760.json", + "M": 296960, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2760.610249999997 + }, + "M=296960,N=1792": { + "file": "silu_config_M296960_N1792.json", + "M": 296960, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2772.5302500000016 + }, + "M=296960,N=1920": { + "file": "silu_config_M296960_N1920.json", + "M": 296960, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2772.0102499999994 + }, + "M=296960,N=2048": { + "file": "silu_config_M296960_N2048.json", + "M": 296960, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2201.688000000001 + }, + "M=296960,N=2080": { + "file": "silu_config_M296960_N2080.json", + "M": 296960, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3037.011499999997 + }, + "M=296960,N=2240": { + "file": "silu_config_M296960_N2240.json", + "M": 296960, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3218.0122499999998 + }, + "M=296960,N=2400": { + "file": "silu_config_M296960_N2400.json", + "M": 296960, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3373.5327500000003 + }, + "M=296960,N=2560": { + "file": "silu_config_M296960_N2560.json", + "M": 296960, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3545.6534999999985 + }, + "M=297984,N=128": { + "file": "silu_config_M297984_N128.json", + "M": 297984, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.11949999999996 + }, + "M=297984,N=160": { + "file": "silu_config_M297984_N160.json", + "M": 297984, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 354.7202500000001 + }, + "M=297984,N=192": { + "file": "silu_config_M297984_N192.json", + "M": 297984, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 355.4804999999999 + }, + "M=297984,N=256": { + "file": "silu_config_M297984_N256.json", + "M": 297984, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 322.8002499999999 + }, + "M=297984,N=320": { + "file": "silu_config_M297984_N320.json", + "M": 297984, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 722.762 + }, + "M=297984,N=384": { + "file": "silu_config_M297984_N384.json", + "M": 297984, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 717.92175 + }, + "M=297984,N=480": { + "file": "silu_config_M297984_N480.json", + "M": 297984, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 736.5220000000004 + }, + "M=297984,N=512": { + "file": "silu_config_M297984_N512.json", + "M": 297984, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 583.0812500000002 + }, + "M=297984,N=576": { + "file": "silu_config_M297984_N576.json", + "M": 297984, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1397.4047500000006 + }, + "M=297984,N=640": { + "file": "silu_config_M297984_N640.json", + "M": 297984, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1396.8047500000002 + }, + "M=297984,N=768": { + "file": "silu_config_M297984_N768.json", + "M": 297984, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1390.7647500000003 + }, + "M=297984,N=800": { + "file": "silu_config_M297984_N800.json", + "M": 297984, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1393.084750000001 + }, + "M=297984,N=896": { + "file": "silu_config_M297984_N896.json", + "M": 297984, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1396.7647500000003 + }, + "M=297984,N=960": { + "file": "silu_config_M297984_N960.json", + "M": 297984, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1415.00475 + }, + "M=297984,N=1024": { + "file": "silu_config_M297984_N1024.json", + "M": 297984, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1130.0434999999998 + }, + "M=297984,N=1120": { + "file": "silu_config_M297984_N1120.json", + "M": 297984, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2776.6904999999997 + }, + "M=297984,N=1152": { + "file": "silu_config_M297984_N1152.json", + "M": 297984, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2762.13025 + }, + "M=297984,N=1280": { + "file": "silu_config_M297984_N1280.json", + "M": 297984, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2769.0502500000002 + }, + "M=297984,N=1344": { + "file": "silu_config_M297984_N1344.json", + "M": 297984, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2796.6105000000016 + }, + "M=297984,N=1408": { + "file": "silu_config_M297984_N1408.json", + "M": 297984, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2764.4102500000017 + }, + "M=297984,N=1440": { + "file": "silu_config_M297984_N1440.json", + "M": 297984, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2804.4505 + }, + "M=297984,N=1536": { + "file": "silu_config_M297984_N1536.json", + "M": 297984, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2782.450500000001 + }, + "M=297984,N=1600": { + "file": "silu_config_M297984_N1600.json", + "M": 297984, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2809.2905 + }, + "M=297984,N=1664": { + "file": "silu_config_M297984_N1664.json", + "M": 297984, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2797.0904999999975 + }, + "M=297984,N=1728": { + "file": "silu_config_M297984_N1728.json", + "M": 297984, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2795.3705 + }, + "M=297984,N=1760": { + "file": "silu_config_M297984_N1760.json", + "M": 297984, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2801.6105000000007 + }, + "M=297984,N=1792": { + "file": "silu_config_M297984_N1792.json", + "M": 297984, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2779.6904999999997 + }, + "M=297984,N=1920": { + "file": "silu_config_M297984_N1920.json", + "M": 297984, + "N": 1920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2803.410500000002 + }, + "M=297984,N=2048": { + "file": "silu_config_M297984_N2048.json", + "M": 297984, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2227.728 + }, + "M=297984,N=2080": { + "file": "silu_config_M297984_N2080.json", + "M": 297984, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3069.0915000000005 + }, + "M=297984,N=2240": { + "file": "silu_config_M297984_N2240.json", + "M": 297984, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3241.772249999998 + }, + "M=297984,N=2400": { + "file": "silu_config_M297984_N2400.json", + "M": 297984, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3411.8129999999983 + }, + "M=297984,N=2560": { + "file": "silu_config_M297984_N2560.json", + "M": 297984, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3572.053750000001 + }, + "M=299008,N=128": { + "file": "silu_config_M299008_N128.json", + "M": 299008, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.8395 + }, + "M=299008,N=160": { + "file": "silu_config_M299008_N160.json", + "M": 299008, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 355.8405 + }, + "M=299008,N=192": { + "file": "silu_config_M299008_N192.json", + "M": 299008, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 378.40049999999985 + }, + "M=299008,N=256": { + "file": "silu_config_M299008_N256.json", + "M": 299008, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 335.16025 + }, + "M=299008,N=320": { + "file": "silu_config_M299008_N320.json", + "M": 299008, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 725.3619999999999 + }, + "M=299008,N=384": { + "file": "silu_config_M299008_N384.json", + "M": 299008, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 720.9219999999998 + }, + "M=299008,N=480": { + "file": "silu_config_M299008_N480.json", + "M": 299008, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 739.3220000000003 + }, + "M=299008,N=512": { + "file": "silu_config_M299008_N512.json", + "M": 299008, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 578.84125 + }, + "M=299008,N=576": { + "file": "silu_config_M299008_N576.json", + "M": 299008, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1395.4047500000001 + }, + "M=299008,N=640": { + "file": "silu_config_M299008_N640.json", + "M": 299008, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1380.4845 + }, + "M=299008,N=768": { + "file": "silu_config_M299008_N768.json", + "M": 299008, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1395.2047499999994 + }, + "M=299008,N=800": { + "file": "silu_config_M299008_N800.json", + "M": 299008, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1424.2847500000003 + }, + "M=299008,N=896": { + "file": "silu_config_M299008_N896.json", + "M": 299008, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1391.12475 + }, + "M=299008,N=960": { + "file": "silu_config_M299008_N960.json", + "M": 299008, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1396.2047500000012 + }, + "M=299008,N=1024": { + "file": "silu_config_M299008_N1024.json", + "M": 299008, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1132.5237500000003 + }, + "M=299008,N=1120": { + "file": "silu_config_M299008_N1120.json", + "M": 299008, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2796.4105 + }, + "M=299008,N=1152": { + "file": "silu_config_M299008_N1152.json", + "M": 299008, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2760.8502500000013 + }, + "M=299008,N=1280": { + "file": "silu_config_M299008_N1280.json", + "M": 299008, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2768.0502499999993 + }, + "M=299008,N=1344": { + "file": "silu_config_M299008_N1344.json", + "M": 299008, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2806.130500000001 + }, + "M=299008,N=1408": { + "file": "silu_config_M299008_N1408.json", + "M": 299008, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2783.8905000000004 + }, + "M=299008,N=1440": { + "file": "silu_config_M299008_N1440.json", + "M": 299008, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2793.0904999999984 + }, + "M=299008,N=1536": { + "file": "silu_config_M299008_N1536.json", + "M": 299008, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2781.330500000001 + }, + "M=299008,N=1600": { + "file": "silu_config_M299008_N1600.json", + "M": 299008, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2787.5305 + }, + "M=299008,N=1664": { + "file": "silu_config_M299008_N1664.json", + "M": 299008, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2785.570499999999 + }, + "M=299008,N=1728": { + "file": "silu_config_M299008_N1728.json", + "M": 299008, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2814.930500000002 + }, + "M=299008,N=1760": { + "file": "silu_config_M299008_N1760.json", + "M": 299008, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2811.3305 + }, + "M=299008,N=1792": { + "file": "silu_config_M299008_N1792.json", + "M": 299008, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2799.5705000000025 + }, + "M=299008,N=1920": { + "file": "silu_config_M299008_N1920.json", + "M": 299008, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2799.2505 + }, + "M=299008,N=2048": { + "file": "silu_config_M299008_N2048.json", + "M": 299008, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2234.728250000001 + }, + "M=299008,N=2080": { + "file": "silu_config_M299008_N2080.json", + "M": 299008, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3089.9717500000006 + }, + "M=299008,N=2240": { + "file": "silu_config_M299008_N2240.json", + "M": 299008, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3255.0522500000025 + }, + "M=299008,N=2400": { + "file": "silu_config_M299008_N2400.json", + "M": 299008, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3452.253249999999 + }, + "M=299008,N=2560": { + "file": "silu_config_M299008_N2560.json", + "M": 299008, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3575.413749999998 + }, + "M=300032,N=128": { + "file": "silu_config_M300032_N128.json", + "M": 300032, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 165.19975 + }, + "M=300032,N=160": { + "file": "silu_config_M300032_N160.json", + "M": 300032, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 352.84024999999997 + }, + "M=300032,N=192": { + "file": "silu_config_M300032_N192.json", + "M": 300032, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 368.4805000000001 + }, + "M=300032,N=256": { + "file": "silu_config_M300032_N256.json", + "M": 300032, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 324.6802500000001 + }, + "M=300032,N=320": { + "file": "silu_config_M300032_N320.json", + "M": 300032, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 728.0420000000001 + }, + "M=300032,N=384": { + "file": "silu_config_M300032_N384.json", + "M": 300032, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 722.8819999999998 + }, + "M=300032,N=480": { + "file": "silu_config_M300032_N480.json", + "M": 300032, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 741.8420000000001 + }, + "M=300032,N=512": { + "file": "silu_config_M300032_N512.json", + "M": 300032, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 587.04125 + }, + "M=300032,N=576": { + "file": "silu_config_M300032_N576.json", + "M": 300032, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1396.16475 + }, + "M=300032,N=640": { + "file": "silu_config_M300032_N640.json", + "M": 300032, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1395.44475 + }, + "M=300032,N=768": { + "file": "silu_config_M300032_N768.json", + "M": 300032, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1400.0447500000005 + }, + "M=300032,N=800": { + "file": "silu_config_M300032_N800.json", + "M": 300032, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1402.3247500000002 + }, + "M=300032,N=896": { + "file": "silu_config_M300032_N896.json", + "M": 300032, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1411.484750000001 + }, + "M=300032,N=960": { + "file": "silu_config_M300032_N960.json", + "M": 300032, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1411.6447500000004 + }, + "M=300032,N=1024": { + "file": "silu_config_M300032_N1024.json", + "M": 300032, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1136.0434999999998 + }, + "M=300032,N=1120": { + "file": "silu_config_M300032_N1120.json", + "M": 300032, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2783.130500000001 + }, + "M=300032,N=1152": { + "file": "silu_config_M300032_N1152.json", + "M": 300032, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2769.37025 + }, + "M=300032,N=1280": { + "file": "silu_config_M300032_N1280.json", + "M": 300032, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2793.1305 + }, + "M=300032,N=1344": { + "file": "silu_config_M300032_N1344.json", + "M": 300032, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2807.570499999999 + }, + "M=300032,N=1408": { + "file": "silu_config_M300032_N1408.json", + "M": 300032, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2788.7704999999996 + }, + "M=300032,N=1440": { + "file": "silu_config_M300032_N1440.json", + "M": 300032, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2793.410499999998 + }, + "M=300032,N=1536": { + "file": "silu_config_M300032_N1536.json", + "M": 300032, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2781.6504999999997 + }, + "M=300032,N=1600": { + "file": "silu_config_M300032_N1600.json", + "M": 300032, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2802.690499999997 + }, + "M=300032,N=1664": { + "file": "silu_config_M300032_N1664.json", + "M": 300032, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2787.010500000002 + }, + "M=300032,N=1728": { + "file": "silu_config_M300032_N1728.json", + "M": 300032, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2808.570499999998 + }, + "M=300032,N=1760": { + "file": "silu_config_M300032_N1760.json", + "M": 300032, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2813.8104999999996 + }, + "M=300032,N=1792": { + "file": "silu_config_M300032_N1792.json", + "M": 300032, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.1705 + }, + "M=300032,N=1920": { + "file": "silu_config_M300032_N1920.json", + "M": 300032, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2803.610499999999 + }, + "M=300032,N=2048": { + "file": "silu_config_M300032_N2048.json", + "M": 300032, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2234.3282500000005 + }, + "M=300032,N=2080": { + "file": "silu_config_M300032_N2080.json", + "M": 300032, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3108.2517499999994 + }, + "M=300032,N=2240": { + "file": "silu_config_M300032_N2240.json", + "M": 300032, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3253.3722499999967 + }, + "M=300032,N=2400": { + "file": "silu_config_M300032_N2400.json", + "M": 300032, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3448.893249999999 + }, + "M=300032,N=2560": { + "file": "silu_config_M300032_N2560.json", + "M": 300032, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3592.973750000001 + }, + "M=301056,N=128": { + "file": "silu_config_M301056_N128.json", + "M": 301056, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 177.11974999999995 + }, + "M=301056,N=160": { + "file": "silu_config_M301056_N160.json", + "M": 301056, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 380.0804999999999 + }, + "M=301056,N=192": { + "file": "silu_config_M301056_N192.json", + "M": 301056, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 369.64049999999975 + }, + "M=301056,N=256": { + "file": "silu_config_M301056_N256.json", + "M": 301056, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.72024999999974 + }, + "M=301056,N=320": { + "file": "silu_config_M301056_N320.json", + "M": 301056, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 730.4019999999998 + }, + "M=301056,N=384": { + "file": "silu_config_M301056_N384.json", + "M": 301056, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 724.8820000000003 + }, + "M=301056,N=480": { + "file": "silu_config_M301056_N480.json", + "M": 301056, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 743.8419999999996 + }, + "M=301056,N=512": { + "file": "silu_config_M301056_N512.json", + "M": 301056, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 574.2812500000002 + }, + "M=301056,N=576": { + "file": "silu_config_M301056_N576.json", + "M": 301056, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1415.6847500000003 + }, + "M=301056,N=640": { + "file": "silu_config_M301056_N640.json", + "M": 301056, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1400.12475 + }, + "M=301056,N=768": { + "file": "silu_config_M301056_N768.json", + "M": 301056, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1393.9247500000006 + }, + "M=301056,N=800": { + "file": "silu_config_M301056_N800.json", + "M": 301056, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1417.56475 + }, + "M=301056,N=896": { + "file": "silu_config_M301056_N896.json", + "M": 301056, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1410.68475 + }, + "M=301056,N=960": { + "file": "silu_config_M301056_N960.json", + "M": 301056, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1427.6847500000008 + }, + "M=301056,N=1024": { + "file": "silu_config_M301056_N1024.json", + "M": 301056, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1141.0437499999998 + }, + "M=301056,N=1120": { + "file": "silu_config_M301056_N1120.json", + "M": 301056, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2813.2105 + }, + "M=301056,N=1152": { + "file": "silu_config_M301056_N1152.json", + "M": 301056, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2778.450249999999 + }, + "M=301056,N=1280": { + "file": "silu_config_M301056_N1280.json", + "M": 301056, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2773.370249999999 + }, + "M=301056,N=1344": { + "file": "silu_config_M301056_N1344.json", + "M": 301056, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2817.170500000001 + }, + "M=301056,N=1408": { + "file": "silu_config_M301056_N1408.json", + "M": 301056, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2788.0904999999993 + }, + "M=301056,N=1440": { + "file": "silu_config_M301056_N1440.json", + "M": 301056, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2823.8505000000014 + }, + "M=301056,N=1536": { + "file": "silu_config_M301056_N1536.json", + "M": 301056, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2791.250499999999 + }, + "M=301056,N=1600": { + "file": "silu_config_M301056_N1600.json", + "M": 301056, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2812.3304999999973 + }, + "M=301056,N=1664": { + "file": "silu_config_M301056_N1664.json", + "M": 301056, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2796.770499999998 + }, + "M=301056,N=1728": { + "file": "silu_config_M301056_N1728.json", + "M": 301056, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2817.8905000000004 + }, + "M=301056,N=1760": { + "file": "silu_config_M301056_N1760.json", + "M": 301056, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2812.930500000002 + }, + "M=301056,N=1792": { + "file": "silu_config_M301056_N1792.json", + "M": 301056, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2803.7704999999996 + }, + "M=301056,N=1920": { + "file": "silu_config_M301056_N1920.json", + "M": 301056, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2792.2504999999974 + }, + "M=301056,N=2048": { + "file": "silu_config_M301056_N2048.json", + "M": 301056, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2241.2082500000015 + }, + "M=301056,N=2080": { + "file": "silu_config_M301056_N2080.json", + "M": 301056, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3098.53175 + }, + "M=301056,N=2240": { + "file": "silu_config_M301056_N2240.json", + "M": 301056, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3274.0125000000025 + }, + "M=301056,N=2400": { + "file": "silu_config_M301056_N2400.json", + "M": 301056, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3461.013249999998 + }, + "M=301056,N=2560": { + "file": "silu_config_M301056_N2560.json", + "M": 301056, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3609.093750000002 + }, + "M=302080,N=128": { + "file": "silu_config_M302080_N128.json", + "M": 302080, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 166.15950000000004 + }, + "M=302080,N=160": { + "file": "silu_config_M302080_N160.json", + "M": 302080, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 363.56050000000005 + }, + "M=302080,N=192": { + "file": "silu_config_M302080_N192.json", + "M": 302080, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 370.76049999999987 + }, + "M=302080,N=256": { + "file": "silu_config_M302080_N256.json", + "M": 302080, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 326.8802499999998 + }, + "M=302080,N=320": { + "file": "silu_config_M302080_N320.json", + "M": 302080, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 727.6020000000001 + }, + "M=302080,N=384": { + "file": "silu_config_M302080_N384.json", + "M": 302080, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 723.4419999999998 + }, + "M=302080,N=480": { + "file": "silu_config_M302080_N480.json", + "M": 302080, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 741.5219999999999 + }, + "M=302080,N=512": { + "file": "silu_config_M302080_N512.json", + "M": 302080, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 579.4012499999997 + }, + "M=302080,N=576": { + "file": "silu_config_M302080_N576.json", + "M": 302080, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1407.2447499999998 + }, + "M=302080,N=640": { + "file": "silu_config_M302080_N640.json", + "M": 302080, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1386.7247500000003 + }, + "M=302080,N=768": { + "file": "silu_config_M302080_N768.json", + "M": 302080, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1390.8447500000002 + }, + "M=302080,N=800": { + "file": "silu_config_M302080_N800.json", + "M": 302080, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1422.6447499999995 + }, + "M=302080,N=896": { + "file": "silu_config_M302080_N896.json", + "M": 302080, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1417.44475 + }, + "M=302080,N=960": { + "file": "silu_config_M302080_N960.json", + "M": 302080, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1421.2047499999999 + }, + "M=302080,N=1024": { + "file": "silu_config_M302080_N1024.json", + "M": 302080, + "N": 1024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1143.9635000000012 + }, + "M=302080,N=1120": { + "file": "silu_config_M302080_N1120.json", + "M": 302080, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2792.6505000000006 + }, + "M=302080,N=1152": { + "file": "silu_config_M302080_N1152.json", + "M": 302080, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2771.17025 + }, + "M=302080,N=1280": { + "file": "silu_config_M302080_N1280.json", + "M": 302080, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2775.8902500000013 + }, + "M=302080,N=1344": { + "file": "silu_config_M302080_N1344.json", + "M": 302080, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2806.8505000000014 + }, + "M=302080,N=1408": { + "file": "silu_config_M302080_N1408.json", + "M": 302080, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2779.8902499999995 + }, + "M=302080,N=1440": { + "file": "silu_config_M302080_N1440.json", + "M": 302080, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2812.410500000001 + }, + "M=302080,N=1536": { + "file": "silu_config_M302080_N1536.json", + "M": 302080, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2783.1304999999993 + }, + "M=302080,N=1600": { + "file": "silu_config_M302080_N1600.json", + "M": 302080, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2791.1305 + }, + "M=302080,N=1664": { + "file": "silu_config_M302080_N1664.json", + "M": 302080, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2808.3305 + }, + "M=302080,N=1728": { + "file": "silu_config_M302080_N1728.json", + "M": 302080, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2806.8104999999996 + }, + "M=302080,N=1760": { + "file": "silu_config_M302080_N1760.json", + "M": 302080, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2811.450499999998 + }, + "M=302080,N=1792": { + "file": "silu_config_M302080_N1792.json", + "M": 302080, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2794.6505000000006 + }, + "M=302080,N=1920": { + "file": "silu_config_M302080_N1920.json", + "M": 302080, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2803.5705000000007 + }, + "M=302080,N=2048": { + "file": "silu_config_M302080_N2048.json", + "M": 302080, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2233.84825 + }, + "M=302080,N=2080": { + "file": "silu_config_M302080_N2080.json", + "M": 302080, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3097.3717500000002 + }, + "M=302080,N=2240": { + "file": "silu_config_M302080_N2240.json", + "M": 302080, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3252.332250000003 + }, + "M=302080,N=2400": { + "file": "silu_config_M302080_N2400.json", + "M": 302080, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3445.693000000001 + }, + "M=302080,N=2560": { + "file": "silu_config_M302080_N2560.json", + "M": 302080, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3600.7737499999985 + }, + "M=303104,N=128": { + "file": "silu_config_M303104_N128.json", + "M": 303104, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 189.39974999999998 + }, + "M=303104,N=160": { + "file": "silu_config_M303104_N160.json", + "M": 303104, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 382.4805 + }, + "M=303104,N=192": { + "file": "silu_config_M303104_N192.json", + "M": 303104, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 361.0005000000001 + }, + "M=303104,N=256": { + "file": "silu_config_M303104_N256.json", + "M": 303104, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 327.9202500000001 + }, + "M=303104,N=320": { + "file": "silu_config_M303104_N320.json", + "M": 303104, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 735.6020000000001 + }, + "M=303104,N=384": { + "file": "silu_config_M303104_N384.json", + "M": 303104, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 730.6819999999998 + }, + "M=303104,N=480": { + "file": "silu_config_M303104_N480.json", + "M": 303104, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 749.1220000000001 + }, + "M=303104,N=512": { + "file": "silu_config_M303104_N512.json", + "M": 303104, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 592.9612499999998 + }, + "M=303104,N=576": { + "file": "silu_config_M303104_N576.json", + "M": 303104, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1420.8047499999998 + }, + "M=303104,N=640": { + "file": "silu_config_M303104_N640.json", + "M": 303104, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1404.564750000001 + }, + "M=303104,N=768": { + "file": "silu_config_M303104_N768.json", + "M": 303104, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1434.8047500000002 + }, + "M=303104,N=800": { + "file": "silu_config_M303104_N800.json", + "M": 303104, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1431.8447500000002 + }, + "M=303104,N=896": { + "file": "silu_config_M303104_N896.json", + "M": 303104, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1425.7647499999994 + }, + "M=303104,N=960": { + "file": "silu_config_M303104_N960.json", + "M": 303104, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1412.9647499999996 + }, + "M=303104,N=1024": { + "file": "silu_config_M303104_N1024.json", + "M": 303104, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1147.1635000000006 + }, + "M=303104,N=1120": { + "file": "silu_config_M303104_N1120.json", + "M": 303104, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2832.0504999999994 + }, + "M=303104,N=1152": { + "file": "silu_config_M303104_N1152.json", + "M": 303104, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2807.7704999999996 + }, + "M=303104,N=1280": { + "file": "silu_config_M303104_N1280.json", + "M": 303104, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2802.370500000002 + }, + "M=303104,N=1344": { + "file": "silu_config_M303104_N1344.json", + "M": 303104, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.450500000001 + }, + "M=303104,N=1408": { + "file": "silu_config_M303104_N1408.json", + "M": 303104, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2796.8105000000014 + }, + "M=303104,N=1440": { + "file": "silu_config_M303104_N1440.json", + "M": 303104, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2822.090500000001 + }, + "M=303104,N=1536": { + "file": "silu_config_M303104_N1536.json", + "M": 303104, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2809.8904999999995 + }, + "M=303104,N=1600": { + "file": "silu_config_M303104_N1600.json", + "M": 303104, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2831.5305000000026 + }, + "M=303104,N=1664": { + "file": "silu_config_M303104_N1664.json", + "M": 303104, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.170500000001 + }, + "M=303104,N=1728": { + "file": "silu_config_M303104_N1728.json", + "M": 303104, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2857.530749999999 + }, + "M=303104,N=1760": { + "file": "silu_config_M303104_N1760.json", + "M": 303104, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2842.4505 + }, + "M=303104,N=1792": { + "file": "silu_config_M303104_N1792.json", + "M": 303104, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2822.7305000000006 + }, + "M=303104,N=1920": { + "file": "silu_config_M303104_N1920.json", + "M": 303104, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2842.5707500000026 + }, + "M=303104,N=2048": { + "file": "silu_config_M303104_N2048.json", + "M": 303104, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2235.9682500000017 + }, + "M=303104,N=2080": { + "file": "silu_config_M303104_N2080.json", + "M": 303104, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3119.571750000001 + }, + "M=303104,N=2240": { + "file": "silu_config_M303104_N2240.json", + "M": 303104, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3296.772499999999 + }, + "M=303104,N=2400": { + "file": "silu_config_M303104_N2400.json", + "M": 303104, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3463.5732499999976 + }, + "M=303104,N=2560": { + "file": "silu_config_M303104_N2560.json", + "M": 303104, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3619.1737500000017 + }, + "M=304128,N=128": { + "file": "silu_config_M304128_N128.json", + "M": 304128, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.19975 + }, + "M=304128,N=160": { + "file": "silu_config_M304128_N160.json", + "M": 304128, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 367.8405 + }, + "M=304128,N=192": { + "file": "silu_config_M304128_N192.json", + "M": 304128, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.3204999999998 + }, + "M=304128,N=256": { + "file": "silu_config_M304128_N256.json", + "M": 304128, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 351.92049999999995 + }, + "M=304128,N=320": { + "file": "silu_config_M304128_N320.json", + "M": 304128, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 737.4819999999997 + }, + "M=304128,N=384": { + "file": "silu_config_M304128_N384.json", + "M": 304128, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 733.0419999999997 + }, + "M=304128,N=480": { + "file": "silu_config_M304128_N480.json", + "M": 304128, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 752.0419999999999 + }, + "M=304128,N=512": { + "file": "silu_config_M304128_N512.json", + "M": 304128, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 580.2412499999998 + }, + "M=304128,N=576": { + "file": "silu_config_M304128_N576.json", + "M": 304128, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1435.68475 + }, + "M=304128,N=640": { + "file": "silu_config_M304128_N640.json", + "M": 304128, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1414.2847499999998 + }, + "M=304128,N=768": { + "file": "silu_config_M304128_N768.json", + "M": 304128, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1428.9647499999996 + }, + "M=304128,N=800": { + "file": "silu_config_M304128_N800.json", + "M": 304128, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1442.4447499999997 + }, + "M=304128,N=896": { + "file": "silu_config_M304128_N896.json", + "M": 304128, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1424.9247499999997 + }, + "M=304128,N=960": { + "file": "silu_config_M304128_N960.json", + "M": 304128, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1430.96475 + }, + "M=304128,N=1024": { + "file": "silu_config_M304128_N1024.json", + "M": 304128, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1151.6837499999997 + }, + "M=304128,N=1120": { + "file": "silu_config_M304128_N1120.json", + "M": 304128, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2831.370500000001 + }, + "M=304128,N=1152": { + "file": "silu_config_M304128_N1152.json", + "M": 304128, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2806.4505 + }, + "M=304128,N=1280": { + "file": "silu_config_M304128_N1280.json", + "M": 304128, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2801.5705000000007 + }, + "M=304128,N=1344": { + "file": "silu_config_M304128_N1344.json", + "M": 304128, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2855.9307499999995 + }, + "M=304128,N=1408": { + "file": "silu_config_M304128_N1408.json", + "M": 304128, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2826.450499999999 + }, + "M=304128,N=1440": { + "file": "silu_config_M304128_N1440.json", + "M": 304128, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2841.530749999998 + }, + "M=304128,N=1536": { + "file": "silu_config_M304128_N1536.json", + "M": 304128, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2829.6105000000016 + }, + "M=304128,N=1600": { + "file": "silu_config_M304128_N1600.json", + "M": 304128, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2850.9307499999995 + }, + "M=304128,N=1664": { + "file": "silu_config_M304128_N1664.json", + "M": 304128, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2814.2104999999992 + }, + "M=304128,N=1728": { + "file": "silu_config_M304128_N1728.json", + "M": 304128, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2845.9707499999995 + }, + "M=304128,N=1760": { + "file": "silu_config_M304128_N1760.json", + "M": 304128, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2841.690749999998 + }, + "M=304128,N=1792": { + "file": "silu_config_M304128_N1792.json", + "M": 304128, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2842.650749999999 + }, + "M=304128,N=1920": { + "file": "silu_config_M304128_N1920.json", + "M": 304128, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2862.210750000002 + }, + "M=304128,N=2048": { + "file": "silu_config_M304128_N2048.json", + "M": 304128, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2258.2482500000024 + }, + "M=304128,N=2080": { + "file": "silu_config_M304128_N2080.json", + "M": 304128, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3139.7717500000017 + }, + "M=304128,N=2240": { + "file": "silu_config_M304128_N2240.json", + "M": 304128, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3287.3325000000023 + }, + "M=304128,N=2400": { + "file": "silu_config_M304128_N2400.json", + "M": 304128, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3475.4532500000005 + }, + "M=304128,N=2560": { + "file": "silu_config_M304128_N2560.json", + "M": 304128, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3629.2937500000007 + }, + "M=305152,N=128": { + "file": "silu_config_M305152_N128.json", + "M": 305152, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 168.39975000000004 + }, + "M=305152,N=160": { + "file": "silu_config_M305152_N160.json", + "M": 305152, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 362.7204999999999 + }, + "M=305152,N=192": { + "file": "silu_config_M305152_N192.json", + "M": 305152, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 363.32050000000015 + }, + "M=305152,N=256": { + "file": "silu_config_M305152_N256.json", + "M": 305152, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 330.08025 + }, + "M=305152,N=320": { + "file": "silu_config_M305152_N320.json", + "M": 305152, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 740.0820000000003 + }, + "M=305152,N=384": { + "file": "silu_config_M305152_N384.json", + "M": 305152, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 735.0419999999999 + }, + "M=305152,N=480": { + "file": "silu_config_M305152_N480.json", + "M": 305152, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 754.4020000000003 + }, + "M=305152,N=512": { + "file": "silu_config_M305152_N512.json", + "M": 305152, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 589.5612499999997 + }, + "M=305152,N=576": { + "file": "silu_config_M305152_N576.json", + "M": 305152, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1440.4847499999996 + }, + "M=305152,N=640": { + "file": "silu_config_M305152_N640.json", + "M": 305152, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1435.96475 + }, + "M=305152,N=768": { + "file": "silu_config_M305152_N768.json", + "M": 305152, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1417.2447499999994 + }, + "M=305152,N=800": { + "file": "silu_config_M305152_N800.json", + "M": 305152, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1446.8849999999993 + }, + "M=305152,N=896": { + "file": "silu_config_M305152_N896.json", + "M": 305152, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1435.4047499999997 + }, + "M=305152,N=960": { + "file": "silu_config_M305152_N960.json", + "M": 305152, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1446.8050000000003 + }, + "M=305152,N=1024": { + "file": "silu_config_M305152_N1024.json", + "M": 305152, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1155.9237499999995 + }, + "M=305152,N=1120": { + "file": "silu_config_M305152_N1120.json", + "M": 305152, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2830.4105 + }, + "M=305152,N=1152": { + "file": "silu_config_M305152_N1152.json", + "M": 305152, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.9305000000004 + }, + "M=305152,N=1280": { + "file": "silu_config_M305152_N1280.json", + "M": 305152, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2821.0105000000012 + }, + "M=305152,N=1344": { + "file": "silu_config_M305152_N1344.json", + "M": 305152, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2834.5705 + }, + "M=305152,N=1408": { + "file": "silu_config_M305152_N1408.json", + "M": 305152, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.0905000000002 + }, + "M=305152,N=1440": { + "file": "silu_config_M305152_N1440.json", + "M": 305152, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2861.690749999998 + }, + "M=305152,N=1536": { + "file": "silu_config_M305152_N1536.json", + "M": 305152, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2839.0105000000003 + }, + "M=305152,N=1600": { + "file": "silu_config_M305152_N1600.json", + "M": 305152, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2850.0107499999986 + }, + "M=305152,N=1664": { + "file": "silu_config_M305152_N1664.json", + "M": 305152, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2844.090750000002 + }, + "M=305152,N=1728": { + "file": "silu_config_M305152_N1728.json", + "M": 305152, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2855.8907500000005 + }, + "M=305152,N=1760": { + "file": "silu_config_M305152_N1760.json", + "M": 305152, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2861.730749999999 + }, + "M=305152,N=1792": { + "file": "silu_config_M305152_N1792.json", + "M": 305152, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2851.8507499999987 + }, + "M=305152,N=1920": { + "file": "silu_config_M305152_N1920.json", + "M": 305152, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2830.210500000002 + }, + "M=305152,N=2048": { + "file": "silu_config_M305152_N2048.json", + "M": 305152, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2250.2882499999996 + }, + "M=305152,N=2080": { + "file": "silu_config_M305152_N2080.json", + "M": 305152, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3160.5719999999983 + }, + "M=305152,N=2240": { + "file": "silu_config_M305152_N2240.json", + "M": 305152, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3308.4524999999994 + }, + "M=305152,N=2400": { + "file": "silu_config_M305152_N2400.json", + "M": 305152, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3496.973250000001 + }, + "M=305152,N=2560": { + "file": "silu_config_M305152_N2560.json", + "M": 305152, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3632.734000000002 + }, + "M=306176,N=128": { + "file": "silu_config_M306176_N128.json", + "M": 306176, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 179.63975000000005 + }, + "M=306176,N=160": { + "file": "silu_config_M306176_N160.json", + "M": 306176, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 386.04049999999984 + }, + "M=306176,N=192": { + "file": "silu_config_M306176_N192.json", + "M": 306176, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 375.56050000000005 + }, + "M=306176,N=256": { + "file": "silu_config_M306176_N256.json", + "M": 306176, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 342.56025 + }, + "M=306176,N=320": { + "file": "silu_config_M306176_N320.json", + "M": 306176, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 742.1220000000001 + }, + "M=306176,N=384": { + "file": "silu_config_M306176_N384.json", + "M": 306176, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 737.7219999999998 + }, + "M=306176,N=480": { + "file": "silu_config_M306176_N480.json", + "M": 306176, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.1220000000001 + }, + "M=306176,N=512": { + "file": "silu_config_M306176_N512.json", + "M": 306176, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 598.6415 + }, + "M=306176,N=576": { + "file": "silu_config_M306176_N576.json", + "M": 306176, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1428.5247500000005 + }, + "M=306176,N=640": { + "file": "silu_config_M306176_N640.json", + "M": 306176, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1423.0447499999996 + }, + "M=306176,N=768": { + "file": "silu_config_M306176_N768.json", + "M": 306176, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1427.6447500000004 + }, + "M=306176,N=800": { + "file": "silu_config_M306176_N800.json", + "M": 306176, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1430.8447500000016 + }, + "M=306176,N=896": { + "file": "silu_config_M306176_N896.json", + "M": 306176, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.4449999999993 + }, + "M=306176,N=960": { + "file": "silu_config_M306176_N960.json", + "M": 306176, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1451.565 + }, + "M=306176,N=1024": { + "file": "silu_config_M306176_N1024.json", + "M": 306176, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1159.6037500000002 + }, + "M=306176,N=1120": { + "file": "silu_config_M306176_N1120.json", + "M": 306176, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2847.7707499999997 + }, + "M=306176,N=1152": { + "file": "silu_config_M306176_N1152.json", + "M": 306176, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2825.2105 + }, + "M=306176,N=1280": { + "file": "silu_config_M306176_N1280.json", + "M": 306176, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2809.9704999999994 + }, + "M=306176,N=1344": { + "file": "silu_config_M306176_N1344.json", + "M": 306176, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2843.7707499999997 + }, + "M=306176,N=1408": { + "file": "silu_config_M306176_N1408.json", + "M": 306176, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2845.290750000001 + }, + "M=306176,N=1440": { + "file": "silu_config_M306176_N1440.json", + "M": 306176, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2860.65075 + }, + "M=306176,N=1536": { + "file": "silu_config_M306176_N1536.json", + "M": 306176, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2848.450749999998 + }, + "M=306176,N=1600": { + "file": "silu_config_M306176_N1600.json", + "M": 306176, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2880.29075 + }, + "M=306176,N=1664": { + "file": "silu_config_M306176_N1664.json", + "M": 306176, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2853.8107499999987 + }, + "M=306176,N=1728": { + "file": "silu_config_M306176_N1728.json", + "M": 306176, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2875.8107500000024 + }, + "M=306176,N=1760": { + "file": "silu_config_M306176_N1760.json", + "M": 306176, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2870.730749999999 + }, + "M=306176,N=1792": { + "file": "silu_config_M306176_N1792.json", + "M": 306176, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2840.25075 + }, + "M=306176,N=1920": { + "file": "silu_config_M306176_N1920.json", + "M": 306176, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2838.370750000001 + }, + "M=306176,N=2048": { + "file": "silu_config_M306176_N2048.json", + "M": 306176, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2272.808249999999 + }, + "M=306176,N=2080": { + "file": "silu_config_M306176_N2080.json", + "M": 306176, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3156.811999999998 + }, + "M=306176,N=2240": { + "file": "silu_config_M306176_N2240.json", + "M": 306176, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3294.4124999999976 + }, + "M=306176,N=2400": { + "file": "silu_config_M306176_N2400.json", + "M": 306176, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3515.8134999999993 + }, + "M=306176,N=2560": { + "file": "silu_config_M306176_N2560.json", + "M": 306176, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3676.854000000001 + }, + "M=307200,N=128": { + "file": "silu_config_M307200_N128.json", + "M": 307200, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 180.91974999999996 + }, + "M=307200,N=160": { + "file": "silu_config_M307200_N160.json", + "M": 307200, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 359.4805 + }, + "M=307200,N=192": { + "file": "silu_config_M307200_N192.json", + "M": 307200, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 366.6005000000001 + }, + "M=307200,N=256": { + "file": "silu_config_M307200_N256.json", + "M": 307200, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 344.72024999999996 + }, + "M=307200,N=320": { + "file": "silu_config_M307200_N320.json", + "M": 307200, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 741.2019999999998 + }, + "M=307200,N=384": { + "file": "silu_config_M307200_N384.json", + "M": 307200, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 735.922 + }, + "M=307200,N=480": { + "file": "silu_config_M307200_N480.json", + "M": 307200, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 754.6019999999996 + }, + "M=307200,N=512": { + "file": "silu_config_M307200_N512.json", + "M": 307200, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 596.6414999999997 + }, + "M=307200,N=576": { + "file": "silu_config_M307200_N576.json", + "M": 307200, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1410.0847499999995 + }, + "M=307200,N=640": { + "file": "silu_config_M307200_N640.json", + "M": 307200, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1399.68475 + }, + "M=307200,N=768": { + "file": "silu_config_M307200_N768.json", + "M": 307200, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1415.4847499999996 + }, + "M=307200,N=800": { + "file": "silu_config_M307200_N800.json", + "M": 307200, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1417.4447499999997 + }, + "M=307200,N=896": { + "file": "silu_config_M307200_N896.json", + "M": 307200, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1410.2847499999998 + }, + "M=307200,N=960": { + "file": "silu_config_M307200_N960.json", + "M": 307200, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1426.04475 + }, + "M=307200,N=1024": { + "file": "silu_config_M307200_N1024.json", + "M": 307200, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1168.6437500000002 + }, + "M=307200,N=1120": { + "file": "silu_config_M307200_N1120.json", + "M": 307200, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2831.330499999999 + }, + "M=307200,N=1152": { + "file": "silu_config_M307200_N1152.json", + "M": 307200, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2798.5305 + }, + "M=307200,N=1280": { + "file": "silu_config_M307200_N1280.json", + "M": 307200, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2812.9704999999994 + }, + "M=307200,N=1344": { + "file": "silu_config_M307200_N1344.json", + "M": 307200, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2834.490499999999 + }, + "M=307200,N=1408": { + "file": "silu_config_M307200_N1408.json", + "M": 307200, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2806.6905000000015 + }, + "M=307200,N=1440": { + "file": "silu_config_M307200_N1440.json", + "M": 307200, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2829.4905000000017 + }, + "M=307200,N=1536": { + "file": "silu_config_M307200_N1536.json", + "M": 307200, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2820.570499999997 + }, + "M=307200,N=1600": { + "file": "silu_config_M307200_N1600.json", + "M": 307200, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2840.050750000002 + }, + "M=307200,N=1664": { + "file": "silu_config_M307200_N1664.json", + "M": 307200, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.490499999998 + }, + "M=307200,N=1728": { + "file": "silu_config_M307200_N1728.json", + "M": 307200, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2837.130500000002 + }, + "M=307200,N=1760": { + "file": "silu_config_M307200_N1760.json", + "M": 307200, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2843.37075 + }, + "M=307200,N=1792": { + "file": "silu_config_M307200_N1792.json", + "M": 307200, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2824.2905 + }, + "M=307200,N=1920": { + "file": "silu_config_M307200_N1920.json", + "M": 307200, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2843.0107499999995 + }, + "M=307200,N=2048": { + "file": "silu_config_M307200_N2048.json", + "M": 307200, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2258.6882499999992 + }, + "M=307200,N=2080": { + "file": "silu_config_M307200_N2080.json", + "M": 307200, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3134.691750000002 + }, + "M=307200,N=2240": { + "file": "silu_config_M307200_N2240.json", + "M": 307200, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3309.3324999999986 + }, + "M=307200,N=2400": { + "file": "silu_config_M307200_N2400.json", + "M": 307200, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3510.093500000001 + }, + "M=307200,N=2560": { + "file": "silu_config_M307200_N2560.json", + "M": 307200, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3651.374 + }, + "M=308224,N=128": { + "file": "silu_config_M308224_N128.json", + "M": 308224, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 170.23950000000002 + }, + "M=308224,N=160": { + "file": "silu_config_M308224_N160.json", + "M": 308224, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 366.8405 + }, + "M=308224,N=192": { + "file": "silu_config_M308224_N192.json", + "M": 308224, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 378.68050000000005 + }, + "M=308224,N=256": { + "file": "silu_config_M308224_N256.json", + "M": 308224, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 345.96025 + }, + "M=308224,N=320": { + "file": "silu_config_M308224_N320.json", + "M": 308224, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 748.5620000000001 + }, + "M=308224,N=384": { + "file": "silu_config_M308224_N384.json", + "M": 308224, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 743.2419999999997 + }, + "M=308224,N=480": { + "file": "silu_config_M308224_N480.json", + "M": 308224, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 762.402 + }, + "M=308224,N=512": { + "file": "silu_config_M308224_N512.json", + "M": 308224, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 602.6815000000001 + }, + "M=308224,N=576": { + "file": "silu_config_M308224_N576.json", + "M": 308224, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1455.045 + }, + "M=308224,N=640": { + "file": "silu_config_M308224_N640.json", + "M": 308224, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.8449999999998 + }, + "M=308224,N=768": { + "file": "silu_config_M308224_N768.json", + "M": 308224, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1443.8447499999997 + }, + "M=308224,N=800": { + "file": "silu_config_M308224_N800.json", + "M": 308224, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1463.8849999999998 + }, + "M=308224,N=896": { + "file": "silu_config_M308224_N896.json", + "M": 308224, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1434.2447500000012 + }, + "M=308224,N=960": { + "file": "silu_config_M308224_N960.json", + "M": 308224, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1464.3250000000003 + }, + "M=308224,N=1024": { + "file": "silu_config_M308224_N1024.json", + "M": 308224, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1172.1637499999997 + }, + "M=308224,N=1120": { + "file": "silu_config_M308224_N1120.json", + "M": 308224, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2881.7707500000006 + }, + "M=308224,N=1152": { + "file": "silu_config_M308224_N1152.json", + "M": 308224, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2845.8107500000015 + }, + "M=308224,N=1280": { + "file": "silu_config_M308224_N1280.json", + "M": 308224, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2839.450750000001 + }, + "M=308224,N=1344": { + "file": "silu_config_M308224_N1344.json", + "M": 308224, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2884.290749999999 + }, + "M=308224,N=1408": { + "file": "silu_config_M308224_N1408.json", + "M": 308224, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2854.41075 + }, + "M=308224,N=1440": { + "file": "silu_config_M308224_N1440.json", + "M": 308224, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2881.0107499999995 + }, + "M=308224,N=1536": { + "file": "silu_config_M308224_N1536.json", + "M": 308224, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2858.53075 + }, + "M=308224,N=1600": { + "file": "silu_config_M308224_N1600.json", + "M": 308224, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2901.211000000001 + }, + "M=308224,N=1664": { + "file": "silu_config_M308224_N1664.json", + "M": 308224, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2853.250750000001 + }, + "M=308224,N=1728": { + "file": "silu_config_M308224_N1728.json", + "M": 308224, + "N": 1728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2939.9309999999996 + }, + "M=308224,N=1760": { + "file": "silu_config_M308224_N1760.json", + "M": 308224, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2916.3309999999983 + }, + "M=308224,N=1792": { + "file": "silu_config_M308224_N1792.json", + "M": 308224, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2872.8107500000006 + }, + "M=308224,N=1920": { + "file": "silu_config_M308224_N1920.json", + "M": 308224, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2882.0507500000003 + }, + "M=308224,N=2048": { + "file": "silu_config_M308224_N2048.json", + "M": 308224, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2279.1282500000016 + }, + "M=308224,N=2080": { + "file": "silu_config_M308224_N2080.json", + "M": 308224, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3187.8120000000017 + }, + "M=308224,N=2240": { + "file": "silu_config_M308224_N2240.json", + "M": 308224, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3344.3727500000005 + }, + "M=308224,N=2400": { + "file": "silu_config_M308224_N2400.json", + "M": 308224, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3538.933499999999 + }, + "M=308224,N=2560": { + "file": "silu_config_M308224_N2560.json", + "M": 308224, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3691.6139999999978 + }, + "M=309248,N=128": { + "file": "silu_config_M309248_N128.json", + "M": 309248, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 181.99975 + }, + "M=309248,N=160": { + "file": "silu_config_M309248_N160.json", + "M": 309248, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 390.04050000000007 + }, + "M=309248,N=192": { + "file": "silu_config_M309248_N192.json", + "M": 309248, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 380.08050000000003 + }, + "M=309248,N=256": { + "file": "silu_config_M309248_N256.json", + "M": 309248, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.6402499999999 + }, + "M=309248,N=320": { + "file": "silu_config_M309248_N320.json", + "M": 309248, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 751.242 + }, + "M=309248,N=384": { + "file": "silu_config_M309248_N384.json", + "M": 309248, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 745.28225 + }, + "M=309248,N=480": { + "file": "silu_config_M309248_N480.json", + "M": 309248, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 765.3619999999999 + }, + "M=309248,N=512": { + "file": "silu_config_M309248_N512.json", + "M": 309248, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 604.6414999999997 + }, + "M=309248,N=576": { + "file": "silu_config_M309248_N576.json", + "M": 309248, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1459.6849999999995 + }, + "M=309248,N=640": { + "file": "silu_config_M309248_N640.json", + "M": 309248, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1427.3647499999997 + }, + "M=309248,N=768": { + "file": "silu_config_M309248_N768.json", + "M": 309248, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1437.2847499999998 + }, + "M=309248,N=800": { + "file": "silu_config_M309248_N800.json", + "M": 309248, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1447.5650000000005 + }, + "M=309248,N=896": { + "file": "silu_config_M309248_N896.json", + "M": 309248, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1444.205 + }, + "M=309248,N=960": { + "file": "silu_config_M309248_N960.json", + "M": 309248, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1469.085 + }, + "M=309248,N=1024": { + "file": "silu_config_M309248_N1024.json", + "M": 309248, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1176.4037500000004 + }, + "M=309248,N=1120": { + "file": "silu_config_M309248_N1120.json", + "M": 309248, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2870.7307499999997 + }, + "M=309248,N=1152": { + "file": "silu_config_M309248_N1152.json", + "M": 309248, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2865.370750000002 + }, + "M=309248,N=1280": { + "file": "silu_config_M309248_N1280.json", + "M": 309248, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2859.210750000001 + }, + "M=309248,N=1344": { + "file": "silu_config_M309248_N1344.json", + "M": 309248, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2893.7307500000015 + }, + "M=309248,N=1408": { + "file": "silu_config_M309248_N1408.json", + "M": 309248, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2873.9707500000004 + }, + "M=309248,N=1440": { + "file": "silu_config_M309248_N1440.json", + "M": 309248, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2879.9307499999995 + }, + "M=309248,N=1536": { + "file": "silu_config_M309248_N1536.json", + "M": 309248, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2877.770749999998 + }, + "M=309248,N=1600": { + "file": "silu_config_M309248_N1600.json", + "M": 309248, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2910.6110000000017 + }, + "M=309248,N=1664": { + "file": "silu_config_M309248_N1664.json", + "M": 309248, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2862.610749999998 + }, + "M=309248,N=1728": { + "file": "silu_config_M309248_N1728.json", + "M": 309248, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2908.4910000000027 + }, + "M=309248,N=1760": { + "file": "silu_config_M309248_N1760.json", + "M": 309248, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2915.2109999999993 + }, + "M=309248,N=1792": { + "file": "silu_config_M309248_N1792.json", + "M": 309248, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2882.410749999998 + }, + "M=309248,N=1920": { + "file": "silu_config_M309248_N1920.json", + "M": 309248, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2891.7707500000006 + }, + "M=309248,N=2048": { + "file": "silu_config_M309248_N2048.json", + "M": 309248, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2286.60825 + }, + "M=309248,N=2080": { + "file": "silu_config_M309248_N2080.json", + "M": 309248, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3188.771999999999 + }, + "M=309248,N=2240": { + "file": "silu_config_M309248_N2240.json", + "M": 309248, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3346.01275 + }, + "M=309248,N=2400": { + "file": "silu_config_M309248_N2400.json", + "M": 309248, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3560.773500000001 + }, + "M=309248,N=2560": { + "file": "silu_config_M309248_N2560.json", + "M": 309248, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3685.5740000000005 + }, + "M=310272,N=128": { + "file": "silu_config_M310272_N128.json", + "M": 310272, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 182.43975 + }, + "M=310272,N=160": { + "file": "silu_config_M310272_N160.json", + "M": 310272, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 369.16049999999996 + }, + "M=310272,N=192": { + "file": "silu_config_M310272_N192.json", + "M": 310272, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.8805000000002 + }, + "M=310272,N=256": { + "file": "silu_config_M310272_N256.json", + "M": 310272, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 348.16025 + }, + "M=310272,N=320": { + "file": "silu_config_M310272_N320.json", + "M": 310272, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 753.5219999999999 + }, + "M=310272,N=384": { + "file": "silu_config_M310272_N384.json", + "M": 310272, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 747.8419999999996 + }, + "M=310272,N=480": { + "file": "silu_config_M310272_N480.json", + "M": 310272, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 768.002 + }, + "M=310272,N=512": { + "file": "silu_config_M310272_N512.json", + "M": 310272, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 606.6014999999998 + }, + "M=310272,N=576": { + "file": "silu_config_M310272_N576.json", + "M": 310272, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.52475 + }, + "M=310272,N=640": { + "file": "silu_config_M310272_N640.json", + "M": 310272, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1432.1647499999995 + }, + "M=310272,N=768": { + "file": "silu_config_M310272_N768.json", + "M": 310272, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1464.245 + }, + "M=310272,N=800": { + "file": "silu_config_M310272_N800.json", + "M": 310272, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1473.645 + }, + "M=310272,N=896": { + "file": "silu_config_M310272_N896.json", + "M": 310272, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1454.1250000000005 + }, + "M=310272,N=960": { + "file": "silu_config_M310272_N960.json", + "M": 310272, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1474.0049999999997 + }, + "M=310272,N=1024": { + "file": "silu_config_M310272_N1024.json", + "M": 310272, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1181.44375 + }, + "M=310272,N=1120": { + "file": "silu_config_M310272_N1120.json", + "M": 310272, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2890.290750000001 + }, + "M=310272,N=1152": { + "file": "silu_config_M310272_N1152.json", + "M": 310272, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2874.8107499999987 + }, + "M=310272,N=1280": { + "file": "silu_config_M310272_N1280.json", + "M": 310272, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2858.21075 + }, + "M=310272,N=1344": { + "file": "silu_config_M310272_N1344.json", + "M": 310272, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2893.0507499999985 + }, + "M=310272,N=1408": { + "file": "silu_config_M310272_N1408.json", + "M": 310272, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2883.3307499999983 + }, + "M=310272,N=1440": { + "file": "silu_config_M310272_N1440.json", + "M": 310272, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2889.6107500000026 + }, + "M=310272,N=1536": { + "file": "silu_config_M310272_N1536.json", + "M": 310272, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2877.1307500000003 + }, + "M=310272,N=1600": { + "file": "silu_config_M310272_N1600.json", + "M": 310272, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2888.9307500000014 + }, + "M=310272,N=1664": { + "file": "silu_config_M310272_N1664.json", + "M": 310272, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2893.0507500000012 + }, + "M=310272,N=1728": { + "file": "silu_config_M310272_N1728.json", + "M": 310272, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2928.571 + }, + "M=310272,N=1760": { + "file": "silu_config_M310272_N1760.json", + "M": 310272, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2925.0510000000013 + }, + "M=310272,N=1792": { + "file": "silu_config_M310272_N1792.json", + "M": 310272, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2892.0507500000003 + }, + "M=310272,N=1920": { + "file": "silu_config_M310272_N1920.json", + "M": 310272, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2911.650999999998 + }, + "M=310272,N=2048": { + "file": "silu_config_M310272_N2048.json", + "M": 310272, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2314.4085000000014 + }, + "M=310272,N=2080": { + "file": "silu_config_M310272_N2080.json", + "M": 310272, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3198.771999999999 + }, + "M=310272,N=2240": { + "file": "silu_config_M310272_N2240.json", + "M": 310272, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3377.01275 + }, + "M=310272,N=2400": { + "file": "silu_config_M310272_N2400.json", + "M": 310272, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3582.6537500000013 + }, + "M=310272,N=2560": { + "file": "silu_config_M310272_N2560.json", + "M": 310272, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3709.214250000001 + }, + "M=311296,N=128": { + "file": "silu_config_M311296_N128.json", + "M": 311296, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 171.63975 + }, + "M=311296,N=160": { + "file": "silu_config_M311296_N160.json", + "M": 311296, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 370.52049999999997 + }, + "M=311296,N=192": { + "file": "silu_config_M311296_N192.json", + "M": 311296, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 371.2405000000002 + }, + "M=311296,N=256": { + "file": "silu_config_M311296_N256.json", + "M": 311296, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 337.8402500000002 + }, + "M=311296,N=320": { + "file": "silu_config_M311296_N320.json", + "M": 311296, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 756.0019999999997 + }, + "M=311296,N=384": { + "file": "silu_config_M311296_N384.json", + "M": 311296, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 750.642 + }, + "M=311296,N=480": { + "file": "silu_config_M311296_N480.json", + "M": 311296, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 769.8420000000001 + }, + "M=311296,N=512": { + "file": "silu_config_M311296_N512.json", + "M": 311296, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 582.1214999999997 + }, + "M=311296,N=576": { + "file": "silu_config_M311296_N576.json", + "M": 311296, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1452.645 + }, + "M=311296,N=640": { + "file": "silu_config_M311296_N640.json", + "M": 311296, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1453.565000000001 + }, + "M=311296,N=768": { + "file": "silu_config_M311296_N768.json", + "M": 311296, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1452.8450000000007 + }, + "M=311296,N=800": { + "file": "silu_config_M311296_N800.json", + "M": 311296, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1461.8450000000003 + }, + "M=311296,N=896": { + "file": "silu_config_M311296_N896.json", + "M": 311296, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1453.5249999999996 + }, + "M=311296,N=960": { + "file": "silu_config_M311296_N960.json", + "M": 311296, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1467.5249999999996 + }, + "M=311296,N=1024": { + "file": "silu_config_M311296_N1024.json", + "M": 311296, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1183.44375 + }, + "M=311296,N=1120": { + "file": "silu_config_M311296_N1120.json", + "M": 311296, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2899.410749999999 + }, + "M=311296,N=1152": { + "file": "silu_config_M311296_N1152.json", + "M": 311296, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2884.21075 + }, + "M=311296,N=1280": { + "file": "silu_config_M311296_N1280.json", + "M": 311296, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2878.45075 + }, + "M=311296,N=1344": { + "file": "silu_config_M311296_N1344.json", + "M": 311296, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2902.690999999998 + }, + "M=311296,N=1408": { + "file": "silu_config_M311296_N1408.json", + "M": 311296, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2882.8507500000005 + }, + "M=311296,N=1440": { + "file": "silu_config_M311296_N1440.json", + "M": 311296, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2888.7307500000015 + }, + "M=311296,N=1536": { + "file": "silu_config_M311296_N1536.json", + "M": 311296, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2896.57075 + }, + "M=311296,N=1600": { + "file": "silu_config_M311296_N1600.json", + "M": 311296, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2887.8907499999978 + }, + "M=311296,N=1664": { + "file": "silu_config_M311296_N1664.json", + "M": 311296, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2902.530749999999 + }, + "M=311296,N=1728": { + "file": "silu_config_M311296_N1728.json", + "M": 311296, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2917.490999999998 + }, + "M=311296,N=1760": { + "file": "silu_config_M311296_N1760.json", + "M": 311296, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2934.371 + }, + "M=311296,N=1792": { + "file": "silu_config_M311296_N1792.json", + "M": 311296, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2890.7707500000015 + }, + "M=311296,N=1920": { + "file": "silu_config_M311296_N1920.json", + "M": 311296, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2900.570749999996 + }, + "M=311296,N=2048": { + "file": "silu_config_M311296_N2048.json", + "M": 311296, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2279.6882499999974 + }, + "M=311296,N=2080": { + "file": "silu_config_M311296_N2080.json", + "M": 311296, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3209.2520000000004 + }, + "M=311296,N=2240": { + "file": "silu_config_M311296_N2240.json", + "M": 311296, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3408.132999999996 + }, + "M=311296,N=2400": { + "file": "silu_config_M311296_N2400.json", + "M": 311296, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3564.933499999999 + }, + "M=311296,N=2560": { + "file": "silu_config_M311296_N2560.json", + "M": 311296, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3726.054250000001 + }, + "M=312320,N=128": { + "file": "silu_config_M312320_N128.json", + "M": 312320, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 172.35974999999996 + }, + "M=312320,N=160": { + "file": "silu_config_M312320_N160.json", + "M": 312320, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 375.80050000000006 + }, + "M=312320,N=192": { + "file": "silu_config_M312320_N192.json", + "M": 312320, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.72075000000007 + }, + "M=312320,N=256": { + "file": "silu_config_M312320_N256.json", + "M": 312320, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 350.12025000000017 + }, + "M=312320,N=320": { + "file": "silu_config_M312320_N320.json", + "M": 312320, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 753.7220000000002 + }, + "M=312320,N=384": { + "file": "silu_config_M312320_N384.json", + "M": 312320, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 748.4420000000002 + }, + "M=312320,N=480": { + "file": "silu_config_M312320_N480.json", + "M": 312320, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 767.202 + }, + "M=312320,N=512": { + "file": "silu_config_M312320_N512.json", + "M": 312320, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 610.5614999999998 + }, + "M=312320,N=576": { + "file": "silu_config_M312320_N576.json", + "M": 312320, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.8047500000002 + }, + "M=312320,N=640": { + "file": "silu_config_M312320_N640.json", + "M": 312320, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1444.0847500000004 + }, + "M=312320,N=768": { + "file": "silu_config_M312320_N768.json", + "M": 312320, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1459.565 + }, + "M=312320,N=800": { + "file": "silu_config_M312320_N800.json", + "M": 312320, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1472.165 + }, + "M=312320,N=896": { + "file": "silu_config_M312320_N896.json", + "M": 312320, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1475.725 + }, + "M=312320,N=960": { + "file": "silu_config_M312320_N960.json", + "M": 312320, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1460.045000000001 + }, + "M=312320,N=1024": { + "file": "silu_config_M312320_N1024.json", + "M": 312320, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1187.0437500000003 + }, + "M=312320,N=1120": { + "file": "silu_config_M312320_N1120.json", + "M": 312320, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2898.7307499999997 + }, + "M=312320,N=1152": { + "file": "silu_config_M312320_N1152.json", + "M": 312320, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2886.410750000001 + }, + "M=312320,N=1280": { + "file": "silu_config_M312320_N1280.json", + "M": 312320, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2869.7307499999997 + }, + "M=312320,N=1344": { + "file": "silu_config_M312320_N1344.json", + "M": 312320, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2901.8107500000006 + }, + "M=312320,N=1408": { + "file": "silu_config_M312320_N1408.json", + "M": 312320, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2883.9707499999995 + }, + "M=312320,N=1440": { + "file": "silu_config_M312320_N1440.json", + "M": 312320, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2887.010750000002 + }, + "M=312320,N=1536": { + "file": "silu_config_M312320_N1536.json", + "M": 312320, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2856.6907500000016 + }, + "M=312320,N=1600": { + "file": "silu_config_M312320_N1600.json", + "M": 312320, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2897.2907499999983 + }, + "M=312320,N=1664": { + "file": "silu_config_M312320_N1664.json", + "M": 312320, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2903.2107499999993 + }, + "M=312320,N=1728": { + "file": "silu_config_M312320_N1728.json", + "M": 312320, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2926.0509999999995 + }, + "M=312320,N=1760": { + "file": "silu_config_M312320_N1760.json", + "M": 312320, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2911.611 + }, + "M=312320,N=1792": { + "file": "silu_config_M312320_N1792.json", + "M": 312320, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2881.210750000001 + }, + "M=312320,N=1920": { + "file": "silu_config_M312320_N1920.json", + "M": 312320, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2910.8910000000014 + }, + "M=312320,N=2048": { + "file": "silu_config_M312320_N2048.json", + "M": 312320, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2322.808500000002 + }, + "M=312320,N=2080": { + "file": "silu_config_M312320_N2080.json", + "M": 312320, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3197.8120000000017 + }, + "M=312320,N=2240": { + "file": "silu_config_M312320_N2240.json", + "M": 312320, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3364.2927500000023 + }, + "M=312320,N=2400": { + "file": "silu_config_M312320_N2400.json", + "M": 312320, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3567.853500000001 + }, + "M=312320,N=2560": { + "file": "silu_config_M312320_N2560.json", + "M": 312320, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3703.5342500000015 + }, + "M=313344,N=128": { + "file": "silu_config_M313344_N128.json", + "M": 313344, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 172.79975000000002 + }, + "M=313344,N=160": { + "file": "silu_config_M313344_N160.json", + "M": 313344, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 379.2805000000001 + }, + "M=313344,N=192": { + "file": "silu_config_M313344_N192.json", + "M": 313344, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 384.84050000000013 + }, + "M=313344,N=256": { + "file": "silu_config_M313344_N256.json", + "M": 313344, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 339.84024999999997 + }, + "M=313344,N=320": { + "file": "silu_config_M313344_N320.json", + "M": 313344, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 761.0420000000001 + }, + "M=313344,N=384": { + "file": "silu_config_M313344_N384.json", + "M": 313344, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 755.6020000000001 + }, + "M=313344,N=480": { + "file": "silu_config_M313344_N480.json", + "M": 313344, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 775.0420000000004 + }, + "M=313344,N=512": { + "file": "silu_config_M313344_N512.json", + "M": 313344, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 608.7215000000001 + }, + "M=313344,N=576": { + "file": "silu_config_M313344_N576.json", + "M": 313344, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1462.245 + }, + "M=313344,N=640": { + "file": "silu_config_M313344_N640.json", + "M": 313344, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1463.165 + }, + "M=313344,N=768": { + "file": "silu_config_M313344_N768.json", + "M": 313344, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1456.2849999999999 + }, + "M=313344,N=800": { + "file": "silu_config_M313344_N800.json", + "M": 313344, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1477.245 + }, + "M=313344,N=896": { + "file": "silu_config_M313344_N896.json", + "M": 313344, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1468.4049999999997 + }, + "M=313344,N=960": { + "file": "silu_config_M313344_N960.json", + "M": 313344, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1499.4449999999997 + }, + "M=313344,N=1024": { + "file": "silu_config_M313344_N1024.json", + "M": 313344, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1192.6437500000002 + }, + "M=313344,N=1120": { + "file": "silu_config_M313344_N1120.json", + "M": 313344, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2918.251000000001 + }, + "M=313344,N=1152": { + "file": "silu_config_M313344_N1152.json", + "M": 313344, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2903.1307500000003 + }, + "M=313344,N=1280": { + "file": "silu_config_M313344_N1280.json", + "M": 313344, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2896.8507499999996 + }, + "M=313344,N=1344": { + "file": "silu_config_M313344_N1344.json", + "M": 313344, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2911.9709999999995 + }, + "M=313344,N=1408": { + "file": "silu_config_M313344_N1408.json", + "M": 313344, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2891.8507499999996 + }, + "M=313344,N=1440": { + "file": "silu_config_M313344_N1440.json", + "M": 313344, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2919.411 + }, + "M=313344,N=1536": { + "file": "silu_config_M313344_N1536.json", + "M": 313344, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2905.411 + }, + "M=313344,N=1600": { + "file": "silu_config_M313344_N1600.json", + "M": 313344, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2917.410999999999 + }, + "M=313344,N=1664": { + "file": "silu_config_M313344_N1664.json", + "M": 313344, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2900.370749999999 + }, + "M=313344,N=1728": { + "file": "silu_config_M313344_N1728.json", + "M": 313344, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2913.8509999999987 + }, + "M=313344,N=1760": { + "file": "silu_config_M313344_N1760.json", + "M": 313344, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2930.610999999999 + }, + "M=313344,N=1792": { + "file": "silu_config_M313344_N1792.json", + "M": 313344, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2928.291 + }, + "M=313344,N=1920": { + "file": "silu_config_M313344_N1920.json", + "M": 313344, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2947.9310000000005 + }, + "M=313344,N=2048": { + "file": "silu_config_M313344_N2048.json", + "M": 313344, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2326.0885 + }, + "M=313344,N=2080": { + "file": "silu_config_M313344_N2080.json", + "M": 313344, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3242.2522499999995 + }, + "M=313344,N=2240": { + "file": "silu_config_M313344_N2240.json", + "M": 313344, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3413.6929999999984 + }, + "M=313344,N=2400": { + "file": "silu_config_M313344_N2400.json", + "M": 313344, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3572.173500000001 + }, + "M=313344,N=2560": { + "file": "silu_config_M313344_N2560.json", + "M": 313344, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3766.6945000000014 + }, + "M=314368,N=128": { + "file": "silu_config_M314368_N128.json", + "M": 314368, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 184.23974999999996 + }, + "M=314368,N=160": { + "file": "silu_config_M314368_N160.json", + "M": 314368, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 380.36075000000005 + }, + "M=314368,N=192": { + "file": "silu_config_M314368_N192.json", + "M": 314368, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 391.8405 + }, + "M=314368,N=256": { + "file": "silu_config_M314368_N256.json", + "M": 314368, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 351.88025000000016 + }, + "M=314368,N=320": { + "file": "silu_config_M314368_N320.json", + "M": 314368, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 762.9220000000007 + }, + "M=314368,N=384": { + "file": "silu_config_M314368_N384.json", + "M": 314368, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.5622500000004 + }, + "M=314368,N=480": { + "file": "silu_config_M314368_N480.json", + "M": 314368, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 777.242 + }, + "M=314368,N=512": { + "file": "silu_config_M314368_N512.json", + "M": 314368, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 614.3615000000009 + }, + "M=314368,N=576": { + "file": "silu_config_M314368_N576.json", + "M": 314368, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1477.5650000000005 + }, + "M=314368,N=640": { + "file": "silu_config_M314368_N640.json", + "M": 314368, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1460.4849999999997 + }, + "M=314368,N=768": { + "file": "silu_config_M314368_N768.json", + "M": 314368, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1454.8450000000003 + }, + "M=314368,N=800": { + "file": "silu_config_M314368_N800.json", + "M": 314368, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1489.4050000000002 + }, + "M=314368,N=896": { + "file": "silu_config_M314368_N896.json", + "M": 314368, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.5649999999996 + }, + "M=314368,N=960": { + "file": "silu_config_M314368_N960.json", + "M": 314368, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1490.0449999999996 + }, + "M=314368,N=1024": { + "file": "silu_config_M314368_N1024.json", + "M": 314368, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1190.0037500000003 + }, + "M=314368,N=1120": { + "file": "silu_config_M314368_N1120.json", + "M": 314368, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2912.7709999999997 + }, + "M=314368,N=1152": { + "file": "silu_config_M314368_N1152.json", + "M": 314368, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2910.611000000001 + }, + "M=314368,N=1280": { + "file": "silu_config_M314368_N1280.json", + "M": 314368, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2926.570999999999 + }, + "M=314368,N=1344": { + "file": "silu_config_M314368_N1344.json", + "M": 314368, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2941.0910000000003 + }, + "M=314368,N=1408": { + "file": "silu_config_M314368_N1408.json", + "M": 314368, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2900.490749999999 + }, + "M=314368,N=1440": { + "file": "silu_config_M314368_N1440.json", + "M": 314368, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2948.4109999999973 + }, + "M=314368,N=1536": { + "file": "silu_config_M314368_N1536.json", + "M": 314368, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2927.651 + }, + "M=314368,N=1600": { + "file": "silu_config_M314368_N1600.json", + "M": 314368, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2941.2110000000002 + }, + "M=314368,N=1664": { + "file": "silu_config_M314368_N1664.json", + "M": 314368, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2922.0110000000004 + }, + "M=314368,N=1728": { + "file": "silu_config_M314368_N1728.json", + "M": 314368, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2945.1309999999994 + }, + "M=314368,N=1760": { + "file": "silu_config_M314368_N1760.json", + "M": 314368, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2961.770999999999 + }, + "M=314368,N=1792": { + "file": "silu_config_M314368_N1792.json", + "M": 314368, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2929.730999999999 + }, + "M=314368,N=1920": { + "file": "silu_config_M314368_N1920.json", + "M": 314368, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2940.2909999999974 + }, + "M=314368,N=2048": { + "file": "silu_config_M314368_N2048.json", + "M": 314368, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2336.3684999999996 + }, + "M=314368,N=2080": { + "file": "silu_config_M314368_N2080.json", + "M": 314368, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3252.6122499999983 + }, + "M=314368,N=2240": { + "file": "silu_config_M314368_N2240.json", + "M": 314368, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3395.3330000000005 + }, + "M=314368,N=2400": { + "file": "silu_config_M314368_N2400.json", + "M": 314368, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3617.93375 + }, + "M=314368,N=2560": { + "file": "silu_config_M314368_N2560.json", + "M": 314368, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3764.7745000000014 + }, + "M=315392,N=128": { + "file": "silu_config_M315392_N128.json", + "M": 315392, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 173.07975000000005 + }, + "M=315392,N=160": { + "file": "silu_config_M315392_N160.json", + "M": 315392, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 370.88049999999987 + }, + "M=315392,N=192": { + "file": "silu_config_M315392_N192.json", + "M": 315392, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 372.2805000000001 + }, + "M=315392,N=256": { + "file": "silu_config_M315392_N256.json", + "M": 315392, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 352.9202499999999 + }, + "M=315392,N=320": { + "file": "silu_config_M315392_N320.json", + "M": 315392, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 765.5219999999999 + }, + "M=315392,N=384": { + "file": "silu_config_M315392_N384.json", + "M": 315392, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 760.0422499999997 + }, + "M=315392,N=480": { + "file": "silu_config_M315392_N480.json", + "M": 315392, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 779.4422499999998 + }, + "M=315392,N=512": { + "file": "silu_config_M315392_N512.json", + "M": 315392, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 615.3615 + }, + "M=315392,N=576": { + "file": "silu_config_M315392_N576.json", + "M": 315392, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1487.7649999999999 + }, + "M=315392,N=640": { + "file": "silu_config_M315392_N640.json", + "M": 315392, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1464.8850000000007 + }, + "M=315392,N=768": { + "file": "silu_config_M315392_N768.json", + "M": 315392, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.005 + }, + "M=315392,N=800": { + "file": "silu_config_M315392_N800.json", + "M": 315392, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1473.125 + }, + "M=315392,N=896": { + "file": "silu_config_M315392_N896.json", + "M": 315392, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1470.4849999999997 + }, + "M=315392,N=960": { + "file": "silu_config_M315392_N960.json", + "M": 315392, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1491.125 + }, + "M=315392,N=1024": { + "file": "silu_config_M315392_N1024.json", + "M": 315392, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1193.7637499999996 + }, + "M=315392,N=1120": { + "file": "silu_config_M315392_N1120.json", + "M": 315392, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2942.610999999999 + }, + "M=315392,N=1152": { + "file": "silu_config_M315392_N1152.json", + "M": 315392, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2909.571000000001 + }, + "M=315392,N=1280": { + "file": "silu_config_M315392_N1280.json", + "M": 315392, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2925.7709999999997 + }, + "M=315392,N=1344": { + "file": "silu_config_M315392_N1344.json", + "M": 315392, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2929.7709999999997 + }, + "M=315392,N=1408": { + "file": "silu_config_M315392_N1408.json", + "M": 315392, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2930.3709999999983 + }, + "M=315392,N=1440": { + "file": "silu_config_M315392_N1440.json", + "M": 315392, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.8510000000015 + }, + "M=315392,N=1536": { + "file": "silu_config_M315392_N1536.json", + "M": 315392, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2926.571000000001 + }, + "M=315392,N=1600": { + "file": "silu_config_M315392_N1600.json", + "M": 315392, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2961.450999999999 + }, + "M=315392,N=1664": { + "file": "silu_config_M315392_N1664.json", + "M": 315392, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2931.6110000000017 + }, + "M=315392,N=1728": { + "file": "silu_config_M315392_N1728.json", + "M": 315392, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2954.9309999999996 + }, + "M=315392,N=1760": { + "file": "silu_config_M315392_N1760.json", + "M": 315392, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2982.3312499999993 + }, + "M=315392,N=1792": { + "file": "silu_config_M315392_N1792.json", + "M": 315392, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2939.1710000000003 + }, + "M=315392,N=1920": { + "file": "silu_config_M315392_N1920.json", + "M": 315392, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2949.810999999999 + }, + "M=315392,N=2048": { + "file": "silu_config_M315392_N2048.json", + "M": 315392, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2344.4085000000014 + }, + "M=315392,N=2080": { + "file": "silu_config_M315392_N2080.json", + "M": 315392, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3252.53225 + }, + "M=315392,N=2240": { + "file": "silu_config_M315392_N2240.json", + "M": 315392, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3425.893000000002 + }, + "M=315392,N=2400": { + "file": "silu_config_M315392_N2400.json", + "M": 315392, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3610.053749999999 + }, + "M=315392,N=2560": { + "file": "silu_config_M315392_N2560.json", + "M": 315392, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3779.294500000002 + }, + "M=316416,N=128": { + "file": "silu_config_M316416_N128.json", + "M": 316416, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 173.59975000000003 + }, + "M=316416,N=160": { + "file": "silu_config_M316416_N160.json", + "M": 316416, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 387.4005000000001 + }, + "M=316416,N=192": { + "file": "silu_config_M316416_N192.json", + "M": 316416, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 399.4005000000003 + }, + "M=316416,N=256": { + "file": "silu_config_M316416_N256.json", + "M": 316416, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 354.1205 + }, + "M=316416,N=320": { + "file": "silu_config_M316416_N320.json", + "M": 316416, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 767.3620000000005 + }, + "M=316416,N=384": { + "file": "silu_config_M316416_N384.json", + "M": 316416, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 762.4422500000007 + }, + "M=316416,N=480": { + "file": "silu_config_M316416_N480.json", + "M": 316416, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 781.8822500000003 + }, + "M=316416,N=512": { + "file": "silu_config_M316416_N512.json", + "M": 316416, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 614.2014999999999 + }, + "M=316416,N=576": { + "file": "silu_config_M316416_N576.json", + "M": 316416, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1481.9649999999997 + }, + "M=316416,N=640": { + "file": "silu_config_M316416_N640.json", + "M": 316416, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1480.2449999999994 + }, + "M=316416,N=768": { + "file": "silu_config_M316416_N768.json", + "M": 316416, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1485.3250000000007 + }, + "M=316416,N=800": { + "file": "silu_config_M316416_N800.json", + "M": 316416, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1488.645000000001 + }, + "M=316416,N=896": { + "file": "silu_config_M316416_N896.json", + "M": 316416, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1486.2049999999995 + }, + "M=316416,N=960": { + "file": "silu_config_M316416_N960.json", + "M": 316416, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1485.2450000000003 + }, + "M=316416,N=1024": { + "file": "silu_config_M316416_N1024.json", + "M": 316416, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1198.2837500000014 + }, + "M=316416,N=1120": { + "file": "silu_config_M316416_N1120.json", + "M": 316416, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2941.7310000000034 + }, + "M=316416,N=1152": { + "file": "silu_config_M316416_N1152.json", + "M": 316416, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2929.490999999998 + }, + "M=316416,N=1280": { + "file": "silu_config_M316416_N1280.json", + "M": 316416, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2945.291 + }, + "M=316416,N=1344": { + "file": "silu_config_M316416_N1344.json", + "M": 316416, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2949.450999999999 + }, + "M=316416,N=1408": { + "file": "silu_config_M316416_N1408.json", + "M": 316416, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2940.1710000000003 + }, + "M=316416,N=1440": { + "file": "silu_config_M316416_N1440.json", + "M": 316416, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2946.611000000001 + }, + "M=316416,N=1536": { + "file": "silu_config_M316416_N1536.json", + "M": 316416, + "N": 1536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2965.73125 + }, + "M=316416,N=1600": { + "file": "silu_config_M316416_N1600.json", + "M": 316416, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2970.8512499999997 + }, + "M=316416,N=1664": { + "file": "silu_config_M316416_N1664.json", + "M": 316416, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2940.770999999998 + }, + "M=316416,N=1728": { + "file": "silu_config_M316416_N1728.json", + "M": 316416, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2974.9312499999987 + }, + "M=316416,N=1760": { + "file": "silu_config_M316416_N1760.json", + "M": 316416, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2960.411 + }, + "M=316416,N=1792": { + "file": "silu_config_M316416_N1792.json", + "M": 316416, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2948.770999999998 + }, + "M=316416,N=1920": { + "file": "silu_config_M316416_N1920.json", + "M": 316416, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2938.4909999999973 + }, + "M=316416,N=2048": { + "file": "silu_config_M316416_N2048.json", + "M": 316416, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2351.888499999998 + }, + "M=316416,N=2080": { + "file": "silu_config_M316416_N2080.json", + "M": 316416, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3263.6922499999982 + }, + "M=316416,N=2240": { + "file": "silu_config_M316416_N2240.json", + "M": 316416, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3427.252999999997 + }, + "M=316416,N=2400": { + "file": "silu_config_M316416_N2400.json", + "M": 316416, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3642.134 + }, + "M=316416,N=2560": { + "file": "silu_config_M316416_N2560.json", + "M": 316416, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3782.7745000000014 + }, + "M=317440,N=128": { + "file": "silu_config_M317440_N128.json", + "M": 317440, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 174.39975000000004 + }, + "M=317440,N=160": { + "file": "silu_config_M317440_N160.json", + "M": 317440, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 381.3204999999999 + }, + "M=317440,N=192": { + "file": "silu_config_M317440_N192.json", + "M": 317440, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 372.16049999999996 + }, + "M=317440,N=256": { + "file": "silu_config_M317440_N256.json", + "M": 317440, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 355.2404999999999 + }, + "M=317440,N=320": { + "file": "silu_config_M317440_N320.json", + "M": 317440, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 764.8020000000001 + }, + "M=317440,N=384": { + "file": "silu_config_M317440_N384.json", + "M": 317440, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 759.6022500000001 + }, + "M=317440,N=480": { + "file": "silu_config_M317440_N480.json", + "M": 317440, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 778.9222500000001 + }, + "M=317440,N=512": { + "file": "silu_config_M317440_N512.json", + "M": 317440, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 608.7614999999998 + }, + "M=317440,N=576": { + "file": "silu_config_M317440_N576.json", + "M": 317440, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1456.565 + }, + "M=317440,N=640": { + "file": "silu_config_M317440_N640.json", + "M": 317440, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1470.165 + }, + "M=317440,N=768": { + "file": "silu_config_M317440_N768.json", + "M": 317440, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1491.8850000000007 + }, + "M=317440,N=800": { + "file": "silu_config_M317440_N800.json", + "M": 317440, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1503.5249999999996 + }, + "M=317440,N=896": { + "file": "silu_config_M317440_N896.json", + "M": 317440, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1475.925000000001 + }, + "M=317440,N=960": { + "file": "silu_config_M317440_N960.json", + "M": 317440, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1490.3650000000011 + }, + "M=317440,N=1024": { + "file": "silu_config_M317440_N1024.json", + "M": 317440, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1201.24375 + }, + "M=317440,N=1120": { + "file": "silu_config_M317440_N1120.json", + "M": 317440, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2920.171000000001 + }, + "M=317440,N=1152": { + "file": "silu_config_M317440_N1152.json", + "M": 317440, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2910.531000000001 + }, + "M=317440,N=1280": { + "file": "silu_config_M317440_N1280.json", + "M": 317440, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2895.0107499999986 + }, + "M=317440,N=1344": { + "file": "silu_config_M317440_N1344.json", + "M": 317440, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2927.7310000000007 + }, + "M=317440,N=1408": { + "file": "silu_config_M317440_N1408.json", + "M": 317440, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2920.410999999999 + }, + "M=317440,N=1440": { + "file": "silu_config_M317440_N1440.json", + "M": 317440, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.810999999997 + }, + "M=317440,N=1536": { + "file": "silu_config_M317440_N1536.json", + "M": 317440, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2905.9707500000013 + }, + "M=317440,N=1600": { + "file": "silu_config_M317440_N1600.json", + "M": 317440, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2958.451000000001 + }, + "M=317440,N=1664": { + "file": "silu_config_M317440_N1664.json", + "M": 317440, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2941.8509999999987 + }, + "M=317440,N=1728": { + "file": "silu_config_M317440_N1728.json", + "M": 317440, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2962.530999999999 + }, + "M=317440,N=1760": { + "file": "silu_config_M317440_N1760.json", + "M": 317440, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.371000000003 + }, + "M=317440,N=1792": { + "file": "silu_config_M317440_N1792.json", + "M": 317440, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2927.7710000000015 + }, + "M=317440,N=1920": { + "file": "silu_config_M317440_N1920.json", + "M": 317440, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2938.1709999999966 + }, + "M=317440,N=2048": { + "file": "silu_config_M317440_N2048.json", + "M": 317440, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2358.448499999996 + }, + "M=317440,N=2080": { + "file": "silu_config_M317440_N2080.json", + "M": 317440, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3251.3322499999995 + }, + "M=317440,N=2240": { + "file": "silu_config_M317440_N2240.json", + "M": 317440, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3423.813000000002 + }, + "M=317440,N=2400": { + "file": "silu_config_M317440_N2400.json", + "M": 317440, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3605.0537499999973 + }, + "M=317440,N=2560": { + "file": "silu_config_M317440_N2560.json", + "M": 317440, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3752.534249999999 + }, + "M=318464,N=128": { + "file": "silu_config_M318464_N128.json", + "M": 318464, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 174.79974999999996 + }, + "M=318464,N=160": { + "file": "silu_config_M318464_N160.json", + "M": 318464, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 389.7205 + }, + "M=318464,N=192": { + "file": "silu_config_M318464_N192.json", + "M": 318464, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 379.5607500000001 + }, + "M=318464,N=256": { + "file": "silu_config_M318464_N256.json", + "M": 318464, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 367.3605 + }, + "M=318464,N=320": { + "file": "silu_config_M318464_N320.json", + "M": 318464, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 772.3220000000006 + }, + "M=318464,N=384": { + "file": "silu_config_M318464_N384.json", + "M": 318464, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 767.402 + }, + "M=318464,N=480": { + "file": "silu_config_M318464_N480.json", + "M": 318464, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 786.8822499999999 + }, + "M=318464,N=512": { + "file": "silu_config_M318464_N512.json", + "M": 318464, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 605.1614999999997 + }, + "M=318464,N=576": { + "file": "silu_config_M318464_N576.json", + "M": 318464, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1496.8049999999998 + }, + "M=318464,N=640": { + "file": "silu_config_M318464_N640.json", + "M": 318464, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1468.5250000000005 + }, + "M=318464,N=768": { + "file": "silu_config_M318464_N768.json", + "M": 318464, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1478.8050000000012 + }, + "M=318464,N=800": { + "file": "silu_config_M318464_N800.json", + "M": 318464, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1498.165 + }, + "M=318464,N=896": { + "file": "silu_config_M318464_N896.json", + "M": 318464, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1468.6849999999995 + }, + "M=318464,N=960": { + "file": "silu_config_M318464_N960.json", + "M": 318464, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1515.64525 + }, + "M=318464,N=1024": { + "file": "silu_config_M318464_N1024.json", + "M": 318464, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1206.2839999999997 + }, + "M=318464,N=1120": { + "file": "silu_config_M318464_N1120.json", + "M": 318464, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2950.330999999999 + }, + "M=318464,N=1152": { + "file": "silu_config_M318464_N1152.json", + "M": 318464, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.9709999999986 + }, + "M=318464,N=1280": { + "file": "silu_config_M318464_N1280.json", + "M": 318464, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2953.5710000000017 + }, + "M=318464,N=1344": { + "file": "silu_config_M318464_N1344.json", + "M": 318464, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2979.091250000002 + }, + "M=318464,N=1408": { + "file": "silu_config_M318464_N1408.json", + "M": 318464, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2958.331 + }, + "M=318464,N=1440": { + "file": "silu_config_M318464_N1440.json", + "M": 318464, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2978.8112500000016 + }, + "M=318464,N=1536": { + "file": "silu_config_M318464_N1536.json", + "M": 318464, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2954.5310000000018 + }, + "M=318464,N=1600": { + "file": "silu_config_M318464_N1600.json", + "M": 318464, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2979.3712499999983 + }, + "M=318464,N=1664": { + "file": "silu_config_M318464_N1664.json", + "M": 318464, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2959.8910000000005 + }, + "M=318464,N=1728": { + "file": "silu_config_M318464_N1728.json", + "M": 318464, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2993.6912500000017 + }, + "M=318464,N=1760": { + "file": "silu_config_M318464_N1760.json", + "M": 318464, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2979.451250000001 + }, + "M=318464,N=1792": { + "file": "silu_config_M318464_N1792.json", + "M": 318464, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2967.490999999998 + }, + "M=318464,N=1920": { + "file": "silu_config_M318464_N1920.json", + "M": 318464, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2967.691 + }, + "M=318464,N=2048": { + "file": "silu_config_M318464_N2048.json", + "M": 318464, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2366.6487499999994 + }, + "M=318464,N=2080": { + "file": "silu_config_M318464_N2080.json", + "M": 318464, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3294.692500000001 + }, + "M=318464,N=2240": { + "file": "silu_config_M318464_N2240.json", + "M": 318464, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3469.1732500000016 + }, + "M=318464,N=2400": { + "file": "silu_config_M318464_N2400.json", + "M": 318464, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3665.5340000000015 + }, + "M=318464,N=2560": { + "file": "silu_config_M318464_N2560.json", + "M": 318464, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3818.054750000001 + }, + "M=319488,N=128": { + "file": "silu_config_M319488_N128.json", + "M": 319488, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 186.71974999999998 + }, + "M=319488,N=160": { + "file": "silu_config_M319488_N160.json", + "M": 319488, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 379.8404999999999 + }, + "M=319488,N=192": { + "file": "silu_config_M319488_N192.json", + "M": 319488, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 402.92049999999995 + }, + "M=319488,N=256": { + "file": "silu_config_M319488_N256.json", + "M": 319488, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.0402499999999 + }, + "M=319488,N=320": { + "file": "silu_config_M319488_N320.json", + "M": 319488, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 775.0420000000001 + }, + "M=319488,N=384": { + "file": "silu_config_M319488_N384.json", + "M": 319488, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 769.8020000000001 + }, + "M=319488,N=480": { + "file": "silu_config_M319488_N480.json", + "M": 319488, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 789.0422499999997 + }, + "M=319488,N=512": { + "file": "silu_config_M319488_N512.json", + "M": 319488, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 608.5215000000001 + }, + "M=319488,N=576": { + "file": "silu_config_M319488_N576.json", + "M": 319488, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1495.6449999999995 + }, + "M=319488,N=640": { + "file": "silu_config_M319488_N640.json", + "M": 319488, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.605 + }, + "M=319488,N=768": { + "file": "silu_config_M319488_N768.json", + "M": 319488, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1478.3250000000003 + }, + "M=319488,N=800": { + "file": "silu_config_M319488_N800.json", + "M": 319488, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1502.4849999999997 + }, + "M=319488,N=896": { + "file": "silu_config_M319488_N896.json", + "M": 319488, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.525000000001 + }, + "M=319488,N=960": { + "file": "silu_config_M319488_N960.json", + "M": 319488, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1520.8452500000003 + }, + "M=319488,N=1024": { + "file": "silu_config_M319488_N1024.json", + "M": 319488, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1209.1239999999998 + }, + "M=319488,N=1120": { + "file": "silu_config_M319488_N1120.json", + "M": 319488, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2970.1312499999985 + }, + "M=319488,N=1152": { + "file": "silu_config_M319488_N1152.json", + "M": 319488, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.171000000001 + }, + "M=319488,N=1280": { + "file": "silu_config_M319488_N1280.json", + "M": 319488, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2942.570999999998 + }, + "M=319488,N=1344": { + "file": "silu_config_M319488_N1344.json", + "M": 319488, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2988.3312499999975 + }, + "M=319488,N=1408": { + "file": "silu_config_M319488_N1408.json", + "M": 319488, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.651 + }, + "M=319488,N=1440": { + "file": "silu_config_M319488_N1440.json", + "M": 319488, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2988.491250000002 + }, + "M=319488,N=1536": { + "file": "silu_config_M319488_N1536.json", + "M": 319488, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2974.731249999998 + }, + "M=319488,N=1600": { + "file": "silu_config_M319488_N1600.json", + "M": 319488, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2977.8912499999997 + }, + "M=319488,N=1664": { + "file": "silu_config_M319488_N1664.json", + "M": 319488, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2958.611 + }, + "M=319488,N=1728": { + "file": "silu_config_M319488_N1728.json", + "M": 319488, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2972.0512499999995 + }, + "M=319488,N=1760": { + "file": "silu_config_M319488_N1760.json", + "M": 319488, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3009.77125 + }, + "M=319488,N=1792": { + "file": "silu_config_M319488_N1792.json", + "M": 319488, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2976.6912500000008 + }, + "M=319488,N=1920": { + "file": "silu_config_M319488_N1920.json", + "M": 319488, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2977.41125 + }, + "M=319488,N=2048": { + "file": "silu_config_M319488_N2048.json", + "M": 319488, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2353.0485 + }, + "M=319488,N=2080": { + "file": "silu_config_M319488_N2080.json", + "M": 319488, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3305.1724999999988 + }, + "M=319488,N=2240": { + "file": "silu_config_M319488_N2240.json", + "M": 319488, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3480.813250000001 + }, + "M=319488,N=2400": { + "file": "silu_config_M319488_N2400.json", + "M": 319488, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3677.4540000000015 + }, + "M=319488,N=2560": { + "file": "silu_config_M319488_N2560.json", + "M": 319488, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3805.2545 + }, + "M=320512,N=128": { + "file": "silu_config_M320512_N128.json", + "M": 320512, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 175.9197499999999 + }, + "M=320512,N=160": { + "file": "silu_config_M320512_N160.json", + "M": 320512, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 392.0405000000003 + }, + "M=320512,N=192": { + "file": "silu_config_M320512_N192.json", + "M": 320512, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 382.04050000000007 + }, + "M=320512,N=256": { + "file": "silu_config_M320512_N256.json", + "M": 320512, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 358.20050000000015 + }, + "M=320512,N=320": { + "file": "silu_config_M320512_N320.json", + "M": 320512, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 777.5219999999999 + }, + "M=320512,N=384": { + "file": "silu_config_M320512_N384.json", + "M": 320512, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 773.0020000000006 + }, + "M=320512,N=480": { + "file": "silu_config_M320512_N480.json", + "M": 320512, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 792.28225 + }, + "M=320512,N=512": { + "file": "silu_config_M320512_N512.json", + "M": 320512, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 618.4417500000002 + }, + "M=320512,N=576": { + "file": "silu_config_M320512_N576.json", + "M": 320512, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1500.685 + }, + "M=320512,N=640": { + "file": "silu_config_M320512_N640.json", + "M": 320512, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1484.4850000000001 + }, + "M=320512,N=768": { + "file": "silu_config_M320512_N768.json", + "M": 320512, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.0849999999996 + }, + "M=320512,N=800": { + "file": "silu_config_M320512_N800.json", + "M": 320512, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1518.005250000001 + }, + "M=320512,N=896": { + "file": "silu_config_M320512_N896.json", + "M": 320512, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1509.5652499999997 + }, + "M=320512,N=960": { + "file": "silu_config_M320512_N960.json", + "M": 320512, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1530.3652499999994 + }, + "M=320512,N=1024": { + "file": "silu_config_M320512_N1024.json", + "M": 320512, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1219.004 + }, + "M=320512,N=1120": { + "file": "silu_config_M320512_N1120.json", + "M": 320512, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2984.1712500000003 + }, + "M=320512,N=1152": { + "file": "silu_config_M320512_N1152.json", + "M": 320512, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2959.2109999999993 + }, + "M=320512,N=1280": { + "file": "silu_config_M320512_N1280.json", + "M": 320512, + "N": 1280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2984.8512499999997 + }, + "M=320512,N=1344": { + "file": "silu_config_M320512_N1344.json", + "M": 320512, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2979.1312500000004 + }, + "M=320512,N=1408": { + "file": "silu_config_M320512_N1408.json", + "M": 320512, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2978.651249999999 + }, + "M=320512,N=1440": { + "file": "silu_config_M320512_N1440.json", + "M": 320512, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3000.77125 + }, + "M=320512,N=1536": { + "file": "silu_config_M320512_N1536.json", + "M": 320512, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2958.770999999999 + }, + "M=320512,N=1600": { + "file": "silu_config_M320512_N1600.json", + "M": 320512, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2991.451250000001 + }, + "M=320512,N=1664": { + "file": "silu_config_M320512_N1664.json", + "M": 320512, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2985.0112499999996 + }, + "M=320512,N=1728": { + "file": "silu_config_M320512_N1728.json", + "M": 320512, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3014.61125 + }, + "M=320512,N=1760": { + "file": "silu_config_M320512_N1760.json", + "M": 320512, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3003.411250000001 + }, + "M=320512,N=1792": { + "file": "silu_config_M320512_N1792.json", + "M": 320512, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2972.411249999999 + }, + "M=320512,N=1920": { + "file": "silu_config_M320512_N1920.json", + "M": 320512, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2987.61125 + }, + "M=320512,N=2048": { + "file": "silu_config_M320512_N2048.json", + "M": 320512, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2391.9287500000005 + }, + "M=320512,N=2080": { + "file": "silu_config_M320512_N2080.json", + "M": 320512, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3304.3725000000013 + }, + "M=320512,N=2240": { + "file": "silu_config_M320512_N2240.json", + "M": 320512, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3464.053249999999 + }, + "M=320512,N=2400": { + "file": "silu_config_M320512_N2400.json", + "M": 320512, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3656.934000000002 + }, + "M=320512,N=2560": { + "file": "silu_config_M320512_N2560.json", + "M": 320512, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3853.0147499999985 + }, + "M=321536,N=128": { + "file": "silu_config_M321536_N128.json", + "M": 321536, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 177.95974999999999 + }, + "M=321536,N=160": { + "file": "silu_config_M321536_N160.json", + "M": 321536, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 380.3204999999999 + }, + "M=321536,N=192": { + "file": "silu_config_M321536_N192.json", + "M": 321536, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 411.8405 + }, + "M=321536,N=256": { + "file": "silu_config_M321536_N256.json", + "M": 321536, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 360.8805000000002 + }, + "M=321536,N=320": { + "file": "silu_config_M321536_N320.json", + "M": 321536, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 780.9222500000001 + }, + "M=321536,N=384": { + "file": "silu_config_M321536_N384.json", + "M": 321536, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 775.0820000000001 + }, + "M=321536,N=480": { + "file": "silu_config_M321536_N480.json", + "M": 321536, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 797.28225 + }, + "M=321536,N=512": { + "file": "silu_config_M321536_N512.json", + "M": 321536, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 628.4814999999996 + }, + "M=321536,N=576": { + "file": "silu_config_M321536_N576.json", + "M": 321536, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1501.5250000000005 + }, + "M=321536,N=640": { + "file": "silu_config_M321536_N640.json", + "M": 321536, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.4449999999997 + }, + "M=321536,N=768": { + "file": "silu_config_M321536_N768.json", + "M": 321536, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1494.045 + }, + "M=321536,N=800": { + "file": "silu_config_M321536_N800.json", + "M": 321536, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1509.5652499999997 + }, + "M=321536,N=896": { + "file": "silu_config_M321536_N896.json", + "M": 321536, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1512.3252499999999 + }, + "M=321536,N=960": { + "file": "silu_config_M321536_N960.json", + "M": 321536, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1523.2052499999995 + }, + "M=321536,N=1024": { + "file": "silu_config_M321536_N1024.json", + "M": 321536, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1221.4840000000004 + }, + "M=321536,N=1120": { + "file": "silu_config_M321536_N1120.json", + "M": 321536, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3003.4112499999983 + }, + "M=321536,N=1152": { + "file": "silu_config_M321536_N1152.json", + "M": 321536, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2988.0512499999986 + }, + "M=321536,N=1280": { + "file": "silu_config_M321536_N1280.json", + "M": 321536, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2971.9712500000014 + }, + "M=321536,N=1344": { + "file": "silu_config_M321536_N1344.json", + "M": 321536, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2997.6912500000017 + }, + "M=321536,N=1408": { + "file": "silu_config_M321536_N1408.json", + "M": 321536, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2987.291250000001 + }, + "M=321536,N=1440": { + "file": "silu_config_M321536_N1440.json", + "M": 321536, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2996.1712499999994 + }, + "M=321536,N=1536": { + "file": "silu_config_M321536_N1536.json", + "M": 321536, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2993.1712500000003 + }, + "M=321536,N=1600": { + "file": "silu_config_M321536_N1600.json", + "M": 321536, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3017.651499999997 + }, + "M=321536,N=1664": { + "file": "silu_config_M321536_N1664.json", + "M": 321536, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2988.8912500000024 + }, + "M=321536,N=1728": { + "file": "silu_config_M321536_N1728.json", + "M": 321536, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3021.5715000000027 + }, + "M=321536,N=1760": { + "file": "silu_config_M321536_N1760.json", + "M": 321536, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3029.851499999999 + }, + "M=321536,N=1792": { + "file": "silu_config_M321536_N1792.json", + "M": 321536, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2992.4512499999983 + }, + "M=321536,N=1920": { + "file": "silu_config_M321536_N1920.json", + "M": 321536, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3014.9312500000015 + }, + "M=321536,N=2048": { + "file": "silu_config_M321536_N2048.json", + "M": 321536, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2399.248749999999 + }, + "M=321536,N=2080": { + "file": "silu_config_M321536_N2080.json", + "M": 321536, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3352.93275 + }, + "M=321536,N=2240": { + "file": "silu_config_M321536_N2240.json", + "M": 321536, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3494.3732500000006 + }, + "M=321536,N=2400": { + "file": "silu_config_M321536_N2400.json", + "M": 321536, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3707.5742500000015 + }, + "M=321536,N=2560": { + "file": "silu_config_M321536_N2560.json", + "M": 321536, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3848.0147500000003 + }, + "M=322560,N=128": { + "file": "silu_config_M322560_N128.json", + "M": 322560, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 189.55999999999995 + }, + "M=322560,N=160": { + "file": "silu_config_M322560_N160.json", + "M": 322560, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 378.76049999999987 + }, + "M=322560,N=192": { + "file": "silu_config_M322560_N192.json", + "M": 322560, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 389.52049999999974 + }, + "M=322560,N=256": { + "file": "silu_config_M322560_N256.json", + "M": 322560, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 350.60024999999985 + }, + "M=322560,N=320": { + "file": "silu_config_M322560_N320.json", + "M": 322560, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 778.3220000000001 + }, + "M=322560,N=384": { + "file": "silu_config_M322560_N384.json", + "M": 322560, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 772.6822499999998 + }, + "M=322560,N=480": { + "file": "silu_config_M322560_N480.json", + "M": 322560, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 794.2822500000004 + }, + "M=322560,N=512": { + "file": "silu_config_M322560_N512.json", + "M": 322560, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 625.4817500000008 + }, + "M=322560,N=576": { + "file": "silu_config_M322560_N576.json", + "M": 322560, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1502.2850000000003 + }, + "M=322560,N=640": { + "file": "silu_config_M322560_N640.json", + "M": 322560, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1511.1252499999996 + }, + "M=322560,N=768": { + "file": "silu_config_M322560_N768.json", + "M": 322560, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1495.7249999999995 + }, + "M=322560,N=800": { + "file": "silu_config_M322560_N800.json", + "M": 322560, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.8849999999998 + }, + "M=322560,N=896": { + "file": "silu_config_M322560_N896.json", + "M": 322560, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1501.3250000000003 + }, + "M=322560,N=960": { + "file": "silu_config_M322560_N960.json", + "M": 322560, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1527.5252500000001 + }, + "M=322560,N=1024": { + "file": "silu_config_M322560_N1024.json", + "M": 322560, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1224.6439999999998 + }, + "M=322560,N=1120": { + "file": "silu_config_M322560_N1120.json", + "M": 322560, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2980.9312500000005 + }, + "M=322560,N=1152": { + "file": "silu_config_M322560_N1152.json", + "M": 322560, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2958.491000000002 + }, + "M=322560,N=1280": { + "file": "silu_config_M322560_N1280.json", + "M": 322560, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2952.3309999999983 + }, + "M=322560,N=1344": { + "file": "silu_config_M322560_N1344.json", + "M": 322560, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2996.1712500000003 + }, + "M=322560,N=1408": { + "file": "silu_config_M322560_N1408.json", + "M": 322560, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2977.8512499999997 + }, + "M=322560,N=1440": { + "file": "silu_config_M322560_N1440.json", + "M": 322560, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2994.0112499999996 + }, + "M=322560,N=1536": { + "file": "silu_config_M322560_N1536.json", + "M": 322560, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2983.251250000002 + }, + "M=322560,N=1600": { + "file": "silu_config_M322560_N1600.json", + "M": 322560, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3005.61125 + }, + "M=322560,N=1664": { + "file": "silu_config_M322560_N1664.json", + "M": 322560, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2978.7712500000034 + }, + "M=322560,N=1728": { + "file": "silu_config_M322560_N1728.json", + "M": 322560, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3008.531250000001 + }, + "M=322560,N=1760": { + "file": "silu_config_M322560_N1760.json", + "M": 322560, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2995.2512500000003 + }, + "M=322560,N=1792": { + "file": "silu_config_M322560_N1792.json", + "M": 322560, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2981.9312500000005 + }, + "M=322560,N=1920": { + "file": "silu_config_M322560_N1920.json", + "M": 322560, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3003.811249999999 + }, + "M=322560,N=2048": { + "file": "silu_config_M322560_N2048.json", + "M": 322560, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2415.2087500000034 + }, + "M=322560,N=2080": { + "file": "silu_config_M322560_N2080.json", + "M": 322560, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3320.1725000000006 + }, + "M=322560,N=2240": { + "file": "silu_config_M322560_N2240.json", + "M": 322560, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3470.5732500000013 + }, + "M=322560,N=2400": { + "file": "silu_config_M322560_N2400.json", + "M": 322560, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3691.173999999999 + }, + "M=322560,N=2560": { + "file": "silu_config_M322560_N2560.json", + "M": 322560, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3836.374750000001 + }, + "M=323584,N=128": { + "file": "silu_config_M323584_N128.json", + "M": 323584, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 178.75975000000005 + }, + "M=323584,N=160": { + "file": "silu_config_M323584_N160.json", + "M": 323584, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.04050000000007 + }, + "M=323584,N=192": { + "file": "silu_config_M323584_N192.json", + "M": 323584, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 386.6005 + }, + "M=323584,N=256": { + "file": "silu_config_M323584_N256.json", + "M": 323584, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 363.1205 + }, + "M=323584,N=320": { + "file": "silu_config_M323584_N320.json", + "M": 323584, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 787.0422500000004 + }, + "M=323584,N=384": { + "file": "silu_config_M323584_N384.json", + "M": 323584, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 780.2422499999998 + }, + "M=323584,N=480": { + "file": "silu_config_M323584_N480.json", + "M": 323584, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 802.5222500000004 + }, + "M=323584,N=512": { + "file": "silu_config_M323584_N512.json", + "M": 323584, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 623.2014999999999 + }, + "M=323584,N=576": { + "file": "silu_config_M323584_N576.json", + "M": 323584, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1505.8449999999998 + }, + "M=323584,N=640": { + "file": "silu_config_M323584_N640.json", + "M": 323584, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1513.8452500000003 + }, + "M=323584,N=768": { + "file": "silu_config_M323584_N768.json", + "M": 323584, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1514.80525 + }, + "M=323584,N=800": { + "file": "silu_config_M323584_N800.json", + "M": 323584, + "N": 800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1530.3252499999994 + }, + "M=323584,N=896": { + "file": "silu_config_M323584_N896.json", + "M": 323584, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1525.6452500000005 + }, + "M=323584,N=960": { + "file": "silu_config_M323584_N960.json", + "M": 323584, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1535.9652499999997 + }, + "M=323584,N=1024": { + "file": "silu_config_M323584_N1024.json", + "M": 323584, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1229.4839999999995 + }, + "M=323584,N=1120": { + "file": "silu_config_M323584_N1120.json", + "M": 323584, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3022.331250000001 + }, + "M=323584,N=1152": { + "file": "silu_config_M323584_N1152.json", + "M": 323584, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2996.211250000001 + }, + "M=323584,N=1280": { + "file": "silu_config_M323584_N1280.json", + "M": 323584, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2990.611250000001 + }, + "M=323584,N=1344": { + "file": "silu_config_M323584_N1344.json", + "M": 323584, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3016.1712500000003 + }, + "M=323584,N=1408": { + "file": "silu_config_M323584_N1408.json", + "M": 323584, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3005.8912500000006 + }, + "M=323584,N=1440": { + "file": "silu_config_M323584_N1440.json", + "M": 323584, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3036.2914999999985 + }, + "M=323584,N=1536": { + "file": "silu_config_M323584_N1536.json", + "M": 323584, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3011.7312500000016 + }, + "M=323584,N=1600": { + "file": "silu_config_M323584_N1600.json", + "M": 323584, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3026.651249999997 + }, + "M=323584,N=1664": { + "file": "silu_config_M323584_N1664.json", + "M": 323584, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3007.331250000002 + }, + "M=323584,N=1728": { + "file": "silu_config_M323584_N1728.json", + "M": 323584, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3040.3314999999984 + }, + "M=323584,N=1760": { + "file": "silu_config_M323584_N1760.json", + "M": 323584, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3049.091500000003 + }, + "M=323584,N=1792": { + "file": "silu_config_M323584_N1792.json", + "M": 323584, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3042.3314999999993 + }, + "M=323584,N=1920": { + "file": "silu_config_M323584_N1920.json", + "M": 323584, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3023.691249999996 + }, + "M=323584,N=2048": { + "file": "silu_config_M323584_N2048.json", + "M": 323584, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2392.5687499999995 + }, + "M=323584,N=2080": { + "file": "silu_config_M323584_N2080.json", + "M": 323584, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3364.2927500000023 + }, + "M=323584,N=2240": { + "file": "silu_config_M323584_N2240.json", + "M": 323584, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3516.933250000002 + }, + "M=323584,N=2400": { + "file": "silu_config_M323584_N2400.json", + "M": 323584, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3750.8142500000013 + }, + "M=323584,N=2560": { + "file": "silu_config_M323584_N2560.json", + "M": 323584, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3878.374750000001 + }, + "M=324608,N=128": { + "file": "silu_config_M324608_N128.json", + "M": 324608, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 190.79975000000002 + }, + "M=324608,N=160": { + "file": "silu_config_M324608_N160.json", + "M": 324608, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 410.04075 + }, + "M=324608,N=192": { + "file": "silu_config_M324608_N192.json", + "M": 324608, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 404.60075000000006 + }, + "M=324608,N=256": { + "file": "silu_config_M324608_N256.json", + "M": 324608, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 364.04049999999984 + }, + "M=324608,N=320": { + "file": "silu_config_M324608_N320.json", + "M": 324608, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 788.3622500000001 + }, + "M=324608,N=384": { + "file": "silu_config_M324608_N384.json", + "M": 324608, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 782.4022500000012 + }, + "M=324608,N=480": { + "file": "silu_config_M324608_N480.json", + "M": 324608, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 804.6022499999999 + }, + "M=324608,N=512": { + "file": "silu_config_M324608_N512.json", + "M": 324608, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 629.08175 + }, + "M=324608,N=576": { + "file": "silu_config_M324608_N576.json", + "M": 324608, + "N": 576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1515.6052500000005 + }, + "M=324608,N=640": { + "file": "silu_config_M324608_N640.json", + "M": 324608, + "N": 640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1515.12525 + }, + "M=324608,N=768": { + "file": "silu_config_M324608_N768.json", + "M": 324608, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1524.24525 + }, + "M=324608,N=800": { + "file": "silu_config_M324608_N800.json", + "M": 324608, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.1650000000009 + }, + "M=324608,N=896": { + "file": "silu_config_M324608_N896.json", + "M": 324608, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1537.60525 + }, + "M=324608,N=960": { + "file": "silu_config_M324608_N960.json", + "M": 324608, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1537.2052500000004 + }, + "M=324608,N=1024": { + "file": "silu_config_M324608_N1024.json", + "M": 324608, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1232.7240000000002 + }, + "M=324608,N=1120": { + "file": "silu_config_M324608_N1120.json", + "M": 324608, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3031.65125 + }, + "M=324608,N=1152": { + "file": "silu_config_M324608_N1152.json", + "M": 324608, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2984.491249999998 + }, + "M=324608,N=1280": { + "file": "silu_config_M324608_N1280.json", + "M": 324608, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2989.691249999999 + }, + "M=324608,N=1344": { + "file": "silu_config_M324608_N1344.json", + "M": 324608, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3025.9712500000014 + }, + "M=324608,N=1408": { + "file": "silu_config_M324608_N1408.json", + "M": 324608, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2996.0112499999987 + }, + "M=324608,N=1440": { + "file": "silu_config_M324608_N1440.json", + "M": 324608, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3045.2515000000003 + }, + "M=324608,N=1536": { + "file": "silu_config_M324608_N1536.json", + "M": 324608, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3000.37125 + }, + "M=324608,N=1600": { + "file": "silu_config_M324608_N1600.json", + "M": 324608, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3035.8515000000016 + }, + "M=324608,N=1664": { + "file": "silu_config_M324608_N1664.json", + "M": 324608, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3016.491249999998 + }, + "M=324608,N=1728": { + "file": "silu_config_M324608_N1728.json", + "M": 324608, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3060.1315000000004 + }, + "M=324608,N=1760": { + "file": "silu_config_M324608_N1760.json", + "M": 324608, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3069.131500000003 + }, + "M=324608,N=1792": { + "file": "silu_config_M324608_N1792.json", + "M": 324608, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3030.5715 + }, + "M=324608,N=1920": { + "file": "silu_config_M324608_N1920.json", + "M": 324608, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3033.051500000003 + }, + "M=324608,N=2048": { + "file": "silu_config_M324608_N2048.json", + "M": 324608, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2421.96875 + }, + "M=324608,N=2080": { + "file": "silu_config_M324608_N2080.json", + "M": 324608, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3375.4127499999995 + }, + "M=324608,N=2240": { + "file": "silu_config_M324608_N2240.json", + "M": 324608, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3536.9734999999964 + }, + "M=324608,N=2400": { + "file": "silu_config_M324608_N2400.json", + "M": 324608, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3732.6142500000024 + }, + "M=324608,N=2560": { + "file": "silu_config_M324608_N2560.json", + "M": 324608, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3881.254750000002 + }, + "M=325632,N=128": { + "file": "silu_config_M325632_N128.json", + "M": 325632, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 180.23974999999996 + }, + "M=325632,N=160": { + "file": "silu_config_M325632_N160.json", + "M": 325632, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 384.9605 + }, + "M=325632,N=192": { + "file": "silu_config_M325632_N192.json", + "M": 325632, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 389.2007500000003 + }, + "M=325632,N=256": { + "file": "silu_config_M325632_N256.json", + "M": 325632, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 353.76049999999987 + }, + "M=325632,N=320": { + "file": "silu_config_M325632_N320.json", + "M": 325632, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 791.1222500000003 + }, + "M=325632,N=384": { + "file": "silu_config_M325632_N384.json", + "M": 325632, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 785.5222499999998 + }, + "M=325632,N=480": { + "file": "silu_config_M325632_N480.json", + "M": 325632, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 806.9622499999998 + }, + "M=325632,N=512": { + "file": "silu_config_M325632_N512.json", + "M": 325632, + "N": 512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 634.0014999999999 + }, + "M=325632,N=576": { + "file": "silu_config_M325632_N576.json", + "M": 325632, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1526.2052499999995 + }, + "M=325632,N=640": { + "file": "silu_config_M325632_N640.json", + "M": 325632, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1512.76525 + }, + "M=325632,N=768": { + "file": "silu_config_M325632_N768.json", + "M": 325632, + "N": 768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1524.165250000001 + }, + "M=325632,N=800": { + "file": "silu_config_M325632_N800.json", + "M": 325632, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1523.6452499999996 + }, + "M=325632,N=896": { + "file": "silu_config_M325632_N896.json", + "M": 325632, + "N": 896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1542.2052500000013 + }, + "M=325632,N=960": { + "file": "silu_config_M325632_N960.json", + "M": 325632, + "N": 960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1556.4052499999998 + }, + "M=325632,N=1024": { + "file": "silu_config_M325632_N1024.json", + "M": 325632, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1236.5639999999994 + }, + "M=325632,N=1120": { + "file": "silu_config_M325632_N1120.json", + "M": 325632, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3051.491499999999 + }, + "M=325632,N=1152": { + "file": "silu_config_M325632_N1152.json", + "M": 325632, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2994.0512500000004 + }, + "M=325632,N=1280": { + "file": "silu_config_M325632_N1280.json", + "M": 325632, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3009.3712499999992 + }, + "M=325632,N=1344": { + "file": "silu_config_M325632_N1344.json", + "M": 325632, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3045.811499999999 + }, + "M=325632,N=1408": { + "file": "silu_config_M325632_N1408.json", + "M": 325632, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3015.6112500000027 + }, + "M=325632,N=1440": { + "file": "silu_config_M325632_N1440.json", + "M": 325632, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3044.771499999999 + }, + "M=325632,N=1536": { + "file": "silu_config_M325632_N1536.json", + "M": 325632, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3010.1712500000003 + }, + "M=325632,N=1600": { + "file": "silu_config_M325632_N1600.json", + "M": 325632, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3055.491500000001 + }, + "M=325632,N=1664": { + "file": "silu_config_M325632_N1664.json", + "M": 325632, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3015.9712499999987 + }, + "M=325632,N=1728": { + "file": "silu_config_M325632_N1728.json", + "M": 325632, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3059.611499999999 + }, + "M=325632,N=1760": { + "file": "silu_config_M325632_N1760.json", + "M": 325632, + "N": 1760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 3105.4917499999983 + }, + "M=325632,N=1792": { + "file": "silu_config_M325632_N1792.json", + "M": 325632, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3040.5315 + }, + "M=325632,N=1920": { + "file": "silu_config_M325632_N1920.json", + "M": 325632, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3042.2515000000003 + }, + "M=325632,N=2048": { + "file": "silu_config_M325632_N2048.json", + "M": 325632, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2428.3690000000015 + }, + "M=325632,N=2080": { + "file": "silu_config_M325632_N2080.json", + "M": 325632, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3395.3730000000014 + }, + "M=325632,N=2240": { + "file": "silu_config_M325632_N2240.json", + "M": 325632, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3538.653500000002 + }, + "M=325632,N=2400": { + "file": "silu_config_M325632_N2400.json", + "M": 325632, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3753.5742500000015 + }, + "M=325632,N=2560": { + "file": "silu_config_M325632_N2560.json", + "M": 325632, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3889.135000000002 + }, + "M=326656,N=128": { + "file": "silu_config_M326656_N128.json", + "M": 326656, + "N": 128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 180.55975 + }, + "M=326656,N=160": { + "file": "silu_config_M326656_N160.json", + "M": 326656, + "N": 160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 390.1205 + }, + "M=326656,N=192": { + "file": "silu_config_M326656_N192.json", + "M": 326656, + "N": 192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 401.52049999999974 + }, + "M=326656,N=256": { + "file": "silu_config_M326656_N256.json", + "M": 326656, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 355.2405000000001 + }, + "M=326656,N=320": { + "file": "silu_config_M326656_N320.json", + "M": 326656, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 793.9622500000003 + }, + "M=326656,N=384": { + "file": "silu_config_M326656_N384.json", + "M": 326656, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 787.6819999999998 + }, + "M=326656,N=480": { + "file": "silu_config_M326656_N480.json", + "M": 326656, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 809.3622500000001 + }, + "M=326656,N=512": { + "file": "silu_config_M326656_N512.json", + "M": 326656, + "N": 512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 638.5217499999999 + }, + "M=326656,N=576": { + "file": "silu_config_M326656_N576.json", + "M": 326656, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1540.965250000001 + }, + "M=326656,N=640": { + "file": "silu_config_M326656_N640.json", + "M": 326656, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1527.68525 + }, + "M=326656,N=768": { + "file": "silu_config_M326656_N768.json", + "M": 326656, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1522.8852500000003 + }, + "M=326656,N=800": { + "file": "silu_config_M326656_N800.json", + "M": 326656, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1549.08525 + }, + "M=326656,N=896": { + "file": "silu_config_M326656_N896.json", + "M": 326656, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1518.8452500000012 + }, + "M=326656,N=960": { + "file": "silu_config_M326656_N960.json", + "M": 326656, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1557.5652499999997 + }, + "M=326656,N=1024": { + "file": "silu_config_M326656_N1024.json", + "M": 326656, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1240.364 + }, + "M=326656,N=1120": { + "file": "silu_config_M326656_N1120.json", + "M": 326656, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3040.3714999999993 + }, + "M=326656,N=1152": { + "file": "silu_config_M326656_N1152.json", + "M": 326656, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3034.491499999999 + }, + "M=326656,N=1280": { + "file": "silu_config_M326656_N1280.json", + "M": 326656, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3018.61125 + }, + "M=326656,N=1344": { + "file": "silu_config_M326656_N1344.json", + "M": 326656, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3044.8515000000007 + }, + "M=326656,N=1408": { + "file": "silu_config_M326656_N1408.json", + "M": 326656, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3035.1714999999976 + }, + "M=326656,N=1440": { + "file": "silu_config_M326656_N1440.json", + "M": 326656, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3054.0514999999978 + }, + "M=326656,N=1536": { + "file": "silu_config_M326656_N1536.json", + "M": 326656, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3029.811249999999 + }, + "M=326656,N=1600": { + "file": "silu_config_M326656_N1600.json", + "M": 326656, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3044.731500000001 + }, + "M=326656,N=1664": { + "file": "silu_config_M326656_N1664.json", + "M": 326656, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3035.8515000000007 + }, + "M=326656,N=1728": { + "file": "silu_config_M326656_N1728.json", + "M": 326656, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3048.411500000001 + }, + "M=326656,N=1760": { + "file": "silu_config_M326656_N1760.json", + "M": 326656, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3077.2114999999985 + }, + "M=326656,N=1792": { + "file": "silu_config_M326656_N1792.json", + "M": 326656, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3039.611500000002 + }, + "M=326656,N=1920": { + "file": "silu_config_M326656_N1920.json", + "M": 326656, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3062.451500000001 + }, + "M=326656,N=2048": { + "file": "silu_config_M326656_N2048.json", + "M": 326656, + "N": 2048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2414.608750000002 + }, + "M=326656,N=2080": { + "file": "silu_config_M326656_N2080.json", + "M": 326656, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3396.1727499999997 + }, + "M=326656,N=2240": { + "file": "silu_config_M326656_N2240.json", + "M": 326656, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3559.4134999999987 + }, + "M=326656,N=2400": { + "file": "silu_config_M326656_N2400.json", + "M": 326656, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3756.17425 + }, + "M=326656,N=2560": { + "file": "silu_config_M326656_N2560.json", + "M": 326656, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3904.2149999999992 + }, + "M=327680,N=128": { + "file": "silu_config_M327680_N128.json", + "M": 327680, + "N": 128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 181.07975000000005 + }, + "M=327680,N=160": { + "file": "silu_config_M327680_N160.json", + "M": 327680, + "N": 160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 384.8805 + }, + "M=327680,N=192": { + "file": "silu_config_M327680_N192.json", + "M": 327680, + "N": 192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 384.92050000000006 + }, + "M=327680,N=256": { + "file": "silu_config_M327680_N256.json", + "M": 327680, + "N": 256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 356.2004999999999 + }, + "M=327680,N=320": { + "file": "silu_config_M327680_N320.json", + "M": 327680, + "N": 320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 790.202 + }, + "M=327680,N=384": { + "file": "silu_config_M327680_N384.json", + "M": 327680, + "N": 384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 784.8822500000001 + }, + "M=327680,N=480": { + "file": "silu_config_M327680_N480.json", + "M": 327680, + "N": 480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 807.1222499999999 + }, + "M=327680,N=512": { + "file": "silu_config_M327680_N512.json", + "M": 327680, + "N": 512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 634.5617500000001 + }, + "M=327680,N=576": { + "file": "silu_config_M327680_N576.json", + "M": 327680, + "N": 576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1515.08525 + }, + "M=327680,N=640": { + "file": "silu_config_M327680_N640.json", + "M": 327680, + "N": 640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1503.605 + }, + "M=327680,N=768": { + "file": "silu_config_M327680_N768.json", + "M": 327680, + "N": 768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1510.9250000000002 + }, + "M=327680,N=800": { + "file": "silu_config_M327680_N800.json", + "M": 327680, + "N": 800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.0852499999996 + }, + "M=327680,N=896": { + "file": "silu_config_M327680_N896.json", + "M": 327680, + "N": 896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.165 + }, + "M=327680,N=960": { + "file": "silu_config_M327680_N960.json", + "M": 327680, + "N": 960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1529.5252500000001 + }, + "M=327680,N=1024": { + "file": "silu_config_M327680_N1024.json", + "M": 327680, + "N": 1024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1255.8442500000006 + }, + "M=327680,N=1120": { + "file": "silu_config_M327680_N1120.json", + "M": 327680, + "N": 1120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3017.411249999999 + }, + "M=327680,N=1152": { + "file": "silu_config_M327680_N1152.json", + "M": 327680, + "N": 1152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2988.0112499999996 + }, + "M=327680,N=1280": { + "file": "silu_config_M327680_N1280.json", + "M": 327680, + "N": 1280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2990.61125 + }, + "M=327680,N=1344": { + "file": "silu_config_M327680_N1344.json", + "M": 327680, + "N": 1344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3016.571250000001 + }, + "M=327680,N=1408": { + "file": "silu_config_M327680_N1408.json", + "M": 327680, + "N": 1408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3004.69125 + }, + "M=327680,N=1440": { + "file": "silu_config_M327680_N1440.json", + "M": 327680, + "N": 1440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3020.8912499999997 + }, + "M=327680,N=1536": { + "file": "silu_config_M327680_N1536.json", + "M": 327680, + "N": 1536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3010.0912499999995 + }, + "M=327680,N=1600": { + "file": "silu_config_M327680_N1600.json", + "M": 327680, + "N": 1600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3032.6115 + }, + "M=327680,N=1664": { + "file": "silu_config_M327680_N1664.json", + "M": 327680, + "N": 1664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3004.49125 + }, + "M=327680,N=1728": { + "file": "silu_config_M327680_N1728.json", + "M": 327680, + "N": 1728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3028.1312500000013 + }, + "M=327680,N=1760": { + "file": "silu_config_M327680_N1760.json", + "M": 327680, + "N": 1760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3034.77125 + }, + "M=327680,N=1792": { + "file": "silu_config_M327680_N1792.json", + "M": 327680, + "N": 1792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3023.9712499999996 + }, + "M=327680,N=1920": { + "file": "silu_config_M327680_N1920.json", + "M": 327680, + "N": 1920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3033.571250000001 + }, + "M=327680,N=2048": { + "file": "silu_config_M327680_N2048.json", + "M": 327680, + "N": 2048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2420.2887499999997 + }, + "M=327680,N=2080": { + "file": "silu_config_M327680_N2080.json", + "M": 327680, + "N": 2080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3343.652750000001 + }, + "M=327680,N=2240": { + "file": "silu_config_M327680_N2240.json", + "M": 327680, + "N": 2240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3505.2532499999998 + }, + "M=327680,N=2400": { + "file": "silu_config_M327680_N2400.json", + "M": 327680, + "N": 2400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3698.8939999999993 + }, + "M=327680,N=2560": { + "file": "silu_config_M327680_N2560.json", + "M": 327680, + "N": 2560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3894.334999999999 + } + }, + "index_by_n": { + "128": [ + { + "M": 1, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0390000000000015 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999927 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999992 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119249999999994 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.5589999999999975 + }, + { + "M": 1024, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.5990000000000038 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 5.439 + }, + { + "M": 3072, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 6.919000000000004 + }, + { + "M": 4096, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 7.838999999999999 + }, + { + "M": 5120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 8.959249999999997 + }, + { + "M": 6144, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 9.87899999999999 + }, + { + "M": 7168, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 10.718999999999994 + }, + { + "M": 8192, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 11.358999999999995 + }, + { + "M": 9216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 11.919249999999991 + }, + { + "M": 10240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 12.718999999999994 + }, + { + "M": 11264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 13.359000000000009 + }, + { + "M": 12288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 13.919000000000011 + }, + { + "M": 13312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 14.399000000000001 + }, + { + "M": 14336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 15.079 + }, + { + "M": 15360, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 15.558999999999997 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 16.358999999999995 + }, + { + "M": 17408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 16.919000000000004 + }, + { + "M": 18432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 17.279249999999998 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 17.918999999999997 + }, + { + "M": 20480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 18.558999999999997 + }, + { + "M": 21504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 19.119 + }, + { + "M": 22528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 19.59899999999999 + }, + { + "M": 23552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 20.159 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 20.679000000000002 + }, + { + "M": 25600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 21.43925 + }, + { + "M": 26624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 21.83925 + }, + { + "M": 27648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 22.39925000000001 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 23.159000000000006 + }, + { + "M": 29696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 23.47925 + }, + { + "M": 30720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 24.15925 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 24.59900000000001 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 25.119250000000008 + }, + { + "M": 33792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 25.67924999999999 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 26.27900000000001 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 26.799000000000014 + }, + { + "M": 36864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 27.319249999999997 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 28.119250000000008 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 28.559000000000005 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 29.079250000000002 + }, + { + "M": 40960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 29.519000000000005 + }, + { + "M": 41984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 33.59925000000001 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 33.91900000000001 + }, + { + "M": 44032, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.87925 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 31.959000000000007 + }, + { + "M": 46080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 32.11924999999999 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 32.839000000000006 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 33.479000000000006 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.07925 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.43924999999997 + }, + { + "M": 51200, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 35.11924999999998 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 35.439249999999994 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 36.11899999999998 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 36.67899999999999 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 37.27899999999999 + }, + { + "M": 56320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 42.75899999999999 + }, + { + "M": 57344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 43.59924999999998 + }, + { + "M": 58368, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 43.79924999999999 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 39.27900000000001 + }, + { + "M": 60416, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 45.439249999999994 + }, + { + "M": 61440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 40.47900000000001 + }, + { + "M": 62464, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 46.719249999999995 + }, + { + "M": 63488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 41.679 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 42.079 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 42.39900000000001 + }, + { + "M": 66560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 43.239 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 43.71899999999998 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.31900000000001 + }, + { + "M": 69632, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 51.319250000000004 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 45.43899999999998 + }, + { + "M": 71680, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.71924999999999 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 46.43900000000001 + }, + { + "M": 73728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 53.559250000000006 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 47.67899999999998 + }, + { + "M": 75776, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 55.31924999999997 + }, + { + "M": 76800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 55.59924999999999 + }, + { + "M": 77824, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 56.51925000000001 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 49.75924999999999 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 50.31925000000002 + }, + { + "M": 80896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 58.67925000000001 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 51.799250000000015 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 51.91924999999999 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 52.43925 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 54.15924999999999 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 54.51924999999998 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 54.07925 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 54.71925000000001 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 55.119250000000015 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 55.71925 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 56.15924999999998 + }, + { + "M": 92160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 65.51925 + }, + { + "M": 93184, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 66.39925 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 57.87925 + }, + { + "M": 95232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 58.39924999999999 + }, + { + "M": 96256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 58.99925 + }, + { + "M": 97280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 59.63925 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 60.119249999999994 + }, + { + "M": 99328, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 60.71925000000001 + }, + { + "M": 100352, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 70.95925 + }, + { + "M": 101376, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 71.31924999999998 + }, + { + "M": 102400, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 72.39925000000001 + }, + { + "M": 103424, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 73.03925000000001 + }, + { + "M": 104448, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 73.31925000000003 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.03925000000001 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.35925000000002 + }, + { + "M": 107520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.91924999999999 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 66.59924999999998 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 65.99925 + }, + { + "M": 110592, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 77.35925 + }, + { + "M": 111616, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 67.27925000000002 + }, + { + "M": 112640, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 78.87924999999996 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 68.27924999999999 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 68.87925000000001 + }, + { + "M": 115712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 69.23924999999998 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 69.71924999999999 + }, + { + "M": 117760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.43924999999999 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.99925000000002 + }, + { + "M": 119808, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 71.51925 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.47925000000001 + }, + { + "M": 121856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 72.75925 + }, + { + "M": 122880, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 73.11950000000002 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 73.55924999999999 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 74.31925000000001 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 86.27950000000003 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 75.35925 + }, + { + "M": 128000, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 75.7995 + }, + { + "M": 129024, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 89.15925 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 76.99924999999999 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.63949999999998 + }, + { + "M": 132096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.4795 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 79.03924999999998 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 79.23925000000001 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 79.63950000000003 + }, + { + "M": 136192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.63950000000001 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.15950000000001 + }, + { + "M": 138240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.4795 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.71950000000002 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.3195 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.19925 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.91924999999996 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 94.07925 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.87925000000003 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 95.59950000000002 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 96.35925000000002 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 85.43924999999999 + }, + { + "M": 148480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 85.99924999999999 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 86.39950000000002 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.15925000000003 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.71950000000001 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 88.11950000000002 + }, + { + "M": 153600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 88.71950000000002 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.1595 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.19925 + }, + { + "M": 156672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 90.39925000000001 + }, + { + "M": 157696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.19950000000001 + }, + { + "M": 158720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.47924999999994 + }, + { + "M": 159744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.3195 + }, + { + "M": 160768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.51950000000002 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.4795 + }, + { + "M": 162816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.91949999999997 + }, + { + "M": 163840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 94.11949999999995 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 94.67925 + }, + { + "M": 165888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 95.2395 + }, + { + "M": 166912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 95.75924999999998 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 107.79925 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 107.63950000000003 + }, + { + "M": 169984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 115.67949999999999 + }, + { + "M": 171008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 97.91949999999999 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 98.07950000000002 + }, + { + "M": 173056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 98.67950000000002 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 98.91925000000003 + }, + { + "M": 175104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 99.71950000000001 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 111.51924999999997 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 100.87950000000001 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 112.79949999999994 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 112.51949999999997 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.43950000000001 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.07950000000002 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.59925000000001 + }, + { + "M": 183296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.91924999999998 + }, + { + "M": 184320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.67949999999999 + }, + { + "M": 185344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 125.59949999999998 + }, + { + "M": 186368, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 126.27950000000004 + }, + { + "M": 187392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 106.19924999999998 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 106.63925000000003 + }, + { + "M": 189440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 118.71950000000001 + }, + { + "M": 190464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 108.03924999999995 + }, + { + "M": 191488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.75949999999997 + }, + { + "M": 192512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 108.99949999999998 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 109.47950000000006 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 109.51949999999997 + }, + { + "M": 195584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 110.4795 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 122.35950000000005 + }, + { + "M": 197632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 111.75950000000003 + }, + { + "M": 198656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 112.03949999999995 + }, + { + "M": 199680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 112.59949999999998 + }, + { + "M": 200704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 113.11949999999999 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 135.83950000000002 + }, + { + "M": 202752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 114.23950000000002 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 114.7595 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 115.03950000000003 + }, + { + "M": 205824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 115.87950000000004 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 127.59949999999998 + }, + { + "M": 207872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 127.99949999999998 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 128.83949999999996 + }, + { + "M": 209920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 118.19950000000003 + }, + { + "M": 210944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 118.67950000000002 + }, + { + "M": 211968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 130.63950000000003 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.71950000000001 + }, + { + "M": 214016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.2794999999999 + }, + { + "M": 215040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.71950000000001 + }, + { + "M": 216064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.3195 + }, + { + "M": 217088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.7595 + }, + { + "M": 218112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 122.3195 + }, + { + "M": 219136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 122.83950000000004 + }, + { + "M": 220160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 146.11950000000004 + }, + { + "M": 221184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 123.95949999999999 + }, + { + "M": 222208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.39950000000002 + }, + { + "M": 223232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.95950000000002 + }, + { + "M": 224256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.5995 + }, + { + "M": 225280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.91949999999999 + }, + { + "M": 226304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 126.67950000000003 + }, + { + "M": 227328, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 127.15974999999996 + }, + { + "M": 228352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 127.79974999999999 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 128.2795 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 150.07950000000002 + }, + { + "M": 231424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.27949999999998 + }, + { + "M": 232448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.8795 + }, + { + "M": 233472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.7195 + }, + { + "M": 234496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 130.91949999999994 + }, + { + "M": 235520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 131.43974999999995 + }, + { + "M": 236544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 143.4795 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 143.99975000000003 + }, + { + "M": 238592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 133.11975 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 134.11975000000004 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 133.79974999999996 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 134.5995 + }, + { + "M": 242688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 135.2794999999999 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 146.95975 + }, + { + "M": 244736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 136.31950000000003 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 136.47950000000003 + }, + { + "M": 246784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.8395 + }, + { + "M": 247808, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 137.95974999999999 + }, + { + "M": 248832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 138.59975 + }, + { + "M": 249856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.3995 + }, + { + "M": 250880, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.95950000000002 + }, + { + "M": 251904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.63950000000003 + }, + { + "M": 252928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 152.55975 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.7195 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 143.03950000000003 + }, + { + "M": 256000, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 142.75975 + }, + { + "M": 257024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 143.1995 + }, + { + "M": 258048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 143.71950000000004 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 154.91950000000003 + }, + { + "M": 260096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 144.39949999999993 + }, + { + "M": 261120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.23950000000002 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 145.27974999999998 + }, + { + "M": 263168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 145.95975 + }, + { + "M": 264192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 146.43949999999998 + }, + { + "M": 265216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 146.91974999999996 + }, + { + "M": 266240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 147.43975 + }, + { + "M": 267264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 170.5995 + }, + { + "M": 268288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.63975000000005 + }, + { + "M": 269312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 149.11950000000002 + }, + { + "M": 270336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 149.67950000000002 + }, + { + "M": 271360, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 150.11950000000002 + }, + { + "M": 272384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 150.79950000000002 + }, + { + "M": 273408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 151.19950000000006 + }, + { + "M": 274432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 151.75949999999995 + }, + { + "M": 275456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 152.35950000000003 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 152.39974999999998 + }, + { + "M": 277504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.91974999999996 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 153.95975000000004 + }, + { + "M": 279552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.51975000000004 + }, + { + "M": 280576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.91974999999996 + }, + { + "M": 281600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 155.51975 + }, + { + "M": 282624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.51975000000004 + }, + { + "M": 283648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.55974999999998 + }, + { + "M": 284672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 157.07974999999993 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 157.71974999999998 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 157.47975000000002 + }, + { + "M": 287744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 158.83974999999992 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 170.59975000000003 + }, + { + "M": 289792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 171.27975000000004 + }, + { + "M": 290816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.19975 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 171.11975 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 161.91975000000002 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 173.35949999999997 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 162.87975 + }, + { + "M": 295936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 163.27950000000004 + }, + { + "M": 296960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 163.63975000000005 + }, + { + "M": 297984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.11949999999996 + }, + { + "M": 299008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.8395 + }, + { + "M": 300032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 165.19975 + }, + { + "M": 301056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 177.11974999999995 + }, + { + "M": 302080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 166.15950000000004 + }, + { + "M": 303104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 189.39974999999998 + }, + { + "M": 304128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.19975 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 168.39975000000004 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 179.63975000000005 + }, + { + "M": 307200, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 180.91974999999996 + }, + { + "M": 308224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 170.23950000000002 + }, + { + "M": 309248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 181.99975 + }, + { + "M": 310272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 182.43975 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 171.63975 + }, + { + "M": 312320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 172.35974999999996 + }, + { + "M": 313344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 172.79975000000002 + }, + { + "M": 314368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 184.23974999999996 + }, + { + "M": 315392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 173.07975000000005 + }, + { + "M": 316416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 173.59975000000003 + }, + { + "M": 317440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 174.39975000000004 + }, + { + "M": 318464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 174.79974999999996 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 186.71974999999998 + }, + { + "M": 320512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 175.9197499999999 + }, + { + "M": 321536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 177.95974999999999 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 189.55999999999995 + }, + { + "M": 323584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 178.75975000000005 + }, + { + "M": 324608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 190.79975000000002 + }, + { + "M": 325632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 180.23974999999996 + }, + { + "M": 326656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 180.55975 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 181.07975000000005 + } + ], + "160": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3190000000000026 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199249999999992 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.959000000000003 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.878999999999998 + }, + { + "M": 1024, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 5.518999999999991 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 8.598999999999997 + }, + { + "M": 3072, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 10.63924999999999 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 12.318999999999996 + }, + { + "M": 5120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 13.718999999999994 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 15.199250000000006 + }, + { + "M": 7168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 16.599000000000004 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 17.83899999999999 + }, + { + "M": 9216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 18.91899999999999 + }, + { + "M": 10240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 20.439000000000007 + }, + { + "M": 11264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 21.598999999999997 + }, + { + "M": 12288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 22.599000000000018 + }, + { + "M": 13312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 23.959000000000003 + }, + { + "M": 14336, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 26.359000000000023 + }, + { + "M": 15360, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 26.318999999999996 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 27.479000000000013 + }, + { + "M": 17408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 30.239250000000006 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 31.55900000000002 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 31.079000000000008 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 32.719 + }, + { + "M": 21504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 33.518999999999984 + }, + { + "M": 22528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.71925 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 38.27924999999999 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 37.11899999999999 + }, + { + "M": 25600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 38.319250000000004 + }, + { + "M": 26624, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 42.35925000000002 + }, + { + "M": 27648, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 43.439 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 41.759000000000015 + }, + { + "M": 29696, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 46.07899999999999 + }, + { + "M": 30720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.399 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 48.83925000000002 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 46.599 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 51.359249999999996 + }, + { + "M": 34816, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.75925000000001 + }, + { + "M": 35840, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 53.95925 + }, + { + "M": 36864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 51.799 + }, + { + "M": 37888, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 56.75925 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.91924999999999 + }, + { + "M": 39936, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 59.23925 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 55.91924999999999 + }, + { + "M": 41984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 61.99925 + }, + { + "M": 43008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 58.75924999999998 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 60.15925000000003 + }, + { + "M": 45056, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 65.91950000000001 + }, + { + "M": 46080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 67.03925 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 63.47925 + }, + { + "M": 48128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 69.95925000000001 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 65.71925000000002 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 67.07925000000003 + }, + { + "M": 51200, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 73.79925 + }, + { + "M": 52224, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 75.03925000000001 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.75924999999998 + }, + { + "M": 54272, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 77.87949999999998 + }, + { + "M": 55296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 73.07924999999997 + }, + { + "M": 56320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 74.19924999999998 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 75.27950000000001 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 76.55924999999999 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.75925000000001 + }, + { + "M": 60416, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 85.75950000000002 + }, + { + "M": 61440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 80.15924999999999 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.47924999999998 + }, + { + "M": 63488, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 89.6795 + }, + { + "M": 64512, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 90.79950000000001 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 84.99924999999999 + }, + { + "M": 66560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 93.63925000000002 + }, + { + "M": 67584, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.63949999999997 + }, + { + "M": 68608, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 96.23925000000001 + }, + { + "M": 69632, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 97.39949999999999 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 90.9995 + }, + { + "M": 71680, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 100.15925 + }, + { + "M": 72704, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 101.43950000000002 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 94.55925000000003 + }, + { + "M": 74752, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 103.99949999999998 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 96.59949999999998 + }, + { + "M": 76800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 106.51950000000002 + }, + { + "M": 77824, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 107.9195 + }, + { + "M": 78848, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 109.39950000000002 + }, + { + "M": 79872, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 110.39950000000003 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.95924999999997 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 102.7995 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 105.35924999999993 + }, + { + "M": 83968, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 115.91950000000001 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 107.75925000000001 + }, + { + "M": 86016, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 118.23949999999998 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 108.79924999999999 + }, + { + "M": 88064, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 121.19949999999997 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 112.55950000000001 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 113.63950000000001 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 114.83949999999997 + }, + { + "M": 92160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 116.19950000000001 + }, + { + "M": 93184, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 127.79950000000001 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 118.51949999999997 + }, + { + "M": 95232, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 130.23950000000002 + }, + { + "M": 96256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.87950000000002 + }, + { + "M": 97280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 132.99975 + }, + { + "M": 98304, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 134.23975000000002 + }, + { + "M": 99328, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.4795 + }, + { + "M": 100352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.55949999999999 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 137.9595 + }, + { + "M": 102400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.1995 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.27949999999998 + }, + { + "M": 104448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.3595 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 141.5195 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 132.87950000000004 + }, + { + "M": 107520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 134.03949999999998 + }, + { + "M": 108544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 135.1995 + }, + { + "M": 109568, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 148.7195 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 137.71949999999998 + }, + { + "M": 111616, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 151.51975 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 138.03975 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.19949999999994 + }, + { + "M": 114688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 141.47950000000003 + }, + { + "M": 115712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.63950000000003 + }, + { + "M": 116736, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 157.7195 + }, + { + "M": 117760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 159.23950000000005 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 156.71975 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 157.9595 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 159.1595 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 160.2395 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 149.95949999999993 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.03975000000003 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 163.87974999999994 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 155.83975000000004 + }, + { + "M": 126976, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 171.15949999999998 + }, + { + "M": 128000, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 169.11975000000004 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.23975000000004 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 171.47974999999994 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.51975000000004 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 161.67950000000002 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 161.59949999999998 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 176.23975000000002 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 166.43949999999992 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 166.39974999999995 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 179.79975000000002 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 177.99974999999998 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 182.31975 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 172.43975 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 183.71975 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 184.95975000000004 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 172.63975 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 176.31975000000003 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 188.43975000000003 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 189.83975000000004 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 179.99974999999998 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 188.99974999999995 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 182.31974999999994 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 194.59974999999994 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 193.43975 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 196.95974999999996 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 184.19975 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 188.24 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 200.56 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 199.55975 + }, + { + "M": 157696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 191.95975 + }, + { + "M": 158720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 203.87975 + }, + { + "M": 159744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 212.95974999999999 + }, + { + "M": 160768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 195.43975 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 215.75975 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 206.59974999999997 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 195.95974999999999 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 211.15974999999997 + }, + { + "M": 165888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 201.36 + }, + { + "M": 166912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 202.63975 + }, + { + "M": 167936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 203.71999999999997 + }, + { + "M": 168960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 204.87975 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 214.75975 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 226.43975000000006 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 216.79974999999996 + }, + { + "M": 173056, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 230.32 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 217.91975000000008 + }, + { + "M": 175104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 211.64000000000004 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 221.35975000000002 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 222.60000000000002 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 223.63974999999994 + }, + { + "M": 179200, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 238.19999999999993 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 217.56 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 218.84000000000003 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 220.07975 + }, + { + "M": 183296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 232.15974999999997 + }, + { + "M": 184320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 222.39975000000004 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 223.64 + }, + { + "M": 186368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 235.95999999999998 + }, + { + "M": 187392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.08000000000004 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 238.47999999999996 + }, + { + "M": 189440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 228.31975 + }, + { + "M": 190464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 240.76 + }, + { + "M": 191488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 230.84000000000003 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 240.27999999999997 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 255.52000000000004 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 241.40000000000003 + }, + { + "M": 195584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 246.64 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 244.95999999999992 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 256.64000000000004 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 236.7600000000001 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 246.96000000000004 + }, + { + "M": 200704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 241.63999999999993 + }, + { + "M": 201728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 242.83999999999997 + }, + { + "M": 202752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 243.92000000000002 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 242.64 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 242.24 + }, + { + "M": 205824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 269.47999999999996 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 248.68025 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 268.4000000000001 + }, + { + "M": 208896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 251.19999999999987 + }, + { + "M": 209920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 263.36025000000006 + }, + { + "M": 210944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 253.44 + }, + { + "M": 211968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 254.79999999999998 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 255.79999999999993 + }, + { + "M": 214016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 279.00000000000017 + }, + { + "M": 215040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 269.32000000000005 + }, + { + "M": 216064, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 285.48025000000007 + }, + { + "M": 217088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 260.6800000000001 + }, + { + "M": 218112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 272.84024999999997 + }, + { + "M": 219136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 262.9200000000001 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 280.63999999999993 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 262.5199999999999 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 284.5599999999999 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 275.51999999999987 + }, + { + "M": 224256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 291.24025 + }, + { + "M": 225280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 270.20000000000016 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.51999999999987 + }, + { + "M": 227328, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 272.64025000000004 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 281.2 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 274.84000000000003 + }, + { + "M": 230400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 287.0802499999999 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 295.36025000000006 + }, + { + "M": 232448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 278.52000000000004 + }, + { + "M": 233472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 279.4800000000001 + }, + { + "M": 234496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 280.76025 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 298.32025000000004 + }, + { + "M": 236544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 305.36025000000006 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 295.24025 + }, + { + "M": 238592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 307.80025 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 294.16000000000014 + }, + { + "M": 240640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 288.0802500000001 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 286.00025 + }, + { + "M": 242688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.4002500000002 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 309.16025000000013 + }, + { + "M": 244736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.72025 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 299.44025 + }, + { + "M": 246784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 317.32025 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 303.60025000000024 + }, + { + "M": 248832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 319.6805 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 295.6802500000002 + }, + { + "M": 250880, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 311.2004999999999 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 319.04025000000007 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 309.8802499999999 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.68024999999983 + }, + { + "M": 254976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 304.8402499999999 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 300.80024999999983 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 314.4002499999999 + }, + { + "M": 258048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 308.7202499999999 + }, + { + "M": 259072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 320.4802499999999 + }, + { + "M": 260096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 321.76050000000004 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 327.12049999999977 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.88025 + }, + { + "M": 263168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.08025000000004 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 322.48025000000007 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 313.0802500000001 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 312.48024999999996 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 336.24049999999994 + }, + { + "M": 268288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.44049999999993 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 339.08050000000014 + }, + { + "M": 270336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 322.76025000000004 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 329.0402500000001 + }, + { + "M": 272384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.1204999999999 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 322.8402500000002 + }, + { + "M": 274432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 338.6005000000001 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 325.2004999999998 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 334.44049999999993 + }, + { + "M": 277504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 341.84050000000013 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.9202499999998 + }, + { + "M": 279552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 344.08050000000003 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 351.6405000000001 + }, + { + "M": 281600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.6004999999999 + }, + { + "M": 282624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 336.7605000000001 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 344.8405 + }, + { + "M": 284672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 349.84050000000013 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 357.24024999999983 + }, + { + "M": 286720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 341.24024999999995 + }, + { + "M": 287744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 353.6005 + }, + { + "M": 288768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 343.6405 + }, + { + "M": 289792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 344.8405 + }, + { + "M": 290816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.16025 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 362.32050000000004 + }, + { + "M": 292864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 348.7605 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 377.36075000000017 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 357.44049999999993 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.80049999999994 + }, + { + "M": 296960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 353.40049999999985 + }, + { + "M": 297984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 354.7202500000001 + }, + { + "M": 299008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 355.8405 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 352.84024999999997 + }, + { + "M": 301056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 380.0804999999999 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 363.56050000000005 + }, + { + "M": 303104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 382.4805 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 367.8405 + }, + { + "M": 305152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 362.7204999999999 + }, + { + "M": 306176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 386.04049999999984 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 359.4805 + }, + { + "M": 308224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 366.8405 + }, + { + "M": 309248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 390.04050000000007 + }, + { + "M": 310272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 369.16049999999996 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 370.52049999999997 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 375.80050000000006 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 379.2805000000001 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 380.36075000000005 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 370.88049999999987 + }, + { + "M": 316416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 387.4005000000001 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 381.3204999999999 + }, + { + "M": 318464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 389.7205 + }, + { + "M": 319488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 379.8404999999999 + }, + { + "M": 320512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 392.0405000000003 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 380.3204999999999 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 378.76049999999987 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.04050000000007 + }, + { + "M": 324608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 410.04075 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 384.9605 + }, + { + "M": 326656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 390.1205 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 384.8805 + } + ], + "192": [ + { + "M": 1, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.3190000000000026 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.23899999999999 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.879000000000005 + }, + { + "M": 512, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 3.8389999999999986 + }, + { + "M": 1024, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 5.7590000000000074 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 8.598999999999997 + }, + { + "M": 3072, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 10.718999999999994 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 12.319249999999997 + }, + { + "M": 5120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 13.639000000000003 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 15.078999999999994 + }, + { + "M": 7168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 16.678999999999995 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 17.839250000000007 + }, + { + "M": 9216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 18.839 + }, + { + "M": 10240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 20.31899999999999 + }, + { + "M": 11264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 21.439000000000007 + }, + { + "M": 12288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 22.839250000000007 + }, + { + "M": 13312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 23.879000000000012 + }, + { + "M": 14336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 25.158999999999992 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 27.558999999999997 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 27.559000000000005 + }, + { + "M": 17408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 30.359 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 31.519 + }, + { + "M": 19456, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 32.99924999999999 + }, + { + "M": 20480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 32.359 + }, + { + "M": 21504, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 35.519 + }, + { + "M": 22528, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 37.07900000000001 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 38.279250000000005 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 37.159 + }, + { + "M": 25600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 40.998999999999995 + }, + { + "M": 26624, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 42.39925000000001 + }, + { + "M": 27648, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 43.43925 + }, + { + "M": 28672, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.99924999999999 + }, + { + "M": 29696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 43.55924999999999 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 44.559000000000005 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 45.518999999999984 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 46.83899999999999 + }, + { + "M": 33792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 48.15924999999999 + }, + { + "M": 34816, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.87925000000001 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 50.35925 + }, + { + "M": 36864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 51.51925 + }, + { + "M": 37888, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 56.75925000000001 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.959250000000004 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 55.15925000000001 + }, + { + "M": 40960, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 60.83925000000001 + }, + { + "M": 41984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 62.11924999999999 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 63.35924999999999 + }, + { + "M": 44032, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.71925000000002 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 60.95925000000002 + }, + { + "M": 46080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 67.11925000000002 + }, + { + "M": 47104, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 68.67925000000002 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.67925000000001 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 65.91925000000003 + }, + { + "M": 50176, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 72.51950000000002 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 68.03925000000002 + }, + { + "M": 52224, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 74.99950000000003 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.71924999999999 + }, + { + "M": 54272, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 77.87950000000001 + }, + { + "M": 55296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 73.07925 + }, + { + "M": 56320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 80.3595 + }, + { + "M": 57344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 81.83950000000003 + }, + { + "M": 58368, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 82.95925000000001 + }, + { + "M": 59392, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 84.35925 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 79.11925 + }, + { + "M": 61440, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 86.9195 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.35925 + }, + { + "M": 63488, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 89.59925000000001 + }, + { + "M": 64512, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 90.75925 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 85.07924999999997 + }, + { + "M": 66560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 86.23925 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.47950000000002 + }, + { + "M": 68608, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 96.23949999999998 + }, + { + "M": 69632, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 97.67925000000001 + }, + { + "M": 70656, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 98.71925 + }, + { + "M": 71680, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 100.23925000000003 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 93.5995 + }, + { + "M": 73728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 102.59924999999998 + }, + { + "M": 74752, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 104.11924999999998 + }, + { + "M": 75776, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 105.47925000000002 + }, + { + "M": 76800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 98.31925000000001 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 99.47950000000003 + }, + { + "M": 78848, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 109.3995 + }, + { + "M": 79872, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 110.5995 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.11950000000003 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 102.95925000000001 + }, + { + "M": 82944, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 114.51950000000002 + }, + { + "M": 83968, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 115.9995 + }, + { + "M": 84992, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 117.35950000000001 + }, + { + "M": 86016, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 118.39949999999999 + }, + { + "M": 87040, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 119.99950000000001 + }, + { + "M": 88064, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 121.11950000000002 + }, + { + "M": 89088, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 122.2795 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 123.71950000000001 + }, + { + "M": 91136, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 125.19950000000001 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 125.55949999999999 + }, + { + "M": 93184, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 127.75949999999995 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 118.3195 + }, + { + "M": 95232, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 130.35950000000003 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 130.95950000000005 + }, + { + "M": 97280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 133.15949999999998 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 123.5195 + }, + { + "M": 99328, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.71950000000001 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 135.67950000000002 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 138.1195 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 126.71950000000001 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.55950000000004 + }, + { + "M": 104448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.55975000000004 + }, + { + "M": 105472, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 143.63950000000003 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 142.8795 + }, + { + "M": 107520, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 146.03949999999998 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 145.3595 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 147.7195 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 137.7995 + }, + { + "M": 111616, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 151.43949999999998 + }, + { + "M": 112640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.1595 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 152.39975 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 142.7595 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 153.59950000000003 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.11949999999996 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 144.27975 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 147.43949999999998 + }, + { + "M": 119808, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.6395 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.95975000000004 + }, + { + "M": 121856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 162.07975000000002 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 150.4395 + }, + { + "M": 123904, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 167.19975000000002 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.51974999999996 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 155.87950000000004 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 156.11949999999996 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 156.1995 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 170.43974999999998 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.4794999999999 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.95950000000002 + }, + { + "M": 132096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 173.95974999999999 + }, + { + "M": 133120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.31975 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 165.51950000000002 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 166.71975000000003 + }, + { + "M": 136192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.75949999999997 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 180.11975000000004 + }, + { + "M": 138240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 185.47975000000005 + }, + { + "M": 139264, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 187.03975000000003 + }, + { + "M": 140288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 188.31975000000003 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 184.19975 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 183.23974999999993 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 172.7197499999999 + }, + { + "M": 144384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 193.27974999999992 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 186.67975 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 177.47974999999997 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 178.79975000000002 + }, + { + "M": 148480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 192.23975000000002 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 182.55975 + }, + { + "M": 150528, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 201.23999999999995 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 195.91975000000002 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 197.15975000000003 + }, + { + "M": 153600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 187.23975000000002 + }, + { + "M": 154624, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 206.60000000000002 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 198.43975 + }, + { + "M": 156672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 191.0397499999999 + }, + { + "M": 157696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 192.27999999999997 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 211.43975000000006 + }, + { + "M": 159744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 213.11974999999995 + }, + { + "M": 160768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 195.68 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 196.87999999999994 + }, + { + "M": 162816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 198.03975000000003 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 196.39975000000004 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 211.40000000000003 + }, + { + "M": 165888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 212.72000000000003 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 211.83974999999992 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 202.23975000000002 + }, + { + "M": 168960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 205.2 + }, + { + "M": 169984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 226.44 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 226.63975000000005 + }, + { + "M": 172032, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 228.67999999999995 + }, + { + "M": 173056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 220.67975 + }, + { + "M": 174080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 210.80000000000007 + }, + { + "M": 175104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 223.07975 + }, + { + "M": 176128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 224.2800000000001 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 214.48000000000002 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 215.48000000000002 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 213.59974999999997 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 218.04000000000002 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 219.2397499999999 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 231.52000000000004 + }, + { + "M": 183296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 232.36 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 229.9600000000001 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.8 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 244.0000000000001 + }, + { + "M": 187392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.31999999999994 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 227.51999999999998 + }, + { + "M": 189440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 228.71974999999992 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 238.39999999999998 + }, + { + "M": 191488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 242.19999999999993 + }, + { + "M": 192512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 243.3599999999999 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 241.87999999999994 + }, + { + "M": 194560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.88 + }, + { + "M": 195584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 235.8800000000001 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.19999999999993 + }, + { + "M": 197632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 238.24000000000007 + }, + { + "M": 198656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 250.39999999999984 + }, + { + "M": 199680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 240.51999999999998 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 260.3999999999998 + }, + { + "M": 201728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 254.20025000000007 + }, + { + "M": 202752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 255.32 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 256.5200000000001 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 253.2399999999999 + }, + { + "M": 205824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 269.91999999999985 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 249.04025000000013 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 258.1999999999999 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 259.56000000000006 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 259.1200000000002 + }, + { + "M": 210944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 253.7600000000001 + }, + { + "M": 211968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 277.0802499999999 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 256.2400000000001 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 265.47999999999985 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 264.87999999999994 + }, + { + "M": 216064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 271.0000000000001 + }, + { + "M": 217088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 283.0399999999999 + }, + { + "M": 218112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 284.4399999999999 + }, + { + "M": 219136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 274.48025 + }, + { + "M": 220160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 286.44024999999993 + }, + { + "M": 221184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 265.7202499999999 + }, + { + "M": 222208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 277.84024999999997 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 286.32000000000005 + }, + { + "M": 224256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 269.20025 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 266.0402500000001 + }, + { + "M": 226304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 271.7602499999998 + }, + { + "M": 227328, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 283.92024999999995 + }, + { + "M": 228352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 274.04025000000007 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 275.28024999999997 + }, + { + "M": 230400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 287.48025000000007 + }, + { + "M": 231424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 299.6402500000001 + }, + { + "M": 232448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 289.68025000000006 + }, + { + "M": 233472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 290.96000000000015 + }, + { + "M": 234496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 303.2002499999999 + }, + { + "M": 235520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 293.5202500000001 + }, + { + "M": 236544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 283.56000000000006 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 295.92025 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 293.60025000000013 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 294.80025 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 304.28024999999997 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 297.00025 + }, + { + "M": 242688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 301.72024999999996 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.08025 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 290.1599999999998 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.52025 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 302.96025000000014 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 314.68025 + }, + { + "M": 248832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 320.00025000000005 + }, + { + "M": 249856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 299.52 + }, + { + "M": 250880, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 300.84024999999986 + }, + { + "M": 251904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.8002499999999 + }, + { + "M": 252928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.28049999999985 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 301.4402499999999 + }, + { + "M": 254976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 305.48025000000007 + }, + { + "M": 256000, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 317.8002499999999 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 315.44025 + }, + { + "M": 258048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 309.2004999999999 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 317.1602499999998 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 328.6402499999999 + }, + { + "M": 261120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 323.2802499999999 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 324.32025 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 311.16025 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 312.5602500000002 + }, + { + "M": 265216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 327.96025 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 312.8802499999998 + }, + { + "M": 267264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 329.96025000000003 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 328.5602500000001 + }, + { + "M": 269312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 322.2002500000001 + }, + { + "M": 270336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 334.6002500000001 + }, + { + "M": 271360, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 347.0005 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 343.56049999999993 + }, + { + "M": 273408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 338.36024999999995 + }, + { + "M": 274432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 339.64025000000004 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 347.3204999999999 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 335.2405000000001 + }, + { + "M": 277504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 342.16025000000013 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 343.40025 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 351.20050000000015 + }, + { + "M": 280576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 356.7605000000002 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.4402499999999 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 344.48050000000023 + }, + { + "M": 283648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 338.32025 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.92050000000006 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 340.64025000000015 + }, + { + "M": 286720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 352.8404999999998 + }, + { + "M": 287744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 354.1205000000001 + }, + { + "M": 288768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 355.4005000000001 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 363.0005000000001 + }, + { + "M": 290816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.7202500000003 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 342.20050000000003 + }, + { + "M": 292864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 349.20050000000003 + }, + { + "M": 293888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 350.44025 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.7204999999999 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 370.1605000000002 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 348.16025 + }, + { + "M": 297984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 355.4804999999999 + }, + { + "M": 299008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 378.40049999999985 + }, + { + "M": 300032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 368.4805000000001 + }, + { + "M": 301056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 369.64049999999975 + }, + { + "M": 302080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 370.76049999999987 + }, + { + "M": 303104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 361.0005000000001 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.3204999999998 + }, + { + "M": 305152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 363.32050000000015 + }, + { + "M": 306176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 375.56050000000005 + }, + { + "M": 307200, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 366.6005000000001 + }, + { + "M": 308224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 378.68050000000005 + }, + { + "M": 309248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 380.08050000000003 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.8805000000002 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 371.2405000000002 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.72075000000007 + }, + { + "M": 313344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 384.84050000000013 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 391.8405 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 372.2805000000001 + }, + { + "M": 316416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 399.4005000000003 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 372.16049999999996 + }, + { + "M": 318464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 379.5607500000001 + }, + { + "M": 319488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 402.92049999999995 + }, + { + "M": 320512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 382.04050000000007 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 411.8405 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 389.52049999999974 + }, + { + "M": 323584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 386.6005 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 404.60075000000006 + }, + { + "M": 325632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 389.2007500000003 + }, + { + "M": 326656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 401.52049999999974 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 384.92050000000006 + } + ], + "256": [ + { + "M": 1, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0390000000000086 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.639000000000003 + }, + { + "M": 512, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 3.5589999999999975 + }, + { + "M": 1024, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 5.439 + }, + { + "M": 2048, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 7.879000000000004 + }, + { + "M": 3072, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 9.759 + }, + { + "M": 4096, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 11.399250000000002 + }, + { + "M": 5120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 12.719000000000001 + }, + { + "M": 6144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 13.799 + }, + { + "M": 7168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 15.039000000000001 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 16.279000000000003 + }, + { + "M": 9216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 17.318999999999996 + }, + { + "M": 10240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 18.638999999999996 + }, + { + "M": 11264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 19.599000000000004 + }, + { + "M": 12288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 20.75925 + }, + { + "M": 13312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 22.039 + }, + { + "M": 14336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 23.039 + }, + { + "M": 15360, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 24.198999999999998 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 25.119000000000014 + }, + { + "M": 17408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 26.398999999999994 + }, + { + "M": 18432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 27.159000000000006 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 28.358999999999988 + }, + { + "M": 20480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 29.559000000000005 + }, + { + "M": 21504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 30.799000000000014 + }, + { + "M": 22528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 31.879250000000006 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 36.79900000000001 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 33.91899999999999 + }, + { + "M": 25600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 39.479 + }, + { + "M": 26624, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 40.83899999999999 + }, + { + "M": 27648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 37.11925000000001 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 38.19924999999999 + }, + { + "M": 29696, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.75925 + }, + { + "M": 30720, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 45.679249999999996 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 41.51925 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 42.63925 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 49.67925 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.71925000000001 + }, + { + "M": 35840, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.639 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 53.79925000000001 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 47.99925 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 49.03900000000002 + }, + { + "M": 39936, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 57.59924999999999 + }, + { + "M": 40960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 51.27925000000001 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 52.519249999999985 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 61.55925000000002 + }, + { + "M": 44032, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 62.99924999999998 + }, + { + "M": 45056, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.39925 + }, + { + "M": 46080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 56.71925 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 57.83925 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 59.03924999999999 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 59.959249999999976 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 61.11925000000002 + }, + { + "M": 51200, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 62.279250000000026 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 63.31925000000001 + }, + { + "M": 53248, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 74.87949999999998 + }, + { + "M": 54272, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 76.31925000000001 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 66.63924999999998 + }, + { + "M": 56320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 67.71924999999999 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 68.67924999999998 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 69.91925 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.83925000000002 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.51925 + }, + { + "M": 61440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 72.99925000000002 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 74.15924999999997 + }, + { + "M": 63488, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 88.23925000000003 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.59949999999999 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.39949999999999 + }, + { + "M": 66560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.95925 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 90.91950000000001 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 80.59925 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.83924999999999 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 90.11950000000002 + }, + { + "M": 71680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.83924999999999 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 84.95925000000001 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 97.43925 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.35925000000002 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 88.23925000000001 + }, + { + "M": 76800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.35925 + }, + { + "M": 77824, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 106.4795 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.51925000000001 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.63925000000002 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.95925 + }, + { + "M": 81920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 94.83924999999998 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 95.91925 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 97.03925 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 98.11925000000002 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 99.27925000000002 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 111.67949999999999 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 111.07925000000003 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.43925000000002 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 103.51925000000003 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 116.07949999999997 + }, + { + "M": 92160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 105.75925000000004 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 106.7595 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.31950000000003 + }, + { + "M": 95232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 108.95950000000005 + }, + { + "M": 96256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.55950000000001 + }, + { + "M": 97280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 122.71950000000001 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 123.5995 + }, + { + "M": 99328, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 113.35949999999997 + }, + { + "M": 100352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.83975 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 115.63949999999997 + }, + { + "M": 102400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 116.75950000000003 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 117.71950000000004 + }, + { + "M": 104448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 130.15975000000003 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.71950000000001 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.95975000000004 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 132.4795 + }, + { + "M": 108544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 145.87950000000004 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.23950000000002 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.23950000000004 + }, + { + "M": 111616, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 137.7595 + }, + { + "M": 112640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 127.39974999999995 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.03950000000003 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.55949999999996 + }, + { + "M": 115712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 130.51950000000002 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 143.3195 + }, + { + "M": 117760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 155.67975 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 134.0795 + }, + { + "M": 119808, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 135.1195 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 136.19974999999988 + }, + { + "M": 121856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.59975000000003 + }, + { + "M": 122880, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.07950000000005 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.51950000000005 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.5995 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 153.11950000000002 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 142.7595 + }, + { + "M": 128000, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 155.19974999999994 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.43975 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 157.35975 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 147.23949999999996 + }, + { + "M": 132096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.15975000000003 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 162.5195 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.79975000000002 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 174.15974999999997 + }, + { + "M": 136192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 152.39950000000005 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 153.6395 + }, + { + "M": 138240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.6397499999999 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.19975 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.95975000000004 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.39975000000004 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 168.95974999999999 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 180.47975000000002 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.9595 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.87975000000006 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.91974999999996 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 163.19950000000006 + }, + { + "M": 148480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.1995 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 165.27975000000004 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 166.19975 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.19975 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 168.35974999999996 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 181.23974999999996 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 184.39974999999998 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 182.91974999999996 + }, + { + "M": 156672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 172.71974999999998 + }, + { + "M": 157696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 173.95975000000004 + }, + { + "M": 158720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 174.95975000000004 + }, + { + "M": 159744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 175.91975000000008 + }, + { + "M": 160768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 177.11975 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 178.19975 + }, + { + "M": 162816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 190.51975000000004 + }, + { + "M": 163840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 191.71999999999997 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 204.11975000000007 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 208.11975 + }, + { + "M": 166912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 183.51975 + }, + { + "M": 167936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 184.59974999999997 + }, + { + "M": 168960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 185.75974999999988 + }, + { + "M": 169984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 186.75974999999994 + }, + { + "M": 171008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 199.1599999999999 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 188.35975000000002 + }, + { + "M": 173056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 200.8400000000001 + }, + { + "M": 174080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 201.99975 + }, + { + "M": 175104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 191.47974999999997 + }, + { + "M": 176128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 192.67974999999996 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 193.67999999999995 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 206.19974999999994 + }, + { + "M": 179200, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 207.07975000000005 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 208.31975 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 197.95974999999993 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 199.0000000000001 + }, + { + "M": 183296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 200.15974999999997 + }, + { + "M": 184320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 201.2 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 213.56000000000006 + }, + { + "M": 186368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 214.83975000000015 + }, + { + "M": 187392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 215.88000000000005 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 205.59974999999997 + }, + { + "M": 189440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 206.63975000000016 + }, + { + "M": 190464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 207.8 + }, + { + "M": 191488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 208.87975000000017 + }, + { + "M": 192512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 221.51999999999987 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 210.83975000000015 + }, + { + "M": 194560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 212.07999999999987 + }, + { + "M": 195584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 213.16000000000014 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 214.11999999999995 + }, + { + "M": 197632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 215.31975 + }, + { + "M": 198656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 216.39974999999998 + }, + { + "M": 199680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 217.4000000000001 + }, + { + "M": 200704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 218.40000000000003 + }, + { + "M": 201728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 219.63975000000005 + }, + { + "M": 202752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 220.51975000000022 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 221.75974999999988 + }, + { + "M": 204800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 222.83974999999998 + }, + { + "M": 205824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 223.75999999999988 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 236.07999999999998 + }, + { + "M": 207872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.60000000000014 + }, + { + "M": 208896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 227.12000000000023 + }, + { + "M": 209920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 228.03975000000003 + }, + { + "M": 210944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 240.7199999999999 + }, + { + "M": 211968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 230.12000000000006 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 231.32 + }, + { + "M": 214016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 243.87999999999988 + }, + { + "M": 215040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 244.84000000000003 + }, + { + "M": 216064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.44000000000005 + }, + { + "M": 217088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 235.60000000000002 + }, + { + "M": 218112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 236.68 + }, + { + "M": 219136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.83999999999992 + }, + { + "M": 220160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 250.28 + }, + { + "M": 221184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 251.24024999999995 + }, + { + "M": 222208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 241.19999999999993 + }, + { + "M": 223232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 253.80000000000007 + }, + { + "M": 224256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 243.19999999999993 + }, + { + "M": 225280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 244.12 + }, + { + "M": 226304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 245.36000000000013 + }, + { + "M": 227328, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 246.36 + }, + { + "M": 228352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 270.24 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 248.56000000000006 + }, + { + "M": 230400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 261.12 + }, + { + "M": 231424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 250.67999999999995 + }, + { + "M": 232448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 263.24 + }, + { + "M": 233472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 264.24 + }, + { + "M": 234496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 253.88000000000022 + }, + { + "M": 235520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 254.9200000000001 + }, + { + "M": 236544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 256.1200000000001 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 257.12025000000006 + }, + { + "M": 238592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 280.9602500000002 + }, + { + "M": 239616, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 270.5600000000002 + }, + { + "M": 240640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 260.6 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 261.4800000000001 + }, + { + "M": 242688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 285.19999999999993 + }, + { + "M": 243712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 263.55999999999995 + }, + { + "M": 244736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 264.55999999999995 + }, + { + "M": 245760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 265.6800000000002 + }, + { + "M": 246784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 267.0 + }, + { + "M": 247808, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 290.6402499999998 + }, + { + "M": 248832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 280.44000000000005 + }, + { + "M": 249856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 282.1199999999999 + }, + { + "M": 250880, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 271.9200000000001 + }, + { + "M": 251904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 284.51999999999987 + }, + { + "M": 252928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 273.9599999999999 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 274.9599999999999 + }, + { + "M": 254976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 276.12 + }, + { + "M": 256000, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 288.52025000000003 + }, + { + "M": 257024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 278.27999999999986 + }, + { + "M": 258048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 279.28 + }, + { + "M": 259072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 279.6400000000001 + }, + { + "M": 260096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 280.55999999999995 + }, + { + "M": 261120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 281.76 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 282.8799999999999 + }, + { + "M": 263168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 283.9200000000001 + }, + { + "M": 264192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 284.91999999999985 + }, + { + "M": 265216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 297.36024999999984 + }, + { + "M": 266240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 287.15999999999997 + }, + { + "M": 267264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 287.99999999999994 + }, + { + "M": 268288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 291.40025 + }, + { + "M": 269312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 292.36000000000024 + }, + { + "M": 270336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 293.60024999999996 + }, + { + "M": 271360, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 294.7202500000001 + }, + { + "M": 272384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 295.76025000000004 + }, + { + "M": 273408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 296.84000000000003 + }, + { + "M": 274432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 297.8402500000001 + }, + { + "M": 275456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.68025 + }, + { + "M": 276480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 299.7602499999999 + }, + { + "M": 277504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.4402500000001 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 302.12024999999994 + }, + { + "M": 279552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 303.0802500000001 + }, + { + "M": 280576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 315.60024999999985 + }, + { + "M": 281600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 327.8805 + }, + { + "M": 282624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 317.72050000000024 + }, + { + "M": 283648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 318.7602499999998 + }, + { + "M": 284672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 319.88025000000005 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 309.36024999999995 + }, + { + "M": 286720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.52025000000003 + }, + { + "M": 287744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 323.2002500000001 + }, + { + "M": 288768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 324.08025 + }, + { + "M": 289792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.04025 + }, + { + "M": 290816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 337.8805 + }, + { + "M": 291840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 316.28025 + }, + { + "M": 292864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 317.3202500000001 + }, + { + "M": 293888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 318.3202499999999 + }, + { + "M": 294912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 330.84024999999997 + }, + { + "M": 295936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.8805000000001 + }, + { + "M": 296960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 321.68025 + }, + { + "M": 297984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 322.8002499999999 + }, + { + "M": 299008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 335.16025 + }, + { + "M": 300032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 324.6802500000001 + }, + { + "M": 301056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.72024999999974 + }, + { + "M": 302080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 326.8802499999998 + }, + { + "M": 303104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 327.9202500000001 + }, + { + "M": 304128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 351.92049999999995 + }, + { + "M": 305152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 330.08025 + }, + { + "M": 306176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 342.56025 + }, + { + "M": 307200, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 344.72024999999996 + }, + { + "M": 308224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 345.96025 + }, + { + "M": 309248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.6402499999999 + }, + { + "M": 310272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 348.16025 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 337.8402500000002 + }, + { + "M": 312320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 350.12025000000017 + }, + { + "M": 313344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 339.84024999999997 + }, + { + "M": 314368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 351.88025000000016 + }, + { + "M": 315392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 352.9202499999999 + }, + { + "M": 316416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 354.1205 + }, + { + "M": 317440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 355.2404999999999 + }, + { + "M": 318464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 367.3605 + }, + { + "M": 319488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.0402499999999 + }, + { + "M": 320512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 358.20050000000015 + }, + { + "M": 321536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 360.8805000000002 + }, + { + "M": 322560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 350.60024999999985 + }, + { + "M": 323584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 363.1205 + }, + { + "M": 324608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 364.04049999999984 + }, + { + "M": 325632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 353.76049999999987 + }, + { + "M": 326656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 355.2405000000001 + }, + { + "M": 327680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 356.2004999999999 + } + ], + "320": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1992499999999993 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.23899999999999 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.0790000000000077 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 4.478999999999992 + }, + { + "M": 1024, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 6.959249999999997 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.998999999999995 + }, + { + "M": 3072, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 12.839000000000006 + }, + { + "M": 4096, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 16.03900000000001 + }, + { + "M": 5120, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 18.719000000000015 + }, + { + "M": 6144, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 21.399249999999995 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 24.278999999999982 + }, + { + "M": 8192, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 26.839000000000006 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 29.478999999999992 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 31.959 + }, + { + "M": 11264, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.759249999999994 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 37.27925000000001 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 39.839000000000006 + }, + { + "M": 14336, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 42.439 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.838999999999984 + }, + { + "M": 16384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 47.599249999999984 + }, + { + "M": 17408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 50.199249999999985 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.47924999999998 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 55.11925 + }, + { + "M": 20480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 57.47925 + }, + { + "M": 21504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 60.07925 + }, + { + "M": 22528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 62.759250000000016 + }, + { + "M": 23552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 64.79925 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 66.95925000000003 + }, + { + "M": 25600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.03925 + }, + { + "M": 26624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 72.55925000000002 + }, + { + "M": 27648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 74.55925000000002 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.39950000000002 + }, + { + "M": 29696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 79.83924999999999 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 82.43924999999996 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 84.75925 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.47925000000001 + }, + { + "M": 33792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.71949999999998 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.1995 + }, + { + "M": 35840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 94.23925 + }, + { + "M": 36864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 97.07924999999999 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 99.83950000000002 + }, + { + "M": 38912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 102.03925 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.75949999999997 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 106.55949999999997 + }, + { + "M": 41984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 109.87950000000002 + }, + { + "M": 43008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 112.15950000000001 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 114.1995 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 117.3195 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 118.91949999999999 + }, + { + "M": 47104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 121.9995 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 124.6795 + }, + { + "M": 49152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 126.99950000000001 + }, + { + "M": 50176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 129.5195 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 130.7595 + }, + { + "M": 52224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 134.27950000000004 + }, + { + "M": 53248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 136.67949999999996 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.3195 + }, + { + "M": 55296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 141.99975000000003 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 143.43949999999998 + }, + { + "M": 57344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 146.63975000000002 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 149.03950000000003 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 151.3195 + }, + { + "M": 60416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 154.03949999999995 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 155.51950000000002 + }, + { + "M": 62464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 158.7994999999999 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 161.47975000000005 + }, + { + "M": 64512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 164.07975 + }, + { + "M": 65536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 166.59974999999991 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 167.71975000000003 + }, + { + "M": 67584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 171.55975 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 173.75974999999994 + }, + { + "M": 69632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 176.35974999999996 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 178.59974999999991 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 180.0797499999999 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 183.87975000000003 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 185.80000000000004 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 188.35975000000002 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 191.19975000000002 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 192.24 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 196.16 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 198.83975 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 201.03999999999996 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 203.6 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 204.20000000000005 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 208.04000000000002 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 211.03974999999997 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 213.07975 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 215.39975000000004 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 216.8 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 220.32 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 223.0399999999999 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 225.5200000000001 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 227.36 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 229.36 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 232.99999999999994 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 235.60000000000002 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 238.04000000000002 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 240.40000000000003 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 241.2399999999999 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 245.5200000000001 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 247.64000000000004 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 250.03999999999994 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 252.39999999999995 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 253.28 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 257.4002500000001 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 259.80000000000007 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 261.88024999999993 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 264.9200000000001 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 265.88000000000005 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 269.8002500000002 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 272.24025 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 274.72025000000014 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 277.1602499999999 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 277.5602499999999 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 282.20025000000004 + }, + { + "M": 114688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 284.36024999999995 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.12025 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.3200000000001 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 290.24024999999995 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 293.96025000000003 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 296.9202499999999 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 299.4802500000001 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 302.0005 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 302.0002500000001 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 306.72024999999996 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 308.88025000000005 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 311.56024999999994 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 313.9602499999999 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 313.92025000000007 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 319.3204999999998 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 321.5602499999999 + }, + { + "M": 131072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 323.92024999999995 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 326.0802500000002 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 326.60024999999996 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 331.6402499999999 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 334.20024999999987 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 335.8802499999998 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 338.60024999999985 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 339.00025000000016 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 343.76049999999987 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 345.6802500000001 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 346.8802499999998 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 349.68050000000017 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 349.52049999999986 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 354.8405 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.6004999999999 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 359.4404999999998 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 361.52049999999986 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 362.0804999999999 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 366.8805 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 369.1204999999999 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 371.6004999999999 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 373.7605 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 373.7605 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 378.6804999999998 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 381.08050000000003 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 383.7205 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 386.2805000000001 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 385.68050000000017 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 390.92050000000006 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.16049999999996 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 396.1205 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 398.12049999999977 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 398.04050000000007 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 403.4805000000001 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 405.04050000000007 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 407.7607499999999 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 411.00075000000004 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 410.24075000000005 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 415.40075000000013 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 417.60074999999995 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 419.88075000000015 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 421.8807499999998 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 421.96074999999985 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 427.52075000000013 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 429.32074999999975 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 431.48074999999983 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.44074999999987 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.08100000000024 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.601 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 442.32075 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 444.16075 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 446.8007499999999 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 446.24074999999993 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 451.8810000000001 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 454.3610000000001 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.8810000000003 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 459.36075000000017 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 457.72074999999984 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 464.16100000000006 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 466.08100000000013 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 469.00099999999986 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 471.361 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 471.081 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 476.2009999999998 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 478.6809999999999 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 481.1210000000001 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 483.361 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 482.9210000000003 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 488.28100000000006 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 490.80099999999993 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 493.40099999999995 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 495.8810000000002 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 494.6410000000001 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 499.9610000000001 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 503.4409999999998 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 505.9609999999998 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 507.6809999999999 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 507.2810000000003 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 512.4810000000001 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 515.4810000000001 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 517.7609999999997 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 519.8410000000003 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 519.0810000000001 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 525.1210000000001 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 527.3209999999997 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 530.3210000000004 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 532.68125 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 531.32125 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 537.0412500000001 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 540.0812499999998 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 542.0812500000002 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 544.0412500000001 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 543.3212500000001 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 549.6412499999999 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 551.7612500000001 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 554.3212499999998 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 557.00125 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 555.80125 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 562.0012499999998 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 563.9612500000001 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 566.6412500000001 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 569.5212500000002 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 567.9212500000001 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 574.0412499999998 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 576.2012499999998 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 578.9212499999999 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 581.4012499999999 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 579.8812499999999 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 586.4412500000004 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 588.84125 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 590.8415 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 593.7214999999999 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 592.0415000000003 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 598.3615 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 600.8015000000003 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 602.8414999999998 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 606.6814999999999 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 604.8415 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 612.1614999999999 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 614.0815 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 616.8015000000003 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 618.5615000000003 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 617.0015000000003 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.7214999999999 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 626.4415000000001 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 627.6815000000004 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 630.2815 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 628.3214999999998 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 635.3215 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.8815 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 639.8415000000005 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 642.3215 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 640.4815000000003 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 646.8815 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 652.08175 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 654.68175 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 656.6017499999998 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 654.8417499999998 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 661.5617499999998 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 664.5617500000001 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.40175 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 668.8817499999998 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 667.5217500000001 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 674.0417499999999 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 676.9617499999997 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 678.4817499999999 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 681.0417500000001 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.0417499999999 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 686.8417499999998 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 688.5617499999998 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 691.0817499999998 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 693.5217500000001 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 690.8817499999998 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 697.88175 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 700.8417499999998 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 702.9617499999999 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 705.4017499999998 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 703.36175 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 710.8017499999996 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 712.962 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 716.0419999999999 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 718.0419999999999 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 716.0420000000001 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 722.762 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 725.3619999999999 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 728.0420000000001 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 730.4019999999998 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 727.6020000000001 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 735.6020000000001 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 737.4819999999997 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 740.0820000000003 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 742.1220000000001 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 741.2019999999998 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 748.5620000000001 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 751.242 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 753.5219999999999 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 756.0019999999997 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 753.7220000000002 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 761.0420000000001 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 762.9220000000007 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 765.5219999999999 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 767.3620000000005 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 764.8020000000001 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 772.3220000000006 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 775.0420000000001 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 777.5219999999999 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 780.9222500000001 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 778.3220000000001 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 787.0422500000004 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 788.3622500000001 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 791.1222500000003 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 793.9622500000003 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 790.202 + } + ], + "384": [ + { + "M": 1, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2.1589999999999847 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0390000000000015 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999927 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.0390000000000086 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 4.679000000000002 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 6.879000000000005 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.959250000000011 + }, + { + "M": 3072, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 12.799 + }, + { + "M": 4096, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 15.918999999999988 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 18.718999999999994 + }, + { + "M": 6144, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 21.439249999999994 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 24.159000000000006 + }, + { + "M": 8192, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 26.718999999999994 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 29.438999999999993 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 31.638999999999985 + }, + { + "M": 11264, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 34.55900000000002 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 37.03900000000001 + }, + { + "M": 13312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 39.639250000000004 + }, + { + "M": 14336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 42.159000000000006 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.47900000000001 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 47.39925 + }, + { + "M": 17408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 49.67924999999999 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 52.15925 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 54.799249999999994 + }, + { + "M": 20480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 57.23925000000002 + }, + { + "M": 21504, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 59.87924999999998 + }, + { + "M": 22528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 62.279250000000005 + }, + { + "M": 23552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.67925 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 66.95924999999997 + }, + { + "M": 25600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 69.19925 + }, + { + "M": 26624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 71.79925 + }, + { + "M": 27648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 74.59924999999998 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.0795 + }, + { + "M": 29696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 79.47925000000002 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 81.79925000000003 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 84.59924999999998 + }, + { + "M": 32768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 86.71925000000002 + }, + { + "M": 33792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 89.11925 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.43949999999995 + }, + { + "M": 35840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 93.71949999999997 + }, + { + "M": 36864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 96.43950000000002 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 99.07950000000002 + }, + { + "M": 38912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.3995 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.11949999999997 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 105.99925000000002 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 108.99949999999997 + }, + { + "M": 43008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 111.5995 + }, + { + "M": 44032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 113.9995 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 116.3595 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 117.95950000000002 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.35950000000003 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 123.67949999999996 + }, + { + "M": 49152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 126.19949999999999 + }, + { + "M": 50176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 128.55949999999999 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 129.9195 + }, + { + "M": 52224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 133.47975 + }, + { + "M": 53248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 135.75974999999997 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 138.07975000000002 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 140.75975 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 142.55975 + }, + { + "M": 57344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 145.63949999999997 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 148.15949999999998 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 150.27974999999998 + }, + { + "M": 60416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 153.51950000000005 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 155.19949999999997 + }, + { + "M": 62464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 157.71975000000003 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 160.43949999999998 + }, + { + "M": 64512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 162.75974999999997 + }, + { + "M": 65536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 165.15975 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 166.95975000000004 + }, + { + "M": 67584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 170.03975 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 173.03975000000003 + }, + { + "M": 69632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 175.03975 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 177.11975 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 178.71974999999998 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 182.31974999999994 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 184.92000000000004 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 187.03975000000003 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 189.55975 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 190.9197499999999 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.59975000000003 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 197.19975 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 199.31975 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 202.23975000000002 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 202.99975 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 207.19975 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 209.03974999999997 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 211.59975000000003 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 213.9197499999999 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 214.99974999999995 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 219.35974999999996 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 221.27975000000004 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 223.79999999999995 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 226.4400000000001 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 226.87999999999994 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 231.03999999999996 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 233.48000000000008 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 236.36000000000007 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 238.88 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 239.35999999999996 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 243.1599999999999 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 245.9999999999999 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 248.96025000000014 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 250.64 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 251.23999999999984 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 255.43999999999994 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 258.08000000000015 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 260.51999999999987 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 263.0400000000001 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 263.6802499999999 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 267.7599999999998 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 269.9199999999998 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 272.52 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 274.91999999999985 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 276.4002500000001 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 279.8402500000001 + }, + { + "M": 114688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 282.5600000000001 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 285.24025 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.3202499999998 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.8002500000001 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 291.8402499999999 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 294.56025 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 296.4402500000001 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 299.3202500000001 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 300.5602499999999 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 304.64025000000004 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 306.24024999999995 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 309.72024999999996 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 312.16049999999996 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 311.76025000000004 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 316.6802500000001 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 318.24025000000006 + }, + { + "M": 131072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 322.0802500000001 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 323.88025000000016 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 324.20025 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 328.6802500000001 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.84024999999997 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 333.2802499999999 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 336.08025 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 336.40025000000026 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 340.48024999999984 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 343.32025 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 344.88024999999993 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 347.2802499999999 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 347.3605000000001 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 352.16050000000007 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 354.80050000000006 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.7605 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 359.3605 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 359.6405000000001 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 364.72050000000013 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 366.7205 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 369.08050000000014 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 371.20050000000003 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 371.52049999999986 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.2805000000001 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 378.88049999999987 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 381.44049999999993 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 383.80049999999983 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 383.80050000000017 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 388.72050000000013 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 390.52049999999997 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.68050000000005 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 395.72050000000024 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 395.6007500000002 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 400.64075000000025 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 402.96074999999996 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 405.72075000000007 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 408.12075000000004 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 407.80049999999994 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 412.6007500000002 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 415.12074999999993 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 417.9607500000003 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 420.08074999999985 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 419.6807500000002 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 424.64075000000025 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 427.32074999999986 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 429.88049999999976 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 431.9607500000001 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 431.2807499999999 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 437.08100000000013 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.32075 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 441.4007499999999 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 444.4407500000001 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 444.0407499999999 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 449.16075000000046 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 451.9607500000002 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 454.4407500000003 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.7207499999997 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.2810000000003 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 461.48075000000006 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 465.24075000000016 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 465.92075000000034 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 468.7207500000002 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 467.80074999999977 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 473.56100000000015 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 475.9209999999998 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 478.6010000000002 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 480.3209999999999 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 479.961 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 485.3609999999999 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 488.4409999999998 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 490.64099999999985 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 493.24099999999976 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 492.2409999999999 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 497.6009999999999 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 500.0409999999998 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 503.08100000000053 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 504.9609999999998 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 504.44100000000003 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 510.9210000000003 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 513.0010000000002 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 514.681 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 517.2410000000002 + }, + { + "M": 215040, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 538.6012499999999 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 522.1209999999999 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 524.4809999999998 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 526.8810000000001 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 529.4412500000001 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 527.8009999999999 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 533.6412499999999 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 536.8412500000002 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 539.6812500000001 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 541.2012499999998 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 540.6012499999999 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 546.7212500000003 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 549.04125 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 550.7612499999998 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 553.4412499999999 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 552.56125 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 559.08125 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.84125 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 563.4412499999999 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.9612499999998 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.04125 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 570.04125 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 573.6412499999999 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 575.8012500000002 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 578.1612500000001 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 576.7612500000005 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 582.8412500000002 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 584.5612500000004 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 587.6412499999997 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 590.5612500000002 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 589.2414999999999 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 594.7615000000003 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 597.6815000000001 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 600.2015000000001 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 602.9214999999999 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 601.7614999999998 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 607.6815000000001 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.0815000000002 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 612.6814999999997 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 615.0014999999999 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 614.3614999999998 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 619.8815000000002 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.0414999999998 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 624.8814999999997 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 626.3214999999998 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 625.4015000000002 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 631.4814999999999 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 633.4814999999999 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 635.6415 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 638.8415000000002 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.6014999999998 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 643.6015000000001 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.8015000000003 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 649.7215000000001 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 652.5215000000001 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 650.8815 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 657.8417500000003 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 659.4417500000002 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 661.7617499999999 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 665.0417500000001 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 662.64175 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 669.3617499999998 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 672.12175 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 674.2017500000002 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 676.5217500000006 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 674.8417499999998 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 681.2817499999999 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 684.0017499999999 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 686.8817499999998 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 689.1617499999998 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 686.92175 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 693.6417500000002 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 696.20175 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 698.3217500000003 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 701.9617500000002 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 699.5617499999998 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.6417500000005 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 708.6417499999998 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 711.0417500000001 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 713.4817499999999 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 711.9219999999998 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 717.92175 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 720.9219999999998 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 722.8819999999998 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 724.8820000000003 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 723.4419999999998 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 730.6819999999998 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 733.0419999999997 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 735.0419999999999 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 737.7219999999998 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 735.922 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 743.2419999999997 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 745.28225 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 747.8419999999996 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 750.642 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 748.4420000000002 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 755.6020000000001 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.5622500000004 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 760.0422499999997 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 762.4422500000007 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 759.6022500000001 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 767.402 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 769.8020000000001 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 773.0020000000006 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 775.0820000000001 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 772.6822499999998 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 780.2422499999998 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 782.4022500000012 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 785.5222499999998 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 787.6819999999998 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 784.8822500000001 + } + ], + "480": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.0790000000000006 + }, + { + "M": 512, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 4.919000000000004 + }, + { + "M": 1024, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 7.0390000000000015 + }, + { + "M": 2048, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 10.158999999999992 + }, + { + "M": 3072, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 12.918999999999997 + }, + { + "M": 4096, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 16.359250000000003 + }, + { + "M": 5120, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 19.119000000000007 + }, + { + "M": 6144, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 21.799249999999994 + }, + { + "M": 7168, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 24.599249999999984 + }, + { + "M": 8192, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 27.358999999999988 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 29.839 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 32.319 + }, + { + "M": 11264, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 35.27900000000001 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 37.79925 + }, + { + "M": 13312, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 40.59899999999999 + }, + { + "M": 14336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 43.23900000000002 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 45.398999999999994 + }, + { + "M": 16384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 48.31925 + }, + { + "M": 17408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 50.71925000000002 + }, + { + "M": 18432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.119249999999994 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 55.759250000000016 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 58.679249999999996 + }, + { + "M": 21504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 60.75925 + }, + { + "M": 22528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 63.35925000000002 + }, + { + "M": 23552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 65.83925000000002 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 68.27924999999999 + }, + { + "M": 25600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 70.83950000000002 + }, + { + "M": 26624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 73.55925000000002 + }, + { + "M": 27648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 75.95925 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 78.47925 + }, + { + "M": 29696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.11925 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 83.67925 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 86.07925000000003 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 88.4795 + }, + { + "M": 33792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.39949999999999 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 93.59949999999999 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 96.03949999999999 + }, + { + "M": 36864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 98.67925 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 101.11950000000003 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.23949999999999 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 106.15924999999996 + }, + { + "M": 40960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 108.83924999999998 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 111.47949999999999 + }, + { + "M": 43008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 114.03950000000003 + }, + { + "M": 44032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 116.4795 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.03950000000003 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 121.15950000000001 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.11975000000001 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 126.71949999999995 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.4795 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 131.83950000000004 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 133.3995 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 136.23950000000002 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 139.07950000000002 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 141.87975 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 144.19975000000002 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 146.23949999999996 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 149.07974999999996 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 151.63949999999997 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 154.47950000000003 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 156.9995 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 158.83974999999995 + }, + { + "M": 62464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 161.83975 + }, + { + "M": 63488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.43975000000003 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 167.07974999999993 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 169.55975 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 170.95974999999999 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 174.39974999999998 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 177.15974999999992 + }, + { + "M": 69632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 179.39975000000004 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 181.83975000000004 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 183.55974999999995 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 187.07975000000005 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 189.67975000000007 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 192.35974999999996 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 194.39974999999998 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 195.60000000000002 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 199.71974999999998 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 202.19975 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 204.71974999999998 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 207.31975 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 208.27974999999998 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 212.27999999999992 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 214.95999999999998 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 217.31975000000006 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 219.68 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 220.99999999999994 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 224.96000000000015 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 227.64 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 229.72000000000014 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 232.5599999999999 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 233.44000000000005 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 237.16000000000003 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 239.79999999999995 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 242.07999999999993 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 245.1599999999999 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 245.75999999999993 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 250.00000000000017 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 252.6402499999999 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 254.07999999999987 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 256.91999999999996 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 257.59999999999997 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 261.5199999999999 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 264.44024999999993 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 266.8400000000001 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 268.76 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 268.9602500000002 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 273.56025 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 275.84024999999997 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 278.6802500000001 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 281.19999999999993 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 281.8002500000001 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 286.20000000000005 + }, + { + "M": 114688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.00025000000016 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 290.92025 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 293.52025000000003 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 294.2802499999999 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 298.6802500000001 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 301.24024999999995 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 303.80025 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 305.9602500000001 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 306.24024999999995 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 310.68025 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 312.92025000000024 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 315.88024999999993 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 318.0402500000001 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 318.84024999999986 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 322.8002500000001 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 325.76025000000004 + }, + { + "M": 131072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 327.96025 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.40025 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.6405000000001 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 335.88024999999993 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 338.36024999999995 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 340.96025 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 342.9202499999999 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 343.0802500000002 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 348.2004999999999 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 350.2805000000001 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 351.0005 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 353.3605000000001 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 353.3605 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.0005000000001 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 360.56050000000016 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 363.3204999999999 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 365.5604999999998 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 365.92049999999995 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 370.8404999999998 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 373.1205000000001 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 375.24049999999977 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 378.04049999999995 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 377.6807499999999 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 382.96050000000014 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 385.2805000000003 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 388.2404999999999 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 390.2805000000001 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 389.9604999999999 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 395.32050000000004 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 398.1205000000001 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 399.9604999999999 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 402.8805000000002 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 402.60075000000006 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 408.2805000000001 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 410.1607499999999 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 412.52075000000025 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 415.24074999999993 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 414.7207500000002 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 420.0807500000004 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 422.60074999999995 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 424.72074999999995 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 427.3607499999998 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 426.80074999999977 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 432.0410000000004 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.5207499999999 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 437.20074999999986 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.2807499999999 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 438.8007499999999 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 444.60074999999995 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 446.6407499999999 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 449.1207499999998 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 452.12075000000004 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 451.32075 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.8407500000002 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 459.40075 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 461.80074999999965 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 464.60074999999983 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 463.92075 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 469.32074999999963 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 471.4410000000005 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 474.3610000000001 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 476.40099999999984 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 475.5609999999999 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 481.6010000000001 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 484.2809999999997 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 486.241 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 488.6809999999998 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 487.9609999999998 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 494.00099999999975 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 496.36100000000033 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 498.8810000000001 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 501.36100000000005 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 500.28099999999995 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 506.4010000000003 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 508.9209999999998 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 511.2409999999997 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 514.1609999999998 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 512.3209999999997 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 518.4009999999998 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 521.3209999999997 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 523.6410000000001 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 526.001 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 524.761 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 531.1612499999999 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 533.08125 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 535.6812500000001 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 538.0812500000002 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 536.6012499999999 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 543.3212499999997 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 545.2012500000001 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 548.7212499999998 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 550.6012500000002 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 549.4812499999998 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 555.72125 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 558.0412500000002 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.6012499999997 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 563.6012499999997 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 561.3612500000004 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 568.00125 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 570.6812499999999 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 573.2412499999998 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 575.4812499999998 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 574.04125 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 580.84125 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 582.9612499999998 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 585.6412500000001 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 587.48125 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 586.4812499999998 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 593.2415000000003 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 595.3215000000002 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 598.2415000000001 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 600.4415000000004 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 598.2415000000001 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 605.1614999999999 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 607.4415000000004 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 609.8415 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 613.4015000000002 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.4815000000003 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 618.2415000000001 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 620.6814999999999 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.2014999999999 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 625.4815000000001 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.5615000000005 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 630.4415000000001 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 633.2414999999996 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 634.6815000000001 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.4415000000001 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 635.2414999999996 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 642.2815 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 644.6815000000001 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.1614999999997 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 649.3615 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.6814999999999 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 654.1215 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 663.2017499999997 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.2017500000002 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 668.4817499999999 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.84175 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 673.92175 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.7617500000001 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 678.60175 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 680.8017499999999 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.9617500000002 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 687.1617499999998 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 689.64175 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 691.92175 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 694.1617499999998 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 692.2817499999996 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 699.4817499999997 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 702.2017500000002 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 703.8017500000003 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.8417499999998 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 704.5617500000003 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 711.8017500000005 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 714.4017500000002 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 717.002 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 719.6019999999999 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 717.3619999999999 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 724.3220000000003 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 727.0420000000004 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 729.5620000000004 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 731.9620000000002 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 729.682 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 736.5220000000004 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 739.3220000000003 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 741.8420000000001 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 743.8419999999996 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 741.5219999999999 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 749.1220000000001 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 752.0419999999999 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 754.4020000000003 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.1220000000001 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 754.6019999999996 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 762.402 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 765.3619999999999 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 768.002 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 769.8420000000001 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 767.202 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 775.0420000000004 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 777.242 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 779.4422499999998 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 781.8822500000003 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 778.9222500000001 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 786.8822499999999 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 789.0422499999997 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 792.28225 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 797.28225 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 794.2822500000004 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 802.5222500000004 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 804.6022499999999 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 806.9622499999998 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 809.3622500000001 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 807.1222499999999 + } + ], + "512": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0389999999999944 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119250000000001 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.959000000000003 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 4.559000000000012 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 6.319000000000003 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 8.918999999999997 + }, + { + "M": 3072, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 11.558999999999997 + }, + { + "M": 4096, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 14.039000000000001 + }, + { + "M": 5120, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 16.038999999999994 + }, + { + "M": 6144, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 18.238999999999997 + }, + { + "M": 7168, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 20.358999999999988 + }, + { + "M": 8192, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 22.51899999999999 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 24.398999999999987 + }, + { + "M": 10240, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 26.67924999999999 + }, + { + "M": 11264, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 28.75925 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 30.15925 + }, + { + "M": 13312, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 32.879 + }, + { + "M": 14336, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 34.99925000000002 + }, + { + "M": 15360, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 37.07925000000001 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 38.67925000000002 + }, + { + "M": 17408, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 41.15924999999999 + }, + { + "M": 18432, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 43.159 + }, + { + "M": 19456, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 45.159 + }, + { + "M": 20480, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 47.159249999999986 + }, + { + "M": 21504, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 49.23924999999999 + }, + { + "M": 22528, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 51.15900000000001 + }, + { + "M": 23552, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 53.199250000000006 + }, + { + "M": 24576, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 55.199250000000006 + }, + { + "M": 25600, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 55.759249999999994 + }, + { + "M": 26624, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 59.15924999999999 + }, + { + "M": 27648, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 59.35925 + }, + { + "M": 28672, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 63.31925000000001 + }, + { + "M": 29696, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 65.15924999999999 + }, + { + "M": 30720, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 67.31925000000001 + }, + { + "M": 31744, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 69.23925000000001 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 68.91925 + }, + { + "M": 33792, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 73.15925000000001 + }, + { + "M": 34816, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 75.23924999999998 + }, + { + "M": 35840, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 77.23924999999998 + }, + { + "M": 36864, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 79.15924999999996 + }, + { + "M": 37888, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 81.15925 + }, + { + "M": 38912, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 83.19949999999996 + }, + { + "M": 39936, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 85.23924999999996 + }, + { + "M": 40960, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 85.31924999999998 + }, + { + "M": 41984, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 89.15950000000002 + }, + { + "M": 43008, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 91.19924999999998 + }, + { + "M": 44032, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 93.03950000000003 + }, + { + "M": 45056, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 95.07925 + }, + { + "M": 46080, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 97.27950000000001 + }, + { + "M": 47104, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 99.15924999999999 + }, + { + "M": 48128, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 101.19950000000003 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 99.35950000000005 + }, + { + "M": 50176, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 105.15950000000004 + }, + { + "M": 51200, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 107.11950000000002 + }, + { + "M": 52224, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 109.07925 + }, + { + "M": 53248, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 110.95950000000005 + }, + { + "M": 54272, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 113.11949999999996 + }, + { + "M": 55296, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 111.47925000000001 + }, + { + "M": 56320, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 114.27949999999996 + }, + { + "M": 57344, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 118.91950000000003 + }, + { + "M": 58368, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 120.79950000000002 + }, + { + "M": 59392, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 122.87950000000004 + }, + { + "M": 60416, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 124.67950000000002 + }, + { + "M": 61440, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 126.87974999999999 + }, + { + "M": 62464, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 128.8795 + }, + { + "M": 63488, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 130.83950000000002 + }, + { + "M": 64512, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 129.03950000000003 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 129.4795 + }, + { + "M": 66560, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 134.03950000000003 + }, + { + "M": 67584, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 138.75975 + }, + { + "M": 68608, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 140.75975000000005 + }, + { + "M": 69632, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 139.83950000000004 + }, + { + "M": 70656, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 144.87975 + }, + { + "M": 71680, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 146.59975000000003 + }, + { + "M": 72704, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 148.6395 + }, + { + "M": 73728, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 150.5195 + }, + { + "M": 74752, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 149.47975000000002 + }, + { + "M": 75776, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 154.51975 + }, + { + "M": 76800, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 156.47975000000002 + }, + { + "M": 77824, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 158.63975 + }, + { + "M": 78848, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 160.27975000000004 + }, + { + "M": 79872, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 158.27975000000004 + }, + { + "M": 80896, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 164.35975000000002 + }, + { + "M": 81920, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 166.35974999999996 + }, + { + "M": 82944, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 168.31975 + }, + { + "M": 83968, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 170.39975000000004 + }, + { + "M": 84992, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 172.27975000000004 + }, + { + "M": 86016, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 174.31975 + }, + { + "M": 87040, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 176.27974999999998 + }, + { + "M": 88064, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 178.31975 + }, + { + "M": 89088, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 180.15975000000003 + }, + { + "M": 90112, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 182.19975 + }, + { + "M": 91136, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 184.15974999999997 + }, + { + "M": 92160, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 186.23974999999996 + }, + { + "M": 93184, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 188.15974999999992 + }, + { + "M": 94208, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 189.99975000000012 + }, + { + "M": 95232, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 187.31975 + }, + { + "M": 96256, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 194.03975000000003 + }, + { + "M": 97280, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 192.1997500000001 + }, + { + "M": 98304, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 193.20000000000005 + }, + { + "M": 99328, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 199.84000000000003 + }, + { + "M": 100352, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 201.80000000000007 + }, + { + "M": 101376, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 203.63974999999994 + }, + { + "M": 102400, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 205.80000000000007 + }, + { + "M": 103424, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 207.47999999999996 + }, + { + "M": 104448, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 209.64 + }, + { + "M": 105472, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 211.60000000000008 + }, + { + "M": 106496, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 213.60000000000002 + }, + { + "M": 107520, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 215.43974999999995 + }, + { + "M": 108544, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 217.44 + }, + { + "M": 109568, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 219.55999999999983 + }, + { + "M": 110592, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 221.4799999999999 + }, + { + "M": 111616, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 223.43999999999983 + }, + { + "M": 112640, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 225.32000000000005 + }, + { + "M": 113664, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 227.43999999999983 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 219.11974999999995 + }, + { + "M": 115712, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 231.39999999999986 + }, + { + "M": 116736, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 233.08000000000004 + }, + { + "M": 117760, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 235.0797500000001 + }, + { + "M": 118784, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 237.16000000000008 + }, + { + "M": 119808, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 239.0000000000001 + }, + { + "M": 120832, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 241.0 + }, + { + "M": 121856, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 243.03999999999996 + }, + { + "M": 122880, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 245.0402499999999 + }, + { + "M": 123904, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 247.19999999999993 + }, + { + "M": 124928, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 248.9999999999999 + }, + { + "M": 125952, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 251.00000000000023 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 240.9202499999999 + }, + { + "M": 128000, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 255.19999999999993 + }, + { + "M": 129024, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 257.0799999999998 + }, + { + "M": 130048, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 258.9202499999999 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 248.64 + }, + { + "M": 132096, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 262.96000000000015 + }, + { + "M": 133120, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 264.9200000000001 + }, + { + "M": 134144, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 266.80025 + }, + { + "M": 135168, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 260.84000000000003 + }, + { + "M": 136192, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 270.88024999999993 + }, + { + "M": 137216, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 272.84024999999997 + }, + { + "M": 138240, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 266.5999999999999 + }, + { + "M": 139264, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 269.55999999999995 + }, + { + "M": 140288, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 278.48025000000007 + }, + { + "M": 141312, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 280.28 + }, + { + "M": 142336, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 282.43999999999994 + }, + { + "M": 143360, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 284.28 + }, + { + "M": 144384, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 286.24 + }, + { + "M": 145408, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 288.15999999999997 + }, + { + "M": 146432, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 290.24024999999995 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 275.80000000000007 + }, + { + "M": 148480, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 294.2002500000001 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 289.2402500000002 + }, + { + "M": 150528, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 298.00024999999994 + }, + { + "M": 151552, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 299.9602500000001 + }, + { + "M": 152576, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 302.00025000000005 + }, + { + "M": 153600, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 304.12025000000006 + }, + { + "M": 154624, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 305.88024999999993 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 290.8802499999998 + }, + { + "M": 156672, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 298.9202500000001 + }, + { + "M": 157696, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 301.9202500000001 + }, + { + "M": 158720, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 313.92025 + }, + { + "M": 159744, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 304.24024999999995 + }, + { + "M": 160768, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 317.72024999999996 + }, + { + "M": 161792, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 319.7605000000001 + }, + { + "M": 162816, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 321.0002499999998 + }, + { + "M": 163840, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 305.72024999999996 + }, + { + "M": 164864, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 325.80049999999983 + }, + { + "M": 165888, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 326.1602499999999 + }, + { + "M": 166912, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 329.5602500000001 + }, + { + "M": 167936, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 321.3605 + }, + { + "M": 168960, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 333.7202500000001 + }, + { + "M": 169984, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 324.88025000000016 + }, + { + "M": 171008, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 337.4402499999999 + }, + { + "M": 172032, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 338.1605000000002 + }, + { + "M": 173056, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 341.40025 + }, + { + "M": 174080, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 333.8802499999998 + }, + { + "M": 175104, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 343.64025000000004 + }, + { + "M": 176128, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 336.9202499999998 + }, + { + "M": 177152, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 349.3204999999999 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 350.96050000000014 + }, + { + "M": 179200, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 343.2805000000001 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 335.0804999999999 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 347.00025000000005 + }, + { + "M": 182272, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 359.28049999999996 + }, + { + "M": 183296, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 361.2404999999999 + }, + { + "M": 184320, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 350.00049999999976 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 363.92049999999983 + }, + { + "M": 186368, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 355.1205 + }, + { + "M": 187392, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 357.44049999999993 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 349.8002499999999 + }, + { + "M": 189440, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 371.80050000000006 + }, + { + "M": 190464, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 375.04049999999995 + }, + { + "M": 191488, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 375.8805000000001 + }, + { + "M": 192512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 378.8807499999997 + }, + { + "M": 193536, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 378.12049999999977 + }, + { + "M": 194560, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 381.6004999999999 + }, + { + "M": 195584, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 384.76075000000003 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 364.6405 + }, + { + "M": 197632, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 386.2805000000003 + }, + { + "M": 198656, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 390.7204999999999 + }, + { + "M": 199680, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 389.2007500000002 + }, + { + "M": 200704, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 391.88049999999976 + }, + { + "M": 201728, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 395.24074999999993 + }, + { + "M": 202752, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 384.52049999999963 + }, + { + "M": 203776, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 400.64075 + }, + { + "M": 204800, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 399.20074999999974 + }, + { + "M": 205824, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 404.44075 + }, + { + "M": 206848, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 406.4805000000001 + }, + { + "M": 207872, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 408.44074999999975 + }, + { + "M": 208896, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 406.8807500000005 + }, + { + "M": 209920, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 412.3607500000003 + }, + { + "M": 210944, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 414.48074999999994 + }, + { + "M": 211968, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 412.1207499999998 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 414.1205 + }, + { + "M": 214016, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 416.2405000000001 + }, + { + "M": 215040, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 422.32074999999963 + }, + { + "M": 216064, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 411.00075000000015 + }, + { + "M": 217088, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 426.36075000000005 + }, + { + "M": 218112, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 428.16075000000023 + }, + { + "M": 219136, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 427.2007500000002 + }, + { + "M": 220160, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 417.5607500000003 + }, + { + "M": 221184, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 418.6007500000003 + }, + { + "M": 222208, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 436.1207500000005 + }, + { + "M": 223232, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 438.1607499999998 + }, + { + "M": 224256, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 424.7607499999997 + }, + { + "M": 225280, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 438.5607500000001 + }, + { + "M": 226304, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 438.40075 + }, + { + "M": 227328, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 429.8407500000003 + }, + { + "M": 228352, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 447.8810000000003 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 424.2807499999999 + }, + { + "M": 230400, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 451.80099999999993 + }, + { + "M": 231424, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 453.76075000000014 + }, + { + "M": 232448, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 455.68074999999976 + }, + { + "M": 233472, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 451.68074999999976 + }, + { + "M": 234496, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 445.44075 + }, + { + "M": 235520, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 461.721 + }, + { + "M": 236544, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 456.92074999999977 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 458.64099999999985 + }, + { + "M": 238592, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 450.84074999999984 + }, + { + "M": 239616, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 462.721 + }, + { + "M": 240640, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 468.20100000000025 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 466.5609999999999 + }, + { + "M": 242688, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 469.6009999999999 + }, + { + "M": 243712, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 477.44075 + }, + { + "M": 244736, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 472.2810000000004 + }, + { + "M": 245760, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 454.24075000000016 + }, + { + "M": 246784, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 475.60075000000006 + }, + { + "M": 247808, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 481.24099999999976 + }, + { + "M": 248832, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 487.4010000000005 + }, + { + "M": 249856, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 485.001 + }, + { + "M": 250880, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 483.32100000000037 + }, + { + "M": 251904, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 476.1210000000001 + }, + { + "M": 252928, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 495.4409999999998 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 489.001 + }, + { + "M": 254976, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 499.12099999999987 + }, + { + "M": 256000, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 492.721 + }, + { + "M": 257024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 484.4010000000003 + }, + { + "M": 258048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 498.1610000000003 + }, + { + "M": 259072, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 507.0412500000002 + }, + { + "M": 260096, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 508.96100000000024 + }, + { + "M": 261120, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 511.0012500000001 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 483.8410000000001 + }, + { + "M": 263168, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 515.0410000000002 + }, + { + "M": 264192, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 497.44100000000003 + }, + { + "M": 265216, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 513.681 + }, + { + "M": 266240, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 514.8812500000006 + }, + { + "M": 267264, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 522.8812500000003 + }, + { + "M": 268288, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 525.6812499999999 + }, + { + "M": 269312, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 524.1212499999999 + }, + { + "M": 270336, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 515.8812500000001 + }, + { + "M": 271360, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 528.1212499999997 + }, + { + "M": 272384, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 533.28125 + }, + { + "M": 273408, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 531.7612499999998 + }, + { + "M": 274432, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 533.04125 + }, + { + "M": 275456, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 539.2412499999998 + }, + { + "M": 276480, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 537.6812500000005 + }, + { + "M": 277504, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 543.52125 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 531.7612500000005 + }, + { + "M": 279552, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 534.6812499999996 + }, + { + "M": 280576, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 549.56125 + }, + { + "M": 281600, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 551.3612499999999 + }, + { + "M": 282624, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 539.56125 + }, + { + "M": 283648, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 555.2412500000003 + }, + { + "M": 284672, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 557.2412499999998 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 554.6012500000004 + }, + { + "M": 286720, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 546.0812500000002 + }, + { + "M": 287744, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 552.4412500000001 + }, + { + "M": 288768, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 560.72125 + }, + { + "M": 289792, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 566.9612500000001 + }, + { + "M": 290816, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 569.2012499999998 + }, + { + "M": 291840, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 571.1212499999999 + }, + { + "M": 292864, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 562.2412500000005 + }, + { + "M": 293888, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 569.2812500000002 + }, + { + "M": 294912, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 563.00125 + }, + { + "M": 295936, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 573.4012500000001 + }, + { + "M": 296960, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 575.5212500000002 + }, + { + "M": 297984, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 583.0812500000002 + }, + { + "M": 299008, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 578.84125 + }, + { + "M": 300032, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 587.04125 + }, + { + "M": 301056, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 574.2812500000002 + }, + { + "M": 302080, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 579.4012499999997 + }, + { + "M": 303104, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 592.9612499999998 + }, + { + "M": 304128, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 580.2412499999998 + }, + { + "M": 305152, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 589.5612499999997 + }, + { + "M": 306176, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 598.6415 + }, + { + "M": 307200, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 596.6414999999997 + }, + { + "M": 308224, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 602.6815000000001 + }, + { + "M": 309248, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 604.6414999999997 + }, + { + "M": 310272, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 606.6014999999998 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 582.1214999999997 + }, + { + "M": 312320, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 610.5614999999998 + }, + { + "M": 313344, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 608.7215000000001 + }, + { + "M": 314368, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 614.3615000000009 + }, + { + "M": 315392, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 615.3615 + }, + { + "M": 316416, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 614.2014999999999 + }, + { + "M": 317440, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 608.7614999999998 + }, + { + "M": 318464, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 605.1614999999997 + }, + { + "M": 319488, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 608.5215000000001 + }, + { + "M": 320512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 618.4417500000002 + }, + { + "M": 321536, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 628.4814999999996 + }, + { + "M": 322560, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 625.4817500000008 + }, + { + "M": 323584, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 623.2014999999999 + }, + { + "M": 324608, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 629.08175 + }, + { + "M": 325632, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 634.0014999999999 + }, + { + "M": 326656, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 638.5217499999999 + }, + { + "M": 327680, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 634.5617500000001 + } + ], + "576": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000106 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + { + "M": 128, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.9989999999999952 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.9990000000000023 + }, + { + "M": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.919250000000005 + }, + { + "M": 1024, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.279000000000003 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 14.558999999999997 + }, + { + "M": 3072, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 19.599249999999998 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 24.639000000000003 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 29.519000000000005 + }, + { + "M": 6144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.71900000000002 + }, + { + "M": 7168, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 44.75899999999999 + }, + { + "M": 8192, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 45.319250000000004 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 49.19925000000001 + }, + { + "M": 10240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 53.63925000000002 + }, + { + "M": 11264, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 60.759250000000016 + }, + { + "M": 12288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 62.91925000000001 + }, + { + "M": 13312, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 74.67925 + }, + { + "M": 14336, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 74.1995 + }, + { + "M": 15360, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 81.27925000000002 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.0395 + }, + { + "M": 17408, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 96.63925 + }, + { + "M": 18432, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 95.19950000000003 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 107.55950000000001 + }, + { + "M": 20480, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 104.39925000000001 + }, + { + "M": 21504, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 110.35950000000001 + }, + { + "M": 22528, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 115.3195 + }, + { + "M": 23552, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 120.35950000000001 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.15949999999994 + }, + { + "M": 25600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.03950000000003 + }, + { + "M": 26624, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 133.07975000000005 + }, + { + "M": 27648, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 140.43975 + }, + { + "M": 28672, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 148.19975 + }, + { + "M": 29696, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 150.3995 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 147.47975 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 157.91975000000002 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 158.27975 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 166.39975 + }, + { + "M": 34816, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 172.51975 + }, + { + "M": 35840, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 184.27974999999998 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 181.07975 + }, + { + "M": 37888, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 190.71975000000003 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 197.99975000000003 + }, + { + "M": 39936, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 200.72 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.07974999999985 + }, + { + "M": 41984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 210.51999999999992 + }, + { + "M": 43008, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 215.88 + }, + { + "M": 44032, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 220.84000000000003 + }, + { + "M": 45056, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 221.83974999999992 + }, + { + "M": 46080, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 229.2 + }, + { + "M": 47104, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 243.27999999999997 + }, + { + "M": 48128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 236.68 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.76000000000005 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 239.4000000000001 + }, + { + "M": 51200, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 254.24024999999978 + }, + { + "M": 52224, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 254.52 + }, + { + "M": 53248, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 271.5999999999999 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 269.5200000000001 + }, + { + "M": 55296, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 269.16 + }, + { + "M": 56320, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 276.0 + }, + { + "M": 57344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 280.99999999999994 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 288.48024999999996 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 304.56050000000005 + }, + { + "M": 60416, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 307.44025000000005 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.8002500000002 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 307.5602500000001 + }, + { + "M": 63488, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 310.6402500000001 + }, + { + "M": 64512, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 324.8802499999999 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.28025 + }, + { + "M": 66560, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 328.92025 + }, + { + "M": 67584, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 327.48025 + }, + { + "M": 68608, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 340.88049999999987 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 339.9604999999999 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 345.4804999999999 + }, + { + "M": 71680, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 350.0005 + }, + { + "M": 72704, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 354.9604999999999 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 348.84050000000013 + }, + { + "M": 74752, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 364.56049999999993 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.5205000000001 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.0405000000003 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 368.04075 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 380.04075 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 384.9605 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 400.12075000000016 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 381.4007499999999 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 402.9207500000002 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 414.04075 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 401.2404999999999 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 417.20074999999997 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 415.28075 + }, + { + "M": 88064, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 428.76075000000003 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 442.4010000000002 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 453.001 + }, + { + "M": 91136, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 443.4809999999999 + }, + { + "M": 92160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 445.241 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 461.3607499999998 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 466.16100000000006 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 455.56074999999987 + }, + { + "M": 96256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 468.1610000000002 + }, + { + "M": 97280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 469.40100000000007 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 463.20100000000036 + }, + { + "M": 99328, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 483.0010000000001 + }, + { + "M": 100352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 483.2812499999999 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 472.4409999999999 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 474.20100000000014 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 497.081 + }, + { + "M": 104448, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 503.0009999999998 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 506.56100000000004 + }, + { + "M": 106496, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 516.2010000000001 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 507.5609999999999 + }, + { + "M": 108544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 520.6809999999999 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 530.2812500000002 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 519.4010000000002 + }, + { + "M": 111616, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 541.0012499999998 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 530.8412500000001 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 544.5612500000002 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 538.0010000000001 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 558.7212499999999 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 542.4012500000001 + }, + { + "M": 117760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 552.5212499999999 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 579.3612499999999 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 566.9612499999998 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 578.00125 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 576.6412500000004 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 567.32125 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 580.7212499999998 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 585.5215000000003 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 590.2414999999999 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 594.9614999999999 + }, + { + "M": 128000, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 600.0415 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 630.0815 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 631.4414999999999 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 625.0414999999998 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 633.6814999999999 + }, + { + "M": 133120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 623.6014999999998 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 632.4815000000001 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 655.1617500000002 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 662.92175 + }, + { + "M": 137216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 663.5217500000001 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 636.9614999999999 + }, + { + "M": 139264, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 673.44175 + }, + { + "M": 140288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 678.4417500000004 + }, + { + "M": 141312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 677.7217500000002 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.44175 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 659.44175 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 675.12175 + }, + { + "M": 145408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 702.12175 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 695.5617500000003 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 689.2817499999999 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 713.922 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 720.9219999999998 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.7617499999997 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 700.6817500000002 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 713.1219999999998 + }, + { + "M": 153600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 728.722 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 735.762 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 730.1220000000003 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 745.2819999999997 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 728.922 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 760.6019999999999 + }, + { + "M": 159744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 768.3219999999997 + }, + { + "M": 160768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 750.922 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 777.6822499999998 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 773.4422499999996 + }, + { + "M": 163840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 765.1619999999998 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 782.76225 + }, + { + "M": 165888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 796.8422499999999 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 792.0822499999997 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 786.4822500000002 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 796.6822500000001 + }, + { + "M": 169984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 804.5222499999998 + }, + { + "M": 171008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 809.7622499999998 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 814.1222499999997 + }, + { + "M": 173056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 818.7222499999998 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 809.56225 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 829.4024999999999 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 813.1222499999999 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 848.9224999999999 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 833.1624999999999 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 843.1225 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 832.0024999999998 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 856.8024999999998 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 872.6824999999997 + }, + { + "M": 183296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 877.2825000000003 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 866.4424999999997 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 864.3625000000002 + }, + { + "M": 186368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 891.1225 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 895.8427500000003 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 889.6424999999999 + }, + { + "M": 189440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 894.6027499999998 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.5627499999996 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 893.7627500000001 + }, + { + "M": 192512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 897.7627499999999 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 924.6827499999999 + }, + { + "M": 194560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 918.2027500000004 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 912.3627500000002 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 916.7227499999999 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 942.92275 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 947.64275 + }, + { + "M": 199680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 941.92275 + }, + { + "M": 200704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 946.8827500000007 + }, + { + "M": 201728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 951.7627500000001 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 945.60275 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 960.6030000000005 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 938.4827500000001 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 969.963 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 986.2430000000004 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 979.3229999999999 + }, + { + "M": 208896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 984.8030000000003 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.0029999999997 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 982.9230000000005 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 987.8030000000006 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 992.883 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1007.6829999999997 + }, + { + "M": 215040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1024.48325 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1027.44325 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1031.7632500000002 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1026.08325 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1020.5632500000004 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1018.7632499999999 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1029.6832499999998 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.5632499999997 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1059.84325 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1043.72325 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1031.68325 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1063.5632499999997 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1068.1232500000006 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1062.2832500000004 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1068.3632500000003 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1086.2034999999996 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1097.3235000000004 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1091.3635 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1106.7635 + }, + { + "M": 234496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1103.4035000000003 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.9635000000003 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.3634999999995 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1106.5635000000002 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1119.6835000000005 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1124.6035000000004 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1133.44375 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1137.0434999999998 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1128.7635000000005 + }, + { + "M": 243712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1146.1637499999997 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1148.2837499999996 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1125.3235 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1168.1637500000002 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1162.28375 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1166.8837500000004 + }, + { + "M": 249856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1163.5237499999998 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1159.0837500000002 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1181.24375 + }, + { + "M": 252928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1177.84375 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1193.4837499999999 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.8437500000005 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1171.80375 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1183.6437500000002 + }, + { + "M": 258048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1224.004 + }, + { + "M": 259072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1206.3639999999996 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1229.0839999999998 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1205.1639999999998 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1220.6840000000002 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1242.7240000000002 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1247.8039999999996 + }, + { + "M": 265216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1245.7640000000006 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1218.2440000000001 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1251.1240000000005 + }, + { + "M": 268288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1274.1242499999998 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1264.5642500000004 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1248.4040000000005 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1266.00425 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1278.6442499999998 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1293.5642499999994 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1277.2842499999997 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1282.2442500000002 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1280.0042499999995 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1303.2042500000002 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1311.2442500000002 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1312.6442500000003 + }, + { + "M": 280576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1331.7644999999998 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1303.4442500000005 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.2042500000002 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.2845000000002 + }, + { + "M": 284672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1339.6045 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1330.2044999999998 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.04425 + }, + { + "M": 287744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1342.8844999999997 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1365.3245000000006 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1359.6045000000004 + }, + { + "M": 290816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1368.2844999999993 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1339.5645000000004 + }, + { + "M": 292864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1389.2047499999999 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1368.1245000000004 + }, + { + "M": 294912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1387.5247500000005 + }, + { + "M": 295936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1392.4447499999997 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1373.5645000000004 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1397.4047500000006 + }, + { + "M": 299008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1395.4047500000001 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1396.16475 + }, + { + "M": 301056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1415.6847500000003 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1407.2447499999998 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1420.8047499999998 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1435.68475 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1440.4847499999996 + }, + { + "M": 306176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1428.5247500000005 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1410.0847499999995 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1455.045 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1459.6849999999995 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.52475 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1452.645 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.8047500000002 + }, + { + "M": 313344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1462.245 + }, + { + "M": 314368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1477.5650000000005 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1487.7649999999999 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1481.9649999999997 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1456.565 + }, + { + "M": 318464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1496.8049999999998 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1495.6449999999995 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1500.685 + }, + { + "M": 321536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1501.5250000000005 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1502.2850000000003 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1505.8449999999998 + }, + { + "M": 324608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1515.6052500000005 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1526.2052499999995 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1540.965250000001 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1515.08525 + } + ], + "640": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1589999999999847 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.9989999999999952 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.879000000000005 + }, + { + "M": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.919000000000004 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.479 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 14.558999999999997 + }, + { + "M": 3072, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 19.599250000000005 + }, + { + "M": 4096, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 24.519000000000005 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 29.399000000000015 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.279250000000005 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 39.43924999999997 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 43.71925 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 48.71924999999999 + }, + { + "M": 10240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.27924999999999 + }, + { + "M": 11264, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 60.39925 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 63.27925000000001 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 68.67925 + }, + { + "M": 14336, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 74.55924999999999 + }, + { + "M": 15360, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 78.99950000000001 + }, + { + "M": 16384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 83.27924999999998 + }, + { + "M": 17408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 86.35925 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 92.31925000000003 + }, + { + "M": 19456, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 99.51924999999997 + }, + { + "M": 20480, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 102.9195 + }, + { + "M": 21504, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 109.35950000000001 + }, + { + "M": 22528, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 112.55949999999996 + }, + { + "M": 23552, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 119.39949999999996 + }, + { + "M": 24576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 119.07950000000002 + }, + { + "M": 25600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 124.1995 + }, + { + "M": 26624, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 134.35975 + }, + { + "M": 27648, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 139.27950000000004 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 138.31950000000003 + }, + { + "M": 29696, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 152.4795 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 146.07975000000002 + }, + { + "M": 31744, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 159.31950000000003 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 157.23975 + }, + { + "M": 33792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 172.99975000000003 + }, + { + "M": 34816, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 183.04000000000002 + }, + { + "M": 35840, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 176.23975000000004 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 179.79975000000005 + }, + { + "M": 37888, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 185.87975000000006 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 196.56 + }, + { + "M": 39936, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 199.27999999999997 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 192.43975000000006 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 210.68 + }, + { + "M": 43008, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 214.07975000000005 + }, + { + "M": 44032, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 219.15975000000003 + }, + { + "M": 45056, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 220.24 + }, + { + "M": 46080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 223.75999999999993 + }, + { + "M": 47104, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 229.92000000000002 + }, + { + "M": 48128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 234.88000000000005 + }, + { + "M": 49152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 238.16000000000008 + }, + { + "M": 50176, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 254.56 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 249.28000000000014 + }, + { + "M": 52224, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 252.79999999999998 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 262.4799999999999 + }, + { + "M": 54272, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 264.3202499999999 + }, + { + "M": 55296, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 267.44 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 272.88000000000005 + }, + { + "M": 57344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 278.9202499999998 + }, + { + "M": 58368, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 293.28025 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 298.48024999999984 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 295.72025000000025 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 306.32024999999993 + }, + { + "M": 62464, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 303.00025000000005 + }, + { + "M": 63488, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 319.5202500000001 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.6002500000001 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 308.1202499999999 + }, + { + "M": 66560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 324.0002499999999 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 317.56049999999993 + }, + { + "M": 68608, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 338.4005000000001 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 349.3605000000001 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 350.0804999999999 + }, + { + "M": 71680, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 347.3605 + }, + { + "M": 72704, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 358.9604999999998 + }, + { + "M": 73728, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 363.76049999999987 + }, + { + "M": 74752, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 377.1205000000001 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 366.80050000000017 + }, + { + "M": 76800, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 376.2007500000001 + }, + { + "M": 77824, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 376.56050000000005 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.9607500000001 + }, + { + "M": 79872, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 383.80075 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 390.52049999999997 + }, + { + "M": 81920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 384.0005000000002 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 399.64075 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 400.20074999999997 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 405.00075000000004 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 419.96074999999996 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 418.68075 + }, + { + "M": 88064, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 425.48075000000006 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.0407499999998 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 421.84074999999973 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 426.64075000000014 + }, + { + "M": 92160, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 442.0407499999998 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 458.32074999999986 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 447.0407500000001 + }, + { + "M": 95232, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 456.80100000000004 + }, + { + "M": 96256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 461.121 + }, + { + "M": 97280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 466.1610000000002 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 459.80100000000004 + }, + { + "M": 99328, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 488.52099999999984 + }, + { + "M": 100352, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 483.7209999999998 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 484.28099999999995 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 470.5210000000002 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 482.64099999999974 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 482.44100000000003 + }, + { + "M": 105472, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 508.081 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 508.04100000000017 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 493.721 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 511.76100000000014 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 533.2412499999998 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 526.8012499999998 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 536.0412499999998 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 527.48125 + }, + { + "M": 113664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 543.24125 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 534.76125 + }, + { + "M": 115712, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 556.72125 + }, + { + "M": 116736, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 557.9212500000001 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 571.4012500000003 + }, + { + "M": 118784, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 571.4012499999999 + }, + { + "M": 119808, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 569.2012500000001 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 567.4012499999999 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 582.6012499999999 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 563.1212500000001 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 588.3615 + }, + { + "M": 124928, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 600.6815000000001 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 590.6415000000002 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 602.4815000000001 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 607.4014999999999 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 615.1614999999999 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 616.6015 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 610.2415000000001 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 618.8815000000002 + }, + { + "M": 133120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 630.6415000000002 + }, + { + "M": 134144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 644.6814999999999 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 643.2414999999999 + }, + { + "M": 136192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 644.8014999999998 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 649.64175 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 653.8817500000002 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 641.1215 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 674.9617500000002 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 660.5217500000003 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 672.5617499999998 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.1217500000005 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 674.3217500000001 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 689.5617499999998 + }, + { + "M": 146432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 703.12175 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 685.44175 + }, + { + "M": 148480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 701.12175 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 705.88175 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 712.8020000000004 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 707.202 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 719.922 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 733.402 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 729.2820000000002 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 734.0420000000004 + }, + { + "M": 156672, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 746.8419999999999 + }, + { + "M": 157696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 743.5620000000004 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 746.0820000000001 + }, + { + "M": 159744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 752.9220000000003 + }, + { + "M": 160768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 757.922 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 762.48225 + }, + { + "M": 162816, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 775.96225 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 748.3219999999999 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 765.4419999999998 + }, + { + "M": 165888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 781.2822499999997 + }, + { + "M": 166912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 797.0422500000002 + }, + { + "M": 167936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 790.96225 + }, + { + "M": 168960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 795.2822500000004 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 801.4822500000002 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 795.48225 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 798.3222499999999 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 825.52225 + }, + { + "M": 174080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 818.8822499999997 + }, + { + "M": 175104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 823.6822500000001 + }, + { + "M": 176128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 828.4822500000002 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 822.0422499999997 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 838.5625 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 849.0024999999998 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 836.2025000000001 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 851.9224999999999 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 856.6025 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 861.8825000000002 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 851.2424999999998 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 870.8025 + }, + { + "M": 186368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 864.4825000000001 + }, + { + "M": 187392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 890.9625000000001 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 884.8824999999997 + }, + { + "M": 189440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 900.5227499999999 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 894.4027500000002 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 888.6024999999997 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 903.6427499999998 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 919.1227500000002 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 907.9627500000006 + }, + { + "M": 195584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 906.6827500000002 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 922.4427500000004 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 916.7227499999999 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 910.9627500000001 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 920.9627500000004 + }, + { + "M": 200704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 952.6029999999998 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 956.123 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 939.88275 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 944.8827500000004 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 933.6427500000002 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 964.4030000000005 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 969.0429999999997 + }, + { + "M": 207872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 963.5629999999996 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 967.7630000000001 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 988.0029999999999 + }, + { + "M": 210944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 988.443 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1002.7230000000005 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 987.2429999999997 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1001.2030000000001 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 979.9230000000002 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1011.0430000000001 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1004.843 + }, + { + "M": 218112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1032.76325 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1014.2032499999998 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1023.6432499999996 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.5632500000002 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1038.5632500000002 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1043.4032500000003 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1027.1232499999996 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1036.64325 + }, + { + "M": 226304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1048.32325 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1061.8032500000004 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1066.84325 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1062.4432499999998 + }, + { + "M": 230400, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1089.2835 + }, + { + "M": 231424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1083.1235000000001 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1074.6834999999996 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1090.0434999999998 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1105.0435000000002 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1082.7635 + }, + { + "M": 236544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1106.6835 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1100.1635 + }, + { + "M": 238592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1127.0835000000002 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1128.5235 + }, + { + "M": 240640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1114.6435000000001 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1116.8835 + }, + { + "M": 242688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1135.0037499999999 + }, + { + "M": 243712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1139.76375 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1151.84375 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1119.2034999999996 + }, + { + "M": 246784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1142.6837500000001 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.0834999999997 + }, + { + "M": 248832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1163.4437500000004 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1154.36375 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.1637499999997 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1163.80375 + }, + { + "M": 252928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1182.36375 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1187.1237499999997 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1209.2040000000002 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1207.4039999999995 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1197.5637499999998 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1191.8837499999995 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1206.6440000000002 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1211.1640000000002 + }, + { + "M": 261120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1219.924 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1209.8439999999996 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1214.6440000000002 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1240.2440000000006 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1234.484 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1211.6039999999998 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1254.004 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1240.0839999999998 + }, + { + "M": 269312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1259.364 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1260.1242499999998 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1257.7240000000002 + }, + { + "M": 272384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1273.4842499999995 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1274.0842500000003 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1278.6442500000003 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1272.96425 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1270.40425 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1292.88425 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1291.2042500000002 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1302.5242500000004 + }, + { + "M": 280576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1311.56425 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1314.7642499999997 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.0842499999999 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1299.96425 + }, + { + "M": 284672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1319.40425 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1335.1644999999999 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.8042499999997 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1339.5645 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1354.6444999999994 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1359.4445 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1343.1644999999999 + }, + { + "M": 291840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1363.5645 + }, + { + "M": 292864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1368.4044999999996 + }, + { + "M": 293888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1373.0445000000004 + }, + { + "M": 294912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1367.0445 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1377.0845 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1363.5645 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1396.8047500000002 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1380.4845 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1395.44475 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1400.12475 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1386.7247500000003 + }, + { + "M": 303104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1404.564750000001 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1414.2847499999998 + }, + { + "M": 305152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1435.96475 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1423.0447499999996 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1399.68475 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.8449999999998 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1427.3647499999997 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1432.1647499999995 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1453.565000000001 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1444.0847500000004 + }, + { + "M": 313344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1463.165 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1460.4849999999997 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1464.8850000000007 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1480.2449999999994 + }, + { + "M": 317440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1470.165 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1468.5250000000005 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.605 + }, + { + "M": 320512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1484.4850000000001 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.4449999999997 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1511.1252499999996 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1513.8452500000003 + }, + { + "M": 324608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1515.12525 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1512.76525 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1527.68525 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1503.605 + } + ], + "768": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999856 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0389999999999944 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 1.9990000000000026 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 128, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.0789999999999935 + }, + { + "M": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.839000000000006 + }, + { + "M": 1024, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 9.599000000000004 + }, + { + "M": 2048, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.478999999999992 + }, + { + "M": 3072, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 19.799 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 24.599000000000018 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 29.599249999999998 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.358999999999995 + }, + { + "M": 7168, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 39.47924999999999 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.07924999999999 + }, + { + "M": 9216, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 49.75924999999998 + }, + { + "M": 10240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 53.479250000000015 + }, + { + "M": 11264, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 59.83925000000002 + }, + { + "M": 12288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 62.95950000000001 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 68.91925000000002 + }, + { + "M": 14336, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 74.83924999999999 + }, + { + "M": 15360, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 77.3195 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 81.79924999999997 + }, + { + "M": 17408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 89.11924999999998 + }, + { + "M": 18432, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 94.87925000000001 + }, + { + "M": 19456, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 98.27925 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 100.03949999999999 + }, + { + "M": 21504, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 107.1995 + }, + { + "M": 22528, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 114.83950000000002 + }, + { + "M": 23552, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 119.95950000000002 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 119.71950000000001 + }, + { + "M": 25600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 127.71949999999998 + }, + { + "M": 26624, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 134.83975 + }, + { + "M": 27648, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 139.83975000000004 + }, + { + "M": 28672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 138.15949999999998 + }, + { + "M": 29696, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 149.95950000000002 + }, + { + "M": 30720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 148.27974999999995 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 164.03975000000003 + }, + { + "M": 32768, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 164.95974999999993 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 165.83950000000002 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 178.23975000000004 + }, + { + "M": 35840, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 178.63975000000005 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 180.43975 + }, + { + "M": 37888, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 186.63975 + }, + { + "M": 38912, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 198.92000000000002 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 201.99975 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 193.35974999999996 + }, + { + "M": 41984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 206.3597499999999 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 209.83975000000004 + }, + { + "M": 44032, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 215.95974999999999 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 213.31974999999994 + }, + { + "M": 46080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 224.43999999999994 + }, + { + "M": 47104, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 230.76000000000005 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 228.95999999999998 + }, + { + "M": 49152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 238.76 + }, + { + "M": 50176, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 249.9199999999999 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 250.35999999999999 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 258.3602499999999 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 252.04000000000005 + }, + { + "M": 54272, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 275.32025000000004 + }, + { + "M": 55296, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 267.6 + }, + { + "M": 56320, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 277.64 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 281.92025000000007 + }, + { + "M": 58368, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 289.56025000000005 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 280.56025000000005 + }, + { + "M": 60416, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 293.80025 + }, + { + "M": 61440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 289.88025000000016 + }, + { + "M": 62464, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 303.7605000000001 + }, + { + "M": 63488, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 308.72024999999985 + }, + { + "M": 64512, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 311.5202499999999 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 308.96024999999986 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 319.6402499999999 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 318.44025 + }, + { + "M": 68608, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 339.52049999999986 + }, + { + "M": 69632, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 337.96025000000003 + }, + { + "M": 70656, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 341.4005000000001 + }, + { + "M": 71680, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 360.1204999999999 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 353.9604999999998 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 355.4805 + }, + { + "M": 74752, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 363.28049999999996 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 357.16049999999984 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 367.2007500000002 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 377.4805 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 382.40049999999997 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.76075000000014 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 392.0007499999999 + }, + { + "M": 81920, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 385.48050000000035 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 390.40049999999997 + }, + { + "M": 83968, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 407.44050000000016 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 406.8407500000003 + }, + { + "M": 86016, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 414.44075 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 431.40099999999984 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 425.12049999999977 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 429.8007499999999 + }, + { + "M": 90112, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 445.64099999999985 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 450.5207499999999 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 447.7207499999997 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 454.8807499999999 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 470.081 + }, + { + "M": 95232, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 458.36099999999976 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 458.7207499999997 + }, + { + "M": 97280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 471.1610000000002 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 472.84100000000024 + }, + { + "M": 99328, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 481.0010000000002 + }, + { + "M": 100352, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 484.7209999999999 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 496.72100000000023 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 472.001 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 489.92100000000005 + }, + { + "M": 104448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 510.8412500000001 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 515.6809999999998 + }, + { + "M": 106496, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 514.6010000000003 + }, + { + "M": 107520, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 515.6409999999998 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 502.84099999999967 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 523.4812499999998 + }, + { + "M": 110592, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 530.1612500000001 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 527.1612500000001 + }, + { + "M": 112640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 548.6012500000004 + }, + { + "M": 113664, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 544.6812499999999 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 535.8812499999997 + }, + { + "M": 115712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 551.6812500000001 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 550.6812500000001 + }, + { + "M": 117760, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 568.04125 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.28125 + }, + { + "M": 119808, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 585.7615000000003 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 579.7212499999998 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 574.24125 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 575.7612499999998 + }, + { + "M": 123904, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 597.6014999999998 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 605.7615000000003 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 588.1215 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 604.0815 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 619.9214999999997 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 617.4814999999999 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 607.1614999999999 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 611.9214999999999 + }, + { + "M": 132096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 627.6415 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 632.6415 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 640.8415 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 630.8015000000003 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 639.9215000000002 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 644.2414999999999 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 655.84175 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 649.7617499999997 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 658.5217499999999 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 669.6017500000003 + }, + { + "M": 142336, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 685.2017500000004 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 667.92175 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 683.7617499999997 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 688.5217499999997 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 704.2417500000001 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 697.9617499999999 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 701.7617500000006 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 710.3620000000005 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 712.1219999999996 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 719.5620000000008 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 724.1219999999998 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 714.402 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 731.1620000000003 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 727.6820000000002 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 743.1219999999998 + }, + { + "M": 157696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 745.0420000000001 + }, + { + "M": 158720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 761.162 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 767.5220000000004 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 772.1622499999996 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 753.2020000000002 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 750.0419999999997 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 750.5619999999999 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 789.56225 + }, + { + "M": 165888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 783.24225 + }, + { + "M": 166912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 809.8022499999997 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 794.5622499999997 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 794.6022499999999 + }, + { + "M": 169984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 802.0422499999997 + }, + { + "M": 171008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 806.80225 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 800.1622500000003 + }, + { + "M": 173056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 826.9222500000001 + }, + { + "M": 174080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 820.6822499999998 + }, + { + "M": 175104, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 835.8825000000004 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 831.3224999999998 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 835.8824999999999 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 819.8822500000001 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 840.8425000000004 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 837.8825000000002 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 853.6824999999999 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 869.3625000000002 + }, + { + "M": 183296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 863.1224999999997 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 843.2825 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 872.4824999999998 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 878.0025 + }, + { + "M": 187392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 871.0425 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 886.8425 + }, + { + "M": 189440, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 891.3225000000004 + }, + { + "M": 190464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 907.16275 + }, + { + "M": 191488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 900.8827500000002 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 895.7227499999999 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 899.2427499999997 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.92275 + }, + { + "M": 195584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 919.60275 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 913.4827500000006 + }, + { + "M": 197632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 918.12275 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 944.7627500000003 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 944.1227500000009 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 933.16275 + }, + { + "M": 201728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 937.2027499999999 + }, + { + "M": 202752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 953.0027499999997 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 957.6030000000001 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 946.7627500000003 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 956.3629999999998 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 971.9229999999998 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 976.2830000000004 + }, + { + "M": 208896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 970.2829999999999 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 980.2429999999999 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1000.8030000000003 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 984.2829999999999 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1000.203 + }, + { + "M": 214016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1004.7230000000005 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 993.0029999999997 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1024.0832499999997 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1018.2832499999998 + }, + { + "M": 218112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1023.6432500000001 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1038.0832500000004 + }, + { + "M": 220160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1033.80325 + }, + { + "M": 221184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1027.64325 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1042.0832500000001 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1067.7632500000004 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1041.20325 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1029.16325 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1061.0032500000002 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1076.1635 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1070.2832500000004 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1076.2835 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1073.0432500000002 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1104.6035000000002 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1088.4835000000003 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1093.2035000000005 + }, + { + "M": 234496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1099.4034999999994 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1096.2034999999996 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1096.6835000000005 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1102.5234999999998 + }, + { + "M": 238592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1107.2835 + }, + { + "M": 239616, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1112.1235 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1119.6834999999996 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1121.6034999999997 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1125.0435000000002 + }, + { + "M": 243712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1142.44375 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1165.6037499999998 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1122.0034999999998 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1164.4837500000003 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1148.2437499999996 + }, + { + "M": 248832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1166.0437499999998 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1168.2437499999996 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1176.9237499999995 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1177.6037499999998 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1182.24375 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1189.8037499999996 + }, + { + "M": 254976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1183.6437499999997 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1189.7637500000005 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1201.1640000000002 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1216.364 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1209.884 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1225.2439999999997 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.5239999999994 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1216.1639999999998 + }, + { + "M": 263168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1220.8039999999996 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1222.8439999999996 + }, + { + "M": 265216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1241.3240000000005 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1214.7639999999997 + }, + { + "M": 267264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1250.4839999999997 + }, + { + "M": 268288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1247.8040000000005 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1260.6842500000007 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1276.1242500000003 + }, + { + "M": 271360, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1262.1242499999998 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1274.96425 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1290.0442500000004 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1304.96425 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1299.36425 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1285.8442499999996 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1287.2442500000002 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1292.3242499999997 + }, + { + "M": 279552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1310.7642499999997 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1301.40425 + }, + { + "M": 281600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1309.0842499999994 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.7644999999998 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1336.6844999999998 + }, + { + "M": 284672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1345.5245 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1328.2045000000003 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1311.3242500000001 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1355.0045000000005 + }, + { + "M": 288768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1364.3245000000002 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1354.3645000000001 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1358.6045 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1335.9644999999996 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1378.7245000000003 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1352.1644999999999 + }, + { + "M": 294912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1370.9244999999996 + }, + { + "M": 295936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1386.6447500000004 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1378.9645000000005 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1390.7647500000003 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1395.2047499999994 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1400.0447500000005 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1393.9247500000006 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1390.8447500000002 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1434.8047500000002 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1428.9647499999996 + }, + { + "M": 305152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1417.2447499999994 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1427.6447500000004 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1415.4847499999996 + }, + { + "M": 308224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1443.8447499999997 + }, + { + "M": 309248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1437.2847499999998 + }, + { + "M": 310272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1464.245 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1452.8450000000007 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1459.565 + }, + { + "M": 313344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1456.2849999999999 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1454.8450000000003 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.005 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1485.3250000000007 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1491.8850000000007 + }, + { + "M": 318464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1478.8050000000012 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1478.3250000000003 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.0849999999996 + }, + { + "M": 321536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1494.045 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1495.7249999999995 + }, + { + "M": 323584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1514.80525 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1524.24525 + }, + { + "M": 325632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1524.165250000001 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1522.8852500000003 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1510.9250000000002 + } + ], + "800": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999893 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.9989999999999957 + }, + { + "M": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.919000000000004 + }, + { + "M": 1024, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.679249999999996 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 14.638999999999989 + }, + { + "M": 3072, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 19.959000000000003 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 24.839000000000013 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 29.839250000000014 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.71925000000001 + }, + { + "M": 7168, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 40.079249999999995 + }, + { + "M": 8192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.63925000000001 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 49.479249999999986 + }, + { + "M": 10240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 53.999249999999996 + }, + { + "M": 11264, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 60.31925000000001 + }, + { + "M": 12288, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 65.43924999999999 + }, + { + "M": 13312, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 71.4795 + }, + { + "M": 14336, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 74.55925 + }, + { + "M": 15360, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 79.99925 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.63950000000003 + }, + { + "M": 17408, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 92.15949999999998 + }, + { + "M": 18432, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 95.8795 + }, + { + "M": 19456, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 102.5595 + }, + { + "M": 20480, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 104.47950000000003 + }, + { + "M": 21504, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 112.9595 + }, + { + "M": 22528, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 116.07949999999998 + }, + { + "M": 23552, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 121.27950000000001 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.1995 + }, + { + "M": 25600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 129.1995 + }, + { + "M": 26624, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 136.51950000000002 + }, + { + "M": 27648, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 141.5195 + }, + { + "M": 28672, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 155.79950000000002 + }, + { + "M": 29696, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 148.99949999999993 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 148.35950000000003 + }, + { + "M": 31744, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 164.7595 + }, + { + "M": 32768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 163.87949999999998 + }, + { + "M": 33792, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 175.03975000000003 + }, + { + "M": 34816, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 176.87975 + }, + { + "M": 35840, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 185.67974999999996 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 182.31975000000006 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 194.71975000000003 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 199.55975 + }, + { + "M": 39936, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 202.47975000000002 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 195.59974999999997 + }, + { + "M": 41984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 212.19975000000005 + }, + { + "M": 43008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 207.67974999999996 + }, + { + "M": 44032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 212.35999999999996 + }, + { + "M": 45056, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 227.72000000000003 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 229.80000000000007 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.48000000000008 + }, + { + "M": 48128, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 237.75999999999993 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 235.68000000000006 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 251.04000000000005 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 251.72024999999996 + }, + { + "M": 52224, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 254.96 + }, + { + "M": 53248, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 261.92000000000013 + }, + { + "M": 54272, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 266.92024999999995 + }, + { + "M": 55296, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 269.68 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 285.52025 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 284.64025000000004 + }, + { + "M": 58368, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 284.28025000000025 + }, + { + "M": 59392, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 297.32025 + }, + { + "M": 60416, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 302.48050000000023 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 298.44025 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 308.40025 + }, + { + "M": 63488, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 311.40025 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 306.88024999999993 + }, + { + "M": 65536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 319.40025 + }, + { + "M": 66560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 326.16050000000007 + }, + { + "M": 67584, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 328.4805000000001 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 337.08025 + }, + { + "M": 69632, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 340.8405 + }, + { + "M": 70656, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 345.00025000000016 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 347.32050000000004 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 357.9604999999999 + }, + { + "M": 73728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 359.44049999999993 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 374.7204999999998 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 361.4005000000002 + }, + { + "M": 76800, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 374.44074999999987 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 378.4805 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 386.72074999999984 + }, + { + "M": 79872, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 389.0004999999999 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 382.04049999999995 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 384.08050000000026 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 391.7605000000001 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 410.72075000000007 + }, + { + "M": 84992, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 417.08074999999985 + }, + { + "M": 86016, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 418.68075 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 425.2805000000001 + }, + { + "M": 88064, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 432.04075 + }, + { + "M": 89088, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 433.3207500000001 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 439.4409999999997 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 440.36075000000005 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 441.6807499999999 + }, + { + "M": 93184, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 466.0409999999998 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 469.76099999999985 + }, + { + "M": 95232, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 462.96100000000035 + }, + { + "M": 96256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 471.72100000000023 + }, + { + "M": 97280, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 476.76099999999997 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 466.9609999999999 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 488.7610000000002 + }, + { + "M": 100352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 496.08100000000013 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 495.001 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 486.08124999999984 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 510.40100000000035 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 499.0809999999997 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 519.8410000000001 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 524.72125 + }, + { + "M": 107520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 518.441 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 538.5612500000002 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 516.761 + }, + { + "M": 110592, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 534.32125 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 531.8812499999999 + }, + { + "M": 112640, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 555.1212499999999 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 541.2812500000002 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 540.6012499999997 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.9212500000001 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 550.3612500000002 + }, + { + "M": 117760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 577.2012500000003 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.1212499999999 + }, + { + "M": 119808, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 578.2012500000001 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 574.28125 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 568.6012499999999 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 569.4812499999998 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 594.9214999999999 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 599.7215000000003 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 598.2014999999999 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 602.8814999999997 + }, + { + "M": 128000, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 630.5615000000003 + }, + { + "M": 129024, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 622.8415 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 634.8415 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 628.4414999999999 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 626.4815000000001 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.2414999999999 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 656.9617499999997 + }, + { + "M": 135168, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 652.2417499999999 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 655.9217499999997 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 650.4017499999998 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 671.3217499999996 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 666.6017500000003 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 682.7217500000002 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 688.4817499999997 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 690.92175 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 672.0817499999998 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 692.0017499999999 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 686.2017499999997 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 701.1617500000002 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 692.5617500000001 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.2017500000002 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 724.1619999999998 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 718.1220000000001 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 711.4020000000003 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 729.5619999999999 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 719.1619999999996 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 737.0420000000001 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 730.6020000000005 + }, + { + "M": 156672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 757.2419999999997 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 753.3619999999999 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 763.5220000000004 + }, + { + "M": 159744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 768.6020000000003 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.1220000000001 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 772.2422499999998 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 766.642 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 755.4020000000005 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 773.6022499999999 + }, + { + "M": 165888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 800.52225 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 806.2022499999998 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 800.4022499999999 + }, + { + "M": 168960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 803.6822499999996 + }, + { + "M": 169984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 819.6422499999996 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 814.52225 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 807.9622499999998 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 812.8022500000004 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 811.9622500000003 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 832.5224999999998 + }, + { + "M": 176128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 847.5624999999998 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 852.3625000000004 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 846.6424999999997 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 845.7224999999999 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 844.5224999999998 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 860.3625 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 865.4425000000008 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 870.2425000000003 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 848.4024999999997 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 879.5624999999998 + }, + { + "M": 186368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 883.8425 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.3627499999998 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 893.2027500000002 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 882.1225000000002 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 913.4027499999997 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 918.2827499999999 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 922.9227500000006 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 917.2427499999999 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 905.4827499999997 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 926.5227500000005 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 920.2427499999994 + }, + { + "M": 197632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 935.9627499999997 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 919.64275 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 939.1627500000004 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 960.3229999999999 + }, + { + "M": 201728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 955.2429999999999 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 948.9627500000001 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 964.6030000000001 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 952.0027499999997 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 973.6030000000001 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 989.9629999999997 + }, + { + "M": 207872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 983.6830000000004 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 998.0030000000002 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1006.6429999999999 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 996.9229999999998 + }, + { + "M": 211968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 991.5630000000001 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1007.4830000000002 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1011.0830000000002 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 988.3229999999999 + }, + { + "M": 216064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1021.8032499999998 + }, + { + "M": 217088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1026.4832500000002 + }, + { + "M": 218112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1020.20325 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1045.4432500000005 + }, + { + "M": 220160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1030.80325 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1055.8832499999999 + }, + { + "M": 222208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1051.32325 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1065.5632499999997 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1059.5632500000006 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1036.3632499999999 + }, + { + "M": 226304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1070.4832499999998 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1084.2835 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1078.4434999999999 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1073.8432500000004 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1079.4435000000003 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1101.8034999999995 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1106.6834999999996 + }, + { + "M": 233472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1092.0834999999997 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1094.6835000000005 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1102.8035 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1135.40375 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1122.0035000000003 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1124.1234999999997 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1118.6035000000002 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1126.6035000000002 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1141.72375 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1143.4837499999999 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1148.32375 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1152.9637500000003 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1129.0435000000002 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1162.5237499999998 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1157.2437499999996 + }, + { + "M": 248832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1164.6037499999998 + }, + { + "M": 249856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1169.40375 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1174.0037499999999 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1176.163750000001 + }, + { + "M": 252928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1205.724000000001 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1199.484 + }, + { + "M": 254976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1204.364 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1197.364 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1220.524 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1225.2440000000001 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1229.2040000000006 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.924 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1230.3639999999996 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1236.884 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1248.0039999999995 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1252.6039999999998 + }, + { + "M": 265216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1239.9240000000004 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1222.484 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1276.4042500000007 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1290.1242499999998 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1273.8442499999996 + }, + { + "M": 270336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1270.0042500000009 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1274.2442500000002 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1308.8842499999996 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1282.0842499999999 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1297.4042499999996 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1312.5642500000004 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1286.7242499999998 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1321.9644999999996 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1307.9642499999995 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.0845 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1325.324500000001 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.0845 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1345.2044999999994 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1349.8445000000002 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1365.1245 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1339.7245000000003 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.4045000000006 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1356.1644999999999 + }, + { + "M": 288768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1376.2044999999998 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1365.3245000000002 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1370.2044999999994 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1359.0845000000008 + }, + { + "M": 292864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1384.5647499999995 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1374.6045 + }, + { + "M": 294912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1382.9245 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1394.1247500000004 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1410.2047499999999 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1393.084750000001 + }, + { + "M": 299008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1424.2847500000003 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1402.3247500000002 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1417.56475 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1422.6447499999995 + }, + { + "M": 303104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1431.8447500000002 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1442.4447499999997 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1446.8849999999993 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1430.8447500000016 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1417.4447499999997 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1463.8849999999998 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1447.5650000000005 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1473.645 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1461.8450000000003 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1472.165 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1477.245 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1489.4050000000002 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1473.125 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1488.645000000001 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1503.5249999999996 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1498.165 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1502.4849999999997 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1518.005250000001 + }, + { + "M": 321536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1509.5652499999997 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.8849999999998 + }, + { + "M": 323584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1530.3252499999994 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.1650000000009 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1523.6452499999996 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1549.08525 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.0852499999996 + } + ], + "896": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.03899999999998 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0390000000000086 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0390000000000015 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.039000000000009 + }, + { + "M": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.838999999999999 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.238999999999997 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 14.519000000000013 + }, + { + "M": 3072, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 19.599000000000004 + }, + { + "M": 4096, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 24.47924999999998 + }, + { + "M": 5120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 29.55899999999999 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 34.399250000000016 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 39.83899999999999 + }, + { + "M": 8192, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 45.27925000000001 + }, + { + "M": 9216, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 49.99925 + }, + { + "M": 10240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 54.27924999999999 + }, + { + "M": 11264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 58.599250000000026 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 63.71924999999999 + }, + { + "M": 13312, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 69.95924999999997 + }, + { + "M": 14336, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 74.03950000000002 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 78.47924999999998 + }, + { + "M": 16384, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 86.47950000000002 + }, + { + "M": 17408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 90.1995 + }, + { + "M": 18432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.83950000000002 + }, + { + "M": 19456, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 100.39925000000001 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 100.55950000000001 + }, + { + "M": 21504, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 112.1995 + }, + { + "M": 22528, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 115.35950000000003 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 118.39949999999996 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 122.43950000000001 + }, + { + "M": 25600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.47975000000005 + }, + { + "M": 26624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 129.6395 + }, + { + "M": 27648, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 140.5595 + }, + { + "M": 28672, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 145.55975000000004 + }, + { + "M": 29696, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 153.47949999999994 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 147.59975000000006 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 153.95950000000002 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 158.75949999999997 + }, + { + "M": 33792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 173.19975000000005 + }, + { + "M": 34816, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 172.63975 + }, + { + "M": 35840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 181.43999999999994 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 181.15975000000003 + }, + { + "M": 37888, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 190.87974999999994 + }, + { + "M": 38912, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 195.87974999999994 + }, + { + "M": 39936, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 195.75975 + }, + { + "M": 40960, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 204.51975000000004 + }, + { + "M": 41984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 206.64 + }, + { + "M": 43008, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 215.63975000000005 + }, + { + "M": 44032, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 224.75999999999993 + }, + { + "M": 45056, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 225.44000000000005 + }, + { + "M": 46080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 224.4799999999999 + }, + { + "M": 47104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 233.59999999999985 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 238.24 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.2800000000001 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 249.95999999999992 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 250.79999999999995 + }, + { + "M": 52224, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 253.72 + }, + { + "M": 53248, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 260.48025000000007 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 266.60000000000025 + }, + { + "M": 55296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 260.7600000000002 + }, + { + "M": 56320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 267.43999999999994 + }, + { + "M": 57344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 280.1199999999999 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 288.08024999999986 + }, + { + "M": 59392, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 289.92025 + }, + { + "M": 60416, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 294.80025000000023 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.20025 + }, + { + "M": 62464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 314.6002500000001 + }, + { + "M": 63488, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 315.6802499999999 + }, + { + "M": 64512, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 320.6802500000001 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.1602499999999 + }, + { + "M": 66560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.8805 + }, + { + "M": 67584, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 327.08050000000003 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 342.6405000000001 + }, + { + "M": 69632, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 345.96049999999997 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 335.4002499999999 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.40049999999997 + }, + { + "M": 72704, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 366.5205000000002 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 367.88049999999976 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 361.88049999999987 + }, + { + "M": 75776, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 369.56049999999993 + }, + { + "M": 76800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 364.08050000000003 + }, + { + "M": 77824, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 379.56049999999993 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 373.8007500000001 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 389.6807499999999 + }, + { + "M": 80896, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 394.32074999999975 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 392.6004999999999 + }, + { + "M": 82944, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 412.28075 + }, + { + "M": 83968, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 420.6807500000001 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 413.24075000000005 + }, + { + "M": 86016, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 427.44074999999987 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 416.1607500000001 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 427.7207500000002 + }, + { + "M": 89088, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 430.6407499999999 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 437.2007500000002 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 441.96074999999985 + }, + { + "M": 92160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 446.88075000000015 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 440.5207499999998 + }, + { + "M": 94208, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 470.001 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 467.0007500000004 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 461.3609999999999 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 463.24099999999976 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 464.44100000000003 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 475.201 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 478.0410000000004 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 476.7209999999998 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 474.12099999999987 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 492.20100000000025 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 507.24124999999975 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 506.8410000000001 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 511.56100000000015 + }, + { + "M": 107520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 527.3612500000002 + }, + { + "M": 108544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 509.961 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 530.7612499999998 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 530.5612499999997 + }, + { + "M": 111616, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 540.2412499999998 + }, + { + "M": 112640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 540.0012499999998 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 556.1212499999999 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 538.56125 + }, + { + "M": 115712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 565.6812500000001 + }, + { + "M": 116736, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 561.1612500000003 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.2812500000005 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 568.8812499999997 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 567.3212499999997 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 589.4414999999997 + }, + { + "M": 121856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 594.2015000000001 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 567.4012500000006 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 581.7612500000002 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 590.7615000000001 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 602.1614999999999 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 595.7615000000003 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.9215000000002 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 616.1614999999999 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 621.1614999999999 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 614.6414999999997 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.9614999999999 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 635.1215000000004 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 628.8814999999997 + }, + { + "M": 135168, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 648.7617499999999 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 642.4415000000001 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.4015000000002 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 658.2817500000003 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.88175 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 671.8017499999999 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.36175 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 669.5217499999999 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 659.9617500000004 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 686.44175 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 691.2817500000001 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 699.1217500000009 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 689.7217500000002 + }, + { + "M": 148480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 705.36175 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 710.3620000000003 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 703.9617499999999 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 708.6819999999998 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 724.2820000000002 + }, + { + "M": 153600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 735.7220000000004 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 736.5219999999999 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 727.7619999999997 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 735.1219999999998 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 750.8019999999997 + }, + { + "M": 158720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 764.1619999999998 + }, + { + "M": 159744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 765.202 + }, + { + "M": 160768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 784.9222499999998 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 767.402 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 774.0422499999997 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 763.8819999999998 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 783.52225 + }, + { + "M": 165888, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 794.4022499999996 + }, + { + "M": 166912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 791.2422499999998 + }, + { + "M": 167936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 796.1222500000001 + }, + { + "M": 168960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 800.52225 + }, + { + "M": 169984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 816.6022499999999 + }, + { + "M": 171008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 799.1222500000001 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 803.4822500000005 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 820.6022500000004 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 820.48225 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 808.96225 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 834.6425000000004 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 849.0824999999998 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 843.0025 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 833.3625 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 841.4824999999996 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 857.1224999999995 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 862.6025000000004 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 856.8425000000011 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 867.4825000000001 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 897.8427500000003 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 871.1225 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 886.0825000000004 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 890.5225 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 911.5227500000001 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 900.3627500000002 + }, + { + "M": 191488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 915.5227500000001 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 909.7227500000004 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 924.8027499999998 + }, + { + "M": 194560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 907.60275 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 923.64275 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 917.2827499999999 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 954.3230000000003 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 937.9227499999997 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 926.8827500000007 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 937.0027500000001 + }, + { + "M": 201728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 941.16275 + }, + { + "M": 202752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 956.9229999999998 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 972.9229999999998 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 950.4027500000002 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 981.2830000000004 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 986.0429999999997 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 980.0830000000005 + }, + { + "M": 208896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 985.163 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.723 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 994.0429999999997 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 998.9229999999998 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 993.4429999999998 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 997.723 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1007.2829999999999 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1017.5232500000006 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1022.1632499999995 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1037.4432499999998 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1021.1232499999999 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1042.80325 + }, + { + "M": 221184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1044.5632499999997 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1058.12325 + }, + { + "M": 223232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1053.76325 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1057.32325 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1034.6032500000006 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1066.6032499999997 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1071.4032499999998 + }, + { + "M": 228352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1077.5234999999998 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1080.5634999999997 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1076.7635 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.2435 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1092.6435000000001 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1107.9635000000003 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1091.9235000000003 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1089.9634999999998 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1101.0834999999997 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1118.5234999999998 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1131.5634999999997 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1146.5637499999996 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.5234999999998 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1145.7237499999997 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1129.3635 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1154.88375 + }, + { + "M": 244736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1151.72375 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1126.4435000000003 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1169.1637499999997 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1163.2837499999996 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1178.3237499999996 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1162.6037500000002 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1170.6837499999997 + }, + { + "M": 251904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1185.0837499999998 + }, + { + "M": 252928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1179.2037499999997 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1183.76375 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1186.0437500000003 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1204.524 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1216.444 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1221.0040000000004 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1214.884 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1230.2039999999997 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1206.8439999999996 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1228.9640000000004 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1223.2439999999997 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1238.2440000000001 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1232.7640000000001 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1219.6439999999993 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1262.44425 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1262.0042499999995 + }, + { + "M": 269312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1270.2042500000002 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1271.52425 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1268.5642499999994 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1280.7242500000002 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1285.6842500000002 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1290.2842499999997 + }, + { + "M": 275456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1298.8442499999996 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1269.2442499999993 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1281.84425 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1286.4842500000004 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1312.1242499999998 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.8042500000001 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1324.2045000000003 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1326.3244999999997 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1320.2842499999997 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1335.7645000000007 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1350.924500000001 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.0842500000003 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1349.5645000000004 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1354.3645000000001 + }, + { + "M": 289792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1363.8444999999997 + }, + { + "M": 290816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1368.7645000000002 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1372.3244999999997 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1373.2844999999998 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1399.1247500000004 + }, + { + "M": 294912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1376.9645 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1387.4847499999996 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1373.4045000000015 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1396.7647500000003 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1391.12475 + }, + { + "M": 300032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1411.484750000001 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1410.68475 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1417.44475 + }, + { + "M": 303104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1425.7647499999994 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1424.9247499999997 + }, + { + "M": 305152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1435.4047499999997 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.4449999999993 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1410.2847499999998 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1434.2447500000012 + }, + { + "M": 309248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1444.205 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1454.1250000000005 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1453.5249999999996 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1475.725 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1468.4049999999997 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.5649999999996 + }, + { + "M": 315392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1470.4849999999997 + }, + { + "M": 316416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1486.2049999999995 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1475.925000000001 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1468.6849999999995 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.525000000001 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1509.5652499999997 + }, + { + "M": 321536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1512.3252499999999 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1501.3250000000003 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1525.6452500000005 + }, + { + "M": 324608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1537.60525 + }, + { + "M": 325632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1542.2052500000013 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1518.8452500000012 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.165 + } + ], + "960": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.398999999999994 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.9989999999999952 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.119250000000008 + }, + { + "M": 512, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.919000000000004 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.598999999999997 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 14.759000000000015 + }, + { + "M": 3072, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 20.159000000000006 + }, + { + "M": 4096, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.11925 + }, + { + "M": 5120, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 30.199250000000013 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 35.03900000000002 + }, + { + "M": 7168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 39.959000000000024 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.67924999999998 + }, + { + "M": 9216, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 50.599249999999984 + }, + { + "M": 10240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 54.39925000000001 + }, + { + "M": 11264, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 60.679249999999996 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.59925 + }, + { + "M": 13312, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 71.83950000000002 + }, + { + "M": 14336, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 75.11924999999998 + }, + { + "M": 15360, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 85.2795 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.31949999999998 + }, + { + "M": 17408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 89.99924999999998 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.27924999999999 + }, + { + "M": 19456, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 101.43925000000002 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.9595 + }, + { + "M": 21504, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 118.11950000000002 + }, + { + "M": 22528, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 116.67950000000002 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 119.91949999999994 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 124.11949999999993 + }, + { + "M": 25600, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 131.03975 + }, + { + "M": 26624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 131.59949999999998 + }, + { + "M": 27648, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 144.39949999999996 + }, + { + "M": 28672, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 144.87974999999997 + }, + { + "M": 29696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 145.99949999999998 + }, + { + "M": 30720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 151.11975 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 159.87975 + }, + { + "M": 32768, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 170.31975 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 168.4395 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 170.31975 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 186.16000000000003 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 183.35975000000002 + }, + { + "M": 37888, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 193.11975 + }, + { + "M": 38912, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 197.79974999999996 + }, + { + "M": 39936, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 206.35974999999996 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 196.27975000000004 + }, + { + "M": 41984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 208.43975 + }, + { + "M": 43008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 207.39999999999992 + }, + { + "M": 44032, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 218.47975000000002 + }, + { + "M": 45056, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 223.35999999999984 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 229.27999999999986 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.71999999999997 + }, + { + "M": 48128, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 242.9200000000002 + }, + { + "M": 49152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 241.31999999999994 + }, + { + "M": 50176, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 248.1599999999999 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 252.71999999999977 + }, + { + "M": 52224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 259.2800000000001 + }, + { + "M": 53248, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 262.8800000000001 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 260.2399999999999 + }, + { + "M": 55296, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 270.88025000000005 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 276.48000000000013 + }, + { + "M": 57344, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 288.6402499999999 + }, + { + "M": 58368, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 285.48024999999996 + }, + { + "M": 59392, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 292.7202500000001 + }, + { + "M": 60416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 297.08025 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.28025 + }, + { + "M": 62464, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 307.72024999999996 + }, + { + "M": 63488, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 312.5602499999999 + }, + { + "M": 64512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 305.64025000000004 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.9602500000001 + }, + { + "M": 66560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 327.48024999999996 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 333.5604999999998 + }, + { + "M": 68608, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 337.44050000000004 + }, + { + "M": 69632, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 354.1204999999998 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 337.8805 + }, + { + "M": 71680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 344.3605 + }, + { + "M": 72704, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 366.32050000000004 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 352.00049999999976 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 369.56050000000016 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 363.3604999999999 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 374.48074999999994 + }, + { + "M": 77824, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 384.32050000000015 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 388.8805000000001 + }, + { + "M": 79872, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 402.8807499999999 + }, + { + "M": 80896, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 407.12074999999993 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 387.7205 + }, + { + "M": 82944, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 406.16075 + }, + { + "M": 83968, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 422.2807500000001 + }, + { + "M": 84992, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 419.3207500000001 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 409.84074999999996 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 422.1607499999999 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 432.5207499999998 + }, + { + "M": 89088, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 447.5207499999999 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 450.0010000000002 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 454.72074999999995 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.40075 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 478.80099999999993 + }, + { + "M": 94208, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 464.28100000000063 + }, + { + "M": 95232, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 478.24099999999976 + }, + { + "M": 96256, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 474.12099999999987 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 459.08100000000013 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 469.6009999999999 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 493.0010000000002 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 493.2810000000002 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 491.92100000000005 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 478.481 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 512.6412499999999 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 511.9210000000003 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 506.56100000000015 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 516.4810000000002 + }, + { + "M": 107520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 520.9610000000002 + }, + { + "M": 108544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 526.1612500000001 + }, + { + "M": 109568, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 535.8812499999999 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 535.5212500000002 + }, + { + "M": 111616, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 545.8812500000001 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 536.48125 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 550.0812500000002 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 554.9212499999999 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.2012499999998 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 553.28125 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.2812500000002 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 574.1212499999999 + }, + { + "M": 119808, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 581.1212500000001 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 594.4014999999999 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 593.3215000000002 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 593.9614999999999 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 612.9614999999999 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 596.5615 + }, + { + "M": 125952, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 622.0015000000003 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 623.1615000000002 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 606.9214999999999 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 615.5617499999998 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 626.2014999999999 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 620.1214999999997 + }, + { + "M": 132096, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 639.8814999999997 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 620.0415000000003 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 656.5217499999999 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 664.6417499999998 + }, + { + "M": 136192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 664.6017499999996 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 663.9217500000002 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 674.92175 + }, + { + "M": 139264, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 679.5617500000001 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 674.2817500000003 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 677.44175 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.1617499999998 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 664.8017499999999 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 692.0417500000001 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 696.8417499999996 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 694.2417499999997 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 695.16175 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 698.8417499999998 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 727.002 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 731.5620000000001 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 736.5219999999999 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 730.242 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 732.6019999999996 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 740.002 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 733.4419999999998 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 752.0020000000006 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.002 + }, + { + "M": 158720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 748.242 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 755.8019999999999 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 781.3622499999999 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 775.9222500000001 + }, + { + "M": 162816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 778.6022499999997 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 759.1619999999996 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 788.0022499999995 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 794.6822500000001 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 809.96225 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 804.20225 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 813.8822499999997 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 813.602250000001 + }, + { + "M": 171008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 827.7622500000002 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 809.7222499999998 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 837.4425000000003 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 815.7222499999998 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 825.96225 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 820.6422499999999 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 855.6424999999999 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 849.5625 + }, + { + "M": 179200, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 865.0825 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 848.0025 + }, + { + "M": 181248, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 873.8824999999997 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 859.4424999999997 + }, + { + "M": 183296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 884.4825000000001 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 852.4024999999997 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 871.8824999999997 + }, + { + "M": 186368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 898.6827500000004 + }, + { + "M": 187392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 892.4427500000002 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 886.0025 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 896.8027499999998 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 907.7627499999999 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 901.8827499999998 + }, + { + "M": 192512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 927.4827500000001 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 911.4427499999997 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.2827499999999 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 952.08275 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 935.6827499999999 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 930.16275 + }, + { + "M": 198656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 945.0027499999997 + }, + { + "M": 199680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 949.6427500000009 + }, + { + "M": 200704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 954.3627499999998 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 970.0029999999997 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 974.7629999999999 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 968.723 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 956.6829999999995 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 988.6430000000005 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 993.4830000000002 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 987.8030000000008 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1003.3630000000003 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 990.9229999999998 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1002.1629999999999 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1017.28325 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1001.2430000000003 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1026.80325 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1024.6832499999996 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1015.3632499999999 + }, + { + "M": 217088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1031.0432500000002 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1035.0832499999997 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1050.4832500000002 + }, + { + "M": 220160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1047.7232499999996 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1062.76325 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1067.32325 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1072.20325 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1056.0032500000007 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1042.8832499999999 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1065.5232499999997 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1090.9634999999998 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1106.4834999999998 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1079.5634999999997 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1074.4035 + }, + { + "M": 231424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1087.5634999999997 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1091.3635 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1116.8035 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.8435000000004 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1108.0835000000006 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1130.9634999999998 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1125.2435 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1129.9234999999999 + }, + { + "M": 239616, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1136.92375 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1132.0434999999998 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1157.6037500000007 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1149.2837500000005 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1153.9237499999995 + }, + { + "M": 244736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1161.0037500000003 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.6837499999997 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1178.4837499999999 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1183.24375 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1156.76375 + }, + { + "M": 249856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1196.32375 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1189.7637500000005 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1192.2037499999997 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1207.4440000000004 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1201.5239999999994 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1216.6840000000007 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1202.6840000000002 + }, + { + "M": 257024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1218.324 + }, + { + "M": 258048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1234.444 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1235.1639999999993 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1229.4840000000004 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1236.2039999999993 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1230.924 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1243.6840000000002 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1248.1640000000007 + }, + { + "M": 265216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1256.364 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1228.444 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1272.2042499999998 + }, + { + "M": 268288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1277.9642499999995 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1281.84425 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1286.6842499999998 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1261.3242499999997 + }, + { + "M": 272384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1297.0442499999995 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1311.3242500000001 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.2042500000002 + }, + { + "M": 275456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1322.8045000000002 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1292.52425 + }, + { + "M": 277504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1329.8445000000002 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1312.7242499999998 + }, + { + "M": 279552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1317.6442499999998 + }, + { + "M": 280576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1333.6045000000008 + }, + { + "M": 281600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1338.3245000000002 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1340.6444999999994 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1324.6445000000003 + }, + { + "M": 284672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1352.4445000000005 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1355.6445000000003 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1327.8845000000001 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1373.0845 + }, + { + "M": 288768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1359.0044999999996 + }, + { + "M": 289792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1385.8047500000002 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1377.2044999999998 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1388.8847500000006 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1399.1647499999995 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1393.2047500000003 + }, + { + "M": 294912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1389.3647500000002 + }, + { + "M": 295936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1405.4447500000015 + }, + { + "M": 296960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1410.1647500000008 + }, + { + "M": 297984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1415.00475 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1396.2047500000012 + }, + { + "M": 300032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1411.6447500000004 + }, + { + "M": 301056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1427.6847500000008 + }, + { + "M": 302080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1421.2047499999999 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1412.9647499999996 + }, + { + "M": 304128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1430.96475 + }, + { + "M": 305152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1446.8050000000003 + }, + { + "M": 306176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1451.565 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1426.04475 + }, + { + "M": 308224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1464.3250000000003 + }, + { + "M": 309248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1469.085 + }, + { + "M": 310272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1474.0049999999997 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1467.5249999999996 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1460.045000000001 + }, + { + "M": 313344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1499.4449999999997 + }, + { + "M": 314368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1490.0449999999996 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1491.125 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1485.2450000000003 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1490.3650000000011 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1515.64525 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1520.8452500000003 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1530.3652499999994 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1523.2052499999995 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1527.5252500000001 + }, + { + "M": 323584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1535.9652499999997 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1537.2052500000004 + }, + { + "M": 325632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1556.4052499999998 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1557.5652499999997 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1529.5252500000001 + } + ], + "1024": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999927 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0389999999999944 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999864 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 2.7989999999999995 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 3.9589999999999965 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 5.559000000000005 + }, + { + "M": 1024, + "rows_per_block": 2, + "vec_size": 4, + "time_us": 8.238999999999997 + }, + { + "M": 2048, + "rows_per_block": 2, + "vec_size": 4, + "time_us": 12.719000000000008 + }, + { + "M": 3072, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 17.55924999999999 + }, + { + "M": 4096, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 22.23899999999999 + }, + { + "M": 5120, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 26.599000000000004 + }, + { + "M": 6144, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 30.719250000000002 + }, + { + "M": 7168, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 34.919000000000004 + }, + { + "M": 8192, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 38.99925000000001 + }, + { + "M": 9216, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 43.159000000000006 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 47.279 + }, + { + "M": 11264, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 51.559250000000006 + }, + { + "M": 12288, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 55.439250000000015 + }, + { + "M": 13312, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 59.35925 + }, + { + "M": 14336, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 63.75925 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 67.23924999999998 + }, + { + "M": 16384, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 71.83925000000002 + }, + { + "M": 17408, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 75.71949999999998 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 79.31925000000003 + }, + { + "M": 19456, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 83.91925 + }, + { + "M": 20480, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 87.79949999999998 + }, + { + "M": 21504, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 91.43950000000001 + }, + { + "M": 22528, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 95.7995 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 99.83924999999999 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 103.47950000000003 + }, + { + "M": 25600, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 107.51949999999997 + }, + { + "M": 26624, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 111.83949999999999 + }, + { + "M": 27648, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 115.47950000000003 + }, + { + "M": 28672, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 119.71950000000004 + }, + { + "M": 29696, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 123.83975000000004 + }, + { + "M": 30720, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 127.07950000000001 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 131.55949999999999 + }, + { + "M": 32768, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 135.51975 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 138.67950000000005 + }, + { + "M": 34816, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 143.31975 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 147.67949999999996 + }, + { + "M": 36864, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 149.8395 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 153.83974999999998 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 156.6395 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 160.35975000000002 + }, + { + "M": 40960, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 163.99975 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 168.03974999999997 + }, + { + "M": 43008, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 171.99975000000006 + }, + { + "M": 44032, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 175.91975000000002 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 179.59975000000003 + }, + { + "M": 46080, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 183.11975 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 187.0797500000001 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 190.67975000000013 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 195.3197500000001 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 198.9597500000001 + }, + { + "M": 51200, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 202.35974999999996 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 206.4797499999999 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 210.36000000000018 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 213.99975000000006 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 218.27974999999998 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 222.32000000000005 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 226.27999999999986 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 229.72000000000014 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 233.79999999999995 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 237.24 + }, + { + "M": 61440, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 241.20000000000005 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 245.04025000000013 + }, + { + "M": 63488, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 248.72000000000014 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 253.08000000000004 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 256.40025000000026 + }, + { + "M": 66560, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 260.19999999999993 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 264.2800000000001 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 267.75999999999976 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 272.3202500000001 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 280.7602499999999 + }, + { + "M": 71680, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 283.76025000000004 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 287.7202500000002 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 291.2402500000002 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 295.88025000000005 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 300.52025000000015 + }, + { + "M": 76800, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 303.56049999999993 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 308.04025 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 311.96024999999986 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 315.5202499999999 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 319.0002500000004 + }, + { + "M": 81920, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 322.6005 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 326.72024999999985 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 331.1205 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 334.5202499999998 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 339.04050000000007 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 343.4005000000002 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 346.1205000000002 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 350.40049999999985 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 354.32050000000004 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 358.2405000000001 + }, + { + "M": 92160, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 362.32050000000004 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 365.7605000000002 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 369.68050000000005 + }, + { + "M": 95232, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 373.68050000000005 + }, + { + "M": 96256, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 377.64049999999986 + }, + { + "M": 97280, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 381.9205000000002 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 385.32074999999986 + }, + { + "M": 99328, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 389.5205000000001 + }, + { + "M": 100352, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 387.6007500000003 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 391.7605000000003 + }, + { + "M": 102400, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 395.6004999999998 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 399.3605 + }, + { + "M": 104448, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 403.9207500000002 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 407.76075000000014 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 411.3207500000003 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 415.40075 + }, + { + "M": 108544, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 418.3207500000001 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 423.44075 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 425.84075000000007 + }, + { + "M": 111616, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 430.84075000000007 + }, + { + "M": 112640, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 434.28075000000035 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 438.3207500000001 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 441.8810000000001 + }, + { + "M": 115712, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 445.48075000000017 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 449.5609999999999 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 453.4409999999998 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 456.76099999999997 + }, + { + "M": 119808, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 460.84075000000007 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 464.88099999999986 + }, + { + "M": 121856, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 468.3209999999999 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 471.9209999999998 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 475.9209999999998 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 479.3212500000002 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 483.20100000000025 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 486.88099999999986 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 490.8409999999999 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 495.1610000000003 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 499.4409999999998 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 502.64100000000013 + }, + { + "M": 132096, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 506.6009999999999 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 510.04125 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 514.3612499999999 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 517.8412499999997 + }, + { + "M": 136192, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 522.1210000000001 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 526.4812500000003 + }, + { + "M": 138240, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 529.4412499999999 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 533.2812500000002 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 537.6012500000002 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 538.9612499999998 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 542.4412500000003 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 546.24125 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 550.3612500000004 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 554.2012500000003 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 557.9612500000001 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 561.8415000000002 + }, + { + "M": 148480, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 565.9212500000003 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 569.5615000000003 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 573.1612500000001 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 576.9615000000001 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 581.0814999999998 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 584.24125 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 588.4812499999998 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 591.9615000000001 + }, + { + "M": 156672, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 596.6012499999999 + }, + { + "M": 157696, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 600.8015000000003 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 603.3615000000002 + }, + { + "M": 159744, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 608.0015000000001 + }, + { + "M": 160768, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 611.9615000000001 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 615.5217500000003 + }, + { + "M": 162816, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 619.3615000000004 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 622.3215000000005 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 627.3217500000007 + }, + { + "M": 165888, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 630.7217500000002 + }, + { + "M": 166912, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 634.8817499999998 + }, + { + "M": 167936, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 638.6415 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 641.6015000000007 + }, + { + "M": 169984, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 645.88175 + }, + { + "M": 171008, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 649.9217499999997 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 652.7217499999999 + }, + { + "M": 173056, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 656.8017500000003 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 659.0017500000001 + }, + { + "M": 175104, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 664.3217500000005 + }, + { + "M": 176128, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 667.9617499999999 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 671.3619999999999 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 675.7617499999999 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 678.2820000000002 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 682.9217500000004 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 687.5217499999999 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 690.5217499999997 + }, + { + "M": 183296, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 694.8820000000001 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 696.8819999999998 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 702.5217500000003 + }, + { + "M": 186368, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 705.5219999999999 + }, + { + "M": 187392, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 710.2820000000002 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 713.8020000000001 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 715.7219999999998 + }, + { + "M": 190464, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 721.2020000000002 + }, + { + "M": 191488, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 726.2419999999997 + }, + { + "M": 192512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 729.002 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 732.482 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 735.8422500000011 + }, + { + "M": 195584, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 741.3620000000005 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 745.002 + }, + { + "M": 197632, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 748.8420000000006 + }, + { + "M": 198656, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 751.8420000000001 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 755.2819999999997 + }, + { + "M": 200704, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 759.6022499999999 + }, + { + "M": 201728, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 763.2419999999997 + }, + { + "M": 202752, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 768.00225 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 771.8422500000006 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 773.1222499999999 + }, + { + "M": 205824, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 779.3222500000002 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 782.7622500000002 + }, + { + "M": 207872, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 786.4822499999996 + }, + { + "M": 208896, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 790.0822499999999 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 792.9222500000001 + }, + { + "M": 210944, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 797.56225 + }, + { + "M": 211968, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 801.56225 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 806.4822499999996 + }, + { + "M": 214016, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 809.3222500000006 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 811.7622500000002 + }, + { + "M": 216064, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 817.1222500000003 + }, + { + "M": 217088, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 820.6422499999999 + }, + { + "M": 218112, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 825.1624999999999 + }, + { + "M": 219136, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 828.0825 + }, + { + "M": 220160, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 834.5224999999996 + }, + { + "M": 221184, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 838.4025000000001 + }, + { + "M": 222208, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 842.4424999999997 + }, + { + "M": 223232, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 846.2824999999998 + }, + { + "M": 224256, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 850.2424999999998 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 852.8827500000002 + }, + { + "M": 226304, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 858.4025000000001 + }, + { + "M": 227328, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 861.64275 + }, + { + "M": 228352, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 864.9625000000001 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 869.60275 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 868.4024999999997 + }, + { + "M": 231424, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 873.7225000000003 + }, + { + "M": 232448, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 877.2427500000008 + }, + { + "M": 233472, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 882.2424999999998 + }, + { + "M": 234496, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 885.6424999999999 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 887.042750000001 + }, + { + "M": 236544, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 894.2027499999999 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 897.8827500000002 + }, + { + "M": 238592, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 901.2427499999999 + }, + { + "M": 239616, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 905.4827500000004 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 906.3627500000002 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 913.5627499999996 + }, + { + "M": 242688, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 917.16275 + }, + { + "M": 243712, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 920.7227499999999 + }, + { + "M": 244736, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 924.9227499999997 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 925.6429999999996 + }, + { + "M": 246784, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 932.0827500000005 + }, + { + "M": 247808, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 935.6429999999996 + }, + { + "M": 248832, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 940.16275 + }, + { + "M": 249856, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 944.9229999999998 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 946.2427500000008 + }, + { + "M": 251904, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 951.643 + }, + { + "M": 252928, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 955.9629999999997 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 959.5629999999996 + }, + { + "M": 254976, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 963.2027499999995 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 965.683 + }, + { + "M": 257024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 970.723 + }, + { + "M": 258048, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 974.6829999999995 + }, + { + "M": 259072, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 977.203 + }, + { + "M": 260096, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 981.203 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 983.20325 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 989.3229999999999 + }, + { + "M": 263168, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 992.4430000000002 + }, + { + "M": 264192, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 996.7230000000004 + }, + { + "M": 265216, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1001.1632499999996 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1001.6432499999996 + }, + { + "M": 267264, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1007.8029999999998 + }, + { + "M": 268288, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1020.3632499999995 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1030.0432500000002 + }, + { + "M": 270336, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1029.2032499999996 + }, + { + "M": 271360, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1031.12325 + }, + { + "M": 272384, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1035.3232500000004 + }, + { + "M": 273408, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1039.6034999999997 + }, + { + "M": 274432, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1043.80325 + }, + { + "M": 275456, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1046.8835000000004 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1047.84325 + }, + { + "M": 277504, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1053.4032499999994 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1056.4432500000007 + }, + { + "M": 279552, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1058.6035000000002 + }, + { + "M": 280576, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1063.3232500000008 + }, + { + "M": 281600, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1067.20325 + }, + { + "M": 282624, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1070.5234999999998 + }, + { + "M": 283648, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1080.283500000001 + }, + { + "M": 284672, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1078.2034999999996 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1082.5234999999998 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1086.2035 + }, + { + "M": 287744, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1090.4035000000003 + }, + { + "M": 288768, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1094.7634999999996 + }, + { + "M": 289792, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1097.9235000000008 + }, + { + "M": 290816, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1101.2035 + }, + { + "M": 291840, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1112.04375 + }, + { + "M": 292864, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1110.1234999999992 + }, + { + "M": 293888, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1114.7237500000006 + }, + { + "M": 294912, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1118.4835000000003 + }, + { + "M": 295936, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1122.9232500000003 + }, + { + "M": 296960, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1125.5634999999993 + }, + { + "M": 297984, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1130.0434999999998 + }, + { + "M": 299008, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1132.5237500000003 + }, + { + "M": 300032, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1136.0434999999998 + }, + { + "M": 301056, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1141.0437499999998 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1143.9635000000012 + }, + { + "M": 303104, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1147.1635000000006 + }, + { + "M": 304128, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1151.6837499999997 + }, + { + "M": 305152, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1155.9237499999995 + }, + { + "M": 306176, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1159.6037500000002 + }, + { + "M": 307200, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1168.6437500000002 + }, + { + "M": 308224, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1172.1637499999997 + }, + { + "M": 309248, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1176.4037500000004 + }, + { + "M": 310272, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1181.44375 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1183.44375 + }, + { + "M": 312320, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1187.0437500000003 + }, + { + "M": 313344, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1192.6437500000002 + }, + { + "M": 314368, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1190.0037500000003 + }, + { + "M": 315392, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1193.7637499999996 + }, + { + "M": 316416, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1198.2837500000014 + }, + { + "M": 317440, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1201.24375 + }, + { + "M": 318464, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1206.2839999999997 + }, + { + "M": 319488, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1209.1239999999998 + }, + { + "M": 320512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1219.004 + }, + { + "M": 321536, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1221.4840000000004 + }, + { + "M": 322560, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1224.6439999999998 + }, + { + "M": 323584, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1229.4839999999995 + }, + { + "M": 324608, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1232.7240000000002 + }, + { + "M": 325632, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1236.5639999999994 + }, + { + "M": 326656, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1240.364 + }, + { + "M": 327680, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1255.8442500000006 + } + ], + "1120": [ + { + "M": 1, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999893 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.5189999999999912 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0390000000000086 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.918999999999997 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.879000000000005 + }, + { + "M": 512, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 9.478999999999992 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.799 + }, + { + "M": 2048, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 25.278999999999982 + }, + { + "M": 3072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.999 + }, + { + "M": 4096, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 45.75924999999999 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 55.15925 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.5595 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 75.87925000000003 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.11925000000002 + }, + { + "M": 9216, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 97.4795 + }, + { + "M": 10240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.31949999999996 + }, + { + "M": 11264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 111.51949999999995 + }, + { + "M": 12288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.47975000000002 + }, + { + "M": 13312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 130.9595 + }, + { + "M": 14336, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 147.11950000000004 + }, + { + "M": 15360, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 159.5995 + }, + { + "M": 16384, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 167.31975 + }, + { + "M": 17408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 174.75975 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 183.23975000000002 + }, + { + "M": 19456, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 194.71974999999995 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 195.56000000000003 + }, + { + "M": 21504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 216.63975000000005 + }, + { + "M": 22528, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 228.47974999999997 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 234.5999999999999 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 236.87999999999994 + }, + { + "M": 25600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 252.72024999999994 + }, + { + "M": 26624, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 269.04024999999996 + }, + { + "M": 27648, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 279.08025 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 275.08000000000004 + }, + { + "M": 29696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 281.92025000000007 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 300.20025 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 314.36024999999995 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 313.6005 + }, + { + "M": 33792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 333.7202500000001 + }, + { + "M": 34816, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 343.84049999999996 + }, + { + "M": 35840, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 356.92050000000006 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 360.4805 + }, + { + "M": 37888, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 373.40049999999997 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 370.20074999999997 + }, + { + "M": 39936, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 400.60074999999995 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 392.6807500000001 + }, + { + "M": 41984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 413.20074999999986 + }, + { + "M": 43008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 424.76099999999997 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 423.76075000000014 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 422.9610000000001 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.28075 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 446.68075000000033 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 471.7610000000001 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 465.9610000000001 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 486.5609999999999 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 496.7209999999999 + }, + { + "M": 52224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 520.0412499999999 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 526.0412500000001 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 538.88125 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 523.3612500000003 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 522.9612500000001 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 542.5612499999997 + }, + { + "M": 58368, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 579.2415000000001 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 555.0814999999998 + }, + { + "M": 60416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 574.7212499999998 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 569.9614999999999 + }, + { + "M": 62464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 604.0415 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 603.2414999999999 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 609.6414999999997 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 630.1215000000002 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.2817499999999 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 649.0817499999998 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 649.9617499999999 + }, + { + "M": 69632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 658.8417500000002 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 679.7217499999999 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 676.3217499999998 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 690.88175 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 710.6020000000001 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 718.242 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 719.2820000000004 + }, + { + "M": 76800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 726.8422500000001 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 736.24225 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 747.5619999999999 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.1222499999997 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 766.6022500000006 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 770.6822500000001 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 795.0022499999998 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 804.8822499999997 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 804.4425000000001 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 834.6025 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 817.8024999999998 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 832.1624999999999 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 863.2025000000003 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 852.0425 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 871.64275 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 875.1227500000005 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 880.4027499999997 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 900.1627499999997 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.7227500000006 + }, + { + "M": 96256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 909.1227499999998 + }, + { + "M": 97280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 940.4029999999996 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 927.8027499999998 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 947.6829999999998 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 943.1630000000002 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 954.123 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 944.8430000000001 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 981.6830000000004 + }, + { + "M": 104448, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 1011.3232499999998 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1000.7632499999996 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 999.4432499999998 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1001.7232500000003 + }, + { + "M": 108544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1042.6832499999998 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1052.0832499999997 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1039.8832500000015 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1057.12325 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1069.3635 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1081.9234999999999 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1080.3235000000004 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1087.4835000000007 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1117.4035 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1118.8835 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1136.40375 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1145.84375 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.5637499999998 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1152.8037500000003 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1132.2437499999996 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1191.324 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1159.2837499999996 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1189.364 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1178.2037499999997 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1189.324 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1234.4840000000004 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1216.6440000000002 + }, + { + "M": 131072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1246.8842499999996 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1245.88425 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1236.4039999999995 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1260.36425 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1263.6442500000007 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1263.2842500000002 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1288.92425 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1293.5642499999994 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1319.2444999999998 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1330.0045 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1325.5645000000004 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1335.2444999999998 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1317.7644999999998 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1357.3244999999997 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1366.7644999999998 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1373.4047500000001 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1375.0847499999995 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1375.00475 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1394.0047500000005 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1392.9247500000001 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1443.2849999999994 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.045 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1442.525 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1441.0049999999997 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1440.4049999999993 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1450.2849999999999 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1480.5650000000005 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1469.165 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1509.5252500000001 + }, + { + "M": 160768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1518.605250000001 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1518.2052499999995 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1527.4852499999997 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1515.8852500000003 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1546.4852500000002 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1555.8055 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1555.0455000000006 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1564.4854999999998 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1562.4455000000007 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1603.6055000000001 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1613.20575 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1612.1657499999997 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1610.4055000000003 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1618.8857500000004 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1629.24575 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1638.80575 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1648.2857500000005 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1667.9257499999994 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1686.326 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1686.5660000000003 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1706.4860000000003 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1718.7659999999996 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1715.0460000000003 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1692.1659999999997 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1744.0862500000012 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1742.9662499999995 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1752.6462499999998 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1772.1262500000003 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1748.5662499999994 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1759.92625 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1790.0062500000008 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1799.4065 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1809.0065000000004 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1805.6064999999999 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1837.886500000001 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1816.5664999999995 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1836.4465000000005 + }, + { + "M": 198656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1871.44675 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1863.12675 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1864.6067500000017 + }, + { + "M": 201728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1889.0867499999995 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1904.04675 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1913.486750000001 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1888.9667500000005 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1932.487 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1942.007000000001 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1930.6070000000004 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1950.8869999999997 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1967.4070000000002 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1980.4472500000002 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1969.0070000000005 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1978.527 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1988.24725 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2004.3272499999994 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2037.96725 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2016.3272500000003 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2036.1672500000006 + }, + { + "M": 219136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2063.4475 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2052.9275 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2056.2475000000004 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2086.2874999999995 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2085.3275000000003 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2105.2077500000005 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2068.7675 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2124.2477500000005 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2123.0077499999998 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2142.848 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2131.84775 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2154.2077499999996 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2178.4879999999994 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2177.648 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2176.968 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2186.2080000000005 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2180.2080000000005 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2215.4880000000003 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2224.2880000000005 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2224.00825 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2244.24825 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2227.92825 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2242.84825 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2283.2885000000006 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2282.60825 + }, + { + "M": 244736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2291.8085 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2254.2082499999997 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2300.1285000000007 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2319.768499999999 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2329.5285000000003 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2339.2884999999997 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2311.4885000000004 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2368.20875 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2347.3287499999997 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2367.04875 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2386.80875 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2369.00875 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2405.5287499999995 + }, + { + "M": 258048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2416.8089999999997 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2413.209 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2423.049 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2415.4089999999997 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2441.929 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2461.5289999999995 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2450.6890000000003 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2459.4889999999996 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2441.4489999999996 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2487.2492500000003 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2516.169249999999 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2504.84925 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.7292500000003 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2515.7292499999994 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2544.049499999999 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2553.089499999999 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2552.4094999999998 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2582.6095000000005 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2538.8095000000003 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2607.6097499999996 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2596.36975 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.7297499999995 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2635.6497499999996 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2626.8097500000003 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.28975 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2636.969750000001 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2652.12975 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2661.5699999999997 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2642.04975 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2680.6099999999988 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2700.0899999999992 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2699.5299999999997 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2719.210250000001 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2703.8100000000004 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2719.450249999999 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2718.21 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2748.5302500000007 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2758.0902499999993 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2737.610249999999 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2776.6904999999997 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2796.4105 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2783.130500000001 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2813.2105 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2792.6505000000006 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2832.0504999999994 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2831.370500000001 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2830.4105 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2847.7707499999997 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2831.330499999999 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2881.7707500000006 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2870.7307499999997 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2890.290750000001 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2899.410749999999 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2898.7307499999997 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2918.251000000001 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2912.7709999999997 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2942.610999999999 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2941.7310000000034 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2920.171000000001 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2950.330999999999 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2970.1312499999985 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2984.1712500000003 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3003.4112499999983 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2980.9312500000005 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3022.331250000001 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3031.65125 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3051.491499999999 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3040.3714999999993 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3017.411249999999 + } + ], + "1152": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.959000000000003 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.959000000000003 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.7989999999999995 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 9.359249999999989 + }, + { + "M": 1024, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.678999999999995 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 24.918999999999983 + }, + { + "M": 3072, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 35.038999999999994 + }, + { + "M": 4096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.55925000000001 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 54.55924999999999 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 63.79925 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 75.07925000000003 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.31925 + }, + { + "M": 9216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.91950000000001 + }, + { + "M": 10240, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 106.6795 + }, + { + "M": 11264, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 116.9595 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 122.39950000000003 + }, + { + "M": 13312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 129.71949999999995 + }, + { + "M": 14336, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 147.71974999999998 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 151.63950000000003 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 158.27975 + }, + { + "M": 17408, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 178.43975 + }, + { + "M": 18432, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 185.6 + }, + { + "M": 19456, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 198.76000000000002 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 193.44000000000008 + }, + { + "M": 21504, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 210.4 + }, + { + "M": 22528, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 221.88000000000005 + }, + { + "M": 23552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 224.56 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 234.32000000000005 + }, + { + "M": 25600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 251.64 + }, + { + "M": 26624, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 270.52025000000003 + }, + { + "M": 27648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 270.60024999999996 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 272.24025 + }, + { + "M": 29696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 292.68024999999983 + }, + { + "M": 30720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 291.28024999999985 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 310.5202499999998 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.2402499999999 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 327.48049999999984 + }, + { + "M": 34816, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 339.92050000000006 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 349.7205 + }, + { + "M": 36864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 354.4805 + }, + { + "M": 37888, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 369.4404999999998 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 367.2405 + }, + { + "M": 39936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 382.4805000000001 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 379.76049999999987 + }, + { + "M": 41984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 401.3207500000002 + }, + { + "M": 43008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 410.48075000000017 + }, + { + "M": 44032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 425.7207500000002 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.72100000000023 + }, + { + "M": 46080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 444.68100000000015 + }, + { + "M": 47104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 448.48074999999983 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 447.24074999999993 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 462.3207500000002 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 493.5609999999998 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 482.9210000000004 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 490.7210000000001 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 500.36124999999987 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 534.7612500000001 + }, + { + "M": 55296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 543.96125 + }, + { + "M": 56320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 539.6412500000001 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 549.2012499999998 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 551.08125 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 571.0815 + }, + { + "M": 60416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 580.5214999999998 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.4815000000001 + }, + { + "M": 62464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 588.6014999999998 + }, + { + "M": 63488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 606.2015000000001 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 615.92175 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 624.9214999999999 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 622.2815 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 643.9617499999997 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 642.16175 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 651.8417500000002 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 664.4017500000002 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 659.2817499999996 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 703.8819999999998 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 701.5219999999999 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 721.8019999999999 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 709.6419999999996 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 726.722 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 739.3222500000004 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 770.3222499999997 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 758.6022499999997 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 757.0822499999999 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 752.5622500000002 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 776.8422500000004 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 786.48225 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 806.0824999999998 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 805.1222500000001 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 836.1225000000002 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 834.0824999999998 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 844.0425 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 852.8824999999999 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 852.1225 + }, + { + "M": 92160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 861.6424999999999 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.3227499999998 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 890.2427499999999 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 889.6427499999998 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 919.4827499999999 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 902.7227500000004 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 917.5627499999998 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 916.7227500000001 + }, + { + "M": 100352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 947.643 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 946.403 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 937.6029999999998 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 973.3229999999999 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 992.8829999999996 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1002.4032499999995 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 990.9629999999995 + }, + { + "M": 107520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1003.1232499999998 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1020.1232499999998 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1033.20325 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1050.0832500000004 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1049.1232499999996 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1031.2832499999995 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1068.0434999999998 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1070.283500000001 + }, + { + "M": 115712, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 1150.4837499999999 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1089.5235000000007 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1078.0834999999997 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1114.9234999999999 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1113.8835 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1123.2037500000001 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1142.0437499999998 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1133.9237500000004 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1160.5637499999998 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1169.7637499999996 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1169.0037500000003 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1183.7240000000002 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1170.0437499999998 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1207.4039999999995 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1206.1239999999998 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1233.0040000000004 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1224.8439999999996 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1237.1239999999998 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1254.1642500000003 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1281.56425 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1263.0042500000004 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1282.84425 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1294.6842499999998 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1297.92425 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1318.6444999999999 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1308.48425 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1317.8845000000006 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1329.6845000000003 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1346.9645000000005 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1354.4045 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1355.2845000000002 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1364.6444999999994 + }, + { + "M": 148480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1393.5247499999996 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1393.6447499999995 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1403.0847500000004 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1401.924750000001 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1420.7247499999999 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1401.8847500000002 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1440.3650000000002 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1449.8450000000003 + }, + { + "M": 156672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1458.725 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1479.3249999999994 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1479.5649999999996 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1487.3649999999998 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1497.0452499999997 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1526.6452499999996 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1525.76525 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1495.0452500000001 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1534.4452500000002 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1533.2052500000004 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1542.4052499999993 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1551.8855000000003 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1562.1655 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1570.3654999999994 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1600.4854999999998 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1589.0054999999998 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1618.9257500000003 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1608.6055000000001 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1637.4457500000008 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1657.1657500000001 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1666.5657500000002 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1665.4857500000003 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1644.5257500000002 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1674.2060000000001 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1683.1660000000002 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1682.3659999999995 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1691.9260000000008 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1691.0060000000003 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1731.0459999999998 + }, + { + "M": 186368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1733.806 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1739.3662500000005 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1748.4862500000004 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1758.0062499999995 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1777.6462499999993 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1776.7262499999997 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1775.4862500000008 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1805.6064999999999 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1814.7265000000007 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1814.2065000000007 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1828.6464999999998 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1843.1664999999998 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1831.886500000002 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1830.4465 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1881.52675 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1870.2867499999993 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1879.7267500000003 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1899.3267499999997 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1876.8467499999992 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1907.7667499999998 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1937.607 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1916.0867500000004 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1936.2069999999999 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1944.127 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1964.9669999999996 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1964.087 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1973.487 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1982.9272500000006 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1990.927250000001 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2011.6472499999998 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2010.64725 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2020.5672499999991 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2029.7272500000006 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2038.0872500000007 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2049.7675 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2058.8475000000008 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2047.6475 + }, + { + "M": 224256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2086.2474999999995 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2053.7275 + }, + { + "M": 226304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2116.1677500000005 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2115.887750000001 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2115.2077500000005 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2124.1277500000006 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2119.72775 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2131.0877499999997 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2151.3277499999995 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2160.4077500000003 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2169.5280000000002 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2155.567750000001 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2188.1680000000015 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2197.968 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2207.0880000000006 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2227.0482500000016 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2213.2080000000014 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2235.4082499999995 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2244.608250000001 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2254.2482499999996 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2253.4082500000004 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2238.728249999999 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2292.7285 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2291.768499999999 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2291.0885 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2311.0485 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2296.3685000000005 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2350.2887500000006 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2338.9685 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2359.04875 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2357.5687500000004 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2352.84875 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2376.5687500000013 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2375.0887500000003 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2394.7287499999993 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2403.80875 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2419.889 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2422.888999999999 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2442.4489999999996 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2441.6889999999994 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2450.929 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2455.8489999999993 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2479.16925 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2492.4492499999997 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2512.08925 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2511.4492499999997 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2505.16925 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2519.6492499999995 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2529.20925 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2559.2095 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2558.129499999999 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2529.4092500000006 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2564.7295000000004 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2563.5695000000005 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2593.6095000000005 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2602.9297499999993 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2565.8495000000003 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2621.6497500000005 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2612.16975 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2619.4497499999998 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2649.5297499999997 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2611.7297499999995 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2658.13 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2677.6500000000005 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2676.6899999999996 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2696.369999999999 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2691.3699999999994 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2705.49 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2715.09 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2724.25025 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2743.8502499999995 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2746.8902499999995 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2762.13025 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2760.8502500000013 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2769.37025 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2778.450249999999 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2771.17025 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2807.7704999999996 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2806.4505 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.9305000000004 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2825.2105 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2798.5305 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2845.8107500000015 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2865.370750000002 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2874.8107499999987 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2884.21075 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2886.410750000001 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2903.1307500000003 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2910.611000000001 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2909.571000000001 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2929.490999999998 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2910.531000000001 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.9709999999986 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.171000000001 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2959.2109999999993 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2988.0512499999986 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2958.491000000002 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2996.211250000001 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2984.491249999998 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2994.0512500000004 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3034.491499999999 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2988.0112499999996 + } + ], + "1280": [ + { + "M": 1, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 2.158999999999992 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0390000000000086 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.9589999999999965 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 5.878999999999998 + }, + { + "M": 512, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.039000000000001 + }, + { + "M": 1024, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.679000000000002 + }, + { + "M": 2048, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 24.87925 + }, + { + "M": 3072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.639 + }, + { + "M": 4096, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 44.67899999999999 + }, + { + "M": 5120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.43925000000002 + }, + { + "M": 6144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 63.439250000000015 + }, + { + "M": 7168, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 76.03924999999998 + }, + { + "M": 8192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 81.99950000000003 + }, + { + "M": 9216, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 100.31925000000001 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 104.55925000000002 + }, + { + "M": 11264, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 113.75949999999999 + }, + { + "M": 12288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 120.3595 + }, + { + "M": 13312, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 137.71949999999998 + }, + { + "M": 14336, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 145.7995 + }, + { + "M": 15360, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 154.35974999999996 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 158.47975000000005 + }, + { + "M": 17408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 184.39975000000004 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 181.39974999999998 + }, + { + "M": 19456, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 192.79974999999996 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.07974999999993 + }, + { + "M": 21504, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 210.75975 + }, + { + "M": 22528, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 230.2 + }, + { + "M": 23552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 225.15999999999997 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 245.63999999999993 + }, + { + "M": 25600, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 260.64025000000004 + }, + { + "M": 26624, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 271.20025000000004 + }, + { + "M": 27648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 271.0400000000001 + }, + { + "M": 28672, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 286.52025000000003 + }, + { + "M": 29696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.5602500000001 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 307.60024999999996 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.0802500000001 + }, + { + "M": 32768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 307.28024999999997 + }, + { + "M": 33792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 336.56050000000016 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 329.68050000000017 + }, + { + "M": 35840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 343.4805 + }, + { + "M": 36864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 355.04049999999995 + }, + { + "M": 37888, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 376.5205000000001 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 378.7204999999998 + }, + { + "M": 39936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.44075 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 379.9604999999999 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 407.36075000000017 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 415.84075000000007 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 441.241 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 435.6407499999999 + }, + { + "M": 46080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 445.2007500000001 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 454.60074999999983 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 458.2009999999998 + }, + { + "M": 49152, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 474.64099999999996 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 483.40100000000007 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 483.56100000000026 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 502.24099999999993 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 511.8412499999999 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 510.321 + }, + { + "M": 55296, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 533.28125 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 540.28125 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 560.9615000000002 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 541.4012500000001 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 561.0015000000001 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 578.3215000000002 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.8815000000002 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 586.0014999999999 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 608.5614999999998 + }, + { + "M": 64512, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 620.6814999999999 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 625.60175 + }, + { + "M": 66560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 645.7217500000002 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 644.5217499999999 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 655.36175 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 663.5619999999998 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 665.5617500000001 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 670.3217500000001 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 694.762 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 702.8020000000001 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 712.3619999999999 + }, + { + "M": 75776, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 735.7220000000002 + }, + { + "M": 76800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 731.4022500000001 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 741.1222499999997 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 750.6022500000001 + }, + { + "M": 79872, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 768.7222499999998 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 759.4822500000002 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 753.6422500000003 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 778.2422500000007 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 797.80225 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 796.9222500000005 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 806.4422500000001 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 820.9624999999996 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 825.1225000000002 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 834.7624999999998 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 854.1225 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 864.8024999999998 + }, + { + "M": 92160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 863.4824999999998 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 882.6027499999998 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.6827499999997 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 901.2827499999999 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 899.9227500000002 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 904.0427499999998 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 919.1227499999998 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 928.60275 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 937.0430000000001 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 956.7629999999997 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 949.8429999999998 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 975.6429999999998 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 995.2029999999997 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.9629999999997 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1003.72325 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1017.2032500000001 + }, + { + "M": 108544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1037.16325 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1031.8832499999999 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1045.2832499999995 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1060.8035 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1053.5632500000006 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1059.2035 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1072.3235 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1155.5637500000003 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1091.2035 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1100.0034999999998 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1121.3237499999996 + }, + { + "M": 119808, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1130.7637499999996 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1145.4037500000004 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1142.76375 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1136.0037500000003 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1162.6437500000006 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1161.8837499999995 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1170.9637500000003 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1180.2037500000001 + }, + { + "M": 128000, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1195.7239999999997 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1199.1239999999998 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1214.484 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1223.8039999999996 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1247.6842499999998 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1218.844 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1256.3242500000001 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1265.5642500000004 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1254.2842499999997 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1281.2442499999997 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1275.3642499999996 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1292.84425 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1309.3642500000005 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1328.3245000000002 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1319.0045000000005 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1320.4845000000005 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1347.8844999999997 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1347.0045 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1356.4845000000005 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1385.44475 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1366.8045000000002 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1384.60475 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1404.2447500000003 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1413.64475 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1432.9250000000006 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1434.045 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1441.3250000000003 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1450.565 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.9650000000001 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1469.8849999999998 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1480.7249999999995 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1478.4449999999997 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1498.0452500000001 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1517.64525 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1517.0052500000002 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1496.4052499999993 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1535.72525 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1565.5255000000002 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1544.0052499999997 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1553.2855 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1563.5254999999997 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1572.0855000000001 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1602.0855000000001 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1602.3255 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1610.0455000000002 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1599.4055000000003 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1639.0857499999993 + }, + { + "M": 176128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1651.36575 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1647.4457499999999 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1659.5657499999998 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1645.5657500000002 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1675.5659999999998 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1695.4060000000004 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1704.406 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1714.0059999999999 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1692.3659999999995 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1732.8859999999995 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1721.5659999999993 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1741.0462499999999 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1750.32625 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1759.48625 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1769.00625 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1778.6862500000002 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1798.2065000000002 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1797.4062500000005 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1806.1264999999999 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1816.0064999999995 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1825.2464999999997 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1845.0465000000008 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1843.9264999999996 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1852.7265000000002 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1873.0867499999995 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1861.6467499999999 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1902.1267500000004 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1897.1667499999994 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1868.08675 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1909.7267499999998 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1918.9669999999996 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1938.7269999999999 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1948.647 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1935.8869999999997 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1946.4470000000001 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.1669999999995 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.9270000000006 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1995.1672499999995 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2003.0072499999997 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2003.8072499999998 + }, + { + "M": 217088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2021.0072500000008 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2022.6872500000002 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2031.24725 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2019.8472499999998 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2041.2075000000004 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2081.5675 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2070.2074999999995 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2079.6875 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2055.4875 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2098.567499999999 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2107.807750000001 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2096.5274999999992 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2126.84775 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2132.0477499999997 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2144.0077500000007 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2163.888000000001 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2152.60775 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2182.5280000000002 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2168.648000000001 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2170.4479999999994 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2210.928000000001 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2199.608 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2229.04825 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2205.888 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2239.5682499999994 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2238.4882500000003 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2258.3282499999996 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2288.2884999999997 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2242.4482500000004 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2296.6085000000003 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2316.3685000000005 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2305.1685000000007 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2312.2484999999997 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2318.1285 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2351.76875 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2360.96875 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2360.16875 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2338.4884999999995 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2333.2485000000006 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2378.16875 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2377.2887499999997 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2376.928749999999 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2396.4887500000004 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2401.6087500000003 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2415.3289999999997 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2455.6089999999995 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2465.049 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2453.5690000000004 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2437.4890000000005 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2471.9692500000006 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2494.129249999999 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2513.6092500000013 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2502.32925 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2485.5692499999996 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2521.2492500000008 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2530.4092500000006 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2539.7695000000003 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2538.9294999999993 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2542.7295000000013 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2557.6495000000014 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2577.6094999999996 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2587.2095 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2606.6097499999996 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2589.4895000000015 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2625.28975 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2615.09 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2633.36975 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2643.16975 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2615.329750000001 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2651.5297499999997 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2681.249999999999 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2680.6099999999988 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2690.05 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2674.5299999999993 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2711.7300000000005 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2731.290250000001 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2741.0902499999993 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2750.370250000001 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2742.170249999999 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2769.0502500000002 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2768.0502499999993 + }, + { + "M": 300032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2793.1305 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2773.370249999999 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2775.8902500000013 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2802.370500000002 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2801.5705000000007 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2821.0105000000012 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2809.9704999999994 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2812.9704999999994 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2839.450750000001 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2859.210750000001 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2858.21075 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2878.45075 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2869.7307499999997 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2896.8507499999996 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2926.570999999999 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2925.7709999999997 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2945.291 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2895.0107499999986 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2953.5710000000017 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2942.570999999998 + }, + { + "M": 320512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2984.8512499999997 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2971.9712500000014 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2952.3309999999983 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2990.611250000001 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2989.691249999999 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3009.3712499999992 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3018.61125 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2990.61125 + } + ], + "1344": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3190000000000026 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.958999999999996 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.959000000000003 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.959000000000003 + }, + { + "M": 512, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.59924999999999 + }, + { + "M": 1024, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.71925000000001 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.039250000000003 + }, + { + "M": 3072, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 35.439000000000014 + }, + { + "M": 4096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 44.91924999999999 + }, + { + "M": 5120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 55.079250000000016 + }, + { + "M": 6144, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 65.83925 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 75.99924999999999 + }, + { + "M": 8192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 85.03950000000003 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.35950000000001 + }, + { + "M": 10240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 105.11950000000003 + }, + { + "M": 11264, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 118.39949999999999 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 124.1195 + }, + { + "M": 13312, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 139.07950000000002 + }, + { + "M": 14336, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 144.9995 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 153.91949999999994 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.95975000000004 + }, + { + "M": 17408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 174.87975 + }, + { + "M": 18432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 179.39975 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 189.07975 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 205.99975 + }, + { + "M": 21504, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 218.68 + }, + { + "M": 22528, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 224.84000000000003 + }, + { + "M": 23552, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 242.8002499999999 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 248.04000000000002 + }, + { + "M": 25600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 254.88024999999988 + }, + { + "M": 26624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 256.15999999999997 + }, + { + "M": 27648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 265.72 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 285.9202500000001 + }, + { + "M": 29696, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 305.20025000000004 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 288.92000000000013 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 313.80025 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 323.88024999999993 + }, + { + "M": 33792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 329.4805000000001 + }, + { + "M": 34816, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 350.0804999999999 + }, + { + "M": 35840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 346.08025 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 360.72050000000013 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 371.80075 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 381.4805000000001 + }, + { + "M": 39936, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 390.28075 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 382.48075000000017 + }, + { + "M": 41984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 413.5210000000001 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 419.84074999999984 + }, + { + "M": 44032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 418.2007500000002 + }, + { + "M": 45056, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 451.6009999999999 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 439.84075000000007 + }, + { + "M": 47104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 452.3209999999998 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 467.8009999999998 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 466.3610000000002 + }, + { + "M": 50176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 491.0010000000001 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 486.7209999999999 + }, + { + "M": 52224, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 508.76100000000025 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 526.6412500000001 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 536.0412500000001 + }, + { + "M": 55296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 527.8412500000002 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 543.8412500000002 + }, + { + "M": 57344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 562.4014999999997 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 566.4012500000001 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 585.4414999999997 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 592.5214999999998 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 579.4815000000001 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 600.9214999999999 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 612.8815 + }, + { + "M": 64512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.8014999999998 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 618.5617500000001 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 616.1614999999999 + }, + { + "M": 67584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 650.68175 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 658.2417499999999 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 667.8017499999999 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 680.8017499999999 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.8017499999999 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 691.6817500000002 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 709.6419999999998 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 710.6019999999999 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 730.5220000000002 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 745.1622500000001 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 739.0020000000004 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 746.722 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 779.0422500000004 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 765.9622499999996 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 761.1222499999999 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 796.1222500000003 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 805.6422499999999 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 815.4024999999999 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 824.8825000000006 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 829.1225000000004 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 833.1225 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 843.4825000000001 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 852.5224999999998 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 852.2424999999998 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 876.12275 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.8027500000001 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 912.7627500000001 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 910.8027499999998 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 931.0029999999997 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 913.0027499999997 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 929.4027499999997 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 938.4830000000002 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 956.8029999999999 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 967.9230000000009 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 948.0430000000001 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 998.0832500000001 + }, + { + "M": 104448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 996.76325 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.3229999999999 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1005.1632500000005 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1015.6832500000002 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1042.80325 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1033.76325 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1051.4032500000003 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1050.6032499999997 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1052.3632500000008 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1072.1234999999997 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1092.5234999999998 + }, + { + "M": 115712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1102.1234999999997 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1108.1235000000006 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.0034999999998 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1127.0037499999999 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1136.4837499999999 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1146.0837499999998 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1172.4037499999997 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1135.0037500000003 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1173.2037499999997 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1172.1237500000002 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1208.0839999999998 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1201.2440000000001 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.7239999999997 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1225.6839999999997 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1209.0040000000004 + }, + { + "M": 131072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1228.804 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1238.6040000000003 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1268.56425 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1261.5242500000004 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1275.6042500000003 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1285.1642499999998 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1284.4042500000005 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1305.2842499999997 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1303.3242500000001 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1312.6844999999998 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1350.1645000000003 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1348.9245 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1328.5245 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1365.7644999999993 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1364.2845000000002 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1376.4047499999997 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1383.5647500000005 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1386.0047499999996 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1413.2047500000003 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1404.00475 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1423.68475 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1422.60475 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1432.5249999999996 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1452.1250000000014 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1471.5249999999996 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.6449999999995 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.7250000000004 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1480.565 + }, + { + "M": 159744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1499.08525 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.7249999999995 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1518.76525 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1527.8452499999999 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.4852499999997 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1536.5652499999997 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1546.12525 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1565.5654999999997 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1575.2455 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1583.9654999999998 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1593.8454999999994 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1592.9254999999998 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1601.5655000000002 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1631.8457499999995 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1619.84575 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1630.1657499999997 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1649.6457499999997 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1659.1257499999997 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1678.8859999999995 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1666.4857500000003 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1700.5660000000012 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1707.1659999999997 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1706.2859999999996 + }, + { + "M": 183296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1729.0860000000002 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1713.6460000000002 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1734.5260000000003 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1733.5259999999994 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1753.5662499999999 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1752.36625 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1760.1262499999998 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1792.0462499999999 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1811.8465 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1800.8064999999997 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1789.48625 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1817.3664999999996 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1818.5264999999995 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1838.366500000001 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1837.4064999999996 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1846.9264999999996 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1853.8864999999996 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1886.52675 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1906.0067499999996 + }, + { + "M": 202752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1899.8467499999997 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1925.1670000000004 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1880.1667500000003 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1943.8469999999998 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1942.9269999999997 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1931.8469999999998 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1941.9670000000006 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1937.9670000000006 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1960.8869999999997 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1991.0872499999996 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1990.1272500000005 + }, + { + "M": 214016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2006.1672500000002 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1984.64725 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2008.1672500000004 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2038.1274999999994 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2047.6475 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2025.52725 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2055.0074999999997 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2068.407500000001 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2077.6875 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2087.2874999999995 + }, + { + "M": 224256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2103.52775 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2080.9674999999997 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2115.5277499999993 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2124.80775 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2124.0477499999997 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2154.1277500000006 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2135.60775 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2149.3677499999994 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2179.5279999999993 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2168.1279999999997 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2167.0879999999997 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2171.848 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2217.2879999999996 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2225.72825 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2235.96825 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2244.96825 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2243.6082499999993 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2268.92825 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2257.80825 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2277.4482500000004 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2297.2884999999997 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2259.4882500000012 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2316.4084999999995 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2305.0085 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2314.6085000000003 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2329.0485 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2322.4085000000005 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2347.88875 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2357.2487499999997 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2376.7287499999993 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2355.2487500000007 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2369.04875 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2384.72875 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2425.3289999999997 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2416.4489999999996 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2425.4490000000005 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2428.2090000000007 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2423.7290000000003 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2443.4490000000005 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2463.5289999999995 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2483.16925 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2444.089 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2489.96925 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2525.5292500000005 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2514.5292500000014 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2513.8092500000002 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2505.249249999999 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2543.2095000000018 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2572.9695 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2562.0495 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2571.4495000000006 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2541.6494999999995 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2589.8895 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2599.28975 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2608.8097499999994 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2628.409749999999 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2609.32975 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2637.00975 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2649.16975 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2655.9299999999994 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2655.249999999999 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2645.6497500000005 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2684.370000000001 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2683.41 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2703.369999999999 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2702.33 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2698.0499999999997 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2739.290249999998 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2769.290249999999 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2758.010250000002 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2767.17025 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2746.7302499999987 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2796.6105000000016 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2806.130500000001 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2807.570499999999 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2817.170500000001 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2806.8505000000014 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.450500000001 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2855.9307499999995 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2834.5705 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2843.7707499999997 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2834.490499999999 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2884.290749999999 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2893.7307500000015 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2893.0507499999985 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2902.690999999998 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2901.8107500000006 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2911.9709999999995 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2941.0910000000003 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2929.7709999999997 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2949.450999999999 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2927.7310000000007 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2979.091250000002 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2988.3312499999975 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2979.1312500000004 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2997.6912500000017 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2996.1712500000003 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3016.1712500000003 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3025.9712500000014 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3045.811499999999 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3044.8515000000007 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3016.571250000001 + } + ], + "1408": [ + { + "M": 1, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0390000000000015 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.919000000000004 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.878999999999998 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 9.479 + }, + { + "M": 1024, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.679249999999982 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.038999999999987 + }, + { + "M": 3072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.47900000000001 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 45.03925000000001 + }, + { + "M": 5120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 54.59925 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 63.999249999999996 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 75.23950000000002 + }, + { + "M": 8192, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 85.3995 + }, + { + "M": 9216, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 95.43949999999995 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 104.8395 + }, + { + "M": 11264, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 115.63950000000001 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 122.91949999999999 + }, + { + "M": 13312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 139.83950000000004 + }, + { + "M": 14336, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 148.11950000000002 + }, + { + "M": 15360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 147.63950000000006 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.07950000000002 + }, + { + "M": 17408, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 173.35974999999993 + }, + { + "M": 18432, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 186.35975000000005 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 187.59975 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.03975000000003 + }, + { + "M": 21504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 217.48000000000002 + }, + { + "M": 22528, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 222.83999999999992 + }, + { + "M": 23552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 233.92000000000002 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 240.52024999999998 + }, + { + "M": 25600, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 261.2799999999999 + }, + { + "M": 26624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 261.84000000000003 + }, + { + "M": 27648, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 269.71999999999997 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 272.76025 + }, + { + "M": 29696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.7602500000001 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 287.20025000000015 + }, + { + "M": 31744, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 316.8402500000001 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 310.88025000000005 + }, + { + "M": 33792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.4404999999998 + }, + { + "M": 34816, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 347.2405000000001 + }, + { + "M": 35840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 354.16049999999984 + }, + { + "M": 36864, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 367.20050000000003 + }, + { + "M": 37888, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 370.2805000000002 + }, + { + "M": 38912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 363.9207500000001 + }, + { + "M": 39936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 394.0407499999998 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 380.2407499999998 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 407.56074999999976 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 416.32075 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 431.6007500000003 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 441.08074999999985 + }, + { + "M": 46080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 434.6007500000002 + }, + { + "M": 47104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 449.4409999999998 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 464.7209999999998 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 463.3609999999999 + }, + { + "M": 50176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 487.9212500000002 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 494.44100000000014 + }, + { + "M": 52224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 496.36099999999976 + }, + { + "M": 53248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 495.2410000000001 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 521.6812499999999 + }, + { + "M": 55296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 524.5212500000002 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 540.28125 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 560.9215000000002 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 562.48125 + }, + { + "M": 59392, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 577.1214999999997 + }, + { + "M": 60416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 570.6015 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 576.4412500000003 + }, + { + "M": 62464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 599.8015 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 609.2015000000001 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 616.5217500000006 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 615.1217500000002 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 644.0017499999999 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 645.0817499999998 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 665.5617500000001 + }, + { + "M": 69632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 655.2019999999999 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 675.3617499999998 + }, + { + "M": 71680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 674.08175 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 694.4820000000002 + }, + { + "M": 73728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 711.4819999999997 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 714.8420000000001 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 734.0819999999999 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 729.402 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 742.3222500000002 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 751.80225 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 751.8022499999997 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 781.8022500000002 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 765.6022499999999 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 790.20225 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 799.4022500000001 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 808.8822500000003 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 818.6025000000002 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 827.8824999999997 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 826.7625 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 857.1225000000004 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 845.8025 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 865.6424999999997 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 859.0824999999998 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 884.4827499999997 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 893.9627499999997 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 903.40275 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.8827499999998 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 905.8427499999998 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 921.36275 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 951.2030000000002 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 949.2429999999997 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 948.3630000000003 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 941.4829999999997 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 979.8029999999999 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 997.3632499999999 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 986.0830000000001 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 997.6030000000001 + }, + { + "M": 107520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1017.8032500000005 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1014.0432499999996 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.1232500000006 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1046.4432499999998 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1062.9234999999999 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1034.80325 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1064.2435 + }, + { + "M": 114688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1080.8034999999995 + }, + { + "M": 115712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1083.2835000000005 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1099.7635 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1081.6835 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1111.8435 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1138.2037500000001 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1127.0837499999998 + }, + { + "M": 121856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1149.96375 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1137.5637500000003 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1180.8839999999996 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.1240000000003 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1183.6039999999998 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1197.884000000001 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1193.924 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1210.6439999999998 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1226.284 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1225.004 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1249.3242500000001 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1230.2439999999997 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1257.8042499999997 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1266.9642499999995 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1276.7642499999997 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1304.1242499999998 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1287.2442499999997 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1323.4045000000006 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1314.2845000000002 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1321.2845000000002 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1330.8844999999997 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1311.4045000000006 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1347.3245000000002 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1348.6045000000008 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1377.3647499999997 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1375.8047499999993 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1388.6047499999995 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1385.96475 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1405.6447499999995 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1404.7647500000003 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1404.00475 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1445.7250000000017 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1453.685 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1452.525 + }, + { + "M": 156672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1461.2850000000003 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1451.2850000000003 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1472.2850000000008 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1490.7652499999995 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1510.2452500000004 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1498.8852500000003 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1528.9652499999993 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.4852499999997 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1537.485250000001 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1546.9652500000002 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1535.8852500000003 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1586.3254999999995 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1575.8055 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1573.9654999999993 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1573.0054999999998 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1602.4454999999994 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1591.0855000000001 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1600.8855000000008 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1640.8857500000008 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1640.0457500000002 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1649.4457499999999 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1658.6057500000002 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1678.6060000000002 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1656.8057500000004 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1687.0459999999998 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1696.5659999999998 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1705.6459999999997 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1684.0460000000003 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1713.7659999999996 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1744.1262499999998 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1732.766 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1731.726 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1751.1262500000007 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1771.0462499999999 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1790.8862499999996 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1779.52625 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1803.6064999999999 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1787.32625 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1828.6465000000007 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1817.5265 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1836.8864999999996 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1866.8467499999997 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1823.3265000000001 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1864.9267499999996 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1884.52675 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1894.2067499999994 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1903.60675 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1880.526749999999 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1932.4069999999992 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1910.7667499999998 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1920.2067499999994 + }, + { + "M": 208896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1947.0070000000005 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1948.0869999999995 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1959.407000000001 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1958.607 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1967.607 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.8069999999998 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1984.68725 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2006.0872499999998 + }, + { + "M": 217088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2033.96725 + }, + { + "M": 218112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2043.2874999999995 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2044.1675000000005 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2042.7675000000002 + }, + { + "M": 221184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2062.1674999999996 + }, + { + "M": 222208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2081.7275 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2062.0075000000006 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2092.0075000000006 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2057.3675000000003 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2079.8075 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2099.5275 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2140.1677499999996 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2108.0077499999998 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2124.0877500000006 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2156.88775 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2156.0077499999998 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2165.2080000000005 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2174.6079999999993 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2170.568 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2193.648 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2192.6479999999992 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2202.048 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2210.9279999999994 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2209.728 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2243.5282500000003 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2252.848250000001 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2262.2482500000006 + }, + { + "M": 244736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2281.68825 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2256.3682500000004 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2290.1285 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2289.0485 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2298.7685 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2314.888499999999 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2310.0085 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2333.8885 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2353.2487499999997 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2362.96875 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2372.2487499999997 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2366.88875 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2390.96875 + }, + { + "M": 258048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2403.3287499999997 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2411.4089999999997 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2420.7690000000002 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2394.4087499999996 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2439.4489999999996 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2448.929 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2437.7289999999994 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2457.209 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2430.7290000000003 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2464.6089999999995 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2477.7692500000003 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2497.6492499999995 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2506.92925 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2511.129249999999 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2546.6895000000013 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2535.289499999999 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2544.6095000000005 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2564.4895000000006 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2536.049499999999 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2572.0895 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2571.329499999999 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2601.289749999999 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2620.6897500000005 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2593.289499999999 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2618.8097500000003 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2619.7297499999995 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2658.7300000000005 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2637.0097499999993 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2619.209749999999 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2655.7299999999996 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2675.329999999998 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2684.970000000001 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2694.05 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2679.4500000000003 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2717.290249999999 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2726.650249999998 + }, + { + "M": 294912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2751.650249999999 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2745.37025 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2757.8102499999986 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2764.4102500000017 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2783.8905000000004 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2788.7704999999996 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2788.0904999999993 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2779.8902499999995 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2796.8105000000014 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2826.450499999999 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.0905000000002 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2845.290750000001 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2806.6905000000015 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2854.41075 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2873.9707500000004 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2883.3307499999983 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2882.8507500000005 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2883.9707499999995 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2891.8507499999996 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2900.490749999999 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2930.3709999999983 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2940.1710000000003 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2920.410999999999 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2958.331 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.651 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2978.651249999999 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2987.291250000001 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2977.8512499999997 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3005.8912500000006 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2996.0112499999987 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3015.6112500000027 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3035.1714999999976 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3004.69125 + } + ], + "1440": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3590000000000018 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.9990000000000023 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.7989999999999995 + }, + { + "M": 512, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.199000000000005 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.759 + }, + { + "M": 2048, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 25.319249999999997 + }, + { + "M": 3072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.87899999999999 + }, + { + "M": 4096, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 45.159 + }, + { + "M": 5120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 54.23925000000001 + }, + { + "M": 6144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 64.75924999999998 + }, + { + "M": 7168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 73.59925 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.35924999999999 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.67924999999998 + }, + { + "M": 10240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 101.91949999999999 + }, + { + "M": 11264, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 117.19949999999997 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 124.35950000000005 + }, + { + "M": 13312, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 139.7995 + }, + { + "M": 14336, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 158.35950000000003 + }, + { + "M": 15360, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 156.55975000000004 + }, + { + "M": 16384, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 168.35975000000005 + }, + { + "M": 17408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 178.51975000000002 + }, + { + "M": 18432, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 188.80000000000004 + }, + { + "M": 19456, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 195.83975000000004 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 196.27975000000004 + }, + { + "M": 21504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 217.72000000000003 + }, + { + "M": 22528, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 233.3599999999999 + }, + { + "M": 23552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 227.92000000000002 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 243.96000000000004 + }, + { + "M": 25600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 253.9999999999999 + }, + { + "M": 26624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 274.92025 + }, + { + "M": 27648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 265.36 + }, + { + "M": 28672, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 284.80024999999995 + }, + { + "M": 29696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 295.72025 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 289.4802499999999 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 314.7605000000001 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 313.52049999999997 + }, + { + "M": 33792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 334.08024999999986 + }, + { + "M": 34816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 350.0005000000002 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 342.3604999999999 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 361.56049999999993 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 372.44074999999987 + }, + { + "M": 38912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 377.5205000000002 + }, + { + "M": 39936, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 401.7607499999999 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 393.68075 + }, + { + "M": 41984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 414.56074999999976 + }, + { + "M": 43008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 420.4407500000002 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 435.2807499999999 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.36075000000017 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 450.92075 + }, + { + "M": 47104, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 464.68075 + }, + { + "M": 48128, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 483.24099999999976 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 467.0409999999998 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 487.6410000000001 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 498.2410000000001 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 506.40100000000024 + }, + { + "M": 53248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 509.9612500000001 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 529.72125 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 534.7612499999998 + }, + { + "M": 56320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 544.28125 + }, + { + "M": 57344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 546.80125 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 574.2815 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 572.9615000000001 + }, + { + "M": 60416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 575.4814999999999 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 570.28125 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 590.8815000000002 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 593.1614999999999 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 610.0417500000003 + }, + { + "M": 65536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 612.4014999999999 + }, + { + "M": 66560, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 652.9217500000002 + }, + { + "M": 67584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 641.2417500000001 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 650.8017499999999 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 657.84175 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 693.402 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.0017499999999 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 713.0420000000001 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 693.2019999999998 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 712.962 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 719.922 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 736.9622499999998 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 751.9219999999998 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 751.2822499999997 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 780.5222500000002 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 779.1624999999999 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 763.2822499999997 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 809.2822499999997 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 797.0422499999997 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 806.80225 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 827.2825 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 825.9624999999999 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 857.4024999999999 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 845.1224999999997 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 865.8425 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 886.2827499999999 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 868.4424999999999 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 894.6827500000002 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 904.16275 + }, + { + "M": 95232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 913.9627499999997 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 923.4427499999999 + }, + { + "M": 97280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 933.4029999999998 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 932.0027499999997 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 941.5629999999996 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 959.4030000000002 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 959.2829999999999 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 950.4830000000002 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 987.9229999999998 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 986.8832500000003 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1006.8832499999997 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1005.8432500000004 + }, + { + "M": 107520, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1038.4432499999998 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1035.28325 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1035.84325 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1043.8832499999999 + }, + { + "M": 111616, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1055.32325 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.6032500000006 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1082.9634999999998 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1084.0434999999998 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1101.8435 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1103.3635 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1113.1235000000001 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1122.9237499999995 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1129.88375 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1160.1237499999997 + }, + { + "M": 121856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1159.6037500000002 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1147.6437499999997 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.6839999999997 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1183.8839999999996 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1193.2440000000001 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1207.844 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1203.524 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1238.1239999999998 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1221.0839999999998 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1235.2839999999997 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1250.5242500000004 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1230.1640000000002 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1264.2442500000002 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1279.0442500000004 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1278.4042500000005 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1287.7242499999998 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1287.7242500000002 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1327.3645000000001 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1322.2044999999998 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1321.3244999999997 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1348.4044999999996 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1320.1245000000004 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1339.2845000000002 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1369.4445 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1375.60475 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1384.9247500000001 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1366.9245 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1396.68475 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1416.3647499999997 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1415.5647499999995 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1435.2849999999999 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1434.4450000000002 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1454.205 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1474.0049999999997 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1473.3650000000002 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.6050000000005 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1492.8452500000003 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1502.72525 + }, + { + "M": 160768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1510.8852500000003 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1521.3652499999994 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1510.24525 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.4452499999993 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1529.4052500000003 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1549.0052499999993 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1568.4854999999993 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1578.2454999999995 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1576.1655000000005 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1597.2055 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1616.7657500000005 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1625.2057499999996 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1624.5257499999993 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1632.4857500000003 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1643.40575 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1652.5257499999993 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1662.1257500000002 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1681.726 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1648.3657500000008 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1690.6059999999998 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1679.2059999999997 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1719.6859999999997 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1728.966 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1695.406 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1747.9662500000004 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1757.48625 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1756.80625 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1776.2462500000001 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1773.1262500000012 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1774.7262500000006 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1784.366250000001 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1793.44625 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1814.0465000000004 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1799.9662499999995 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1843.0864999999994 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1842.0464999999995 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1851.2064999999993 + }, + { + "M": 198656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1864.0467500000004 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1877.2867500000002 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1879.2467500000002 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1899.2067500000003 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1898.2867499999993 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1917.9667500000005 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1882.8867500000006 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1916.56675 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1935.8469999999998 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1945.6870000000008 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.3269999999993 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1940.9270000000006 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1964.4470000000001 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1984.447250000001 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1994.0072499999997 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2003.1672499999997 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1988.1272499999995 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2011.8472499999998 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2031.2072500000004 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2051.0074999999997 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2060.2474999999995 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2046.1274999999994 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2081.0474999999997 + }, + { + "M": 222208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2085.8875000000007 + }, + { + "M": 223232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2106.4877500000002 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2088.4474999999993 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2072.3675000000003 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2128.2877500000004 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2147.92775 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2126.4477499999994 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2146.3277500000004 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2138.76775 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2153.2877500000004 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2162.608 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2172.1279999999997 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2181.648 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2206.2879999999996 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2221.2479999999996 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2229.1282500000007 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2250.4882500000012 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2238.96825 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2227.4482499999995 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2243.2882499999996 + }, + { + "M": 242688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2281.72825 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2293.0085 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2292.2884999999997 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2284.96825 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2321.6085000000003 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2320.6885 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2320.1285 + }, + { + "M": 249856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2343.2084999999997 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2325.6485000000002 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2352.04875 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2382.16875 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2380.968749999999 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2401.04875 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2362.2887499999997 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2419.889 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2398.76875 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2411.249 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2430.969 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2432.8089999999993 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2449.7289999999994 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2448.928999999999 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2458.5689999999995 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2488.6092500000013 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2448.689000000001 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2505.20925 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2512.2892500000007 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2511.4092499999997 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2531.32925 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2521.7692500000003 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2550.2495 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2570.1695 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2559.169499999999 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2579.2895000000008 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2587.9295 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2606.489749999999 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2615.9297499999993 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2624.8497500000003 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2614.129749999999 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2604.00975 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2664.13 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2655.6900000000005 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2673.0499999999993 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2682.0899999999992 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2640.8097500000003 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2691.0499999999993 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2690.5699999999997 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2720.250250000001 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2709.209999999999 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2735.57025 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2746.6502500000024 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2776.9302500000013 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2755.330250000002 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2774.9302499999985 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2742.7702500000005 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2804.4505 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2793.0904999999984 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2793.410499999998 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2823.8505000000014 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2812.410500000001 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2822.090500000001 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2841.530749999998 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2861.690749999998 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2860.65075 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2829.4905000000017 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2881.0107499999995 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2879.9307499999995 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2889.6107500000026 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2888.7307500000015 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2887.010750000002 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2919.411 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2948.4109999999973 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.8510000000015 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2946.611000000001 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.810999999997 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2978.8112500000016 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2988.491250000002 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3000.77125 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2996.1712499999994 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2994.0112499999996 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3036.2914999999985 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3045.2515000000003 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3044.771499999999 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3054.0514999999978 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3020.8912499999997 + } + ], + "1536": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0789999999999935 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 1.9990000000000026 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.438999999999993 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.079000000000008 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.918999999999997 + }, + { + "M": 512, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.159000000000006 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.758999999999993 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 24.999000000000002 + }, + { + "M": 3072, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 35.27924999999999 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 45.07925 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 54.83925 + }, + { + "M": 6144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 63.71925 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 75.39950000000002 + }, + { + "M": 8192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 84.31924999999997 + }, + { + "M": 9216, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 95.79924999999999 + }, + { + "M": 10240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 104.39950000000002 + }, + { + "M": 11264, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 117.59950000000003 + }, + { + "M": 12288, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 125.87950000000004 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 133.95949999999996 + }, + { + "M": 14336, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 148.47975000000002 + }, + { + "M": 15360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 147.5195 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.35950000000003 + }, + { + "M": 17408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 176.43975000000003 + }, + { + "M": 18432, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 189.67975 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 187.87975 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.51975 + }, + { + "M": 21504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 215.67974999999996 + }, + { + "M": 22528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 225.0000000000001 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 233.11999999999995 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 235.2800000000001 + }, + { + "M": 25600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 241.24024999999995 + }, + { + "M": 26624, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 262.44000000000005 + }, + { + "M": 27648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 271.2802499999998 + }, + { + "M": 28672, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 282.08025000000015 + }, + { + "M": 29696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 293.64025 + }, + { + "M": 30720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 303.4002499999999 + }, + { + "M": 31744, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 317.44049999999993 + }, + { + "M": 32768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 318.44025 + }, + { + "M": 33792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.64025000000015 + }, + { + "M": 34816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 337.24024999999995 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 350.8405 + }, + { + "M": 36864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.08050000000003 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 381.08050000000014 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 379.6405 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 389.08050000000003 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 380.8805000000003 + }, + { + "M": 41984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 403.0804999999998 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 417.24074999999993 + }, + { + "M": 44032, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 430.4007499999998 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 431.32074999999986 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 427.6407499999999 + }, + { + "M": 47104, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 460.12099999999987 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 465.3610000000001 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 463.76099999999997 + }, + { + "M": 50176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 467.7210000000001 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 474.121 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 514.0010000000002 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 512.3612499999997 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 515.1609999999998 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 531.3612500000004 + }, + { + "M": 56320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 530.1612499999999 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 539.6812500000001 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 571.2012499999998 + }, + { + "M": 59392, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 578.1214999999997 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 578.9615000000001 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 577.3614999999998 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 598.1615000000002 + }, + { + "M": 63488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 607.8014999999998 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 606.2417500000001 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 626.7617499999999 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 624.0815 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 645.7617500000001 + }, + { + "M": 68608, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 667.1617500000002 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 675.7617499999998 + }, + { + "M": 70656, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 683.60175 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 673.7617499999999 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 687.1617500000002 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 705.3220000000001 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.0819999999999 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 705.1619999999998 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 730.9622499999998 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 743.56225 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 753.3622499999999 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 762.6422499999999 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 761.28225 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 767.3222499999997 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 792.0822500000002 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 811.7624999999998 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 810.5224999999998 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 819.9225000000004 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 803.7622499999998 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 849.3625000000004 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 858.8424999999997 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 847.5625 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 857.1624999999999 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 850.5625000000002 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 896.5627499999998 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 895.6827499999999 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 905.2827499999999 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 914.3627499999998 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 918.2827500000003 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 923.8027499999998 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 932.4827500000001 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 951.0829999999996 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 962.3629999999998 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 953.3230000000008 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 968.7629999999999 + }, + { + "M": 104448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 980.2429999999999 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 998.0832499999997 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1007.4032499999998 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1010.6832499999996 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1016.0032500000002 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1025.64325 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1045.6432500000005 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1054.80325 + }, + { + "M": 112640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1067.6835 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1084.2035 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1075.7635000000005 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1092.7234999999996 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.7635000000005 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1102.6835 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1112.8435000000004 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1128.36375 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1137.7237499999997 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1146.12375 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1128.6037499999998 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1165.9237500000008 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1174.92375 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.7640000000001 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1173.32375 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1185.40375 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1213.0040000000004 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1228.5240000000003 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1227.1239999999998 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1251.52425 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1242.7239999999997 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1266.48425 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1248.9242499999996 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1289.7642499999997 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1309.3245000000002 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1289.9242500000005 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1318.0045000000005 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1316.9645000000005 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1333.8044999999993 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1329.7244999999998 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1323.2844999999993 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1348.9245000000005 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1340.1245000000008 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1378.8847499999997 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1369.1644999999994 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1369.7247500000003 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1398.12475 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1405.9247499999997 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1416.9247499999997 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1436.1650000000004 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1406.3247499999998 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1455.085 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1453.5649999999996 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1453.685 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1463.4049999999997 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1463.7649999999994 + }, + { + "M": 159744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1503.24525 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1501.9652499999997 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1511.3652499999994 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1521.0452500000006 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1510.28525 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1539.64525 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1548.72525 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1558.5254999999997 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1567.6854999999996 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1567.4855000000007 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1586.4854999999993 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1595.6055000000006 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1615.3257500000004 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1614.28575 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1613.5257500000002 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1653.6057500000002 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1632.0057499999998 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1661.9657500000008 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1671.36575 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1680.6059999999993 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1679.8060000000005 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1689.2459999999996 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1719.4459999999995 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1718.2459999999996 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1686.2859999999996 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1737.6062500000003 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1736.5262499999994 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1756.36625 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1755.1662499999993 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1754.0462500000003 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1774.286250000001 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1783.52625 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1782.6462499999998 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1792.2462500000001 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1800.3265000000001 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1831.6464999999998 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1840.6865000000007 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1839.6464999999998 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1848.8865000000014 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1857.0865000000013 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1867.2067500000012 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1876.8067499999997 + }, + { + "M": 202752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1902.56675 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1911.8867499999997 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1872.2467500000002 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1924.8869999999997 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1923.7269999999999 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1933.367000000001 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1943.0869999999995 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1940.3270000000002 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1972.3270000000002 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1981.7672499999999 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1987.7672499999999 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2010.60725 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1987.0872499999996 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1998.68725 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2018.2072499999997 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2017.4472500000008 + }, + { + "M": 219136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2055.5275 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2044.8474999999999 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2046.0875000000005 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2076.3274999999994 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2085.8475 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2095.0074999999997 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2070.9275000000007 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2114.0077499999998 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2102.4877500000002 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2132.80775 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2131.76775 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2116.2877499999995 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2170.1679999999997 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2158.96775 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2168.4080000000004 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2177.768000000001 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2173.6479999999992 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2196.728000000001 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2216.3680000000004 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2225.848250000001 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2224.448 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2223.0879999999997 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2257.2082499999997 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2256.0882500000007 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2265.4082499999995 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2264.2082499999997 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2248.92825 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2283.2082500000006 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2303.0085 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2312.0885 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2308.2084999999997 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2323.848500000001 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2337.7284999999993 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2346.80875 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2345.9285 + }, + { + "M": 254976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2396.16875 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2360.2487499999997 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2405.2487499999997 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2414.249 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2404.4887499999995 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2414.129000000001 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2428.8089999999993 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2422.2889999999998 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2452.4489999999996 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2441.129000000001 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2460.969 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2434.168999999999 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2468.209000000001 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2493.249249999999 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2502.4492500000024 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2501.7292500000003 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2515.8892499999993 + }, + { + "M": 272384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2544.089499999999 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2529.96925 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2549.8494999999994 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2569.6895000000004 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2540.4094999999998 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2576.049500000001 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2575.089500000001 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.4097500000016 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2604.4897500000006 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2586.7695000000003 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2623.129750000001 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.8497500000003 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2621.409749999998 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2651.969750000001 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2623.4097500000016 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2670.5699999999997 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2669.250000000001 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2678.8100000000013 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2708.7699999999986 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2684.0499999999997 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2721.7302500000005 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2720.6902499999997 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2740.6902499999987 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2750.0102499999994 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2741.490249999998 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2782.450500000001 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2781.330500000001 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2781.6504999999997 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2791.250499999999 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2783.1304999999993 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2809.8904999999995 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2829.6105000000016 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2839.0105000000003 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2848.450749999998 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2820.570499999997 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2858.53075 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2877.770749999998 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2877.1307500000003 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2896.57075 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2856.6907500000016 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2905.411 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2927.651 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2926.571000000001 + }, + { + "M": 316416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2965.73125 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2905.9707500000013 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2954.5310000000018 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2974.731249999998 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2958.770999999999 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2993.1712500000003 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2983.251250000002 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3011.7312500000016 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3000.37125 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3010.1712500000003 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3029.811249999999 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3010.0912499999995 + } + ], + "1600": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.9990000000000023 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.038999999999994 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.878999999999991 + }, + { + "M": 512, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 9.598999999999997 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.758999999999993 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.318999999999996 + }, + { + "M": 3072, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 35.27925 + }, + { + "M": 4096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 45.199250000000006 + }, + { + "M": 5120, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 56.39924999999998 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.63924999999999 + }, + { + "M": 7168, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 75.23925 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.51925 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.71950000000002 + }, + { + "M": 10240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.79925000000001 + }, + { + "M": 11264, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 117.1995 + }, + { + "M": 12288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 121.99950000000001 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 135.5595 + }, + { + "M": 14336, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 157.27949999999993 + }, + { + "M": 15360, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 156.59975 + }, + { + "M": 16384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 165.75975000000005 + }, + { + "M": 17408, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 181.11975000000004 + }, + { + "M": 18432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 179.79975000000002 + }, + { + "M": 19456, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 195.59975000000003 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 196.39999999999986 + }, + { + "M": 21504, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 214.15974999999997 + }, + { + "M": 22528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 218.31975 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 235.2399999999999 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 243.04000000000013 + }, + { + "M": 25600, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 256.96 + }, + { + "M": 26624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 253.72000000000006 + }, + { + "M": 27648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 276.6 + }, + { + "M": 28672, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 284.64025 + }, + { + "M": 29696, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 300.08025 + }, + { + "M": 30720, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 313.8805 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.96024999999986 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 313.52025000000003 + }, + { + "M": 33792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.16025000000013 + }, + { + "M": 34816, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 344.44025 + }, + { + "M": 35840, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 358.0804999999999 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 361.4005000000002 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 383.52075 + }, + { + "M": 38912, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 391.5207499999999 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 402.52049999999986 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 383.4007499999998 + }, + { + "M": 41984, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 414.3207500000001 + }, + { + "M": 43008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 415.5210000000002 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 435.44074999999975 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 439.6009999999999 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 451.20074999999986 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 447.8409999999998 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 468.5210000000001 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 467.0809999999998 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 498.08124999999984 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 486.9609999999999 + }, + { + "M": 52224, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 509.0009999999999 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 515.80125 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 525.4412499999999 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 534.9612499999998 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 533.9212500000001 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 543.2412499999998 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 563.7612499999998 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 584.2415000000001 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 604.6415000000004 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 591.0815000000002 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 591.0814999999998 + }, + { + "M": 63488, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 622.8815000000002 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 621.1217500000005 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 619.7617500000001 + }, + { + "M": 66560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 629.5615000000003 + }, + { + "M": 67584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 641.5217499999999 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 648.8417500000005 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 669.1220000000001 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.3217500000001 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.4817500000004 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 702.682 + }, + { + "M": 73728, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 721.4820000000002 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 713.242 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 733.1220000000001 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 737.4022499999999 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 750.3620000000001 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 751.0822499999999 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 770.0022500000005 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 770.28225 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 763.6822499999998 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 809.5222499999998 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 798.8822499999999 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 818.8424999999997 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 838.8024999999998 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 826.4825000000003 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 857.9624999999996 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 856.8424999999997 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 876.7627500000003 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 865.0425000000007 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 879.4027500000002 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 895.08275 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 904.7227499999999 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 903.8427499999998 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 923.7227499999999 + }, + { + "M": 97280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 933.6030000000001 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 932.5227500000001 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 941.9229999999995 + }, + { + "M": 100352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 960.9229999999998 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 970.2829999999999 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 950.8429999999998 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 978.7629999999999 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1007.8832499999999 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1008.7632499999999 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1006.4432500000003 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1008.8832499999997 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1046.5632499999997 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1035.0032500000002 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.8832499999999 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1061.6435000000006 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1063.3234999999995 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1083.6835000000005 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1082.4434999999999 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1099.4834999999998 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.5235000000002 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.5235000000002 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1120.9637499999994 + }, + { + "M": 119808, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1141.2037499999997 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1157.4037499999995 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1144.56375 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1136.6837500000001 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1190.964 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1195.0439999999999 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1183.964 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1214.1639999999998 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1215.0439999999999 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1228.0839999999998 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1243.1240000000003 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1236.964 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1241.5640000000003 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1231.804 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1260.2842500000006 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1290.5642499999994 + }, + { + "M": 136192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1296.0842500000003 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1299.2442499999997 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1309.6845000000003 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1307.8442500000006 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1323.4044999999996 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1339.6845000000003 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1341.5645000000013 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1320.8045000000002 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1370.6844999999998 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1370.1245 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1389.8847499999993 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1386.12475 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1398.9247500000001 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1405.2047499999999 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1407.9247500000001 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1427.3247500000007 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1434.6850000000009 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1446.4449999999997 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1466.1249999999995 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1474.2849999999994 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1453.1650000000004 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1484.165 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1493.4852499999997 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1492.8852499999998 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1491.8852500000003 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1511.64525 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1552.0454999999997 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1509.1252499999996 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1560.8855000000003 + }, + { + "M": 165888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1559.6054999999997 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1559.0055000000002 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1579.1254999999996 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1577.6055000000001 + }, + { + "M": 169984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1598.3654999999999 + }, + { + "M": 171008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1608.0454999999997 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1626.0457499999993 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1635.3257499999995 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1622.9657500000008 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1654.1257499999997 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1643.0857500000002 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1674.8460000000005 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1661.9657500000003 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1680.2060000000001 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1680.6460000000002 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1700.5660000000007 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1720.5660000000007 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1709.4860000000008 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1717.0859999999998 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1749.1262499999993 + }, + { + "M": 186368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1750.9262500000004 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1758.1662500000002 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1769.9262500000004 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1784.8462499999996 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1796.6062499999998 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1795.6862500000016 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1815.6864999999998 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1828.8064999999997 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1832.0065000000004 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1822.326500000001 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1821.6464999999998 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1862.0867500000004 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1881.7667499999998 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1847.0465000000004 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1879.9267500000005 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1899.9267500000005 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1899.0467500000004 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1898.0467500000004 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1883.60675 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1937.8069999999998 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1946.8869999999997 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.9270000000006 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1946.3270000000011 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1931.4470000000001 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1996.0472500000005 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1995.4072499999993 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2004.68725 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2004.0472499999996 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2009.32725 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2032.9672499999995 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2021.727250000001 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2052.2074999999995 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2050.8875000000003 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2047.3275 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2081.6475 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2090.9674999999997 + }, + { + "M": 223232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2096.9275 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2089.0475000000015 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2083.8475 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2108.2477499999995 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2117.4077500000003 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2127.2077499999996 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2146.72775 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2149.88775 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2164.568 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2184.008 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2172.968 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2182.4079999999994 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2197.1680000000006 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2211.928 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2241.2882500000005 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2220.4079999999994 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2239.92825 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2248.4482499999995 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2274.3682500000004 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2263.1282499999998 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2293.3284999999996 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2282.00825 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2264.6082499999993 + }, + { + "M": 246784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2331.2884999999997 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2320.6085000000003 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2350.6887500000003 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2344.6484999999993 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2337.8085 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2364.00875 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2363.20875 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2383.08875 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2382.2887499999997 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2374.2087500000007 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2400.5287499999995 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2430.089 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2439.6890000000003 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2449.4090000000006 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2451.249 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2468.049000000001 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2457.009000000001 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2476.7292499999994 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2475.5292500000005 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2467.089 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2516.4492499999997 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.7292499999994 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.089249999999 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2554.3695 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2503.92925 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2573.4094999999998 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2572.5295000000006 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2571.849500000002 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2591.6094999999987 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2578.8895 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2606.7297500000004 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2606.0097499999993 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2625.9697499999984 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2614.4897500000015 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.209749999998 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.0097499999983 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2656.09 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2673.45 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2665.209999999998 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2655.169999999999 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2704.8499999999995 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2724.45025 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2734.0902499999993 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2712.5700000000015 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2711.9699999999993 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2740.8502500000004 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2771.410250000001 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2759.6502500000006 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2779.7704999999996 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2758.410250000001 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2809.2905 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2787.5305 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2802.690499999997 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2812.3304999999973 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2791.1305 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2831.5305000000026 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2850.9307499999995 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2850.0107499999986 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2880.29075 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2840.050750000002 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2901.211000000001 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2910.6110000000017 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2888.9307500000014 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2887.8907499999978 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2897.2907499999983 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2917.410999999999 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2941.2110000000002 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2961.450999999999 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2970.8512499999997 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2958.451000000001 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2979.3712499999983 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2977.8912499999997 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2991.451250000001 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3017.651499999997 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3005.61125 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3026.651249999997 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3035.8515000000016 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3055.491500000001 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3044.731500000001 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3032.6115 + } + ], + "1664": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000077 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.158999999999999 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0390000000000086 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.0789999999999935 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.878999999999991 + }, + { + "M": 512, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 9.318999999999996 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.679000000000002 + }, + { + "M": 2048, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 25.079 + }, + { + "M": 3072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.59900000000001 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 45.23925000000001 + }, + { + "M": 5120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 53.91924999999999 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.35924999999999 + }, + { + "M": 7168, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 76.43950000000001 + }, + { + "M": 8192, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 86.87950000000001 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 93.95925 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 105.23924999999997 + }, + { + "M": 11264, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 117.7995 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 123.43950000000001 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 134.35950000000003 + }, + { + "M": 14336, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 144.27975 + }, + { + "M": 15360, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 155.35949999999997 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 159.23950000000002 + }, + { + "M": 17408, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 179.75975 + }, + { + "M": 18432, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 187.07975 + }, + { + "M": 19456, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 194.03974999999997 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 194.99999999999994 + }, + { + "M": 21504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 216.08000000000004 + }, + { + "M": 22528, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 227.24 + }, + { + "M": 23552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.00000000000006 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 241.04000000000002 + }, + { + "M": 25600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 251.40025000000006 + }, + { + "M": 26624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 254.4802499999999 + }, + { + "M": 27648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 264.1999999999998 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 273.68025000000006 + }, + { + "M": 29696, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 292.56025 + }, + { + "M": 30720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 303.8002499999999 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 312.1204999999999 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 311.9602500000001 + }, + { + "M": 33792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 338.16050000000007 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 352.80049999999994 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 351.80049999999994 + }, + { + "M": 36864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 360.9604999999999 + }, + { + "M": 37888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 376.6405000000001 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 391.08050000000014 + }, + { + "M": 39936, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 388.36075000000017 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 381.6405000000002 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 408.8407500000003 + }, + { + "M": 43008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 429.2007500000002 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 433.2407499999998 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 432.0007499999998 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 448.9207500000001 + }, + { + "M": 47104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 450.7207499999997 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 459.7209999999999 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 464.3610000000002 + }, + { + "M": 50176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 478.48099999999977 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 495.44100000000003 + }, + { + "M": 52224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 497.3610000000001 + }, + { + "M": 53248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 517.24125 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 526.5612500000002 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 521.4012500000001 + }, + { + "M": 56320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 541.9212499999999 + }, + { + "M": 57344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 554.8015 + }, + { + "M": 58368, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 564.4012499999997 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 559.7615000000001 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 580.1215 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 568.1212499999997 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 599.2415000000001 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 600.9615000000003 + }, + { + "M": 64512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 599.8015000000005 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 627.8417499999998 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 625.2014999999999 + }, + { + "M": 67584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 638.4017499999998 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.8017500000001 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 655.1217500000002 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 679.8417499999998 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 685.0417500000001 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 697.1220000000001 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 708.1220000000003 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 697.0419999999997 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 737.3619999999996 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 731.642 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 756.1622500000001 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 765.6022499999997 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 763.9222499999996 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 763.6822500000001 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 758.1622500000001 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 782.8422500000001 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 813.1225 + }, + { + "M": 84992, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 830.0825000000004 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 821.4424999999997 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 825.3624999999997 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 851.0824999999998 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 849.8824999999999 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 860.0025000000005 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 879.2027499999999 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 851.9624999999999 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 877.8427500000003 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 886.7627500000003 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 917.0027499999997 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 916.1227500000005 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 909.16275 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 924.4027499999997 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 954.5630000000001 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 952.123 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 961.9630000000002 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 955.1629999999996 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 983.4029999999998 + }, + { + "M": 104448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 982.1229999999996 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 989.4429999999998 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1011.6832500000002 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1021.24325 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1037.0032500000002 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1035.9632500000002 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1035.2432500000004 + }, + { + "M": 111616, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1047.4832499999998 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1036.6432499999996 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1063.1634999999997 + }, + { + "M": 114688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1093.4034999999994 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1082.3635 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1091.8035 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1114.6835 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.3235 + }, + { + "M": 119808, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1124.0837499999998 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1150.0037499999999 + }, + { + "M": 121856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1153.1637500000002 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1130.76375 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1167.8037499999996 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1171.84375 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1181.6037499999998 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1213.324 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1198.2440000000001 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1194.8040000000005 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1224.924 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1229.4439999999995 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1253.96425 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1234.7240000000006 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1262.6442500000007 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1267.52425 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1281.48425 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1290.92425 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1281.48425 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1317.1244999999994 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1298.44425 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1336.2444999999993 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1331.9645 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1325.6444999999994 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1343.9645 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1353.2845000000002 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1372.924750000001 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1372.1247500000004 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1382.9247499999992 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1409.6447500000008 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1410.3647499999997 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1430.3250000000007 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1429.4049999999997 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1429.445000000001 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1437.9650000000006 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1467.725 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1466.6850000000009 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1476.8850000000002 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1476.8849999999998 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.2452499999995 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1515.3252499999999 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1514.60525 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1534.2452500000009 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1502.8452500000003 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1542.8852499999998 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1552.2455 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1551.2055 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1581.1255 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1570.0455000000002 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1599.7255 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1598.8854999999999 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1607.9655000000007 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1627.565750000002 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1626.76575 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1656.88575 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1645.4457499999994 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1665.2457499999996 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1654.1257499999997 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1673.2860000000005 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1683.2059999999992 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1705.6460000000006 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1691.7259999999997 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1721.8060000000005 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1699.4459999999995 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1720.446 + }, + { + "M": 186368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1753.5662499999999 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1749.7662499999997 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1758.48625 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1756.6462499999998 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1787.7262499999997 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1786.48625 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1796.2465000000002 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1805.6064999999999 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1792.9662499999995 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1824.3265000000001 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1833.5665 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1842.7664999999997 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1831.3265000000001 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1860.0867499999995 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1870.8867499999997 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1881.1267500000004 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1900.9267500000005 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1910.2467500000002 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1876.60675 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1929.1670000000004 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1933.487000000001 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1937.6870000000008 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1936.687 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1944.0069999999996 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1965.687 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1985.3672500000002 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1974.1670000000004 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2004.7672500000008 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1980.4872500000001 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2023.3672499999993 + }, + { + "M": 217088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2029.527250000001 + }, + { + "M": 218112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2049.8075 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2041.0072500000012 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2037.68725 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2070.3675000000003 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2079.9674999999997 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2068.4075000000003 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2098.7275 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2065.6475 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2088.4875 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2098.3674999999994 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2127.88775 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2127.2477500000005 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2130.64775 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2143.68775 + }, + { + "M": 232448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2171.888 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2183.2479999999996 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2182.2079999999996 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2157.2077500000005 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2211.687999999999 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2220.048 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2230.5282499999994 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2228.8482500000005 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2205.4880000000003 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2260.2482500000006 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2279.96825 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2268.88825 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2267.728249999999 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2272.76825 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2287.5282500000003 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2306.9685 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2306.0885000000007 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2323.4885000000004 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2317.7685 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2341.9685 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2362.04875 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2371.4487499999996 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2380.648750000001 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2364.96875 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2379.3687500000005 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2387.8087499999992 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2413.5290000000005 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2423.169000000001 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2437.4489999999996 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2452.3689999999997 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2451.3289999999997 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2471.129000000001 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2456.6490000000003 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2450.208999999999 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2473.00925 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2508.3692499999997 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2517.7692499999994 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2517.049249999999 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2509.9292499999992 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2535.4094999999998 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2555.169500000001 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2575.129500000001 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2563.6094999999978 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2554.7694999999985 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2580.5695000000005 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2579.6095000000005 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2599.5697500000006 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2609.129749999999 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2580.6894999999986 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2637.7297500000004 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2636.4497499999998 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2658.6500000000005 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2668.130000000001 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2639.6897499999977 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2686.8500000000004 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2696.209999999999 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2705.9699999999984 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2725.7702499999996 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2707.0499999999993 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2729.530249999997 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2738.21025 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2758.410249999999 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2767.8902499999995 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2748.8102500000005 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2797.0904999999975 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2785.570499999999 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2787.010500000002 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2796.770499999998 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2808.3305 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.170500000001 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2814.2104999999992 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2844.090750000002 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2853.8107499999987 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.490499999998 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2853.250750000001 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2862.610749999998 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2893.0507500000012 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2902.530749999999 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2903.2107499999993 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2900.370749999999 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2922.0110000000004 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2931.6110000000017 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2940.770999999998 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2941.8509999999987 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2959.8910000000005 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2958.611 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2985.0112499999996 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2988.8912500000024 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2978.7712500000034 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3007.331250000002 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3016.491249999998 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3015.9712499999987 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3035.8515000000007 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3004.49125 + } + ], + "1728": [ + { + "M": 1, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999991 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3189999999999955 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0790000000000006 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.0789999999999935 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 6.079000000000001 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 9.598999999999997 + }, + { + "M": 1024, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.879249999999992 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.238999999999997 + }, + { + "M": 3072, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 35.678999999999995 + }, + { + "M": 4096, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 46.159250000000014 + }, + { + "M": 5120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 55.03925 + }, + { + "M": 6144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.47925 + }, + { + "M": 7168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 73.79924999999999 + }, + { + "M": 8192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 85.6795 + }, + { + "M": 9216, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 97.07925 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 106.27950000000003 + }, + { + "M": 11264, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 115.87950000000001 + }, + { + "M": 12288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 122.27950000000001 + }, + { + "M": 13312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 132.15949999999998 + }, + { + "M": 14336, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 150.43975 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 154.79949999999994 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.1995 + }, + { + "M": 17408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 181.67974999999996 + }, + { + "M": 18432, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 189.31975 + }, + { + "M": 19456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 199.07999999999998 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 207.27975000000004 + }, + { + "M": 21504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 209.3197500000001 + }, + { + "M": 22528, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 233.60000000000002 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 235.23999999999995 + }, + { + "M": 24576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 245.44 + }, + { + "M": 25600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 263.47999999999996 + }, + { + "M": 26624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 264.2402500000001 + }, + { + "M": 27648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 273.9600000000001 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 275.6400000000001 + }, + { + "M": 29696, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 295.2802499999999 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 300.16025 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 315.20025 + }, + { + "M": 32768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 321.28025 + }, + { + "M": 33792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 330.92049999999995 + }, + { + "M": 34816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 350.60050000000024 + }, + { + "M": 35840, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 358.6005 + }, + { + "M": 36864, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 371.9607500000001 + }, + { + "M": 37888, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 382.16050000000007 + }, + { + "M": 38912, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 385.20074999999997 + }, + { + "M": 39936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 398.0007499999998 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 394.56075 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 400.80074999999977 + }, + { + "M": 43008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 416.2807499999998 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 435.8007499999999 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 434.7607499999999 + }, + { + "M": 46080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 449.48074999999994 + }, + { + "M": 47104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 453.24074999999993 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 479.8409999999999 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 467.361 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 499.1210000000001 + }, + { + "M": 51200, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 497.8009999999997 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 507.00100000000026 + }, + { + "M": 53248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 520.1612500000001 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 529.7612500000002 + }, + { + "M": 55296, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 539.6412499999999 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 544.8812499999997 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 544.1612500000003 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 557.52125 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 566.9614999999999 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 573.1215000000002 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 571.4414999999999 + }, + { + "M": 62464, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 614.1215 + }, + { + "M": 63488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 612.9214999999999 + }, + { + "M": 64512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 634.9215000000002 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 621.16175 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 639.20175 + }, + { + "M": 67584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 653.0417500000003 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 672.8017500000001 + }, + { + "M": 69632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 671.842 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 685.0417499999996 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 681.44175 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 706.0819999999999 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 715.8019999999997 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 725.5619999999999 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 722.762 + }, + { + "M": 76800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 743.1220000000003 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 764.4422500000001 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 762.2022500000007 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 761.4022499999999 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 793.0022500000002 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 776.0422499999997 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 801.28225 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 810.76225 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 821.0424999999998 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 830.5224999999996 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 833.8025 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 839.3624999999997 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 859.0824999999995 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 858.1225000000004 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 889.5627500000005 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 870.6824999999999 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 898.0427500000001 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 906.7227499999995 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 916.4027499999997 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 915.8027499999998 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 939.0029999999997 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 935.4830000000006 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 944.2429999999997 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 952.0430000000001 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 951.2029999999995 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 953.4030000000002 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 977.3629999999998 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 996.8432499999999 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1006.9632500000004 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1018.9632500000004 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1018.1232500000001 + }, + { + "M": 108544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1027.4832499999998 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.68325 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1054.2432499999995 + }, + { + "M": 111616, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1067.2435000000005 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1075.4035 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1086.5235000000002 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1084.9235000000003 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1101.6035000000002 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1115.2035 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1112.4434999999999 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1134.4037499999995 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1129.2037500000001 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1142.8437500000005 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1146.6437500000002 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1150.1237499999997 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1167.88375 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1193.0439999999994 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1197.4839999999995 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1212.444 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1186.7239999999997 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1231.7239999999997 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1230.2839999999997 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1250.7642499999997 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1264.7242500000007 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1244.2040000000006 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1273.2842500000002 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1272.7642499999993 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1302.8442500000006 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1301.7642500000006 + }, + { + "M": 138240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1307.40425 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1328.004500000001 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1337.7644999999998 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1324.6844999999994 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1352.4445 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1344.3244999999997 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1371.9647499999996 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1352.1645000000003 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1380.0047499999996 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1402.2047499999994 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1391.16475 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1408.8047499999998 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1429.3250000000012 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1427.96475 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1439.3249999999998 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1428.1650000000004 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1456.9650000000001 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1466.4449999999997 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1476.2849999999994 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1487.125 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1496.2052500000004 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.4052500000003 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1525.8852500000003 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1535.64525 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1534.4452499999998 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1522.7652500000008 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1563.8055 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1563.0054999999998 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1562.1654999999996 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1592.5254999999997 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1559.4455000000012 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1590.6454999999996 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1610.1654999999996 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1618.4857499999998 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1648.2857499999996 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1615.4857499999998 + }, + { + "M": 175104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1648.1657500000001 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1645.8057500000004 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1667.5257499999998 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1676.9260000000004 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1672.4857499999998 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1694.246 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1713.7260000000006 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1723.1260000000002 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1712.0860000000002 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1709.0060000000003 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1745.52625 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1752.0462499999994 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1771.9262500000004 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1750.36625 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1757.1262499999993 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1779.48625 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1789.0462499999994 + }, + { + "M": 192512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1812.4465 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1818.0865000000003 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1793.7662499999997 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1826.8465000000006 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1846.8465000000015 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1856.4865 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1875.8867499999997 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1861.7267499999998 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1894.9267500000005 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1904.2867499999993 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1893.326750000001 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1902.6867499999998 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1888.2067500000003 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1932.0070000000005 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1951.8070000000007 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1940.647 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1939.3670000000002 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.9269999999997 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1989.2872500000003 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1978.0469999999996 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2004.76725 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2017.52725 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1981.5672499999991 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2025.96725 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2025.647250000001 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2045.1274999999996 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2043.9675000000004 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2062.8875 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2077.0874999999996 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2097.0074999999997 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2086.0875000000015 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2116.0877499999997 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2089.3675000000003 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2114.3677499999994 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2144.447750000001 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2143.76775 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2132.64775 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2133.1277499999997 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2158.2077499999996 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2167.3679999999995 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2187.2479999999996 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2196.6479999999992 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2169.728 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2226.1282500000007 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2214.848 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2244.968250000001 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2264.2482499999996 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2241.2082499999997 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2256.728249999999 + }, + { + "M": 242688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2287.008500000001 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2286.1285 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2295.768500000001 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2267.72825 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2314.6485000000002 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2334.4084999999995 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2323.6084999999994 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2338.7285 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2320.7685 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2357.72875 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2356.5687500000004 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2386.92875 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2386.04875 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2357.16875 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2404.76875 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2424.169000000001 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2418.2890000000016 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2427.6090000000013 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2439.929 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2467.4090000000015 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2466.769000000001 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2476.16925 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2506.209249999998 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2466.5292499999996 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2510.0892500000014 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2498.7292500000003 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2549.6895000000013 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2559.249500000001 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2529.169250000001 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2557.7294999999986 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2566.6894999999986 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2586.889500000001 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2586.129499999998 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2561.809500000001 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2610.6097500000005 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2609.6897500000014 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2608.969750000001 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2628.8497499999994 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2629.4097500000007 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2637.5697499999987 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2656.7700000000004 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2690.289999999999 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2689.0899999999992 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2647.849750000001 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2708.330000000001 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2728.0102500000003 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2737.8102499999986 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2726.450249999999 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2740.2902499999996 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2747.6902500000015 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2767.3702500000018 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2756.450249999998 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2786.2105 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2765.0502499999975 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2795.3705 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2814.930500000002 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2808.570499999998 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2817.8905000000004 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2806.8104999999996 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2857.530749999999 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2845.9707499999995 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2855.8907500000005 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2875.8107500000024 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2837.130500000002 + }, + { + "M": 308224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2939.9309999999996 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2908.4910000000027 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2928.571 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2917.490999999998 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2926.0509999999995 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2913.8509999999987 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2945.1309999999994 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2954.9309999999996 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2974.9312499999987 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2962.530999999999 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2993.6912500000017 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2972.0512499999995 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3014.61125 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3021.5715000000027 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3008.531250000001 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3040.3314999999984 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3060.1315000000004 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3059.611499999999 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3048.411500000001 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3028.1312500000013 + } + ], + "1760": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2790000000000035 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0790000000000006 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.038999999999994 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.958999999999996 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 9.55899999999999 + }, + { + "M": 1024, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 14.838999999999992 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 25.279000000000025 + }, + { + "M": 3072, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 35.358999999999995 + }, + { + "M": 4096, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 45.31924999999998 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 55.63924999999999 + }, + { + "M": 6144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 64.35925 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 76.71925000000002 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 83.87950000000002 + }, + { + "M": 9216, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 97.35950000000001 + }, + { + "M": 10240, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 108.9595 + }, + { + "M": 11264, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 116.1195 + }, + { + "M": 12288, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 129.83975 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 136.2395 + }, + { + "M": 14336, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 148.67950000000002 + }, + { + "M": 15360, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 161.15975000000003 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 161.39975000000004 + }, + { + "M": 17408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 179.67975 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 185.15975000000003 + }, + { + "M": 19456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 189.47974999999997 + }, + { + "M": 20480, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 206.99975 + }, + { + "M": 21504, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 214.51975 + }, + { + "M": 22528, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 229.60000000000002 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 235.67999999999995 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 237.4402500000001 + }, + { + "M": 25600, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 258.0799999999998 + }, + { + "M": 26624, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 270.55999999999983 + }, + { + "M": 27648, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 273.60024999999996 + }, + { + "M": 28672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 284.00025000000005 + }, + { + "M": 29696, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 295.88024999999993 + }, + { + "M": 30720, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 314.9605 + }, + { + "M": 31744, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 315.72050000000013 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.6002500000001 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 333.3202500000001 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 344.6005 + }, + { + "M": 35840, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 355.9604999999999 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 363.3204999999998 + }, + { + "M": 37888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 369.2004999999998 + }, + { + "M": 38912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 389.4805 + }, + { + "M": 39936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 388.80050000000006 + }, + { + "M": 40960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 391.5607500000001 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 423.08074999999997 + }, + { + "M": 43008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 421.60074999999995 + }, + { + "M": 44032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 431.28075000000024 + }, + { + "M": 45056, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 445.0409999999998 + }, + { + "M": 46080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 449.92075 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 448.52099999999984 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 473.56100000000015 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 467.8009999999998 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 499.08124999999995 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 477.961 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 496.5609999999999 + }, + { + "M": 53248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 521.1212500000001 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 530.48125 + }, + { + "M": 55296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 529.8812499999999 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 525.1612499999999 + }, + { + "M": 57344, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 565.80125 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 568.7612499999998 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 575.0015000000001 + }, + { + "M": 60416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 587.5614999999998 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 582.6015 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 592.9614999999999 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 615.8014999999998 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 633.88175 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 621.5215000000001 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 619.3215 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 640.8417500000003 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 663.4017500000004 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 671.202 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 686.20175 + }, + { + "M": 71680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 695.9620000000002 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 705.6020000000003 + }, + { + "M": 73728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 718.0819999999999 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 717.5619999999997 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 737.3220000000001 + }, + { + "M": 76800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 744.5219999999997 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 743.2819999999997 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 774.7222499999998 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 764.9622500000003 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 785.0822499999997 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 768.1222499999997 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 802.6422500000001 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 812.3224999999998 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 822.0025 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 820.6824999999999 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 841.3625 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 862.6025000000002 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 860.8025000000007 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 859.4825000000001 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.3627499999998 + }, + { + "M": 92160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 878.8027499999998 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 888.6827499999999 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 920.16275 + }, + { + "M": 95232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 929.8027499999998 + }, + { + "M": 96256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 928.6427500000004 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 921.2827499999999 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 937.0829999999996 + }, + { + "M": 99328, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 946.9630000000002 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 975.3229999999999 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 991.3630000000003 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 962.723 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 980.4029999999998 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 999.2832500000004 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 998.5230000000006 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1018.2432500000004 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 999.5632500000002 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1047.8832499999999 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1056.9234999999999 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1048.0032499999998 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1055.2432499999995 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1057.12325 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1074.7635000000005 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1086.2435 + }, + { + "M": 115712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1117.8435 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1113.6034999999997 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1124.8837500000004 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1124.84375 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1142.0037500000003 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1130.763750000001 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1169.8837499999995 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1141.7237499999997 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1184.6040000000003 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1200.8439999999996 + }, + { + "M": 125952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1193.1240000000003 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1209.6040000000003 + }, + { + "M": 128000, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1212.5239999999994 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1208.0040000000008 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1227.6440000000002 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1241.5239999999994 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1267.404250000001 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1257.0442500000004 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1276.36425 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1291.2042499999998 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1305.6842500000002 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1299.4042499999996 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1293.96425 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1323.9645 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1343.6844999999994 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1358.1645000000003 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1336.5644999999995 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1325.8445000000002 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1372.524750000001 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1365.0845 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1384.9247500000001 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1384.2047499999994 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1372.8447499999993 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1421.16475 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1419.44475 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1429.1649999999995 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1462.3650000000002 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1440.3249999999998 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1440.0049999999992 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1490.6452499999996 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1488.76525 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1479.4850000000001 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1498.72525 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1508.68525 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1528.6052499999996 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1517.5252499999997 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1537.5252499999992 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1514.6052499999996 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1555.0454999999997 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1565.6455000000005 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1575.2855000000009 + }, + { + "M": 167936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1584.1654999999996 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1603.1254999999996 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1624.4057499999994 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1602.8854999999994 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1600.125500000001 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1640.6057500000002 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1638.0457500000007 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1649.24575 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1648.245750000001 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1658.0057499999994 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1677.6059999999998 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1674.286 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1697.286 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1716.246000000001 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1715.366 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1704.1259999999993 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1701.0060000000003 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1755.9262499999995 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1734.326 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1775.2062499999993 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1753.1662500000002 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1780.6062500000007 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1792.8862500000005 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1812.6064999999999 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1812.3265000000001 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1821.4465 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1806.8464999999997 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1840.1664999999994 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1852.6064999999999 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1880.0067499999996 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1858.8464999999997 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1885.0867500000004 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1877.52675 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1897.44675 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1896.60675 + }, + { + "M": 203776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1920.1667500000003 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1890.8867499999997 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1914.52675 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1924.0470000000005 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1943.8469999999998 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1952.687 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1957.8470000000016 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1981.8072500000007 + }, + { + "M": 211968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1997.0072499999997 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1980.9269999999997 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2010.2472500000003 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1994.4872499999992 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2008.48725 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2038.9672499999997 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2058.7675 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2057.3274999999994 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2066.1275000000005 + }, + { + "M": 221184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2098.1275000000005 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2100.6075 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2089.6875 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2109.3677499999994 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2092.8075 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2128.60775 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2137.96775 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2147.4477500000003 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2164.2479999999996 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2146.1677499999996 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2161.688 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2181.3680000000004 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2190.808 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2210.3279999999995 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2193.1280000000006 + }, + { + "M": 236544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2238.4882499999994 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2218.4480000000003 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2227.88825 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2267.7682499999996 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2255.2482499999996 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2271.6482499999993 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2260.2482500000006 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2280.00825 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2310.3684999999996 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2271.3282499999996 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2329.5285000000003 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2318.1684999999998 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2338.2884999999987 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2321.4085000000005 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2344.3685000000005 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2371.4087499999996 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2370.4087499999996 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2400.6887500000003 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2389.5687499999995 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2381.4487499999996 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2398.3287499999997 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2427.929 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2422.7690000000002 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2442.6090000000004 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2423.249 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2461.889 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2461.089000000001 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2480.889250000001 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2500.4892500000005 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2459.8489999999993 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.2492500000003 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2525.16925 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2534.849500000001 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2554.6495000000004 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2555.2895 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2583.929500000001 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2583.329499999999 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2593.20975 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2592.2895000000017 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2576.3695000000007 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.5297500000006 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2614.7697500000004 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2634.969750000001 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.6497500000014 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2633.8897499999994 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2663.25 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2662.21 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2675.289999999998 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2697.7700000000013 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2663.7700000000023 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2703.8499999999985 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2702.8100000000004 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2723.13025 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2721.9702499999994 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2748.37025 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2764.250250000001 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2773.8902499999976 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2773.0902499999984 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2792.8904999999977 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2760.610249999997 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2801.6105000000007 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2811.3305 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2813.8104999999996 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2812.930500000002 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2811.450499999998 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2842.4505 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2841.690749999998 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2861.730749999999 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2870.730749999999 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2843.37075 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2916.3309999999983 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2915.2109999999993 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2925.0510000000013 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2934.371 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2911.611 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2930.610999999999 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2961.770999999999 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2982.3312499999993 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2960.411 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2957.371000000003 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2979.451250000001 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3009.77125 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3003.411250000001 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3029.851499999999 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2995.2512500000003 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3049.091500000003 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3069.131500000003 + }, + { + "M": 325632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 3105.4917499999983 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3077.2114999999985 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3034.77125 + } + ], + "1792": [ + { + "M": 1, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.079000000000022 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119249999999994 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0790000000000006 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.079000000000001 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.879250000000006 + }, + { + "M": 512, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 9.639000000000003 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.639000000000003 + }, + { + "M": 2048, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 25.198999999999998 + }, + { + "M": 3072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.79899999999999 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 45.35925000000001 + }, + { + "M": 5120, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 56.119249999999994 + }, + { + "M": 6144, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 65.67925 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 76.07925 + }, + { + "M": 8192, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 86.03925000000001 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 94.19924999999999 + }, + { + "M": 10240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.27924999999999 + }, + { + "M": 11264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 112.1995 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 123.87949999999995 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 134.87950000000004 + }, + { + "M": 14336, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 147.27950000000004 + }, + { + "M": 15360, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 150.4395 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.11949999999993 + }, + { + "M": 17408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 169.83974999999998 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 183.15974999999992 + }, + { + "M": 19456, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 194.55975000000007 + }, + { + "M": 20480, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 206.31974999999994 + }, + { + "M": 21504, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 221.44 + }, + { + "M": 22528, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 223.63999999999987 + }, + { + "M": 23552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 234.72000000000008 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 236.16000000000008 + }, + { + "M": 25600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 241.72000000000008 + }, + { + "M": 26624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 266.2 + }, + { + "M": 27648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 275.72024999999996 + }, + { + "M": 28672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 271.8399999999999 + }, + { + "M": 29696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 291.2402500000002 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 298.84024999999986 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 314.24025000000006 + }, + { + "M": 32768, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 322.96024999999986 + }, + { + "M": 33792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 339.0005000000001 + }, + { + "M": 34816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 338.56049999999993 + }, + { + "M": 35840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 346.04050000000007 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 359.8404999999998 + }, + { + "M": 37888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 366.7204999999999 + }, + { + "M": 38912, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 382.56074999999987 + }, + { + "M": 39936, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 389.44050000000004 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 382.4407500000001 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 409.6407499999999 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 418.7607499999998 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 433.44075 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 448.92100000000005 + }, + { + "M": 46080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 436.48074999999994 + }, + { + "M": 47104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 451.081 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 455.8007499999999 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 476.28099999999984 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 485.7209999999998 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 496.001 + }, + { + "M": 52224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 498.48099999999977 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 514.2412500000003 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 513.1610000000001 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 544.6012499999999 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 532.32125 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 552.8015000000003 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 572.9612500000001 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 560.9615000000001 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 570.5215000000001 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 569.2814999999998 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 600.6815000000001 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 601.9615000000001 + }, + { + "M": 64512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.8014999999998 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 618.2415000000001 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 647.0017500000001 + }, + { + "M": 67584, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 654.3617500000003 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 669.7617499999997 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 678.4419999999999 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 669.9617500000006 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 666.8417499999996 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 680.9217499999997 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 698.6019999999999 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 708.2419999999997 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 750.7222500000005 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 745.2019999999998 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 748.962 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 757.72225 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 768.2422499999998 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 757.0022500000002 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 760.76225 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 807.0422499999997 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 795.8022499999997 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 826.5224999999998 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 814.6425000000004 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 829.1624999999997 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 844.2425000000003 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 854.2424999999998 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 863.6425000000004 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 862.1225 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 876.0827499999996 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 892.5227499999996 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 911.4827499999997 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 910.3227500000003 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 919.9227500000002 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 923.2827499999999 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 929.4827499999997 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 958.8029999999999 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 954.1230000000005 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 942.7629999999995 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 966.643 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 971.8029999999999 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 981.4830000000002 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1001.2032499999997 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1000.4032500000002 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1023.6832499999999 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1008.8832500000002 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1042.4032499999998 + }, + { + "M": 110592, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1062.9634999999998 + }, + { + "M": 111616, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1061.8035000000004 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1039.5232499999997 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1076.8035 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1079.4434999999999 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1106.1234999999997 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1094.5634999999997 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1086.4434999999999 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1103.3235 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1123.2037500000001 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1163.5637500000007 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1171.8437499999998 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.0437499999998 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1171.6437499999997 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1181.1237499999997 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1201.0039999999995 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1189.5639999999994 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.1639999999993 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1214.524 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1228.6440000000002 + }, + { + "M": 131072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1227.364 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1236.8439999999996 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1248.92425 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1284.04425 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1282.7642499999997 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1285.2842500000002 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1284.2442500000002 + }, + { + "M": 138240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1300.56425 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1303.56425 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1312.6445000000008 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1325.9644999999996 + }, + { + "M": 142336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1346.5245 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1318.2044999999994 + }, + { + "M": 144384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1365.7645000000002 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1366.2844999999998 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1365.7245000000003 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1383.3647499999997 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1385.8447500000002 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1404.60475 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1413.96475 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1423.3247499999998 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1442.4850000000001 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1432.6849999999995 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1461.0850000000014 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1471.245 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1479.9650000000001 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1490.6052500000005 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1480.085 + }, + { + "M": 159744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1498.4452500000002 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1518.8852500000007 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1528.20525 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1527.3652500000007 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.3252500000003 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1546.20525 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1555.5654999999997 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1575.5255000000002 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1574.5254999999997 + }, + { + "M": 168960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1585.0455000000006 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1593.4454999999994 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1592.3255 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1612.2057499999992 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1620.5257500000007 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1609.2055 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1639.0857499999993 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1669.32575 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1668.5257499999998 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1667.5257499999998 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1686.8860000000013 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1688.9260000000004 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1695.8860000000004 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1694.9660000000003 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1725.0860000000002 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1692.4459999999995 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1748.6862499999997 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1744.0062499999995 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1753.44625 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1762.8062499999996 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1750.2062499999993 + }, + { + "M": 190464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1785.406250000001 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1780.40625 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1810.9264999999996 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1809.8464999999997 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1796.9262500000004 + }, + { + "M": 195584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1843.9665000000005 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1838.1265000000003 + }, + { + "M": 197632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1852.7264999999998 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1857.2064999999993 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1864.7267499999998 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1865.8867499999997 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1895.8067500000006 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1905.2867499999993 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1894.1267500000013 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1880.3267500000002 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1933.567 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1932.3269999999993 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1962.8869999999997 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1940.9269999999997 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1947.9269999999997 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1959.8869999999997 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1969.0072500000006 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1996.1272500000005 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1998.4872500000001 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1994.8872499999998 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2006.6472500000007 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2026.4872499999992 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2046.3674999999992 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2034.60725 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2023.2472499999994 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2056.487500000001 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2076.4474999999993 + }, + { + "M": 223232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2105.0877499999997 + }, + { + "M": 224256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2103.4477500000003 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2070.3275000000003 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2104.0477499999997 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2123.4877499999993 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2153.9277500000007 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2142.5277499999993 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2135.3277499999995 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2158.76775 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2178.3680000000004 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2198.048 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2197.2079999999996 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2171.688 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2205.728 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2225.5682500000003 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2203.848000000001 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2223.168 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2231.4882500000003 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2256.08825 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2285.8485 + }, + { + "M": 243712, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2285.4885000000004 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2284.048500000001 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2257.5682500000003 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2303.0485 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2301.968499999999 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2321.8885 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2328.1685000000007 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2322.7284999999993 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2347.20875 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2356.4887499999995 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2366.088749999999 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2364.928750000001 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2389.76875 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2404.5687500000004 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2402.7287499999998 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2416.4889999999996 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2425.7690000000002 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2418.968999999999 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2434.4890000000005 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2454.129000000001 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2463.5689999999995 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2462.4089999999987 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2445.169000000001 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2488.40925 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2515.7292500000003 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2535.9295 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.6892500000004 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2496.249249999999 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2533.009250000001 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2542.7695000000012 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2562.169499999999 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2561.089500000002 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2560.5295000000015 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2587.209499999999 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2606.8097499999985 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.7297499999986 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2635.9297499999993 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2607.4497500000007 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2634.4897500000006 + }, + { + "M": 283648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2659.5699999999997 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2665.569999999998 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2675.170000000002 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2635.6897499999986 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2683.8100000000004 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2682.5699999999997 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2712.8499999999985 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2722.290250000002 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2724.33025 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2722.0502499999993 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2752.010250000001 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2762.0502499999984 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2750.2102499999983 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2772.5302500000016 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2779.6904999999997 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2799.5705000000025 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2815.1705 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2803.7704999999996 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2794.6505000000006 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2822.7305000000006 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2842.650749999999 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2851.8507499999987 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2840.25075 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2824.2905 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2872.8107500000006 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2882.410749999998 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2892.0507500000003 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2890.7707500000015 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2881.210750000001 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2928.291 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2929.730999999999 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2939.1710000000003 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2948.770999999998 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2927.7710000000015 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2967.490999999998 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2976.6912500000008 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2972.411249999999 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2992.4512499999983 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2981.9312500000005 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3042.3314999999993 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3030.5715 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3040.5315 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3039.611500000002 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3023.9712499999996 + } + ], + "1920": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999856 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0389999999999944 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.439 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.119000000000007 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 5.879000000000005 + }, + { + "M": 512, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 9.678999999999995 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 14.759 + }, + { + "M": 2048, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 25.31899999999999 + }, + { + "M": 3072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 34.99900000000001 + }, + { + "M": 4096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 45.27925000000002 + }, + { + "M": 5120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 55.039249999999996 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 64.79925000000003 + }, + { + "M": 7168, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 76.95925000000001 + }, + { + "M": 8192, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 86.31949999999999 + }, + { + "M": 9216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 92.99950000000001 + }, + { + "M": 10240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.63925 + }, + { + "M": 11264, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 115.43950000000004 + }, + { + "M": 12288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 121.5995 + }, + { + "M": 13312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 131.67950000000002 + }, + { + "M": 14336, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 147.39950000000002 + }, + { + "M": 15360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 149.15975000000003 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 160.35950000000003 + }, + { + "M": 17408, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 180.59975000000003 + }, + { + "M": 18432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 189.15975000000003 + }, + { + "M": 19456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 197.92000000000002 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 195.87975 + }, + { + "M": 21504, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 218.08000000000004 + }, + { + "M": 22528, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 228.27999999999997 + }, + { + "M": 23552, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 238.36000000000007 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 236.56 + }, + { + "M": 25600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 252.80024999999983 + }, + { + "M": 26624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 263.88 + }, + { + "M": 27648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 273.28025 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 285.9202499999999 + }, + { + "M": 29696, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 294.1205000000001 + }, + { + "M": 30720, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 306.88025000000005 + }, + { + "M": 31744, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 319.6005 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 313.36024999999995 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 331.20025 + }, + { + "M": 34816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 339.64049999999986 + }, + { + "M": 35840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 356.96050000000014 + }, + { + "M": 36864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 358.4802500000002 + }, + { + "M": 37888, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 380.56049999999993 + }, + { + "M": 38912, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 383.84075000000007 + }, + { + "M": 39936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 397.00075000000027 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 383.08050000000003 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 410.12074999999993 + }, + { + "M": 43008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 430.5210000000001 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 423.84074999999996 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 427.80100000000016 + }, + { + "M": 46080, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 449.2407499999997 + }, + { + "M": 47104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 462.7610000000002 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 461.6809999999998 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 466.241 + }, + { + "M": 50176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 486.8409999999999 + }, + { + "M": 51200, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 485.56100000000015 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 506.04100000000017 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 504.28125 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 529.0412499999998 + }, + { + "M": 55296, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 538.0812500000002 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 554.8812499999999 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 542.6812500000003 + }, + { + "M": 58368, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 567.6812500000001 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 565.6012499999999 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 593.6014999999998 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 570.4412499999999 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 601.6814999999997 + }, + { + "M": 63488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 611.2014999999999 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 620.9617500000002 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 641.64175 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 627.8414999999998 + }, + { + "M": 67584, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 656.0817499999998 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 659.2417500000001 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 679.4817500000001 + }, + { + "M": 70656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.0817499999998 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 690.2817500000001 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 701.5220000000004 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 700.4420000000002 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 723.7220000000002 + }, + { + "M": 75776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 743.3219999999997 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 748.0820000000001 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 741.9219999999996 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 759.8022499999997 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 780.2822499999997 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 759.8822500000001 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 774.5222499999995 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 787.2022500000005 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 818.6824999999999 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 817.2424999999998 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 828.4425000000001 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 822.0425000000005 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 836.9225000000001 + }, + { + "M": 89088, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 844.9625000000001 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 866.4825000000001 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 875.0827499999996 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 858.8025000000016 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 894.7627499999999 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 904.56275 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 903.6027500000005 + }, + { + "M": 96256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 919.3227499999998 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 934.7629999999995 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 929.9227499999997 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 935.683 + }, + { + "M": 100352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 958.203 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 966.2829999999994 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 958.7629999999999 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 987.1630000000005 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 984.2029999999995 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.4430000000002 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1013.3232500000005 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1016.0432500000012 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1032.72325 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1031.80325 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1051.5632499999997 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1050.8432500000004 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1053.2032500000005 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1094.1634999999997 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1081.6835 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1099.2035 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.0434999999998 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1121.0037499999999 + }, + { + "M": 118784, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1131.1637500000006 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1137.0037500000003 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1136.1637499999997 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1164.4837500000003 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1137.32375 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1165.0437499999998 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1178.2837499999996 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1184.0040000000004 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1203.804 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1194.8040000000005 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.4439999999995 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1242.3242499999997 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1235.7640000000001 + }, + { + "M": 132096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1245.524 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1252.2442500000006 + }, + { + "M": 134144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1286.6842499999998 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1279.84425 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1310.1245 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1298.8442499999996 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1288.7242499999998 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1323.6445000000003 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1336.9244999999996 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1339.6044999999995 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1342.0044999999996 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1321.9245 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1360.7644999999998 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1378.1647500000004 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1379.9247499999997 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1379.04475 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1399.7247500000012 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1408.0047500000005 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1436.7649999999999 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1427.2050000000004 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1436.445000000001 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1446.8049999999994 + }, + { + "M": 154624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1464.0449999999996 + }, + { + "M": 155648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1462.8450000000012 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1463.4450000000002 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1473.9250000000002 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1483.9649999999997 + }, + { + "M": 159744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1501.8852500000003 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1523.12525 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1520.8852499999998 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1532.2052499999995 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1520.9252499999993 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1561.0855000000001 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1560.6054999999997 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1580.2054999999991 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1589.7654999999995 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1588.6854999999996 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1587.7255000000005 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1597.4054999999998 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1625.9657499999994 + }, + { + "M": 173056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1625.20575 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1613.28575 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1654.0857499999993 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1663.5657499999998 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1662.8057499999995 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1683.8860000000004 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1660.5257499999998 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1692.166000000001 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1700.4460000000008 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1699.9259999999995 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1729.9260000000004 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1696.7259999999997 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1739.2062500000002 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1748.48625 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1758.36625 + }, + { + "M": 188416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1769.7662500000006 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1765.44625 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1786.6462499999998 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1796.2862499999992 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1794.8062499999996 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1817.6464999999998 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1822.4465 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1823.4465 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1843.1664999999994 + }, + { + "M": 197632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1867.0867500000004 + }, + { + "M": 198656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1876.9667499999996 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1859.3665 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1860.3667499999992 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1890.56675 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1900.7267499999998 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1920.2067499999994 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1895.8467499999997 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1928.8469999999998 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1927.8469999999998 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1968.2070000000003 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1956.6070000000009 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1963.4070000000002 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.8469999999998 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1995.4072499999993 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1994.60725 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2014.3672500000011 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1979.4072499999993 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2023.0472499999994 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2032.36725 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2062.807500000001 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2060.8075000000003 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2029.32725 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2083.8475 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2083.1275000000014 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2102.72775 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2111.88775 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2076.3274999999994 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2120.68775 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2119.847749999999 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2139.60775 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2138.56775 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2140.92775 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2164.687999999999 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2184.4880000000003 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2204.1279999999997 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2182.4080000000004 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2177.1679999999997 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2222.2082500000006 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2231.5682499999994 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2241.2082500000006 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2239.488250000001 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2248.048249999999 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2281.60825 + }, + { + "M": 242688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2280.4882500000003 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2270.92825 + }, + { + "M": 244736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2310.6484999999993 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2274.6082499999993 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2309.888500000001 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2319.2485000000015 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2328.848500000001 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2324.5685000000003 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2339.1684999999998 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2353.84875 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2363.3287499999997 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2382.76875 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2372.04875 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2354.928749999999 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2411.6090000000013 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2430.089 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2434.2889999999998 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2433.2890000000007 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2426.2889999999998 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2452.168999999999 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2471.8889999999983 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2471.0890000000018 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2490.969250000001 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2462.849000000001 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2505.28925 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2524.649250000002 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2534.0494999999983 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2532.8094999999985 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2525.049250000001 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2572.7295000000004 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2572.249499999999 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2581.169499999999 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2590.969500000001 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2589.8095000000003 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2596.169750000002 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2615.929750000002 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2625.4497500000016 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2634.5697499999987 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2605.6097500000014 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2643.5697500000024 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2651.28975 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2664.129999999999 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2684.0099999999993 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.0497499999974 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2692.5700000000006 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2712.7300000000005 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2711.290000000001 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2741.8902499999995 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2714.570000000001 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2721.53025 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2741.6502500000006 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2761.490499999998 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2770.6102500000006 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2772.0102499999994 + }, + { + "M": 297984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2803.410500000002 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2799.2505 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2803.610499999999 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2792.2504999999974 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2803.5705000000007 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2842.5707500000026 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2862.210750000002 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2830.210500000002 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2838.370750000001 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2843.0107499999995 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2882.0507500000003 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2891.7707500000006 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2911.650999999998 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2900.570749999996 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2910.8910000000014 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2947.9310000000005 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2940.2909999999974 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2949.810999999999 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2938.4909999999973 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2938.1709999999966 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2967.691 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2977.41125 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2987.61125 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3014.9312500000015 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3003.811249999999 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3023.691249999996 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3033.051500000003 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3042.2515000000003 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3062.451500000001 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3033.571250000001 + } + ], + "2048": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999998 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.119000000000007 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1589999999999847 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.23899999999999 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 2.678999999999995 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 3.439 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 4, + "time_us": 5.038999999999994 + }, + { + "M": 512, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 7.5989999999999895 + }, + { + "M": 1024, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 12.359000000000002 + }, + { + "M": 2048, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 20.239000000000004 + }, + { + "M": 3072, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 28.59899999999999 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 36.318999999999996 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 43.75900000000001 + }, + { + "M": 6144, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 52.11900000000001 + }, + { + "M": 7168, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 59.559250000000006 + }, + { + "M": 8192, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 69.19925000000002 + }, + { + "M": 9216, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 75.31925000000003 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 82.27949999999993 + }, + { + "M": 11264, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 90.59925000000001 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 97.79925 + }, + { + "M": 13312, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 107.1995 + }, + { + "M": 14336, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 114.75950000000003 + }, + { + "M": 15360, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 125.43950000000001 + }, + { + "M": 16384, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 130.39974999999998 + }, + { + "M": 17408, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 137.63974999999994 + }, + { + "M": 18432, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 149.07949999999988 + }, + { + "M": 19456, + "rows_per_block": 2, + "vec_size": 4, + "time_us": 166.55975 + }, + { + "M": 20480, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 164.07975000000005 + }, + { + "M": 21504, + "rows_per_block": 2, + "vec_size": 4, + "time_us": 183.67974999999996 + }, + { + "M": 22528, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 175.6797499999999 + }, + { + "M": 23552, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 188.11975000000012 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 187.51974999999987 + }, + { + "M": 25600, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 198.79975000000002 + }, + { + "M": 26624, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 203.83999999999997 + }, + { + "M": 27648, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 220.55999999999995 + }, + { + "M": 28672, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 228.1199999999999 + }, + { + "M": 29696, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 236.0799999999998 + }, + { + "M": 30720, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 245.72000000000003 + }, + { + "M": 31744, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 242.2800000000002 + }, + { + "M": 32768, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 253.27999999999994 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 255.8000000000001 + }, + { + "M": 34816, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 265.2002500000001 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 280.4000000000001 + }, + { + "M": 36864, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 278.2000000000003 + }, + { + "M": 37888, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 302.08024999999986 + }, + { + "M": 38912, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 315.32050000000015 + }, + { + "M": 39936, + "rows_per_block": 3, + "vec_size": 4, + "time_us": 316.68025 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 303.3605000000001 + }, + { + "M": 41984, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 334.52025000000003 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 333.8802499999998 + }, + { + "M": 44032, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 350.08050000000014 + }, + { + "M": 45056, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 346.1205 + }, + { + "M": 46080, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 359.92074999999977 + }, + { + "M": 47104, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 356.2004999999998 + }, + { + "M": 48128, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 369.72075000000007 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 376.0007499999999 + }, + { + "M": 50176, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 389.80050000000006 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 395.1205 + }, + { + "M": 52224, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 395.6005 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 415.8007499999999 + }, + { + "M": 54272, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 416.0007499999999 + }, + { + "M": 55296, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 423.84075000000007 + }, + { + "M": 56320, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 436.16075 + }, + { + "M": 57344, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 439.48075000000017 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 457.9209999999998 + }, + { + "M": 59392, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 466.68100000000027 + }, + { + "M": 60416, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 462.6010000000001 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 469.04099999999994 + }, + { + "M": 62464, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 478.76099999999997 + }, + { + "M": 63488, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 498.48099999999977 + }, + { + "M": 64512, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 493.3609999999999 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 483.96100000000047 + }, + { + "M": 66560, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 512.48125 + }, + { + "M": 67584, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 514.8810000000001 + }, + { + "M": 68608, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 512.7610000000002 + }, + { + "M": 69632, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 521.00125 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 551.8012499999998 + }, + { + "M": 71680, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 548.8812499999999 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 571.1215000000002 + }, + { + "M": 73728, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 569.4014999999997 + }, + { + "M": 74752, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 588.4815000000001 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 568.9614999999999 + }, + { + "M": 76800, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 591.1615000000002 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 614.0014999999999 + }, + { + "M": 78848, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 599.2015000000001 + }, + { + "M": 79872, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 603.6814999999997 + }, + { + "M": 80896, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 639.5217499999997 + }, + { + "M": 81920, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 614.1617500000002 + }, + { + "M": 82944, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 632.7217499999999 + }, + { + "M": 83968, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 647.9617499999999 + }, + { + "M": 84992, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 647.2017499999997 + }, + { + "M": 86016, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 660.36175 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 662.1617499999998 + }, + { + "M": 88064, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 678.92175 + }, + { + "M": 89088, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 687.3620000000001 + }, + { + "M": 90112, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 687.682 + }, + { + "M": 91136, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 702.0819999999999 + }, + { + "M": 92160, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 697.3219999999997 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 715.1219999999998 + }, + { + "M": 94208, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 715.7619999999997 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 712.2420000000002 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 737.002 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 741.0820000000003 + }, + { + "M": 98304, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 732.6820000000012 + }, + { + "M": 99328, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 763.2822499999997 + }, + { + "M": 100352, + "rows_per_block": 4, + "vec_size": 4, + "time_us": 766.56225 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 758.56225 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 763.3622500000006 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 792.2022499999998 + }, + { + "M": 104448, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 790.56225 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 797.9625000000001 + }, + { + "M": 106496, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 809.5225000000005 + }, + { + "M": 107520, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 809.0824999999995 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 820.6424999999999 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 846.3224999999998 + }, + { + "M": 110592, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 824.6025 + }, + { + "M": 111616, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 852.0425 + }, + { + "M": 112640, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 847.6024999999995 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 867.2424999999998 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 855.4024999999997 + }, + { + "M": 115712, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 883.4027499999997 + }, + { + "M": 116736, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 880.6827500000004 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 893.64275 + }, + { + "M": 118784, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 905.5227500000005 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 913.0027499999997 + }, + { + "M": 120832, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 909.64275 + }, + { + "M": 121856, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 923.9229999999998 + }, + { + "M": 122880, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 917.2027500000008 + }, + { + "M": 123904, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 941.2429999999999 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 952.2429999999999 + }, + { + "M": 125952, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 950.203 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 966.2829999999994 + }, + { + "M": 128000, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 964.4030000000007 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 981.723 + }, + { + "M": 130048, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 997.8029999999999 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 967.5630000000001 + }, + { + "M": 132096, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 985.2829999999999 + }, + { + "M": 133120, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 1002.0832500000001 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1028.5232499999997 + }, + { + "M": 135168, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1008.0432499999996 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1033.6034999999997 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1041.0434999999998 + }, + { + "M": 138240, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1041.5232500000002 + }, + { + "M": 139264, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 1049.1235000000001 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1070.6035000000002 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1053.8834999999995 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1075.6434999999997 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1050.2435 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1090.5234999999998 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1088.5634999999997 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1096.0037499999994 + }, + { + "M": 147456, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1089.1635 + }, + { + "M": 148480, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 1111.9235000000003 + }, + { + "M": 149504, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 1119.2034999999992 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1135.2037500000001 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1132.9634999999998 + }, + { + "M": 152576, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1138.843499999999 + }, + { + "M": 153600, + "rows_per_block": 5, + "vec_size": 4, + "time_us": 1151.2034999999992 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1158.1634999999987 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1174.9637500000003 + }, + { + "M": 156672, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1170.6037500000004 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1181.6439999999998 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1191.204000000001 + }, + { + "M": 159744, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1196.2439999999997 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1194.8039999999996 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1211.7240000000002 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1227.5639999999994 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1201.4440000000004 + }, + { + "M": 164864, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1231.1639999999998 + }, + { + "M": 165888, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1231.2839999999997 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1231.284 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1265.0042500000004 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1256.3642500000005 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1271.3242500000001 + }, + { + "M": 171008, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1273.04425 + }, + { + "M": 172032, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1267.56425 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1288.3242499999997 + }, + { + "M": 174080, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1292.0842500000008 + }, + { + "M": 175104, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1299.5642499999994 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1319.3244999999997 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1314.6045000000004 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1322.4444999999996 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1334.9644999999991 + }, + { + "M": 180224, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1317.0445 + }, + { + "M": 181248, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1353.2045000000007 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1360.6444999999994 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1371.0845 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1335.6844999999998 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1373.5244999999995 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1385.5647499999995 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1384.12475 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1409.5247499999996 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1409.9647499999992 + }, + { + "M": 190464, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1421.00475 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1422.6847500000003 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1439.2849999999999 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1437.3650000000007 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1446.8849999999993 + }, + { + "M": 195584, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1447.8049999999994 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1444.9250000000006 + }, + { + "M": 197632, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1462.085 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1474.4050000000007 + }, + { + "M": 199680, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1467.9250000000002 + }, + { + "M": 200704, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1484.4049999999997 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1487.3249999999998 + }, + { + "M": 202752, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1499.24525 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1521.08525 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1485.165 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1526.4052499999998 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1542.5252499999997 + }, + { + "M": 207872, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1536.08525 + }, + { + "M": 208896, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1548.4852499999997 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1557.7254999999996 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1572.2855 + }, + { + "M": 211968, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1569.6054999999997 + }, + { + "M": 212992, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1576.8455000000004 + }, + { + "M": 214016, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1585.0055000000002 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1584.8454999999994 + }, + { + "M": 216064, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1598.8455000000004 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1607.0055000000002 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1615.4054999999998 + }, + { + "M": 219136, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1630.5257500000002 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1639.7657500000005 + }, + { + "M": 221184, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1639.4457499999999 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1663.1257499999992 + }, + { + "M": 223232, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1654.485749999999 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1669.2457499999991 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1649.6057499999997 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1683.286 + }, + { + "M": 227328, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1693.326 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1699.2859999999991 + }, + { + "M": 229376, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1690.406 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1703.7259999999997 + }, + { + "M": 231424, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1721.0060000000003 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1718.7259999999997 + }, + { + "M": 233472, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 1729.0460000000003 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1743.0462499999994 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1741.1662500000002 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1748.32625 + }, + { + "M": 237568, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1757.1262500000003 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1772.52625 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1769.40625 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1787.2462499999992 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1799.7664999999997 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1811.0065000000004 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1800.5265 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1816.5265 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1806.7664999999997 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1822.2864999999993 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1839.5265 + }, + { + "M": 248832, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1852.7664999999997 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1854.2064999999993 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1853.0465000000004 + }, + { + "M": 251904, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1867.366750000001 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1867.52675 + }, + { + "M": 253952, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1883.1267499999994 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1873.6467499999999 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1889.6867499999998 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1906.2067499999994 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1904.6867500000003 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1922.40675 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1938.1269999999995 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1937.0470000000005 + }, + { + "M": 262144, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1928.4070000000002 + }, + { + "M": 263168, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1955.7670000000007 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1959.9270000000006 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1958.4070000000002 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1937.7669999999998 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 1979.2869999999998 + }, + { + "M": 268288, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1999.72725 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2005.0072499999999 + }, + { + "M": 270336, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 1994.60725 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2011.9272499999997 + }, + { + "M": 272384, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2009.7672500000008 + }, + { + "M": 273408, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2045.2075000000004 + }, + { + "M": 274432, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2043.8872499999995 + }, + { + "M": 275456, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2053.2075000000013 + }, + { + "M": 276480, + "rows_per_block": 6, + "vec_size": 4, + "time_us": 2056.0875000000015 + }, + { + "M": 277504, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2065.607500000001 + }, + { + "M": 278528, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2042.6872499999988 + }, + { + "M": 279552, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2071.0475000000006 + }, + { + "M": 280576, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2097.2075000000004 + }, + { + "M": 281600, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2085.6074999999983 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2102.807499999999 + }, + { + "M": 283648, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2108.64775 + }, + { + "M": 284672, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2107.0874999999987 + }, + { + "M": 285696, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2116.087749999999 + }, + { + "M": 286720, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2121.807749999998 + }, + { + "M": 287744, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2140.0877500000015 + }, + { + "M": 288768, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2127.687750000002 + }, + { + "M": 289792, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2145.3677500000003 + }, + { + "M": 290816, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2153.687749999999 + }, + { + "M": 291840, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2181.008 + }, + { + "M": 292864, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2179.767999999999 + }, + { + "M": 293888, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2177.0080000000016 + }, + { + "M": 294912, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2176.2079999999987 + }, + { + "M": 295936, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2213.1680000000015 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2201.688000000001 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2227.728 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2234.728250000001 + }, + { + "M": 300032, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2234.3282500000005 + }, + { + "M": 301056, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2241.2082500000015 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2233.84825 + }, + { + "M": 303104, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2235.9682500000017 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2258.2482500000024 + }, + { + "M": 305152, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2250.2882499999996 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2272.808249999999 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2258.6882499999992 + }, + { + "M": 308224, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2279.1282500000016 + }, + { + "M": 309248, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2286.60825 + }, + { + "M": 310272, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2314.4085000000014 + }, + { + "M": 311296, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2279.6882499999974 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2322.808500000002 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2326.0885 + }, + { + "M": 314368, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2336.3684999999996 + }, + { + "M": 315392, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2344.4085000000014 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2351.888499999998 + }, + { + "M": 317440, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2358.448499999996 + }, + { + "M": 318464, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2366.6487499999994 + }, + { + "M": 319488, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2353.0485 + }, + { + "M": 320512, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2391.9287500000005 + }, + { + "M": 321536, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2399.248749999999 + }, + { + "M": 322560, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2415.2087500000034 + }, + { + "M": 323584, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2392.5687499999995 + }, + { + "M": 324608, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2421.96875 + }, + { + "M": 325632, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2428.3690000000015 + }, + { + "M": 326656, + "rows_per_block": 8, + "vec_size": 4, + "time_us": 2414.608750000002 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 4, + "time_us": 2420.2887499999997 + } + ], + "2080": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2789999999999964 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.3589999999999947 + }, + { + "M": 32, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.9989999999999952 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.5189999999999984 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.5589999999999975 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 6.999000000000002 + }, + { + "M": 512, + "rows_per_block": 3, + "vec_size": 2, + "time_us": 10.798999999999992 + }, + { + "M": 1024, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 16.998999999999995 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 28.519250000000007 + }, + { + "M": 3072, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 39.79925 + }, + { + "M": 4096, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 51.11924999999997 + }, + { + "M": 5120, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 63.71925 + }, + { + "M": 6144, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 72.39925000000001 + }, + { + "M": 7168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 82.79925000000001 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 93.39949999999999 + }, + { + "M": 9216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 104.23925000000001 + }, + { + "M": 10240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 114.87949999999998 + }, + { + "M": 11264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 125.47949999999999 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 138.59975 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 150.87974999999994 + }, + { + "M": 14336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 157.47949999999997 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 171.71975000000003 + }, + { + "M": 16384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 184.19975 + }, + { + "M": 17408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 199.52000000000004 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 204.07975 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 209.28000000000003 + }, + { + "M": 20480, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 219.71974999999992 + }, + { + "M": 21504, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 236.28000000000003 + }, + { + "M": 22528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 240.95999999999992 + }, + { + "M": 23552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 249.67999999999995 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 268.96 + }, + { + "M": 25600, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 282.12025000000006 + }, + { + "M": 26624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 283.4002499999999 + }, + { + "M": 27648, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 301.7602499999999 + }, + { + "M": 28672, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 315.2802499999999 + }, + { + "M": 29696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 315.0002499999998 + }, + { + "M": 30720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 325.6002500000001 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 336.24025000000006 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 346.6005000000001 + }, + { + "M": 33792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 364.0807500000001 + }, + { + "M": 34816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 374.92075000000034 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 378.2805000000002 + }, + { + "M": 36864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 396.0007499999999 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 409.6407499999999 + }, + { + "M": 38912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 425.40075 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 419.7610000000002 + }, + { + "M": 40960, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 430.2407499999998 + }, + { + "M": 41984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 446.80099999999993 + }, + { + "M": 43008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 456.8009999999997 + }, + { + "M": 44032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 472.40099999999984 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 477.6809999999998 + }, + { + "M": 46080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 482.9609999999998 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 494.0409999999997 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 509.1209999999998 + }, + { + "M": 49152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 509.721 + }, + { + "M": 50176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 520.00125 + }, + { + "M": 51200, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 536.1212499999999 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 546.3212500000002 + }, + { + "M": 53248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 561.4415000000004 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 567.56125 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 588.7614999999998 + }, + { + "M": 56320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 599.2814999999998 + }, + { + "M": 57344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 602.7217500000004 + }, + { + "M": 58368, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 628.1614999999999 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 623.8417499999998 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 630.8417499999998 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 639.7217499999999 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 650.9617500000004 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 655.2017500000006 + }, + { + "M": 64512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 675.5617500000001 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 682.6817499999997 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 681.5617500000003 + }, + { + "M": 67584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 696.9219999999998 + }, + { + "M": 68608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 707.0819999999999 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 725.002 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 753.20225 + }, + { + "M": 71680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 753.2822499999997 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 763.56225 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 785.1622499999996 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 795.72225 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 795.6022500000004 + }, + { + "M": 76800, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 806.52225 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 823.3224999999998 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 838.2824999999998 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 834.6825000000003 + }, + { + "M": 80896, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 848.5625 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 849.4425000000001 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 870.2427500000003 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 886.6427499999995 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 897.7227499999999 + }, + { + "M": 86016, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 912.56275 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 912.1627499999995 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 923.2827499999999 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 928.0827500000005 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 942.0029999999997 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 949.4029999999998 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 953.163 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 970.4830000000002 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 980.163 + }, + { + "M": 95232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 991.9632500000002 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1001.2032500000006 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 994.3232499999999 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1022.2432500000004 + }, + { + "M": 99328, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1034.20325 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1045.0032500000002 + }, + { + "M": 101376, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1066.0035000000003 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1058.6035000000006 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1076.8435 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1097.0835000000002 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.2835 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.2034999999996 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1100.6835 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1119.6037500000007 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1140.0037499999999 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1150.3237499999996 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1170.72375 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1163.32375 + }, + { + "M": 113664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1182.3639999999996 + }, + { + "M": 114688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1182.0839999999998 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1212.3639999999996 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1214.324 + }, + { + "M": 117760, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1224.9640000000009 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1243.8042499999997 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1254.56425 + }, + { + "M": 120832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1255.0842499999994 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1273.0042500000002 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1258.56425 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1300.4042499999996 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1308.6444999999999 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1308.8844999999997 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1321.4445 + }, + { + "M": 128000, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1332.4444999999996 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1330.4844999999996 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1350.9244999999996 + }, + { + "M": 131072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1361.4044999999996 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1371.6047500000004 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1372.2047499999994 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1402.3247499999998 + }, + { + "M": 135168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1416.8047500000007 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1403.084750000001 + }, + { + "M": 137216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1427.2047500000003 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1443.4449999999997 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1454.7650000000003 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1453.9650000000001 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1477.5649999999996 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1487.8050000000007 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1457.8849999999998 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1478.6049999999996 + }, + { + "M": 145408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1505.9252499999998 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1526.8052500000013 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1530.4052499999998 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1529.28525 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1550.8854999999999 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1559.6855 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1570.2855 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1563.1655 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1582.6054999999997 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1584.3654999999999 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1605.045500000001 + }, + { + "M": 156672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1620.4857499999994 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1615.6857499999996 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1614.4457499999999 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1666.8057499999995 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1647.1657500000001 + }, + { + "M": 161792, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1677.406 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1688.2859999999991 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1676.246 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1688.8459999999995 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1709.366 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1719.8059999999996 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1740.1662500000002 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1718.2459999999992 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1761.0062500000013 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1770.4062500000005 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1758.8462500000005 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1769.36625 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1787.40625 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1790.9662500000004 + }, + { + "M": 176128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1833.0064999999995 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1811.6064999999999 + }, + { + "M": 178176, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1843.5665 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1829.0865000000013 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1863.0467499999995 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1882.9267499999996 + }, + { + "M": 182272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1885.9267499999996 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1903.8867499999997 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1879.6067500000004 + }, + { + "M": 185344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1927.567 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1926.4069999999992 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1937.2470000000003 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1927.607 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1933.3269999999993 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1958.2870000000003 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1979.1272499999995 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1999.2072500000004 + }, + { + "M": 193536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2001.3272499999991 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.487 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2011.0872499999996 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2032.487249999999 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2052.9675000000007 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2063.1674999999996 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2028.9672500000004 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2054.2875000000004 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2074.5275 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2085.3275000000003 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2105.4877500000002 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2090.9674999999997 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2117.56775 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2127.64775 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2125.88775 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2147.56775 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2142.4477500000003 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2167.727999999999 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2188.3680000000004 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2199.1280000000006 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2199.2879999999996 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2203.688 + }, + { + "M": 216064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2236.6482499999993 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2240.2082500000006 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2251.008249999999 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2247.4482500000004 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2240.728250000001 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2288.2484999999997 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2298.3684999999996 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2299.008499999999 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2329.4085000000005 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2303.5684999999994 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2330.768499999999 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2340.968499999999 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2360.968750000001 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2356.96875 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2367.2887499999997 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2365.4487499999996 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2395.6887500000003 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2406.7689999999993 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2406.6489999999994 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2409.6090000000013 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2417.208999999998 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2428.5290000000014 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2448.2889999999998 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2444.8489999999997 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2469.4092500000015 + }, + { + "M": 241664, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2516.969249999999 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2508.4492500000006 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2518.8092499999993 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2518.6492499999995 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2521.4092500000006 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2559.8094999999985 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2560.449499999999 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2550.8895 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2583.889500000002 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2575.3695 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2604.8097500000003 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2595.089750000001 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2615.089749999999 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2616.129750000001 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2617.449749999997 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2646.6497500000005 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2633.00975 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2677.130000000001 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2688.289999999999 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2679.010000000002 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2699.5699999999997 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2709.4900000000016 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2730.5702499999998 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2730.8102500000005 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2731.6102500000006 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2747.0102500000003 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2781.8505000000023 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2782.5305000000008 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2782.9704999999976 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2783.410500000001 + }, + { + "M": 272384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2830.7704999999987 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2834.7707499999997 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2835.490499999997 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2835.690499999998 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2839.330750000001 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2870.330750000002 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2861.570749999997 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2901.571 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2891.570749999999 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2891.8907500000005 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2903.1709999999994 + }, + { + "M": 283648, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2943.1310000000003 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2935.610999999997 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2955.6510000000026 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2925.411000000003 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2956.571249999999 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2987.771249999999 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2977.8912500000015 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2998.4912500000037 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3001.81125 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3026.0115000000023 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3046.451499999999 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3057.1714999999967 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3058.0115000000023 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3037.011499999997 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3069.0915000000005 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3089.9717500000006 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3108.2517499999994 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3098.53175 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3097.3717500000002 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3119.571750000001 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3139.7717500000017 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3160.5719999999983 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3156.811999999998 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3134.691750000002 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3187.8120000000017 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3188.771999999999 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3198.771999999999 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3209.2520000000004 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3197.8120000000017 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3242.2522499999995 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3252.6122499999983 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3252.53225 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3263.6922499999982 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3251.3322499999995 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3294.692500000001 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3305.1724999999988 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3304.3725000000013 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3352.93275 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3320.1725000000006 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3364.2927500000023 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3375.4127499999995 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3395.3730000000014 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3396.1727499999997 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3343.652750000001 + } + ], + "2240": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999983 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.199000000000005 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.399000000000001 + }, + { + "M": 32, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 2.958999999999996 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.438999999999993 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.518999999999991 + }, + { + "M": 256, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 6.878999999999998 + }, + { + "M": 512, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 10.878999999999998 + }, + { + "M": 1024, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 17.479 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 29.87899999999999 + }, + { + "M": 3072, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 41.43925 + }, + { + "M": 4096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 52.99925 + }, + { + "M": 5120, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 65.07924999999999 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 76.11925 + }, + { + "M": 7168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 87.03924999999998 + }, + { + "M": 8192, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 99.83950000000002 + }, + { + "M": 9216, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 113.47950000000003 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 124.47949999999997 + }, + { + "M": 11264, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 137.3195 + }, + { + "M": 12288, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 149.15974999999997 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 158.43975000000003 + }, + { + "M": 14336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 166.07975000000005 + }, + { + "M": 15360, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 180.39999999999998 + }, + { + "M": 16384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 187.35975000000002 + }, + { + "M": 17408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 208.35975000000008 + }, + { + "M": 18432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 217.8400000000001 + }, + { + "M": 19456, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 220.03999999999996 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 228.28000000000014 + }, + { + "M": 21504, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 248.4000000000002 + }, + { + "M": 22528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 251.36025000000006 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 273.31999999999994 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 282.6402499999999 + }, + { + "M": 25600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 286.9202500000001 + }, + { + "M": 26624, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 308.0005000000002 + }, + { + "M": 27648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 316.5205000000001 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 331.00024999999994 + }, + { + "M": 29696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 342.2405 + }, + { + "M": 30720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 347.52049999999986 + }, + { + "M": 31744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 360.68050000000017 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 365.2405000000001 + }, + { + "M": 33792, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 385.88049999999987 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 387.0005000000002 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 397.48075000000017 + }, + { + "M": 36864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 404.5207499999999 + }, + { + "M": 37888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 425.48074999999983 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 441.44075 + }, + { + "M": 39936, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 442.0007499999997 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 455.84074999999984 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 475.24099999999976 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 488.5610000000004 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 481.44100000000003 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 508.6012499999999 + }, + { + "M": 46080, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 508.68100000000004 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 519.8012500000002 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 531.0012499999998 + }, + { + "M": 49152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 536.4812500000005 + }, + { + "M": 50176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 547.6412499999999 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 584.2815000000005 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 586.1614999999999 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 586.4414999999999 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 611.6415000000002 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 619.4414999999997 + }, + { + "M": 56320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 630.4414999999997 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 631.0815000000002 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 642.0417499999999 + }, + { + "M": 59392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 645.84175 + }, + { + "M": 60416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 687.0817500000003 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 693.4019999999998 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 686.8817500000002 + }, + { + "M": 63488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 708.3220000000006 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 708.5620000000006 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 719.8820000000001 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 727.922 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 752.8022500000002 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 753.2022499999996 + }, + { + "M": 69632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 776.1222500000002 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 795.80225 + }, + { + "M": 71680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 796.6822499999998 + }, + { + "M": 72704, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 807.4825000000001 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 819.0025000000005 + }, + { + "M": 74752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 819.7624999999998 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 852.3625000000002 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 856.6824999999999 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 874.8427499999998 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 894.4427500000006 + }, + { + "M": 79872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 875.2827499999999 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 886.4827499999997 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 891.2827499999999 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 910.8027500000007 + }, + { + "M": 83968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 916.12275 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 942.8829999999998 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 944.1229999999996 + }, + { + "M": 87040, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 956.163 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 975.7629999999999 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 996.5232499999997 + }, + { + "M": 90112, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 989.2832500000009 + }, + { + "M": 91136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1008.8032500000004 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1012.8832499999997 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1020.68325 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1052.0434999999998 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1042.76325 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1063.4434999999999 + }, + { + "M": 97280, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1067.3635 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1075.4434999999999 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1097.0034999999998 + }, + { + "M": 100352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1113.6437500000006 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1110.9234999999999 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1113.8035000000004 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1122.8837500000004 + }, + { + "M": 104448, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1147.2437499999996 + }, + { + "M": 105472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1154.6037499999998 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1180.1240000000003 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1178.4439999999995 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1187.964 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1203.524 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1219.9240000000004 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1220.8839999999996 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1223.3639999999996 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1241.0042500000004 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1268.1642500000003 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1266.7242500000002 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1276.4842500000004 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1284.44425 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1295.0042499999995 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1307.5244999999995 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1323.7644999999993 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1326.9244999999999 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1323.2044999999994 + }, + { + "M": 123904, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1371.5647499999995 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1364.9247500000001 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1375.8847499999997 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1405.08475 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1398.3647499999997 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1418.8047499999998 + }, + { + "M": 130048, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1427.7649999999994 + }, + { + "M": 131072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1431.0050000000006 + }, + { + "M": 132096, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1460.7650000000003 + }, + { + "M": 133120, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1472.4449999999997 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1463.965000000001 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1475.125000000001 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1486.4450000000002 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1507.3252499999999 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1517.2852500000008 + }, + { + "M": 139264, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1528.4452500000002 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1535.4052499999998 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1546.64525 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1557.4454999999998 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1557.4854999999998 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1569.9654999999993 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1580.2855000000009 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1602.0055000000002 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1612.6457499999997 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1601.9655000000002 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1634.20575 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1635.8857499999995 + }, + { + "M": 151552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1656.4457499999999 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1657.8457500000004 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1656.32575 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1669.246000000001 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1690.4860000000017 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1708.6460000000002 + }, + { + "M": 157696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1723.6459999999997 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1730.6060000000007 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1744.446249999999 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1765.446249999999 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1746.44625 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1767.44625 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1755.40625 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1798.9665000000005 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1800.2864999999993 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1811.6064999999999 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1822.2065000000011 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1840.2865000000002 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1834.3264999999992 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1863.8867499999997 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1874.52675 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1885.2867500000002 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1883.0867499999995 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1897.56675 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1908.1267500000004 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1932.9669999999996 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1930.0869999999995 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1938.7669999999998 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1974.0070000000005 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.0470000000005 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1975.2872500000003 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1977.2069999999994 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1990.0472499999996 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2017.7272500000001 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2019.0872499999996 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2039.8475 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2040.4875000000002 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2047.4875000000013 + }, + { + "M": 190464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2079.2075000000004 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2073.8075 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2084.6475 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2115.76775 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2111.96775 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2127.88775 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2144.72775 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2152.72775 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2163.968 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2150.0877499999997 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2196.2080000000005 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2197.0880000000006 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2188.3280000000004 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2219.0880000000006 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2205.008 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2240.928249999999 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2271.808250000002 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2251.00825 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2252.5282499999994 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2267.2882499999996 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2282.4084999999986 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2293.5684999999994 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2304.2084999999997 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2315.2085000000006 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2299.0085 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2337.3285000000005 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2368.088749999999 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2359.6887500000003 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2378.5287499999995 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2382.008749999999 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2420.849 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2411.6489999999985 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2433.169 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2453.849000000001 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2427.048999999999 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2466.089250000001 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2476.6892500000013 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2487.969250000001 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2492.8492499999993 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2474.28925 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2493.32925 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2514.2492500000017 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2535.4495000000015 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2547.009500000001 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2539.4894999999997 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2578.8094999999994 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2569.1694999999972 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2570.3695000000007 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2587.5695 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2610.6497499999978 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2630.7297499999995 + }, + { + "M": 242688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2655.769999999997 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2653.0499999999993 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2673.3700000000017 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2635.4497499999998 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2675.9699999999993 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2706.7299999999977 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2703.250000000001 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2702.5700000000006 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2684.409999999998 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2735.250249999999 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2736.0502500000002 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2757.530249999996 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2747.9302499999985 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2738.9702499999994 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2790.4505000000026 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2796.7305000000006 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2810.8505000000014 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2821.490500000001 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2803.170500000001 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2844.170750000002 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2865.1307499999966 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2866.410749999998 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2887.4507500000027 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2877.690749999999 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2904.771 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2937.8110000000015 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2939.8110000000024 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2950.571000000001 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2930.530999999998 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2962.8912499999988 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2984.0112499999996 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3005.8512500000006 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3009.771249999999 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2990.291250000001 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3022.2115000000013 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3012.9712500000005 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3023.9315000000006 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3055.9715000000015 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3035.371500000001 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3078.2517499999976 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3107.2917500000012 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3090.731749999999 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3110.9317499999997 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3080.1717499999977 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3134.9317499999997 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3135.1317500000023 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3176.2520000000004 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3177.652 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3161.7719999999995 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3196.2519999999986 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3208.1322499999987 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3209.212249999997 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3249.8922500000026 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3218.0122499999998 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3241.772249999998 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3255.0522500000025 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3253.3722499999967 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3274.0125000000025 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3252.332250000003 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3296.772499999999 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3287.3325000000023 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3308.4524999999994 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3294.4124999999976 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3309.3324999999986 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3344.3727500000005 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3346.01275 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3377.01275 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3408.132999999996 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3364.2927500000023 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3413.6929999999984 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3395.3330000000005 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3425.893000000002 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3427.252999999997 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3423.813000000002 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3469.1732500000016 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3480.813250000001 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3464.053249999999 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3494.3732500000006 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3470.5732500000013 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3516.933250000002 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3536.9734999999964 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3538.653500000002 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3559.4134999999987 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3505.2532499999998 + } + ], + "2400": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.198999999999998 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.159000000000006 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.238999999999997 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.2390000000000043 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.5989999999999966 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0389999999999944 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.5190000000000055 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.838999999999999 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 7.2792499999999976 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 11.158999999999992 + }, + { + "M": 1024, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 18.038999999999994 + }, + { + "M": 2048, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 30.91924999999999 + }, + { + "M": 3072, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 43.95924999999999 + }, + { + "M": 4096, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 56.27925000000002 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 68.23949999999998 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 79.71950000000001 + }, + { + "M": 7168, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 91.39949999999997 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 102.95949999999999 + }, + { + "M": 9216, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 116.63950000000003 + }, + { + "M": 10240, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 129.35949999999997 + }, + { + "M": 11264, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 141.99975000000003 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 153.07975 + }, + { + "M": 13312, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 166.43975 + }, + { + "M": 14336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 174.39975000000004 + }, + { + "M": 15360, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 192.99975000000006 + }, + { + "M": 16384, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 202.71974999999998 + }, + { + "M": 17408, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 207.72000000000008 + }, + { + "M": 18432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 219.68 + }, + { + "M": 19456, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 238.51999999999987 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 240.3999999999998 + }, + { + "M": 21504, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 274.36024999999984 + }, + { + "M": 22528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 266.8800000000001 + }, + { + "M": 23552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 288.96025 + }, + { + "M": 24576, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 300.92025 + }, + { + "M": 25600, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 312.68025 + }, + { + "M": 26624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 321.68049999999994 + }, + { + "M": 27648, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 333.40025 + }, + { + "M": 28672, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 348.52049999999997 + }, + { + "M": 29696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 359.96074999999996 + }, + { + "M": 30720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 371.4805000000001 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 372.20050000000003 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 383.88049999999987 + }, + { + "M": 33792, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 417.3207500000001 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 417.56074999999976 + }, + { + "M": 35840, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 428.8007500000002 + }, + { + "M": 36864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 436.92100000000016 + }, + { + "M": 37888, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 441.76075000000037 + }, + { + "M": 38912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 459.8409999999999 + }, + { + "M": 39936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 471.4012499999999 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 469.2810000000002 + }, + { + "M": 41984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 510.0812499999997 + }, + { + "M": 43008, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 513.9612500000001 + }, + { + "M": 44032, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 522.9612500000001 + }, + { + "M": 45056, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 545.32125 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 546.5212499999998 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 558.5215000000001 + }, + { + "M": 48128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 583.7614999999998 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 570.9212500000001 + }, + { + "M": 50176, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 603.4417500000004 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 594.3614999999998 + }, + { + "M": 52224, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 616.9214999999997 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 628.2417500000001 + }, + { + "M": 54272, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 639.8417499999998 + }, + { + "M": 55296, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 651.9217500000002 + }, + { + "M": 56320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 651.7217500000002 + }, + { + "M": 57344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 668.8417499999996 + }, + { + "M": 58368, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 676.2417499999999 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 709.2019999999998 + }, + { + "M": 60416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 712.3619999999996 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 699.1220000000001 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 723.3619999999996 + }, + { + "M": 63488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 747.402 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 757.4422499999996 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 758.28225 + }, + { + "M": 66560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 786.2022499999998 + }, + { + "M": 67584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 774.8022499999997 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 804.1222500000003 + }, + { + "M": 69632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 817.2025000000001 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 839.8824999999997 + }, + { + "M": 71680, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 851.4425000000001 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 878.2827499999999 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 864.6025 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 887.08275 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 899.2427499999999 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 884.3227499999998 + }, + { + "M": 77824, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 907.3227500000007 + }, + { + "M": 78848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 913.2827499999999 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 924.7627499999999 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 937.3629999999998 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 932.0029999999997 + }, + { + "M": 82944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 960.7629999999999 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 971.8829999999998 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 994.0832500000006 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1005.6832499999997 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1009.2032500000006 + }, + { + "M": 88064, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1029.6832499999996 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1050.5234999999998 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1041.8432500000008 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1054.2835000000005 + }, + { + "M": 92160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1066.7635 + }, + { + "M": 93184, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1088.6435000000001 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1088.6034999999997 + }, + { + "M": 95232, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1112.1637499999997 + }, + { + "M": 96256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1121.6837499999997 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1114.0437499999998 + }, + { + "M": 98304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1144.84375 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1146.3237499999998 + }, + { + "M": 100352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1162.5237500000003 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1163.8837499999995 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1167.0437499999998 + }, + { + "M": 103424, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1198.484 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1219.4039999999995 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1221.724000000001 + }, + { + "M": 106496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1239.4842499999995 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1247.0442500000004 + }, + { + "M": 108544, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1257.36425 + }, + { + "M": 109568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1268.6442499999998 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1293.1642500000003 + }, + { + "M": 111616, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1289.6842500000002 + }, + { + "M": 112640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1287.0442499999995 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1309.4445 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1324.8845000000001 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1331.4844999999996 + }, + { + "M": 116736, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1348.5245000000004 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1355.2444999999998 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1357.1644999999999 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1379.00475 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1404.9247499999997 + }, + { + "M": 121856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1409.8047499999998 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1409.52475 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1432.6450000000004 + }, + { + "M": 124928, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1446.4849999999997 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1454.7249999999995 + }, + { + "M": 126976, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1470.3649999999998 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1477.6850000000004 + }, + { + "M": 129024, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1494.3652499999998 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1505.9252499999998 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1523.80525 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1535.165249999999 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1544.0855000000001 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1540.4052499999998 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1561.6854999999996 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1563.4855000000016 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1574.8054999999995 + }, + { + "M": 138240, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1594.4054999999998 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1608.7254999999996 + }, + { + "M": 140288, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1615.9657499999998 + }, + { + "M": 141312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1621.40575 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1622.6857500000006 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1641.6457499999997 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1676.1659999999993 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1668.1657499999992 + }, + { + "M": 146432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1679.0860000000002 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1681.0460000000003 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1679.286 + }, + { + "M": 149504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1714.406 + }, + { + "M": 150528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1747.1662500000002 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1756.52625 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1752.6862499999997 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1746.0462500000003 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1768.5662499999999 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1800.3665 + }, + { + "M": 156672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1811.8465 + }, + { + "M": 157696, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1817.1265000000003 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1804.2464999999993 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1840.5265 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1851.44675 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1863.7667500000007 + }, + { + "M": 162816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1884.8867499999997 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1851.7667499999998 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1898.6467499999999 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1899.7267499999998 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1921.487000000001 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1923.0069999999996 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1933.0070000000005 + }, + { + "M": 169984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1965.687 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.2070000000003 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1979.3272499999994 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1980.72725 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1966.2470000000003 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2004.0472500000005 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2025.8872499999998 + }, + { + "M": 177152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2050.0075000000006 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2038.8875 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2054.2874999999995 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2071.8475 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2083.7675 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2085.3275000000003 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2107.80775 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2087.7675 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2130.2077500000005 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2132.2877499999995 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2163.1279999999997 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2165.847999999999 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2160.048 + }, + { + "M": 190464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2202.4880000000003 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2189.2879999999996 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2212.0879999999997 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2214.688000000001 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2217.808 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2226.4082500000004 + }, + { + "M": 196608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2260.76825 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2273.1282500000007 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2294.8485 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2259.088249999999 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2308.3684999999987 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2329.928499999999 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2341.80875 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2342.8085 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2316.4084999999995 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2376.6887499999984 + }, + { + "M": 206848, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2384.1687500000007 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2387.80875 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2405.6490000000003 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2388.6487500000003 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2419.169000000001 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2429.968999999999 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2442.048999999999 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2454.769000000001 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2436.6889999999994 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2457.729000000001 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2498.6092499999986 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2490.8092499999993 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2499.52925 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2503.6092499999986 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2534.4494999999997 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2566.8094999999994 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2569.2895 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2579.409499999997 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2591.8897499999975 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2592.969750000001 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2604.4497500000016 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2636.7297499999977 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2611.6897500000005 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2600.289749999997 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2663.9700000000003 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2683.6900000000005 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2665.5300000000016 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2698.290000000001 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2689.850000000002 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2712.210250000001 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2724.130250000003 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2745.570249999998 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2751.25025 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2746.0502499999993 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2779.410499999998 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2781.490500000001 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2792.650499999999 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2795.130500000003 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2794.4905 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2848.6907500000016 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2840.1707499999984 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2866.0507500000012 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2873.410749999998 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2872.8107500000033 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2876.8107500000015 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2888.4507499999963 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2920.411000000001 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2922.0109999999977 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2901.4510000000037 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2934.8909999999996 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2954.611 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2971.1312500000004 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3003.65125 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2971.451250000001 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2996.9712499999987 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3017.8914999999997 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3019.211499999996 + }, + { + "M": 265216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3041.531500000001 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3010.891249999997 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3043.6515000000004 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3113.1317499999986 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3115.2917500000003 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3115.691750000002 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3124.851749999998 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3149.492000000004 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3162.0519999999997 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3184.051999999998 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3166.3719999999967 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3162.2120000000014 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3197.0522500000025 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3210.2122499999987 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3221.85225 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3243.0922500000015 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3208.6922499999982 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3266.612500000001 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3281.1724999999997 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3272.1325000000015 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3268.852500000001 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3254.1322500000024 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3312.0525000000034 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3303.3725000000013 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3325.3327499999996 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3336.2527500000015 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3314.6125 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3373.4527500000004 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3375.692750000002 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3387.4130000000005 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3419.4529999999995 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3373.5327500000003 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3411.8129999999983 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3452.253249999999 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3448.893249999999 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3461.013249999998 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3445.693000000001 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3463.5732499999976 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3475.4532500000005 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3496.973250000001 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3515.8134999999993 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3510.093500000001 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3538.933499999999 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3560.773500000001 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3582.6537500000013 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3564.933499999999 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3567.853500000001 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3572.173500000001 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3617.93375 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3610.053749999999 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3642.134 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3605.0537499999973 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3665.5340000000015 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3677.4540000000015 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3656.934000000002 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3707.5742500000015 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3691.173999999999 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3750.8142500000013 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3732.6142500000024 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3753.5742500000015 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3756.17425 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3698.8939999999993 + } + ], + "2560": [ + { + "M": 1, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0390000000000086 + }, + { + "M": 2, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 4, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.0790000000000006 + }, + { + "M": 8, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.1189999999999927 + }, + { + "M": 16, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 2.5589999999999975 + }, + { + "M": 32, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.0789999999999935 + }, + { + "M": 64, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 3.5589999999999975 + }, + { + "M": 128, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 4.919000000000004 + }, + { + "M": 256, + "rows_per_block": 1, + "vec_size": 2, + "time_us": 7.478999999999999 + }, + { + "M": 512, + "rows_per_block": 2, + "vec_size": 2, + "time_us": 11.319000000000003 + }, + { + "M": 1024, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 18.359249999999996 + }, + { + "M": 2048, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 31.83925 + }, + { + "M": 3072, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 44.879 + }, + { + "M": 4096, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 57.23925 + }, + { + "M": 5120, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 70.31925 + }, + { + "M": 6144, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 82.3195 + }, + { + "M": 7168, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 95.75924999999998 + }, + { + "M": 8192, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 106.23950000000002 + }, + { + "M": 9216, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 125.27950000000001 + }, + { + "M": 10240, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 135.23949999999996 + }, + { + "M": 11264, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 146.9595 + }, + { + "M": 12288, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 158.5195 + }, + { + "M": 13312, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 178.95999999999998 + }, + { + "M": 14336, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 180.55975000000007 + }, + { + "M": 15360, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 199.75974999999994 + }, + { + "M": 16384, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 203.79975000000002 + }, + { + "M": 17408, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 226.95999999999992 + }, + { + "M": 18432, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 233.71999999999997 + }, + { + "M": 19456, + "rows_per_block": 4, + "vec_size": 2, + "time_us": 258.1599999999999 + }, + { + "M": 20480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 249.80000000000007 + }, + { + "M": 21504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 265.52025000000003 + }, + { + "M": 22528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 276.20000000000016 + }, + { + "M": 23552, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 299.3202499999999 + }, + { + "M": 24576, + "rows_per_block": 6, + "vec_size": 2, + "time_us": 309.44025 + }, + { + "M": 25600, + "rows_per_block": 5, + "vec_size": 2, + "time_us": 328.1205 + }, + { + "M": 26624, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 337.40049999999985 + }, + { + "M": 27648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 346.8405 + }, + { + "M": 28672, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 351.16049999999996 + }, + { + "M": 29696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 370.84074999999996 + }, + { + "M": 30720, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 386.40049999999997 + }, + { + "M": 31744, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 387.52049999999997 + }, + { + "M": 32768, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 399.8007500000002 + }, + { + "M": 33792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 419.2007500000004 + }, + { + "M": 34816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 434.9207500000001 + }, + { + "M": 35840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 430.0407500000001 + }, + { + "M": 36864, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 449.08100000000013 + }, + { + "M": 37888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 468.1210000000001 + }, + { + "M": 38912, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 483.8009999999997 + }, + { + "M": 39936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 491.5609999999999 + }, + { + "M": 40960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 490.12099999999987 + }, + { + "M": 41984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 515.9212500000001 + }, + { + "M": 43008, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 522.3612500000002 + }, + { + "M": 44032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 540.6412499999997 + }, + { + "M": 45056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 552.2412500000003 + }, + { + "M": 46080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 560.2412499999998 + }, + { + "M": 47104, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 571.0014999999999 + }, + { + "M": 48128, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 583.8014999999998 + }, + { + "M": 49152, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 596.1215000000004 + }, + { + "M": 50176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 613.4415000000001 + }, + { + "M": 51200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 620.6815000000001 + }, + { + "M": 52224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 637.2017500000002 + }, + { + "M": 53248, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 644.5617500000001 + }, + { + "M": 54272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 661.6817499999997 + }, + { + "M": 55296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 683.4417499999997 + }, + { + "M": 56320, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 681.2417500000004 + }, + { + "M": 57344, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 704.0020000000004 + }, + { + "M": 58368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 699.6020000000001 + }, + { + "M": 59392, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 739.3619999999996 + }, + { + "M": 60416, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 741.2019999999998 + }, + { + "M": 61440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 750.3622499999997 + }, + { + "M": 62464, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 765.9222500000001 + }, + { + "M": 63488, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 788.96225 + }, + { + "M": 64512, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 789.7222499999996 + }, + { + "M": 65536, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 792.0822499999997 + }, + { + "M": 66560, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 814.8024999999998 + }, + { + "M": 67584, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 826.8425000000002 + }, + { + "M": 68608, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 828.2825000000004 + }, + { + "M": 69632, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 858.8024999999998 + }, + { + "M": 70656, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 860.2024999999999 + }, + { + "M": 71680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 889.6827500000004 + }, + { + "M": 72704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 881.4027500000007 + }, + { + "M": 73728, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 905.4027500000002 + }, + { + "M": 74752, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 914.7227499999999 + }, + { + "M": 75776, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 937.2429999999995 + }, + { + "M": 76800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 933.5230000000001 + }, + { + "M": 77824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 952.2829999999999 + }, + { + "M": 78848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 954.0429999999997 + }, + { + "M": 79872, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 976.123 + }, + { + "M": 80896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 968.6430000000005 + }, + { + "M": 81920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 983.4029999999998 + }, + { + "M": 82944, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1012.5632499999997 + }, + { + "M": 83968, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1014.5632500000002 + }, + { + "M": 84992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1027.28325 + }, + { + "M": 86016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1039.4032499999998 + }, + { + "M": 87040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1044.5632500000002 + }, + { + "M": 88064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1063.5234999999998 + }, + { + "M": 89088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1105.7234999999996 + }, + { + "M": 90112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1098.2435 + }, + { + "M": 91136, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1100.4434999999999 + }, + { + "M": 92160, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1112.28375 + }, + { + "M": 93184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1134.6837500000001 + }, + { + "M": 94208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1156.2837500000005 + }, + { + "M": 95232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1138.4837499999999 + }, + { + "M": 96256, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1161.92375 + }, + { + "M": 97280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1174.8839999999996 + }, + { + "M": 98304, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1186.324 + }, + { + "M": 99328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1198.6840000000002 + }, + { + "M": 100352, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1223.2439999999997 + }, + { + "M": 101376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1224.4039999999995 + }, + { + "M": 102400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1215.8839999999996 + }, + { + "M": 103424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1272.7242500000007 + }, + { + "M": 104448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1259.7242499999998 + }, + { + "M": 105472, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1272.8042500000001 + }, + { + "M": 106496, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1284.8842500000005 + }, + { + "M": 107520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1295.3245000000002 + }, + { + "M": 108544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1306.8444999999997 + }, + { + "M": 109568, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1331.4045 + }, + { + "M": 110592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1321.6844999999998 + }, + { + "M": 111616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1353.0045 + }, + { + "M": 112640, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1357.7247500000008 + }, + { + "M": 113664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1367.444750000001 + }, + { + "M": 114688, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1392.5647500000005 + }, + { + "M": 115712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1413.1247499999995 + }, + { + "M": 116736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1414.56475 + }, + { + "M": 117760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1425.605 + }, + { + "M": 118784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1437.6849999999995 + }, + { + "M": 119808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1470.3650000000007 + }, + { + "M": 120832, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1465.6049999999996 + }, + { + "M": 121856, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1492.5652500000006 + }, + { + "M": 122880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1455.165 + }, + { + "M": 123904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1498.3652500000012 + }, + { + "M": 124928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1516.8852499999994 + }, + { + "M": 125952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1513.80525 + }, + { + "M": 126976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1533.8452500000003 + }, + { + "M": 128000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1525.5652500000006 + }, + { + "M": 129024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1547.9255000000003 + }, + { + "M": 130048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1560.6854999999996 + }, + { + "M": 131072, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1578.325499999999 + }, + { + "M": 132096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1574.2055 + }, + { + "M": 133120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1595.0455000000002 + }, + { + "M": 134144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1619.20575 + }, + { + "M": 135168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1621.0457500000002 + }, + { + "M": 136192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1643.5257499999998 + }, + { + "M": 137216, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1665.4057500000008 + }, + { + "M": 138240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1655.4857499999998 + }, + { + "M": 139264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1677.7659999999996 + }, + { + "M": 140288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1692.0859999999993 + }, + { + "M": 141312, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1699.4859999999999 + }, + { + "M": 142336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1705.0860000000002 + }, + { + "M": 143360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1706.406 + }, + { + "M": 144384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1739.0062500000004 + }, + { + "M": 145408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1751.1662500000002 + }, + { + "M": 146432, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1770.52625 + }, + { + "M": 147456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1754.0862499999994 + }, + { + "M": 148480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1774.0462499999994 + }, + { + "M": 149504, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1809.6864999999998 + }, + { + "M": 150528, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1820.6064999999999 + }, + { + "M": 151552, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1821.0865000000003 + }, + { + "M": 152576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1847.0064999999995 + }, + { + "M": 153600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1834.9264999999996 + }, + { + "M": 154624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1860.2867500000002 + }, + { + "M": 155648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1851.2867499999998 + }, + { + "M": 156672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1886.8867500000006 + }, + { + "M": 157696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1891.3667500000001 + }, + { + "M": 158720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1889.6067500000008 + }, + { + "M": 159744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1917.4470000000001 + }, + { + "M": 160768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1934.8069999999998 + }, + { + "M": 161792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1940.6070000000009 + }, + { + "M": 162816, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 1969.8072499999998 + }, + { + "M": 163840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1938.0870000000004 + }, + { + "M": 164864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1993.60725 + }, + { + "M": 165888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 1984.4472500000002 + }, + { + "M": 166912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2008.2072500000013 + }, + { + "M": 167936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2019.2872499999992 + }, + { + "M": 168960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2029.3672500000005 + }, + { + "M": 169984, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 2060.4875 + }, + { + "M": 171008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2067.4875 + }, + { + "M": 172032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2058.1674999999996 + }, + { + "M": 173056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2089.687500000001 + }, + { + "M": 174080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2090.567499999999 + }, + { + "M": 175104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2118.2877500000004 + }, + { + "M": 176128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2105.1677500000005 + }, + { + "M": 177152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2118.4477499999994 + }, + { + "M": 178176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2143.68775 + }, + { + "M": 179200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2150.407750000001 + }, + { + "M": 180224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2155.4480000000012 + }, + { + "M": 181248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2168.608 + }, + { + "M": 182272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2170.2079999999996 + }, + { + "M": 183296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2197.848 + }, + { + "M": 184320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2195.6479999999992 + }, + { + "M": 185344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2239.6482499999993 + }, + { + "M": 186368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2237.5282500000003 + }, + { + "M": 187392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2252.6882499999992 + }, + { + "M": 188416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2268.608250000002 + }, + { + "M": 189440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2267.1682500000006 + }, + { + "M": 190464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2277.6882499999992 + }, + { + "M": 191488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2290.6484999999993 + }, + { + "M": 192512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2319.2885000000006 + }, + { + "M": 193536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2317.888499999999 + }, + { + "M": 194560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2317.9685 + }, + { + "M": 195584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2360.96875 + }, + { + "M": 196608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2362.288749999999 + }, + { + "M": 197632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2365.2487500000007 + }, + { + "M": 198656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2385.3287500000015 + }, + { + "M": 199680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2393.968749999999 + }, + { + "M": 200704, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2417.5290000000005 + }, + { + "M": 201728, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2424.208999999998 + }, + { + "M": 202752, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2435.4490000000005 + }, + { + "M": 203776, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2443.4489999999996 + }, + { + "M": 204800, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2421.5690000000013 + }, + { + "M": 205824, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2457.3689999999997 + }, + { + "M": 206848, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2474.129250000001 + }, + { + "M": 207872, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2492.6492500000004 + }, + { + "M": 208896, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2508.3692500000006 + }, + { + "M": 209920, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2499.4892499999987 + }, + { + "M": 210944, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2511.24925 + }, + { + "M": 211968, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2547.049500000001 + }, + { + "M": 212992, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2553.8895 + }, + { + "M": 214016, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2566.4495000000006 + }, + { + "M": 215040, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2560.2495 + }, + { + "M": 216064, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2573.4894999999997 + }, + { + "M": 217088, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2604.889750000002 + }, + { + "M": 218112, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2620.76975 + }, + { + "M": 219136, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2644.8900000000003 + }, + { + "M": 220160, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2632.249749999999 + }, + { + "M": 221184, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2663.7700000000013 + }, + { + "M": 222208, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2664.6899999999996 + }, + { + "M": 223232, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2676.289999999999 + }, + { + "M": 224256, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2691.8500000000004 + }, + { + "M": 225280, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2661.890000000002 + }, + { + "M": 226304, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2714.2902499999973 + }, + { + "M": 227328, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2715.9302499999976 + }, + { + "M": 228352, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2736.8502499999986 + }, + { + "M": 229376, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2749.970250000003 + }, + { + "M": 230400, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2736.450249999999 + }, + { + "M": 231424, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2753.370249999998 + }, + { + "M": 232448, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2797.330499999999 + }, + { + "M": 233472, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2790.8504999999986 + }, + { + "M": 234496, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2819.050500000001 + }, + { + "M": 235520, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2804.5705000000007 + }, + { + "M": 236544, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2847.9707499999995 + }, + { + "M": 237568, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2824.7304999999988 + }, + { + "M": 238592, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2853.61075 + }, + { + "M": 239616, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2857.130750000003 + }, + { + "M": 240640, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2873.0509999999977 + }, + { + "M": 241664, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2891.131000000002 + }, + { + "M": 242688, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2900.371000000001 + }, + { + "M": 243712, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2912.8110000000006 + }, + { + "M": 244736, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2946.4509999999973 + }, + { + "M": 245760, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2906.1709999999985 + }, + { + "M": 246784, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2950.0510000000013 + }, + { + "M": 247808, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2975.53125 + }, + { + "M": 248832, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2994.1712500000012 + }, + { + "M": 249856, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3008.531250000002 + }, + { + "M": 250880, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 2987.7312500000007 + }, + { + "M": 251904, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3039.611499999999 + }, + { + "M": 252928, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3031.3314999999984 + }, + { + "M": 253952, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3051.1714999999986 + }, + { + "M": 254976, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3042.4915 + }, + { + "M": 256000, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3042.3315000000002 + }, + { + "M": 257024, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3081.4917499999992 + }, + { + "M": 258048, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3091.2517500000013 + }, + { + "M": 259072, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3110.45175 + }, + { + "M": 260096, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3134.2517499999994 + }, + { + "M": 261120, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3117.45175 + }, + { + "M": 262144, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3155.6519999999964 + }, + { + "M": 263168, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3154.692000000001 + }, + { + "M": 264192, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3168.691999999999 + }, + { + "M": 265216, + "rows_per_block": 8, + "vec_size": 2, + "time_us": 3188.612000000001 + }, + { + "M": 266240, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3147.132 + }, + { + "M": 267264, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3202.012249999998 + }, + { + "M": 268288, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3215.772250000002 + }, + { + "M": 269312, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3221.932249999998 + }, + { + "M": 270336, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3244.772249999998 + }, + { + "M": 271360, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3232.2922500000022 + }, + { + "M": 272384, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3279.2125000000015 + }, + { + "M": 273408, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3291.3324999999986 + }, + { + "M": 274432, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3277.5725 + }, + { + "M": 275456, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3294.1324999999997 + }, + { + "M": 276480, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3301.532500000001 + }, + { + "M": 277504, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3334.132749999999 + }, + { + "M": 278528, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3346.212749999999 + }, + { + "M": 279552, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3349.2927500000005 + }, + { + "M": 280576, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3352.1727499999997 + }, + { + "M": 281600, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3347.93275 + }, + { + "M": 282624, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3368.85275 + }, + { + "M": 283648, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3402.4530000000013 + }, + { + "M": 284672, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3417.4929999999986 + }, + { + "M": 285696, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3446.6932499999984 + }, + { + "M": 286720, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3393.292999999998 + }, + { + "M": 287744, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3449.613250000004 + }, + { + "M": 288768, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3452.4932499999977 + }, + { + "M": 289792, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3476.0932499999963 + }, + { + "M": 290816, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3499.053250000001 + }, + { + "M": 291840, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3482.333249999998 + }, + { + "M": 292864, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3516.5734999999986 + }, + { + "M": 293888, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3519.573500000004 + }, + { + "M": 294912, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3549.933499999999 + }, + { + "M": 295936, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3541.5334999999977 + }, + { + "M": 296960, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3545.6534999999985 + }, + { + "M": 297984, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3572.053750000001 + }, + { + "M": 299008, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3575.413749999998 + }, + { + "M": 300032, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3592.973750000001 + }, + { + "M": 301056, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3609.093750000002 + }, + { + "M": 302080, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3600.7737499999985 + }, + { + "M": 303104, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3619.1737500000017 + }, + { + "M": 304128, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3629.2937500000007 + }, + { + "M": 305152, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3632.734000000002 + }, + { + "M": 306176, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3676.854000000001 + }, + { + "M": 307200, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3651.374 + }, + { + "M": 308224, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3691.6139999999978 + }, + { + "M": 309248, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3685.5740000000005 + }, + { + "M": 310272, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3709.214250000001 + }, + { + "M": 311296, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3726.054250000001 + }, + { + "M": 312320, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3703.5342500000015 + }, + { + "M": 313344, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3766.6945000000014 + }, + { + "M": 314368, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3764.7745000000014 + }, + { + "M": 315392, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3779.294500000002 + }, + { + "M": 316416, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3782.7745000000014 + }, + { + "M": 317440, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3752.534249999999 + }, + { + "M": 318464, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3818.054750000001 + }, + { + "M": 319488, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3805.2545 + }, + { + "M": 320512, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3853.0147499999985 + }, + { + "M": 321536, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3848.0147500000003 + }, + { + "M": 322560, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3836.374750000001 + }, + { + "M": 323584, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3878.374750000001 + }, + { + "M": 324608, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3881.254750000002 + }, + { + "M": 325632, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3889.135000000002 + }, + { + "M": 326656, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3904.2149999999992 + }, + { + "M": 327680, + "rows_per_block": 10, + "vec_size": 2, + "time_us": 3894.334999999999 + } + ] + } +} \ No newline at end of file diff --git a/aiter/ops/custom_all_reduce.py b/aiter/ops/custom_all_reduce.py index 9b01a82c8c3939f045d03e5e92238a27d8992aa2..bad475f761ff31d860b32627af0f7595f05e7c11 100644 --- a/aiter/ops/custom_all_reduce.py +++ b/aiter/ops/custom_all_reduce.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: MIT - -from typing import List, Optional, Tuple +# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. + +from typing import List import torch @@ -9,38 +10,61 @@ from ..jit.core import compile_ops MD_NAME = "module_custom_all_reduce" -@compile_ops("module_custom_all_reduce") +@compile_ops("module_custom_all_reduce", develop=True) def init_custom_ar( - meta: torch.Tensor, - rank_data: torch.Tensor, - handles: List[torch.Tensor], + meta_ptr: int, + rank_data_ptr: int, + rank_data_sz: int, + ipc_handle_ptrs: List[int], offsets: List[int], rank: int, fully_connected: bool, ) -> int: ... -@compile_ops("module_custom_all_reduce") +@compile_ops("module_custom_all_reduce", develop=True) def all_reduce( _fa: int, inp: torch.Tensor, out: torch.Tensor, + use_new: bool, open_fp8_quant: bool, - reg_buffer: Optional[torch.Tensor] = None, + reg_inp_ptr: int, + reg_inp_bytes: int, +) -> None: ... + + +@compile_ops("module_custom_all_reduce", develop=True) +def reduce_scatter( + _fa: int, + inp: torch.Tensor, + out: torch.Tensor, + reg_ptr: int, + reg_bytes: int, ) -> None: ... -@compile_ops("module_custom_all_reduce") -def all_gather_reg(_fa: int, inp: torch.Tensor, out: torch.Tensor) -> None: ... +@compile_ops("module_custom_all_reduce", develop=True) +def all_gather_reg( + _fa: int, + inp: torch.Tensor, + out: torch.Tensor, + dim: int, +) -> None: ... -@compile_ops("module_custom_all_reduce") +@compile_ops("module_custom_all_reduce", develop=True) def all_gather_unreg( - _fa: int, inp: torch.Tensor, reg_buffer: torch.Tensor, out: torch.Tensor + _fa: int, + inp: torch.Tensor, + reg_buffer: int, + out: torch.Tensor, + reg_bytes: int, + dim: int, ) -> None: ... -@compile_ops("module_custom_all_reduce") +@compile_ops("module_custom_all_reduce", develop=True) def fused_allreduce_rmsnorm( _fa: int, inp: torch.Tensor, @@ -49,162 +73,102 @@ def fused_allreduce_rmsnorm( out: torch.Tensor, w: torch.Tensor, eps: float, - reg_buffer: Optional[torch.Tensor] = None, + reg_ptr: int, + reg_bytes: int, + use_1stage: bool, ) -> None: ... -def all_reduce_asm_fake_tensor( +@compile_ops("module_custom_all_reduce", develop=True) +def fused_allreduce_rmsnorm_quant( + _fa: int, inp: torch.Tensor, - ca: int, - reg_sig: torch.Tensor, - reg_buffer: torch.Tensor, - isGraph: bool, -) -> torch.Tensor: - - return torch.empty_like( - inp, - dtype=inp.dtype, - device=inp.device, - ) + res_inp: torch.Tensor, + res_out: torch.Tensor, + out: torch.Tensor, + scale_out: torch.Tensor, + w: torch.Tensor, + eps: float, + reg_ptr: int, + reg_bytes: int, + use_1stage: bool, +) -> None: ... -@compile_ops("module_custom_all_reduce", gen_fake=all_reduce_asm_fake_tensor) -def all_reduce_asm_( +@compile_ops("module_custom_all_reduce", develop=True) +def fused_allreduce_rmsnorm_quant_per_group( + _fa: int, inp: torch.Tensor, - ca: int, - reg_sig: torch.Tensor, - reg_buffer: torch.Tensor, - isGraph: bool, -) -> torch.Tensor: ... - - -def all_reduce_rmsnorm_fake_tensors( - input: torch.Tensor, - residual_in: torch.Tensor, - weight: torch.Tensor, - bias: torch.Tensor, - epsilon: float, - ca: int, - reg_sig: torch.Tensor, - reg_buffer: torch.Tensor, - isGraph: bool, -) -> List[torch.Tensor]: - - output = torch.empty_like( - input, dtype=input.dtype, device=input.device, requires_grad=input.requires_grad - ) - - residual_out = torch.empty_like( - input, dtype=input.dtype, device=input.device, requires_grad=input.requires_grad - ) - - return [output, residual_out] - - -@compile_ops("module_custom_all_reduce", gen_fake=all_reduce_rmsnorm_fake_tensors) -def all_reduce_rmsnorm_( - input: torch.Tensor, - residual_in: torch.Tensor, - weight: torch.Tensor, - bias: torch.Tensor, - epsilon: float, - ca: int, - reg_sig: torch.Tensor, - reg_buffer: torch.Tensor, - isGraph: bool, -) -> List[torch.Tensor]: ... - - -# def all_reduce_rmsnorm_quant_fake_tensors( -# input: torch.Tensor, -# residual_in: torch.Tensor, -# weight: torch.Tensor, -# xscale: torch.Tensor, -# bias: torch.Tensor, -# epsilon: float, -# ca: int, -# reg_sig: torch.Tensor, -# reg_buffer: torch.Tensor, -# isGraph: bool, -# ) -> List[torch.Tensor]: - -# N = input.size(-1) -# M = input.numel() // N - -# output = torch.empty_like( -# input, dtype=input.dtype, device=input.device, requires_grad=input.requires_grad -# ) - -# residual_out = torch.empty_like( -# input, dtype=input.dtype, device=input.device, requires_grad=input.requires_grad -# ) - -# y_scale = torch.empty((M, 1), dtype=torch.float32, device=input.device) - -# return [output, residual_out, y_scale] - - -# @compile_ops("module_custom_all_reduce", gen_fake=all_reduce_rmsnorm_quant_fake_tensors) -# def all_reduce_rmsnorm_quant_( -# input: torch.Tensor, -# residual_in: torch.Tensor, -# weight: torch.Tensor, -# xscale: torch.Tensor, -# bias: torch.Tensor, -# epsilon: float, -# ca: int, -# reg_sig: torch.Tensor, -# reg_buffer: torch.Tensor, -# isGraph: bool, -# ) -> List[torch.Tensor]: ... - - -@compile_ops("module_custom_all_reduce") + res_inp: torch.Tensor, + res_out: torch.Tensor, + out: torch.Tensor, + scale_out: torch.Tensor, + w: torch.Tensor, + eps: float, + group_size: int, + reg_ptr: int, + reg_bytes: int, + use_1stage: bool, + bf16_out_ptr: int = 0, +) -> None: ... + + +@compile_ops("module_custom_all_reduce", develop=True) +def fused_qknorm_allreduce( + _fa: int, + qkv_in: torch.Tensor, + q_w: torch.Tensor, + k_w: torch.Tensor, + q_out: torch.Tensor, + k_out: torch.Tensor, + v_out: torch.Tensor, + eps: float, + reg_ptr: int, + reg_bytes: int, +) -> None: ... + + +@compile_ops("module_custom_all_reduce", develop=True) def dispose(_fa: int) -> None: ... -@compile_ops("module_custom_all_reduce") +@compile_ops("module_custom_all_reduce", develop=True) def meta_size() -> int: ... -@compile_ops("module_custom_all_reduce") -def register_buffer( - _fa: int, t: torch.Tensor, handles: List[torch.Tensor], offsets: List[int] +@compile_ops("module_custom_all_reduce", develop=True) +def register_input_buffer( + _fa: int, self_ptr: int, ipc_handle_ptrs: List[int], offsets: List[int] ) -> None: ... -# def gen_get_graph_buffer_ipc_meta_fake_tensors(_fa: int) -> List[torch.Tensor]: - -# handle_sz = 64 # sizeof(hipIpcMemHandle_t) is 64 byte -# num_buffers = 4 # ??? -# handles = torch.empty((handle_sz * num_buffers,), dtype=torch.uint8, device="cuda") +@compile_ops("module_custom_all_reduce", develop=True) +def register_output_buffer( + _fa: int, self_ptr: int, ipc_handle_ptrs: List[int], offsets: List[int] +) -> None: ... -# offset_tensor = torch.empty((num_buffers,), dtype=torch.int64, device="cuda") -# return [handles, offset_tensor] +@compile_ops("module_custom_all_reduce", develop=True) +def get_graph_buffer_count(_fa: int) -> int: ... -@compile_ops("module_custom_all_reduce") -def get_graph_buffer_ipc_meta(_fa: int) -> Tuple[torch.Tensor, torch.Tensor]: ... +@compile_ops("module_custom_all_reduce", develop=True) +def get_graph_buffer_ipc_meta(_fa: int, handle_out: int, offset_out: int) -> None: ... -@compile_ops("module_custom_all_reduce") +@compile_ops("module_custom_all_reduce", develop=True) def register_graph_buffers( - _fa: int, handles: List[torch.Tensor], offsets: List[torch.Tensor] + _fa: int, handle_ptrs: List[int], offset_ptrs: List[int] ) -> None: ... -@compile_ops("module_custom_all_reduce") -def allocate_meta_buffer(size: int) -> torch.Tensor: ... - +@compile_ops("module_custom_all_reduce", develop=True) +def allocate_meta_buffer(size: int) -> int: ... -# def get_meta_buffer_ipc_handle_fake(inp: torch.Tensor) -> torch.Tensor: -# handle_size = 64 -# if not inp.is_cuda: -# raise RuntimeError("Input tensor must be on CUDA device") -# return torch.empty(handle_size, dtype=torch.uint8, device=inp.device) +@compile_ops("module_custom_all_reduce", develop=True) +def free_meta_buffer(ptr: int) -> None: ... -@compile_ops("module_custom_all_reduce") -def get_meta_buffer_ipc_handle(inp: torch.Tensor) -> torch.Tensor: ... \ No newline at end of file +@compile_ops("module_custom_all_reduce", develop=True) +def get_meta_buffer_ipc_handle(inp_ptr: int, out_handle_ptr: int) -> None: ... diff --git a/aiter/ops/grouped_gemm.py b/aiter/ops/grouped_gemm.py new file mode 100644 index 0000000000000000000000000000000000000000..41c2e2f307a676b62a83013d2fdd753410adfe0c --- /dev/null +++ b/aiter/ops/grouped_gemm.py @@ -0,0 +1,264 @@ +# SPDX-License-Identifier: MIT + +from typing import List, Optional, Sequence, Tuple + +import torch +from torch import Tensor + +from ..jit.core import compile_ops + + +@compile_ops("module_grouped_gemm") +def ck_grouped_gemm( + a_tensors: List[Tensor], + b_tensors: List[Tensor], +) -> List[Tensor]: ... + + +@compile_ops("module_grouped_gemm") +def ck_grouped_gemm_out( + a_tensors: List[Tensor], + b_tensors: List[Tensor], + c_tensors: List[Tensor], +) -> List[Tensor]: ... + + +# CK tile alignment for the low-level kernel (see grouped_gemm_kernels.cu). +_MOE_M_ALIGN = { + torch.float16: 64, + torch.bfloat16: 64, + torch.float8_e4m3fn: 128, + torch.int8: 32, +} +_MOE_NK_ALIGN = { + torch.float16: dict(n=128, k=128), + torch.bfloat16: dict(n=128, k=128), + torch.float8_e4m3fn: dict(n=128, k=128), + torch.int8: dict(n=32, k=128), +} + + +def _moe_output_dtype(dtype: torch.dtype) -> torch.dtype: + if dtype is torch.int8: + return torch.int32 + if dtype is torch.float8_e4m3fn: + return torch.float32 + return dtype + + +def _align_up(x: int, align: int) -> int: + return ((x + align - 1) // align) * align + + +def _validate_moe_fixed_nk(b_tensors: Sequence[Tensor], dtype: torch.dtype) -> Tuple[int, int]: + if not b_tensors: + raise ValueError("ck_grouped_gemm_moe: b_tensors must not be empty") + n0, k0 = b_tensors[0].shape + nk = _MOE_NK_ALIGN[dtype] + if n0 % nk["n"] != 0 or k0 % nk["k"] != 0 or k0 < nk["k"]: + raise ValueError( + f"ck_grouped_gemm_moe: fixed N/K must satisfy N % {nk['n']} == 0, " + f"K % {nk['k']} == 0, K >= {nk['k']} for {dtype}, got N={n0}, K={k0}" + ) + for i, b in enumerate(b_tensors): + if b.shape != (n0, k0): + raise ValueError( + f"ck_grouped_gemm_moe: all B tensors must share the same [N, K], " + f"group {i} has {tuple(b.shape)} vs expected ({n0}, {k0})" + ) + return n0, k0 + + +def _pad_a_rows(a: Tensor, m_align: int) -> Tuple[Tensor, int, int]: + m_orig = a.size(0) + m_pad = _align_up(m_orig, m_align) + if m_pad == m_orig: + return a, m_orig, m_pad + a_pad = a.new_zeros(m_pad, a.size(1)) + a_pad[:m_orig].copy_(a) + return a_pad, m_orig, m_pad + + +def ck_grouped_gemm_moe( + a_tensors: List[Tensor], + b_tensors: List[Tensor], +) -> List[Tensor]: + """ + MOE-friendly grouped GEMM with per-group dynamic M and fixed N/K. + + Each group computes C_i = A_i @ B_i^T. A_i may have arbitrary M_i >= 1; + rows are zero-padded to the CK M-tile boundary before launch, then outputs + are sliced back to the logical M_i. + """ + if len(a_tensors) != len(b_tensors): + raise ValueError("ck_grouped_gemm_moe: a and b tensor lists must have the same length") + + dtype = a_tensors[0].dtype + m_align = _MOE_M_ALIGN[dtype] + _validate_moe_fixed_nk(b_tensors, dtype) + + a_padded: List[Tensor] = [] + m_orig_list: List[int] = [] + for a, b in zip(a_tensors, b_tensors): + if a.dtype != dtype or b.dtype != dtype: + raise ValueError("ck_grouped_gemm_moe: all tensors must share the same dtype") + if a.size(1) != b.size(1): + raise ValueError("ck_grouped_gemm_moe: K mismatch between A and B") + if a.size(0) <= 0: + raise ValueError("ck_grouped_gemm_moe: M must be positive") + a_pad, m_orig, _ = _pad_a_rows(a, m_align) + a_padded.append(a_pad) + m_orig_list.append(m_orig) + + c_padded = ck_grouped_gemm(a_padded, b_tensors) + n = b_tensors[0].size(0) + out_dtype = _moe_output_dtype(dtype) + return [ + c[:m_orig, :n].to(out_dtype) if c.size(0) != m_orig else c + for c, m_orig in zip(c_padded, m_orig_list) + ] + + +def ck_grouped_gemm_moe_out( + a_tensors: List[Tensor], + b_tensors: List[Tensor], + c_tensors: List[Tensor], +) -> List[Tensor]: + """ + MOE grouped GEMM writing into caller-provided logical C tensors [M_i, N]. + + Padded A/C buffers are allocated internally; only the valid M_i rows are + copied into c_tensors. + """ + if not (len(a_tensors) == len(b_tensors) == len(c_tensors)): + raise ValueError("ck_grouped_gemm_moe_out: a, b, c lists must have the same length") + + dtype = a_tensors[0].dtype + m_align = _MOE_M_ALIGN[dtype] + n, _ = _validate_moe_fixed_nk(b_tensors, dtype) + out_dtype = _moe_output_dtype(dtype) + + a_padded: List[Tensor] = [] + c_padded: List[Tensor] = [] + m_orig_list: List[int] = [] + + for a, b, c in zip(a_tensors, b_tensors, c_tensors): + if a.dtype != dtype or b.dtype != dtype: + raise ValueError("ck_grouped_gemm_moe_out: a/b dtype mismatch") + if c.dtype != out_dtype: + raise ValueError(f"ck_grouped_gemm_moe_out: c dtype must be {out_dtype}") + if a.size(1) != b.size(1): + raise ValueError("ck_grouped_gemm_moe_out: K mismatch between A and B") + m_orig = a.size(0) + if c.shape != (m_orig, n): + raise ValueError( + f"ck_grouped_gemm_moe_out: c shape {tuple(c.shape)} != ({m_orig}, {n})" + ) + a_pad, m_orig, m_pad = _pad_a_rows(a, m_align) + a_padded.append(a_pad) + m_orig_list.append(m_orig) + if m_pad == m_orig: + c_padded.append(c) + else: + c_padded.append(c.new_empty(m_pad, n)) + + ck_grouped_gemm_out(a_padded, b_tensors, c_padded) + + for c, c_pad, m_orig in zip(c_tensors, c_padded, m_orig_list): + if c_pad.data_ptr() != c.data_ptr(): + c.copy_(c_pad[:m_orig]) + return c_tensors + + +class GroupedGemmMoeBuffers: + """ + Reusable padded A/C buffers for MOE inference with fixed N/K per expert. + + Avoids per-forward allocation when max tokens per expert is bounded. + """ + + def __init__( + self, + num_groups: int, + n: int, + k: int, + dtype: torch.dtype, + max_m: int, + device: Optional[torch.device] = None, + ): + if num_groups <= 0: + raise ValueError("GroupedGemmMoeBuffers: num_groups must be positive") + nk = _MOE_NK_ALIGN[dtype] + if n % nk["n"] != 0 or k % nk["k"] != 0 or k < nk["k"]: + raise ValueError(f"GroupedGemmMoeBuffers: invalid fixed N={n}, K={k} for {dtype}") + + self.num_groups = num_groups + self.n = n + self.k = k + self.dtype = dtype + self.m_align = _MOE_M_ALIGN[dtype] + self.max_m_pad = _align_up(max_m, self.m_align) + self.out_dtype = _moe_output_dtype(dtype) + dev = device or torch.device("cuda") + + self.a_bufs = [ + torch.zeros(self.max_m_pad, k, device=dev, dtype=dtype) + for _ in range(num_groups) + ] + self.c_bufs = [ + torch.zeros(self.max_m_pad, n, device=dev, dtype=self.out_dtype) + for _ in range(num_groups) + ] + + def _ensure_capacity(self, m_orig: int) -> int: + m_pad = _align_up(m_orig, self.m_align) + if m_pad > self.max_m_pad: + raise ValueError( + f"GroupedGemmMoeBuffers: M={m_orig} exceeds configured max_m " + f"(padded max {self.max_m_pad})" + ) + return m_pad + + def run( + self, + a_tensors: Sequence[Tensor], + b_tensors: Sequence[Tensor], + c_tensors: Optional[Sequence[Tensor]] = None, + ) -> List[Tensor]: + if len(a_tensors) != self.num_groups or len(b_tensors) != self.num_groups: + raise ValueError("GroupedGemmMoeBuffers: group count mismatch") + + a_padded: List[Tensor] = [] + c_padded: List[Tensor] = [] + m_orig_list: List[int] = [] + logical_c: List[Tensor] = [] + + for i, (a, b) in enumerate(zip(a_tensors, b_tensors)): + if b.shape != (self.n, self.k): + raise ValueError(f"GroupedGemmMoeBuffers: B[{i}] shape {tuple(b.shape)} != ({self.n}, {self.k})") + m_orig = a.size(0) + m_pad = self._ensure_capacity(m_orig) + m_orig_list.append(m_orig) + + a_buf = self.a_bufs[i] + a_buf.zero_() + a_buf[:m_orig].copy_(a) + a_padded.append(a_buf[:m_pad]) + + if c_tensors is not None: + c = c_tensors[i] + if c.shape != (m_orig, self.n): + raise ValueError(f"GroupedGemmMoeBuffers: c[{i}] shape mismatch") + logical_c.append(c) + c_padded.append(self.c_bufs[i][:m_pad]) + else: + c_padded.append(self.c_bufs[i][:m_pad]) + + if c_tensors is not None: + ck_grouped_gemm_out(a_padded, list(b_tensors), c_padded) + for c, c_pad, m_orig in zip(logical_c, c_padded, m_orig_list): + c.copy_(c_pad[:m_orig]) + return list(logical_c) + + c_full = ck_grouped_gemm_out(a_padded, list(b_tensors), c_padded) + return [c[:m_orig].clone() for c, m_orig in zip(c_full, m_orig_list)] diff --git a/aiter/ops/mhc.py b/aiter/ops/mhc.py new file mode 100644 index 0000000000000000000000000000000000000000..6d10373a60dd77c65f3157f98adb3e9598ba935e --- /dev/null +++ b/aiter/ops/mhc.py @@ -0,0 +1,350 @@ +# SPDX-License-Identifier: MIT + + +import math +import os + +import torch +from aiter import dtypes +from torch import Tensor + +from ..jit.core import compile_ops +from ..jit.utils.chip_info import get_cu_num, get_gfx +from ..jit.utils.torch_guard import torch_compile_guard + + +def _truthy_env(name: str) -> bool: + v = os.environ.get(name, "").strip().lower() + return v in ("1", "true", "yes", "on") + + +def _round_to_tf32_like_tilekernels(x: torch.Tensor) -> torch.Tensor: + return (x.view(torch.int32) + 0x1000).view(torch.float32) + + +@compile_ops("module_mhc") +def mhc_pre_gemm_sqrsum( + out: Tensor, + sqrsum: Tensor, + x: Tensor, + fn: Tensor, + tile_k: int = 128, # 64 or 128 + use_tf32: bool = False, +) -> None: ... + + +@compile_ops("module_mhc") +def mhc_pre_gemm_sqrsum_stage1_m128( + out: Tensor, + sqrsum: Tensor, + x: Tensor, + fn: Tensor, + use_tf32: bool = False, +) -> None: ... + + +@compile_ops("module_mhc") +def mhc_pre_reduce_splitk( + out_red: Tensor, + sqrsum_red: Tensor, + out: Tensor, + sqrsum: Tensor, +) -> None: ... + + +@compile_ops("module_mhc") +def mhc_pre_big_fuse( + post_mix: Tensor, + comb_mix: Tensor, + layer_input: Tensor, + gemm_out_mul: Tensor, + gemm_out_sqrsum: Tensor, + hc_scale: Tensor, + hc_base: Tensor, + residual: Tensor, + rms_eps: float = 1e-6, + hc_pre_eps: float = 1e-6, + hc_sinkhorn_eps: float = 1e-6, + hc_post_mult_value: float = 1.0, + sinkhorn_repeat: int = 20, +) -> None: ... + + +@compile_ops("module_mhc") +def mhc_pre_big_fuse_tlstyle( + post_mix: Tensor, + comb_mix: Tensor, + layer_input: Tensor, + gemm_out_mul: Tensor, + gemm_out_sqrsum: Tensor, + hc_scale: Tensor, + hc_base: Tensor, + residual: Tensor, + rms_eps: float = 1e-6, + hc_pre_eps: float = 1e-6, + hc_sinkhorn_eps: float = 1e-6, + hc_post_mult_value: float = 1.0, + sinkhorn_repeat: int = 20, +) -> None: ... + + +def mhc_pre_fake( + residual: torch.Tensor, + fn: torch.Tensor, + hc_scale: torch.Tensor, + hc_base: torch.Tensor, + rms_eps: float = 1e-6, + hc_pre_eps: float = 1e-6, + hc_sinkhorn_eps: float = 1e-6, + hc_post_mult_value: float = 1.0, + sinkhorn_repeat: int = 20, # if 0, only do pre for hc_head + use_tf32: bool = False, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + m = residual.size(0) + hc_mult = residual.size(1) + hidden_size = residual.size(2) + device = residual.device + post_mix = torch.empty(m, hc_mult, 1, dtype=dtypes.fp32, device=device) + comb_mix = torch.empty(m, hc_mult, hc_mult, dtype=dtypes.fp32, device=device) + layer_input = torch.empty(m, hidden_size, dtype=dtypes.bf16, device=device) + return post_mix, comb_mix, layer_input + + +@torch_compile_guard(gen_fake=mhc_pre_fake) +def mhc_pre( + residual: torch.Tensor, + fn: torch.Tensor, + hc_scale: torch.Tensor, + hc_base: torch.Tensor, + rms_eps: float = 1e-6, + hc_pre_eps: float = 1e-6, + hc_sinkhorn_eps: float = 1e-6, + hc_post_mult_value: float = 1.0, + sinkhorn_repeat: int = 20, # if 0, only do pre for hc_head + use_tf32: bool = False, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + m = residual.size(0) + hc_mult = residual.size(1) + hidden_size = residual.size(2) + hc_mult3 = fn.size(0) + assert hc_mult3 == hc_mult * 2 + hc_mult * hc_mult or ( + hc_mult3 == hc_mult and sinkhorn_repeat == 0 + ) + hc_hidden_size = hc_mult * hidden_size + gfx = get_gfx() + stage1_variant = os.environ.get("AITER_MHC_PRE_STAGE1", "auto").strip().lower() + use_stage1_m128_auto = ( + sinkhorn_repeat > 0 + and hc_mult3 == hc_mult * (2 + hc_mult) + and gfx != "gfx936" + and not (hidden_size in (1280, 2560) and m <= 512) + ) + if stage1_variant in ("", "auto"): + use_stage1_m128 = use_stage1_m128_auto + elif stage1_variant in ("aiter", "legacy"): + use_stage1_m128 = False + elif stage1_variant in ("m128", "tlstyle"): + use_stage1_m128 = True + else: + raise ValueError("AITER_MHC_PRE_STAGE1 must be 'auto' or 'm128' ('tlstyle' is accepted as an alias)") + + env_kernel = os.environ.get("AITER_MHC_PRE_KERNEL", "auto").strip().lower() + use_tlstyle_auto = ( + sinkhorn_repeat > 0 + and hc_mult3 == hc_mult * (2 + hc_mult) + and m > 128 + and not (hidden_size in (1280, 2560) and m <= 512) + ) + if env_kernel in ("aiter", "legacy"): + use_tlstyle = False + elif env_kernel == "tlstyle": + use_tlstyle = True + elif env_kernel in ("", "auto"): + use_tlstyle = use_tlstyle_auto + else: + use_tlstyle = use_tlstyle_auto + + prefetch_stages = 2 + tile_m = 128 if use_stage1_m128 else 16 * 4 + # tile_k → 估算 tg_per_cu (target groups per CU, 受 LDS/VGPR 占用约束): + # tile_k=64: tile_n*64*4*2 = 16KB/block → 4 blocks/CU + # tile_k=128: tile_n*128*4*2 = 32KB/block → 2 blocks/CU + tile_k_tg_dict = {128: 2} if use_stage1_m128 else {128: 2, 64: 4} + num_cu = get_cu_num() + selected_splitk = 1 + selected_tile_k = 128 if use_stage1_m128 else 64 + num_tg_m = (m + tile_m - 1) // tile_m + # Data-driven split-k window: + # - For small/medium M (num_tg_m < num_cu), keep broad search [1, 32]. + # - Once M-side TGs already cover all CUs (num_tg_m >= num_cu), prefer split-k=2. + # This avoids the large regression observed with split-k=1 on large batches. + if num_tg_m >= num_cu: + min_splitk = 2 + max_splitk = 2 + else: + min_splitk = 1 + max_splitk = 32 + selected_score = num_tg_m / (num_cu * tile_k_tg_dict[selected_tile_k]) + selected_score = selected_score / math.ceil(selected_score) + for tile_k, tg_per_cu in tile_k_tg_dict.items(): + if (hc_hidden_size % tile_k) != 0: + continue + meanwhile_tg = num_cu * tg_per_cu + for splitk in range(min_splitk, max_splitk + 1): + if hc_hidden_size % (splitk * tile_k) != 0 or (hc_hidden_size // splitk) < ( + tile_k * prefetch_stages + ): + continue + num_tg = num_tg_m * splitk + score = num_tg / meanwhile_tg + score = score / math.ceil(score) + if selected_score < score: + selected_splitk = splitk + selected_tile_k = tile_k + selected_score = score + # print(f"{selected_score=} {selected_splitk=} {selected_tile_k=} {score=} {splitk=} {tile_k=}") + if num_tg > meanwhile_tg * 4: + break + + # TileLang-style M128 stage1 still needs split-k parallelism when M-side + # CTAs under-fill DCU. Once M-side CTAs already cover CUs, keep split_k low + # to avoid excessive partial writes and stage2 reduction work. + if use_stage1_m128 and hc_hidden_size in (4 * 4096, 4 * 7168): + if num_tg_m >= num_cu: + candidate_splitk = 2 + elif m >= 2048: + candidate_splitk = 8 + else: + candidate_splitk = 32 + if ( + hc_hidden_size % (candidate_splitk * selected_tile_k) == 0 + and (hc_hidden_size // candidate_splitk) >= selected_tile_k * prefetch_stages + ): + selected_splitk = candidate_splitk + + # Work-bound regime override: + # When num_tg_m >= num_cu the splitk window is already forced to {2}, and both + # (tile_k=64, splitk=2) and (tile_k=128, splitk=2) can land on score==1.0. The + # strict `<` update in the loop above lets whichever is iterated first win. + # Empirically on DCU gfx936/938 tile_k=64 is meaningfully faster in this regime + # because it halves per-block LDS occupancy (tile_n*64*4*2 vs tile_n*128*4*2), + # unlocking ~2x concurrent blocks per CU. Measured stage1 wins (auto vs forced + # tile_k=64) up to ~40% at m=8192,hidden=7168 and consistent ~10% at m=8192 + # across hidden_size; large-m/large-hidden cases where auto already picks + # tile_k=64 are unchanged. + if not use_stage1_m128 and num_tg_m >= num_cu and selected_tile_k == 128: + candidate_tile_k = 64 + candidate_splitk = 2 + if ( + hc_hidden_size % (candidate_splitk * candidate_tile_k) == 0 + and (hc_hidden_size // candidate_splitk) + >= candidate_tile_k * prefetch_stages + ): + selected_tile_k = candidate_tile_k + selected_splitk = candidate_splitk + + # Small/medium DeepSeek MHC stage1 override: + # sweep data shows tile_k=64, splitk=32 wins for m<=1024 on hidden=4096/7168. + # For m=2048 it only wins on hidden=7168; hidden=4096 regresses from extra split-k work. + candidate_tile_k = 64 + candidate_splitk = 32 + if ( + not use_stage1_m128 + and hc_hidden_size in (4 * 4096, 4 * 7168) + and (m <= 1024 or (m == 2048 and hc_hidden_size == 4 * 7168)) + and hc_hidden_size % (candidate_splitk * candidate_tile_k) == 0 + and (hc_hidden_size // candidate_splitk) >= candidate_tile_k * prefetch_stages + ): + selected_tile_k = candidate_tile_k + selected_splitk = candidate_splitk + + # Optional manual overrides for stage1 launch search: + # AITER_MHC_PRE_TILE_K=64|128 + # AITER_MHC_PRE_SPLITK= + env_tile_k = os.environ.get("AITER_MHC_PRE_TILE_K", "").strip() + if env_tile_k: + forced_tile_k = int(env_tile_k) + if forced_tile_k not in tile_k_tg_dict: + msg = "AITER_MHC_PRE_TILE_K must be 128 when AITER_MHC_PRE_STAGE1=m128" + if not use_stage1_m128: + msg = "AITER_MHC_PRE_TILE_K must be 64 or 128" + raise ValueError(msg) + if (hc_hidden_size % forced_tile_k) != 0: + raise ValueError( + f"AITER_MHC_PRE_TILE_K={forced_tile_k} is incompatible with hc_hidden_size={hc_hidden_size}" + ) + selected_tile_k = forced_tile_k + + env_splitk = os.environ.get("AITER_MHC_PRE_SPLITK", "").strip() + if env_splitk: + forced_splitk = int(env_splitk) + if forced_splitk < 1: + raise ValueError("AITER_MHC_PRE_SPLITK must be >= 1") + if hc_hidden_size % (forced_splitk * selected_tile_k) != 0: + raise ValueError( + "AITER_MHC_PRE_SPLITK is incompatible with selected tile_k/hc_hidden_size" + ) + if (hc_hidden_size // forced_splitk) < (selected_tile_k * prefetch_stages): + raise ValueError( + "AITER_MHC_PRE_SPLITK violates prefetch stage constraint for selected tile_k" + ) + selected_splitk = forced_splitk + + device = residual.device + out_pad = torch.empty( + selected_splitk, m, (hc_mult3 + 31) // 32 * 32, dtype=dtypes.fp32, device=device + ) + out = out_pad[:, :, :hc_mult3] + sqrsum = torch.empty(selected_splitk, m, dtype=dtypes.fp32, device=device) + if use_stage1_m128: + mhc_pre_gemm_sqrsum_stage1_m128(out, sqrsum, residual, fn, use_tf32) + else: + stage1_fn = _round_to_tf32_like_tilekernels(fn) if use_tf32 else fn + mhc_pre_gemm_sqrsum(out, sqrsum, residual, stage1_fn, selected_tile_k, False) + # Optional path: reduce split-k outputs before big_fuse and run stage2 with n_splits=1. + # Keep stage2 input layout compatible with kernel assumptions (3D + padded stride), + # instead of passing compact 2D tensors from direct sum(). + # Enable explicitly via AITER_MHC_PRE_REDUCE_SPLITK=1|true|yes|on. + # Current data shows the extra kernel cost outweighs the stage2 reduction win. + use_reduce_splitk = selected_splitk > 1 and _truthy_env("AITER_MHC_PRE_REDUCE_SPLITK") + if use_reduce_splitk: + out_red_pad = torch.empty( + 1, m, (hc_mult3 + 31) // 32 * 32, dtype=dtypes.fp32, device=device + ) + out_red = out_red_pad[:, :, :hc_mult3] + sqrsum_red = torch.empty(1, m, dtype=dtypes.fp32, device=device) + mhc_pre_reduce_splitk(out_red, sqrsum_red, out, sqrsum) + out = out_red + sqrsum = sqrsum_red + + post_mix = torch.empty(m, hc_mult, 1, dtype=dtypes.fp32, device=device) + comb_mix = torch.empty(m, hc_mult, hc_mult, dtype=dtypes.fp32, device=device) + layer_input = torch.empty(m, hidden_size, dtype=dtypes.bf16, device=device) + big_fuse = mhc_pre_big_fuse_tlstyle if use_tlstyle else mhc_pre_big_fuse + big_fuse( + post_mix, + comb_mix, + layer_input, + out, + sqrsum, + hc_scale, + hc_base, + residual, + rms_eps, + hc_pre_eps, + hc_sinkhorn_eps, + hc_post_mult_value, + sinkhorn_repeat, + ) + + return post_mix, comb_mix, layer_input + + +@compile_ops("module_mhc") +def mhc_post( + out: Tensor, + x: Tensor, + residual: Tensor, + post_layer_mix: Tensor, + comb_res_mix: Tensor, +) -> None: ... diff --git a/aiter/ops/moe_c_op.py b/aiter/ops/moe_c_op.py index 8ce3206c135153944cfdb9fcf9ea47aa22fce4a6..03d94fe7642b71a87ffdb7513d440f4a8b97d429 100644 --- a/aiter/ops/moe_c_op.py +++ b/aiter/ops/moe_c_op.py @@ -8,7 +8,84 @@ from ..jit.core import ( compile_ops, ) from .enum import ActivationType, Enum, QuantType +import os +import json + +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +_SILU_CFG_DIR = os.path.join(SCRIPT_DIR, "../moe_c_configs/silu_configs") +_SILU_SUMMARY = os.path.join(_SILU_CFG_DIR, "silu_config_summary.json") +_SILU_CASES_CACHE = None +_SILU_INDEX_BY_N_CACHE = None + + +def _load_silu_summary(): + global _SILU_CASES_CACHE, _SILU_INDEX_BY_N_CACHE + if _SILU_CASES_CACHE is not None and _SILU_INDEX_BY_N_CACHE is not None: + return _SILU_CASES_CACHE, _SILU_INDEX_BY_N_CACHE + if not os.path.exists(_SILU_SUMMARY): + _SILU_CASES_CACHE = {} + _SILU_INDEX_BY_N_CACHE = {} + return _SILU_CASES_CACHE, _SILU_INDEX_BY_N_CACHE + with open(_SILU_SUMMARY, "r", encoding="utf-8") as f: + data = json.load(f) + _SILU_CASES_CACHE = data.get("cases", {}) + _SILU_INDEX_BY_N_CACHE = data.get("index_by_n", {}) + return _SILU_CASES_CACHE, _SILU_INDEX_BY_N_CACHE + + +def load_silu_tune_config(M: int, N: int): + # 1) 只读 summary:优先精确命中 key + N = int(N) + cases, index_by_n = _load_silu_summary() + key = f"M={M},N={N}" + + if key in cases: + return cases[key]["rows_per_block"], cases[key]["vec_size"] + + # 2) 同 N 下按 |M - M_i| 找最接近配置(优先使用预构建索引) + n_key = str(N) + if n_key in index_by_n and index_by_n[n_key]: + entries = index_by_n[n_key] # sorted by M + m_list = [int(e["M"]) for e in entries] + + # Manual lower_bound (avoid importing bisect). + left = 0 + right = len(m_list) + while left < right: + mid = (left + right) // 2 + if m_list[mid] < M: + left = mid + 1 + else: + right = mid + pos = left + + candidates = [] + if pos < len(entries): + candidates.append(entries[pos]) + if pos > 0: + candidates.append(entries[pos - 1]) + if candidates: + best = min(candidates, key=lambda e: abs(int(e["M"]) - M)) + return best["rows_per_block"], best["vec_size"] + + # Backward-compatible slow path when old summary has no index_by_n. + nearest = None + for _, v in cases.items(): + if int(v.get("N", -1)) != N: + continue + km = int(v.get("M", -1)) + if km < 0: + continue + dist = abs(km - M) + if nearest is None or dist < nearest[0]: + nearest = (dist, v) + if nearest is not None: + return nearest[1]["rows_per_block"], nearest[1]["vec_size"] + + # 3) fallback 默认值 + + return 1, 2 @compile_ops("module_moe_c_kernel") @@ -24,7 +101,8 @@ def moe_c_moe_gemm_marlin_w8a8( num_tokens_post_pad: torch.Tensor, top_k : int, mode :int, - delta: int)-> torch.Tensor: + delta: int, + size_m: int)-> torch.Tensor: """ --------------------------------------------------------------- # MoE 场景下 8bit 量化的 GEMM 计算(Marlin 优化版) @@ -44,6 +122,30 @@ def moe_c_moe_gemm_marlin_w8a8( pass +@compile_ops("module_moe_c_kernel") +def moe_c_moe_gemm_marlin_w8a8_tensorwise( + input: torch.Tensor, + b_qweight : torch.Tensor, + output : torch.Tensor, + a_scale: torch.Tensor, + b_scale : torch.Tensor, + topk_weights : Optional[torch.Tensor], + sorted_token_ids: torch.Tensor, + expert_ids : torch.Tensor, + num_tokens_post_pad: torch.Tensor, + top_k : int, + mode :int, + delta: int, + size_m: int)-> torch.Tensor: + """ + Marlin W8A8 MoE GEMM with tensorwise weight scales. + + b_scale must contain one scale per expert and use shape (E, 1, 1). + """ + + pass + + @compile_ops("module_moe_c_kernel") def moe_c_moe_gemm_marlin_w4a8( input: torch.Tensor, @@ -57,7 +159,8 @@ def moe_c_moe_gemm_marlin_w4a8( num_tokens_post_pad: torch.Tensor, top_k : int, mode :int, - delta: int)-> torch.Tensor: + delta: int, + size_m: int)-> torch.Tensor: """ --------------------------------------------------------------- # MoE 场景下 8bit 量化的 GEMM 计算(Marlin 优化版) @@ -91,7 +194,8 @@ def moe_c_moe_gemm_marlin_w8a8_fp8( num_tokens_post_pad: torch.Tensor, top_k : int, mode :int, - delta: int)-> torch.Tensor: + delta: int, + size_m: int)-> torch.Tensor: """ --------------------------------------------------------------- # MoE 场景下 8bit 量化的 GEMM 计算(Marlin 优化版) @@ -110,6 +214,29 @@ def moe_c_moe_gemm_marlin_w8a8_fp8( pass +@compile_ops("module_moe_c_kernel") +def moe_c_moe_gemm_marlin_w8a8_fp8_tensorwise( + input: torch.Tensor, + b_qweight : torch.Tensor, + output : torch.Tensor, + a_scale: torch.Tensor, + b_scale : torch.Tensor, + topk_weights : Optional[torch.Tensor], + sorted_token_ids: torch.Tensor, + expert_ids : torch.Tensor, + num_tokens_post_pad: torch.Tensor, + top_k : int, + mode :int, + delta: int, + size_m: int)-> torch.Tensor: + """ + Marlin FP8 W8A8 MoE GEMM with tensorwise weight scales. + + b_scale must contain one scale per expert and use shape (E, 1, 1). + """ + + pass + @compile_ops("module_moe_c_kernel") def moe_c_moe_gemm_marlin_w4a16( input: torch.Tensor, @@ -132,6 +259,32 @@ def moe_c_moe_gemm_marlin_w4a16( 必须配合对应的权重 Shuffle 函数使用,否则会导致计算结果完全错误: + --------------------------------------------------------------- + """ + + pass + +@compile_ops("module_moe_c_kernel") +def moe_c_moe_gemm_marlin_w8a16( + input: torch.Tensor, + b_qweight : torch.Tensor, + output : torch.Tensor, + b_scale: torch.Tensor, + topk_weights : Optional[torch.Tensor], + sorted_token_ids: torch.Tensor, + expert_ids : torch.Tensor, + num_tokens_post_pad: torch.Tensor, + top_k : int, + mode :int, + delta: int)-> torch.Tensor: + """ + --------------------------------------------------------------- + # MoE 场景下 4bit 量化的 GEMM 计算(Marlin 优化版) + + ## 关键前置条件 + 必须配合对应的权重 Shuffle 函数使用,否则会导致计算结果完全错误: + + --------------------------------------------------------------- """ @@ -346,9 +499,15 @@ def moe_c_topk_softmax( @compile_ops("module_moe_c_kernel") def moe_c_silu_and_mul( out : torch.Tensor, - input : torch.Tensor) -> None: + input : torch.Tensor, + rows_per_block: int = 1, + vec_size: int = 2) -> None: pass + + + + @compile_ops("module_moe_c_kernel") def moe_c_moe_sum( input: torch.Tensor, # 移除 C++ 引用 & @@ -357,6 +516,13 @@ def moe_c_moe_sum( ) -> None: pass +@compile_ops("module_moe_c_kernel") +def moe_c_moe_sum_opt_v2(input: torch.Tensor,output: torch.Tensor, + routed_scaling_factor: float = 1.0) -> torch.Tensor: + pass + + + @compile_ops("module_moe_c_kernel") def moe_c_moe_align_block_size( topk_ids: torch.Tensor, diff --git a/aiter/ops/moe_sorting.py b/aiter/ops/moe_sorting.py index 13e501c17da90fe28710c95fa35bc53398f626f7..bfd3e2f7e146440bc5a05a3e99b45fe9b8efb717 100644 --- a/aiter/ops/moe_sorting.py +++ b/aiter/ops/moe_sorting.py @@ -15,7 +15,7 @@ def moe_sorting_fwd( sorted_expert_ids: torch.Tensor, tokens_positions_per_expert: torch.Tensor, num_valid_ids: torch.Tensor, - moe_buf: torch.Tensor, + moe_buf: Optional[torch.Tensor], num_experts: int, unit_size: int, local_expert_mask: Optional[torch.Tensor] = None, diff --git a/aiter/ops/quant.py b/aiter/ops/quant.py index 605152eed01d1edc7e1af93486f917ba0eab4ea4..ebf3ffb53d53c1f9d2ab8eaedba20cc40d2dfdbf 100644 --- a/aiter/ops/quant.py +++ b/aiter/ops/quant.py @@ -413,3 +413,42 @@ def partial_transpose( input: Tensor, num_rows: Tensor, ) -> None: ... + + +@compile_ops("module_quant") +def moe_swiglu_dynamic_quant( + scatter_tokens: torch.Tensor, + smooth: torch.Tensor, + experts_tokens_count: torch.Tensor, + experts_tokens_start: torch.Tensor, + output: torch.Tensor, + scales: torch.Tensor, + beta: float, +) -> None: + ... + + +def moe_swiglu_dynamic_quant_wrapper( + scatter_tokens: torch.Tensor, + smooth: torch.Tensor, + experts_tokens_count: torch.Tensor, + experts_tokens_start: torch.Tensor, + beta: float = 1.0, +): + + leading, d2 = scatter_tokens.shape + d = d2 // 2 + + output = torch.empty((leading, d), dtype=torch.int8, device=scatter_tokens.device) + scales = torch.empty((leading,), dtype=torch.float32, device=scatter_tokens.device) + + moe_swiglu_dynamic_quant( + scatter_tokens, + smooth, + experts_tokens_count, + experts_tokens_start, + output, + scales, + beta, + ) + return output, scales diff --git a/aiter/ops/rmsnorm.py b/aiter/ops/rmsnorm.py index ef52844b1e34f472bd54b21c4fd7929d6abf3d19..88691e899a7725ddf7247929769ed8c6f349fe30 100644 --- a/aiter/ops/rmsnorm.py +++ b/aiter/ops/rmsnorm.py @@ -120,3 +120,29 @@ def rmsnorm2d_fwd_with_add_dynamicquant( weight: Tensor, epsilon: float, ) -> None: ... + + +@compile_ops("module_rmsnorm", gen_fake=gen_rms_norm_fake_tensor) +def head_rms_norm( + input: Tensor, # [num_tokens, num_heads * head_dim] + weight: Tensor, # [num_heads * head_dim] + epsilon: float, + norm_head_dim: int, # head_dim (size of each head's normalization window) +) -> Tensor: + """ + Apply RMS normalization per head independently. + + Unlike standard rms_norm which normalizes over the entire last dimension, + head_rms_norm normalizes each head's head_dim elements separately with + its own weight parameters. + + Args: + input: shape [num_tokens, num_heads * head_dim] + weight: shape [num_heads * head_dim] + epsilon: small value for numerical stability + norm_head_dim: the dimension of each head (head_dim) + + Returns: + Tensor with same shape as input + """ + ... diff --git a/aiter/ops/shuffle.py b/aiter/ops/shuffle.py index 4a3051a75bedcfed98c2c99e3639350476e172bd..c9bf1e2bc42fa64d7184bf33c77994c1bf7be44d 100644 --- a/aiter/ops/shuffle.py +++ b/aiter/ops/shuffle.py @@ -66,6 +66,39 @@ def _w8a8_marlin_weight_2(weight_input # [size_n, size_k// 2 ] marlin_q_w = _marlin_weights_2(weight, k_tile=64, n_tile=16, pack_factor=8) return marlin_q_w +#w8a16 +def w8a16_marlin_weight_1(weight_input # [size_n, size_k] + ): + w1_qweight = weight_input + e,n,k=w1_qweight.shape + # k = k * 2 + w1_qweight_uint32 = w1_qweight.view(-1).view(torch.uint32) + new_shape = (e, n // 16, 16, k // 32, 8) # uint32张量的形状 + w1_qweight_uint32_reshaped = w1_qweight_uint32.view(new_shape) + w1_qweight_uint32_transposed = w1_qweight_uint32_reshaped.transpose(2, 3).contiguous() + new_shape = (e, n // 16, k // 128, 4, 16, 8) + w1_new_trans = w1_qweight_uint32_transposed.view(new_shape) + w1_qweight_shuffle = w1_new_trans.transpose(1, 2).contiguous() + w1_new = w1_qweight_shuffle.view(-1).view(torch.uint8).view(*w1_qweight.shape) + + + return w1_new + +def w8a16_marlin_weight_2(weight_input # [size_n, size_k] + ): + w2_qweight = weight_input + e,k,n=w2_qweight.shape + # n = n * 2 + w2_qweight_uint32 = w2_qweight.view(-1).view(torch.uint32) + new_shape = (e, k // 16, 16, n // 32, 8) # uint32张量的形状 + w2_qweight_uint32_reshaped = w2_qweight_uint32.view(new_shape) + w2_qweight_uint32_transposed = w2_qweight_uint32_reshaped.transpose(2, 3).contiguous() + new_shape = (e, k // 16, n // 128, 4, 16, 8) + w2_new_trans = w2_qweight_uint32_transposed.view(new_shape) + w2_qweight_shuffle = w2_new_trans.transpose(1, 2).contiguous() + w2_new = w2_qweight_shuffle.view(-1).view(torch.uint8).view(*w2_qweight.shape) + + return w2_new def _marlin_weights( diff --git a/aiter/ops/tilelang/__init__.py b/aiter/ops/tilelang/__init__.py index 2ccdf5c7c5b14d375c788cc32a0db10ca889ce02..d5ae285d526af3f3428ddc2fb7a34628f0a0cfa3 100644 --- a/aiter/ops/tilelang/__init__.py +++ b/aiter/ops/tilelang/__init__.py @@ -1,6 +1,14 @@ # SPDX-License-Identifier: MIT from .sparse_mla_fwd import tilelang_sparse_fwd, ref_sparse_mla_fwd_interface +from .mhc import hc_split_sinkhorn, mhc_fused_tilelang, mhc_post_fwd, mhc_pre_big_fuse, pre_big_fuse_tilelang -__all__ = ["tilelang_sparse_fwd", "ref_sparse_mla_fwd_interface"] - +__all__ = [ + "tilelang_sparse_fwd", + "ref_sparse_mla_fwd_interface", + "mhc_pre_big_fuse", + "pre_big_fuse_tilelang", + "mhc_post_fwd", + "hc_split_sinkhorn", + "mhc_fused_tilelang", +] diff --git a/aiter/ops/tilelang/mhc/__init__.py b/aiter/ops/tilelang/mhc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fa9722f031224f4a44baad2ddd720f8eb5072528 --- /dev/null +++ b/aiter/ops/tilelang/mhc/__init__.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: MIT + +from .hc_split_sinkhorn_kernel import hc_split_sinkhorn +from .mhc_fused_post_pre_kernel import mhc_fused_tilelang +from .post_kernel import mhc_post_fwd +from .pre_big_fuse import mhc_pre_big_fuse +from .pre_big_fuse_kernel import pre_big_fuse_tilelang + +__all__ = ["mhc_pre_big_fuse", "pre_big_fuse_tilelang", "mhc_post_fwd", "hc_split_sinkhorn", "mhc_fused_tilelang"] diff --git a/aiter/ops/tilelang/mhc/hc_split_sinkhorn_kernel.py b/aiter/ops/tilelang/mhc/hc_split_sinkhorn_kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..2c247592e1abb8d74809cd0fb384a0df873ebebb --- /dev/null +++ b/aiter/ops/tilelang/mhc/hc_split_sinkhorn_kernel.py @@ -0,0 +1,176 @@ +import tilelang +import torch +from tilelang import language as T + +_PASS_CONFIGS = { + tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True, + tilelang.PassConfigKey.TL_DISABLE_TMA_LOWER: True, +} + + +@tilelang.jit(pass_configs=_PASS_CONFIGS) +def _mhc_split_sinkhorn_fwd_orig( + hc: int, + sinkhorn_iters: int, + eps: float, + threads: int, +) -> tilelang.JITKernel: + n = T.dynamic('n') + mix_hc = (2 + hc) * hc + + @T.prim_func + def _mhc_split_sinkhorn_fwd_orig_kernel( + mixes: T.Tensor[(n, mix_hc), T.float32], + hc_scale: T.Tensor[(3,), T.float32], + hc_base: T.Tensor[(mix_hc,), T.float32], + pre: T.Tensor[(n, hc), T.float32], + post: T.Tensor[(n, hc), T.float32], + comb: T.Tensor[(n, hc, hc), T.float32], + ) -> None: + with T.Kernel(n, threads=threads) as i: + mixes_shared = T.alloc_shared(mix_hc, T.float32) + comb_frag = T.alloc_fragment((hc, hc), T.float32) + row_sum = T.alloc_fragment(hc, T.float32) + col_sum = T.alloc_fragment(hc, T.float32) + row_max = T.alloc_fragment(hc, T.float32) + + T.copy(mixes[i, :], mixes_shared) + + for j in T.Parallel(hc): + pre[i, j] = T.sigmoid(mixes_shared[j] * hc_scale[0] + hc_base[j]) + eps + for j in T.Parallel(hc): + post[i, j] = 2 * T.sigmoid(mixes_shared[j + hc] * hc_scale[1] + hc_base[j + hc]) + for j, k in T.Parallel(hc, hc): + comb_frag[j, k] = ( + mixes_shared[j * hc + k + hc * 2] * hc_scale[2] + + hc_base[j * hc + k + hc * 2] + ) + + T.reduce_max(comb_frag, row_max, dim=1) + for j, k in T.Parallel(hc, hc): + comb_frag[j, k] = T.exp(comb_frag[j, k] - row_max[j]) + T.reduce_sum(comb_frag, row_sum, dim=1) + for j, k in T.Parallel(hc, hc): + comb_frag[j, k] = comb_frag[j, k] / row_sum[j] + eps + + T.reduce_sum(comb_frag, col_sum, dim=0) + for j, k in T.Parallel(hc, hc): + comb_frag[j, k] = comb_frag[j, k] / (col_sum[k] + eps) + + for _ in T.serial(sinkhorn_iters - 1): + T.reduce_sum(comb_frag, row_sum, dim=1) + for j, k in T.Parallel(hc, hc): + comb_frag[j, k] = comb_frag[j, k] / (row_sum[j] + eps) + T.reduce_sum(comb_frag, col_sum, dim=0) + for j, k in T.Parallel(hc, hc): + comb_frag[j, k] = comb_frag[j, k] / (col_sum[k] + eps) + + T.copy(comb_frag, comb[i, :, :]) + + return _mhc_split_sinkhorn_fwd_orig_kernel + + +@tilelang.jit(pass_configs=_PASS_CONFIGS) +def _mhc_split_sinkhorn_fwd( + hc: int, + sinkhorn_iters: int, + eps: float, + token_block_size: int, + threads: int, +) -> tilelang.JITKernel: + n = T.dynamic('n') + mix_hc = (2 + hc) * hc + @T.prim_func + def _mhc_split_sinkhorn_fwd_kernel( + mixes: T.Tensor[(n, mix_hc), T.float32], + hc_scale: T.Tensor[(3,), T.float32], + hc_base: T.Tensor[(mix_hc,), T.float32], + pre: T.Tensor[(n, hc), T.float32], + post: T.Tensor[(n, hc), T.float32], + comb: T.Tensor[(n, hc, hc), T.float32], + ) -> None: + with T.Kernel(T.ceildiv(n, token_block_size), threads=threads) as pid_x: + mixes_shared = T.alloc_shared((token_block_size, mix_hc), T.float32) + comb_frag = T.alloc_fragment((token_block_size, hc, hc), T.float32) + row_sum = T.alloc_fragment((token_block_size, hc), T.float32) + col_sum = T.alloc_fragment((token_block_size, hc), T.float32) + row_max = T.alloc_fragment((token_block_size, hc), T.float32) + T.copy(mixes[pid_x * token_block_size, 0], mixes_shared) + for i, j in T.Parallel(token_block_size, hc): + idx = pid_x * token_block_size + i + if idx < n: + pre[idx, j] = T.sigmoid(mixes_shared[i, j] * hc_scale[0] + hc_base[j]) + eps + for i, j in T.Parallel(token_block_size, hc): + idx = pid_x * token_block_size + i + if idx < n: + post[idx, j] = 2 * T.sigmoid( + mixes_shared[i, j + hc] * hc_scale[1] + hc_base[j + hc] + ) + + for i, j, k in T.Parallel(token_block_size, hc, hc): + comb_frag[i, j, k] = ( + mixes_shared[i, j * hc + k + hc * 2] * hc_scale[2] + + hc_base[j * hc + k + hc * 2] + ) + + T.reduce_max(comb_frag, row_max, dim=2) + for i, j, k in T.Parallel(token_block_size, hc, hc): + comb_frag[i, j, k] = T.exp(comb_frag[i, j, k] - row_max[i, j]) + T.reduce_sum(comb_frag, row_sum, dim=2) + for i, j, k in T.Parallel(token_block_size, hc, hc): + comb_frag[i, j, k] = comb_frag[i, j, k] / row_sum[i, j] + eps + + T.reduce_sum(comb_frag, col_sum, dim=1) + for i, j, k in T.Parallel(token_block_size, hc, hc): + comb_frag[i, j, k] = comb_frag[i, j, k] / (col_sum[i, k] + eps) + + for _ in T.serial(sinkhorn_iters - 1): + T.reduce_sum(comb_frag, row_sum, dim=2) + for i, j, k in T.Parallel(token_block_size, hc, hc): + comb_frag[i, j, k] = comb_frag[i, j, k] / (row_sum[i, j] + eps) + T.reduce_sum(comb_frag, col_sum, dim=1) + for i, j, k in T.Parallel(token_block_size, hc, hc): + comb_frag[i, j, k] = comb_frag[i, j, k] / (col_sum[i, k] + eps) + + for i, j, k in T.Parallel(token_block_size, hc, hc): + idx = pid_x * token_block_size + i + if idx < n: + comb[idx, j, k] = comb_frag[i, j, k] + + return _mhc_split_sinkhorn_fwd_kernel + + +def mhc_split_sinkhorn( + mixes: torch.Tensor, + hc_scale: torch.Tensor, + hc_base: torch.Tensor, + hc_mult: int = 4, + sinkhorn_iters: int = 20, + eps: float = 1e-6, + token_block_size: int = 32, + threads: int = 128, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + b, s, _ = mixes.size() + n = b * s + pre = mixes.new_empty(b, s, hc_mult) + post = mixes.new_empty(b, s, hc_mult) + comb = mixes.new_empty(b, s, hc_mult, hc_mult) + if threads * token_block_size // 4 > n: + kernel = _mhc_split_sinkhorn_fwd_orig(hc_mult, sinkhorn_iters, eps, threads) + else: + kernel = _mhc_split_sinkhorn_fwd(hc_mult, sinkhorn_iters, eps, token_block_size, threads) + kernel( + mixes.contiguous().view(-1, (2 + hc_mult) * hc_mult), + hc_scale.contiguous(), + hc_base.contiguous(), + pre.view(-1, hc_mult), + post.view(-1, hc_mult), + comb.view(-1, hc_mult, hc_mult), + ) + return pre, post, comb + + + +# public alias +def hc_split_sinkhorn(*args, **kwargs): + return mhc_split_sinkhorn(*args, **kwargs) diff --git a/aiter/ops/tilelang/mhc/mhc_fused_post_pre_kernel.py b/aiter/ops/tilelang/mhc/mhc_fused_post_pre_kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..a04214a96a7f696643c3806b4d8d29cf65cc2188 --- /dev/null +++ b/aiter/ops/tilelang/mhc/mhc_fused_post_pre_kernel.py @@ -0,0 +1,174 @@ +import math + +import tilelang +from tilelang import language as T + + +@tilelang.jit( + pass_configs={ + tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True, + tilelang.PassConfigKey.TL_DISABLE_TMA_LOWER: True, + tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10, + }, +) +def mhc_post_tilelang( + a, + b, + c, + d, + x, + mhc: int, + hidden: int, + n_thr: int = 128, + h_blk: int = 1024, +) -> tilelang.JITKernel: + n = T.dynamic("num_tokens") + h = hidden + h_blk = math.gcd(hidden, h_blk) + + a: T.Tensor((n, mhc, mhc), T.float32) # type: ignore[no-redef, valid-type] + b: T.Tensor((n, mhc, h), T.bfloat16) # type: ignore[no-redef, valid-type] + c: T.Tensor((n, mhc), T.float32) # type: ignore[no-redef, valid-type] + d: T.Tensor((n, h), T.bfloat16) # type: ignore[no-redef, valid-type] + x: T.Tensor((n, mhc, h), T.bfloat16) # type: ignore[no-redef, valid-type] + + with T.Kernel(n, threads=n_thr) as i_n: + x_shared = T.alloc_shared((mhc, h_blk), T.bfloat16) + b_shared = T.alloc_shared((mhc, h_blk), T.bfloat16) + d_shared = T.alloc_shared(h_blk, T.bfloat16) + + x_local = T.alloc_fragment((mhc, h_blk), T.float32) + b_local = T.alloc_fragment((mhc, h_blk), T.float32) + d_local = T.alloc_fragment(h_blk, T.float32) + + a_local = T.alloc_fragment((mhc, mhc), T.float32) + c_local = T.alloc_fragment(mhc, T.float32) + T.copy(a[i_n, 0, 0], a_local) + T.copy(c[i_n, 0], c_local) + + for i0_h in T.Pipelined(T.ceildiv(h, h_blk), num_stages=2): + T.copy(b[i_n, 0, i0_h * h_blk], b_shared) + T.copy(d[i_n, i0_h * h_blk], d_shared) + + T.copy(b_shared, b_local) + T.copy(d_shared, d_local) + for i_hco, i1_h in T.Parallel(mhc, h_blk): + x_local[i_hco, i1_h] = c_local[i_hco] * d_local[i1_h] + for i_hci in T.serial(mhc): + x_local[i_hco, i1_h] += a_local[i_hci, i_hco] * b_local[i_hci, i1_h] + T.copy(x_local, x_shared) + T.copy(x_shared, x[i_n, 0, i0_h * h_blk]) + + +@tilelang.jit( + pass_configs={ + tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True, + tilelang.PassConfigKey.TL_DISABLE_TMA_LOWER: True, + tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10, + }, +) +def mhc_fused_tilelang( + comb_mix, + residual_in, + post_mix, + x_in, + weight_t, + yp_out, + rp_out, + residual_out, + mhc: int, + hidden: int, + n_out: int, + n_thr: int = 256, + h_blk: int = 256, + tile_n: int = 1, + split_k: int = 1, +) -> tilelang.JITKernel: + m = T.dynamic("num_tokens") + split_k = T.dynamic("split_k") + h = hidden + h_blk = math.gcd(hidden, h_blk) + h_per_split = h // split_k + n_tiles = n_out // tile_n + h_iters = h_per_split // n_thr + warp_size = 64 + num_warps = n_thr // warp_size + + comb_mix: T.Tensor((m, mhc, mhc), T.float32) # type: ignore[no-redef, valid-type] + residual_in: T.Tensor((m, mhc, h), T.bfloat16) # type: ignore[no-redef, valid-type] + post_mix: T.Tensor((m, mhc), T.float32) # type: ignore[no-redef, valid-type] + x_in: T.Tensor((m, h), T.bfloat16) # type: ignore[no-redef, valid-type] + weight_t: T.Tensor((n_out, mhc, h), T.float32) # type: ignore[no-redef, valid-type] + yp_out: T.Tensor((split_k, m, n_out), T.float32) # type: ignore[no-redef, valid-type] + rp_out: T.Tensor((split_k, m), T.float32) # type: ignore[no-redef, valid-type] + residual_out: T.Tensor((m, mhc, h), T.bfloat16) # type: ignore[no-redef, valid-type] + + with T.Kernel(m, n_tiles, split_k, threads=n_thr) as (i_n, i_nt, i_ks): + tid = T.get_thread_binding() + # warp_id = tid // warp_size + # lane = tid % warp_size + warp_id = T.get_warp_idx() + lane = T.get_lane_idx() + h_split_start = i_ks * h_per_split + + s_warp = T.alloc_shared((num_warps, tile_n + 1), T.float32) + s_post = T.alloc_shared((mhc,), T.float32) + s_comb = T.alloc_shared((mhc, mhc), T.float32) + + pm = T.alloc_local((mhc,), T.float32) + cm = T.alloc_local((mhc, mhc), T.float32) + acc = T.alloc_local((tile_n,), T.float32) + sqr = T.alloc_local((1,), T.float32) + new_r = T.alloc_local((mhc,), T.float32) + T.clear(acc) + T.clear(sqr) + + T.copy(post_mix[i_n, 0], s_post) + T.copy(comb_mix[i_n, 0, 0], s_comb) + + for j in T.unroll(mhc): + pm[j] = s_post[j] + for j in T.unroll(mhc): + for k in T.unroll(mhc): + cm[k, j] = s_comb[k, j] + + for it in T.serial(h_iters): + h_idx = h_split_start + it * n_thr + tid + for j in T.unroll(mhc): + new_r[j] = pm[j] * x_in[i_n, h_idx] + for k in T.unroll(mhc): + new_r[j] += cm[k, j] * residual_in[i_n, k, h_idx] + + if i_nt == 0: + for j in T.unroll(mhc): + residual_out[i_n, j, h_idx] = new_r[j] + sqr[0] += new_r[j] * new_r[j] + + for n in T.unroll(tile_n): + for j in T.unroll(mhc): + acc[n] += weight_t[i_nt * tile_n + n, j, h_idx] * new_r[j] + + for n in T.unroll(tile_n): + acc[n] = T.warp_reduce_sum(acc[n]) + if i_nt == 0: + sqr[0] = T.warp_reduce_sum(sqr[0]) + + if lane == 0: + for n in T.unroll(tile_n): + s_warp[warp_id, n] = acc[n] + if i_nt == 0: + s_warp[warp_id, tile_n] = sqr[0] + T.sync_threads() + + if warp_id == 0: + if lane < tile_n: + v = T.alloc_var(T.float32, init=0.0) + for w in T.unroll(num_warps): + v += s_warp[w, lane] + yp_out[i_ks, i_n, i_nt * tile_n + lane] = v + + if i_nt == 0 and lane == 0: + v2 = T.alloc_var(T.float32, init=0.0) + for w in T.unroll(num_warps): + v2 += s_warp[w, tile_n] + rp_out[i_ks, i_n] = v2 diff --git a/aiter/ops/tilelang/mhc/norm_fn_kernel.py b/aiter/ops/tilelang/mhc/norm_fn_kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..879ffe65cb7f6432cd83cd47c184d7324a38e245 --- /dev/null +++ b/aiter/ops/tilelang/mhc/norm_fn_kernel.py @@ -0,0 +1,305 @@ +import tilelang +import torch +from tilelang import language as T + + +_PASS_CONFIGS = { + tilelang.PassConfigKey.TL_DISABLE_WGMMA: True, + tilelang.PassConfigKey.TL_ENABLE_AGGRESSIVE_SHARED_MEMORY_MERGE: True, + tilelang.PassConfigKey.TL_ENABLE_FAST_MATH: True, +} + + +@tilelang.jit +def _mhc_fn_normw_merge_fwd(m: int, n: int, dtype: T.dtype = T.float32) -> tilelang.JITKernel: + n_blk = 256 + + @T.prim_func + def _mhc_fn_normw_merge_fwd_( + fn: T.Tensor[(m, n), dtype], + normw: T.Tensor[n, dtype], + out_fn: T.Tensor[(m, n), dtype], + ) -> None: + _ = dtype + with T.Kernel(m, T.ceildiv(n, n_blk)) as (pid_m, pid_n): + for i1_n in T.Parallel(n_blk): + i_n = pid_n * n_blk + i1_n + if i_n < n: + out_fn[pid_m, i_n] = fn[pid_m, i_n] * normw[i_n] + + return _mhc_fn_normw_merge_fwd_ + + +@tilelang.jit +def _mhc_fn_normw_merge_bwd(m: int, n: int, dtype: T.dtype = T.float32) -> tilelang.JITKernel: + n_blk = 256 + + @T.prim_func + def _mhc_fn_normw_merge_bwd_( + fn: T.Tensor[(m, n), dtype], + normw: T.Tensor[n, dtype], + out_fn_grad: T.Tensor[(m, n), dtype], + fn_grad: T.Tensor[(m, n), dtype], + normw_grad: T.Tensor[n, dtype], + ) -> None: + _ = dtype + with T.Kernel(T.ceildiv(n, n_blk)) as pid_n: + normw_frag = T.alloc_fragment(n_blk, dtype) + T.copy(normw[pid_n * n_blk], normw_frag) + + normw_grad_frag = T.alloc_fragment(n_blk, dtype) + T.clear(normw_grad_frag) + + for i_m in T.serial(m): + for i1_n in T.Parallel(n_blk): + i_n = pid_n * n_blk + i1_n + if i_n < n: + fn_grad[i_m, i_n] += out_fn_grad[i_m, i_n] * normw_frag[i1_n] + normw_grad_frag[i1_n] += out_fn_grad[i_m, i_n] * fn[i_m, i_n] + + for i1_n in T.Parallel(n_blk): + normw_grad[pid_n * n_blk + i1_n] += normw_grad_frag[i1_n] + + return _mhc_fn_normw_merge_bwd_ + + +@tilelang.jit(pass_configs=_PASS_CONFIGS) +def _mhc_pre_norm_fn_fwd_mul( + mhc_mult3: int, + n_rms_group: int, + rms_group_size: int, + token_block: int = 64, + hidden_block: int = 256, +) -> tilelang.JITKernel: + assert mhc_mult3 <= 32 + num_tokens = T.dynamic('num_tokens') + assert rms_group_size % hidden_block == 0 + + @T.prim_func + def _mhc_pre_norm_fn_fwd_mul_kernel( + x: T.Tensor[(num_tokens, n_rms_group * rms_group_size), T.bfloat16], + fn: T.Tensor[(mhc_mult3, n_rms_group * rms_group_size), T.float32], + out: T.Tensor[(num_tokens, n_rms_group, mhc_mult3), T.float32], + sqrsum: T.Tensor[(num_tokens, n_rms_group), T.float32], + ) -> None: + _ = mhc_mult3 + with T.Kernel(T.ceildiv(num_tokens, token_block), n_rms_group, threads=256) as (pid_x, pid_y): + out_frag = T.alloc_fragment((token_block, 32), T.float32) + sqrsum_part = T.alloc_fragment((token_block, 16), T.float32) + T.clear(out_frag) + T.clear(sqrsum_part) + for pz in T.Pipelined(rms_group_size // hidden_block, num_stages=0): + x_frag_pre = T.alloc_fragment((token_block, hidden_block), T.bfloat16) + fn_frag_pre = T.alloc_fragment((32, hidden_block), T.float32) + x_frag_16 = T.alloc_fragment((token_block, hidden_block), T.bfloat16) + x_frag = T.alloc_fragment((token_block, hidden_block), T.float32) + fn_frag = T.alloc_fragment((32, hidden_block), T.float32) + + x_smem_16 = T.alloc_shared((token_block, hidden_block), T.bfloat16) + fn_smem = T.alloc_shared((32, hidden_block), T.float32) + T.annotate_layout({x_smem_16: tilelang.layout.make_hcu_swizzled_layout(x_smem_16, major_pack=2)}) + T.annotate_layout({fn_smem: tilelang.layout.make_hcu_swizzled_layout(fn_smem, major_pack=2)}) + + T.copy(x[pid_x * token_block, pid_y * rms_group_size + pz * hidden_block], x_frag_pre) + T.copy(fn[0, pid_y * rms_group_size + pz * hidden_block], fn_frag_pre) + + T.copy(x_frag_pre, x_smem_16) + T.copy(x_smem_16, x_frag_16) + T.copy(x_frag_16, x_frag) + T.copy(fn_frag_pre, fn_smem) + T.copy(fn_smem, fn_frag) + + for jj in T.serial(hidden_block // 16): + for i, j in T.Parallel(token_block, 16): + sqrsum_part[i, j] += x_frag[i, jj * 16 + j] * x_frag[i, jj * 16 + j] + + T.gemm( + x_frag, + fn_frag, + out_frag, + transpose_A=False, + transpose_B=True, + clear_accum=False, + k_pack=2, + policy=T.GemmWarpPolicy.FullRow, + use_tf32=True, + ) + sqrsum_l = T.alloc_fragment(token_block, T.float32) + T.reduce_sum(sqrsum_part, sqrsum_l) + out_shared = T.alloc_shared((token_block, 32), T.float32) + T.annotate_layout({out_shared: tilelang.layout.make_hcu_swizzled_layout(out_shared, major_pack=2)}) + T.copy(out_frag, out_shared) + + for i in T.Parallel(token_block): + sqrsum[pid_x * token_block + i, pid_y] = sqrsum_l[i] + for i, j in T.Parallel(token_block, 32): + if j < 24: + out[pid_x * token_block + i, pid_y, j] = out_shared[i, j] + + return _mhc_pre_norm_fn_fwd_mul_kernel + + +@tilelang.jit(pass_configs=_PASS_CONFIGS) +def _mhc_pre_norm_fn_fwd_norm( + mhc_mult3: int, + n_rms_group: int, + rms_group_size: int, + rms_eps: float, + n_splits: int, +) -> tilelang.JITKernel: + num_tokens = T.dynamic('num_tokens') + n_thr = 32 + + @T.prim_func + def _mhc_pre_norm_fn_fwd_norm_kernel( + out_mul_splitted: T.Tensor[(n_splits, num_tokens, n_rms_group, mhc_mult3), T.float32], + sqrsum_splitted: T.Tensor[(n_splits, num_tokens, n_rms_group), T.float32], + out_mul: T.Tensor[(num_tokens, n_rms_group, mhc_mult3), T.float32], + sqrsum: T.Tensor[(num_tokens, n_rms_group), T.float32], + out: T.Tensor[(num_tokens, mhc_mult3), T.float32], + ) -> None: + with T.Kernel(num_tokens, threads=n_thr) as pid: + rms = T.alloc_fragment(1, T.float32) + out_l = T.alloc_fragment(mhc_mult3, T.float32) + out_l0 = T.alloc_fragment(mhc_mult3, T.float32) + T.clear(out_l) + for k in T.serial(n_rms_group): + rms[0] = 0 + for i_split in T.serial(n_splits): + rms[0] += sqrsum_splitted[i_split, pid, k] + if T.get_thread_binding() == 0: + sqrsum[pid, k] = rms[0] + rms[0] = T.rsqrt(rms[0] / rms_group_size + rms_eps) + for j in T.Parallel(mhc_mult3): + out_l0[j] = 0 + for i_split in T.serial(n_splits): + out_l0[j] += out_mul_splitted[i_split, pid, k, j] + out_l[j] += out_l0[j] * rms[0] + T.copy(out_l0, out_mul[pid, k, :]) + T.copy(out_l[:], out[pid, :]) + + return _mhc_pre_norm_fn_fwd_norm_kernel + + +@tilelang.jit(pass_configs=_PASS_CONFIGS) +def _mhc_pre_norm_fn_bwd_norm( + mhc_mult3: int, + n_rms_group: int, + rms_group_size: int, + rms_eps: float, +) -> tilelang.JITKernel: + num_tokens = T.dynamic('num_tokens') + n_thr = 32 + + @T.prim_func + def _mhc_pre_norm_fn_bwd_norm_kernel( + # Gradient of output + out_grad: T.Tensor[(num_tokens, mhc_mult3), T.float32], + # Saved inputs + out_mul: T.Tensor[(num_tokens, n_rms_group, mhc_mult3), T.float32], + sqrsum: T.Tensor[(num_tokens, n_rms_group), T.float32], + # Computed gradient of inputs + out_mul_grad: T.Tensor[(num_tokens, n_rms_group, mhc_mult3), T.float32], + sqrsum_grad: T.Tensor[(num_tokens, n_rms_group), T.float32], + ) -> None: + with T.Kernel(num_tokens, n_rms_group, threads=n_thr) as (pid_i, pid_k): + sqrsum_frag = T.alloc_fragment(1, T.float32) + sqrsum_frag[0] = sqrsum[pid_i, pid_k] + rms_frag = T.alloc_fragment(1, T.float32) + rms_frag[0] = T.rsqrt(sqrsum_frag[0] / rms_group_size + rms_eps) + + rms_grad_frag = T.alloc_reducer(1, T.float32, replication='all') + T.clear(rms_grad_frag) + for j in T.Parallel(mhc_mult3): + out_mul_grad[pid_i, pid_k, j] = out_grad[pid_i, j] * rms_frag[0] + rms_grad_frag[0] += out_grad[pid_i, j] * out_mul[pid_i, pid_k, j] + T.finalize_reducer(rms_grad_frag) + + for kk in T.Parallel(1): + sqrsum_grad[pid_i, pid_k + kk] = rms_grad_frag[kk] * rms_frag[kk] / (sqrsum_frag[kk] + rms_eps * rms_group_size) / -2 + + return _mhc_pre_norm_fn_bwd_norm_kernel + + +@tilelang.jit(pass_configs=_PASS_CONFIGS) +def _mhc_pre_norm_fn_bwd_mul( + mhc_mult3: int, + n_rms_group: int, + rms_group_size: int, + token_block: int = 128, + hidden_block: int = 128, +) -> tilelang.JITKernel: + assert mhc_mult3 <= 32 + num_tokens = T.dynamic('num_tokens') + assert rms_group_size % hidden_block == 0 + + @T.prim_func + def _mhc_pre_norm_fn_bwd_mul_kernel( + # Gradient of output + out_mul_grad: T.Tensor[(num_tokens, n_rms_group, mhc_mult3), T.float32], + sqrsum_grad: T.Tensor[(num_tokens, n_rms_group), T.float32], + # Saved inputs + x: T.Tensor[(num_tokens, n_rms_group * rms_group_size), T.bfloat16], + fn: T.Tensor[(mhc_mult3, n_rms_group * rms_group_size), T.float32], + # Computed gradient of inputs + x_grad: T.Tensor[(num_tokens, n_rms_group * rms_group_size), T.bfloat16], + fn_grad: T.Tensor[(mhc_mult3, n_rms_group * rms_group_size), T.float32], + ) -> None: + with T.Kernel(n_rms_group, T.ceildiv(rms_group_size, hidden_block)) as (pid_y, pid_z): + yz = pid_y * rms_group_size + pid_z * hidden_block + + fn_smem = T.alloc_shared((32, hidden_block), T.float32) + for i, j in T.Parallel(32, hidden_block): + if i < mhc_mult3: + fn_smem[i, j] = fn[i, yz + j] + else: + fn_smem[i, j] = 0 + + fn_grad_frag = T.alloc_fragment((32, hidden_block), T.float32) + T.fill(fn_grad_frag, 0) + + for px in T.serial(T.ceildiv(num_tokens, token_block)): + x_smem = T.alloc_shared((token_block, hidden_block), T.float32) + T.copy(x[px * token_block, yz], x_smem) + + padded_grad = T.alloc_shared((token_block, 32), T.float32) + for i, j in T.Parallel(token_block, 32): + if j < mhc_mult3: + padded_grad[i, j] = out_mul_grad[px * token_block + i, pid_y, j] + else: + padded_grad[i, j] = 0 + + x_grad_frag = T.alloc_fragment((token_block, hidden_block), T.float32) + T.copy(x_grad[px * token_block, yz], x_grad_frag) + + T.gemm( + padded_grad, + x_smem, + fn_grad_frag, + transpose_A=True, + transpose_B=False, + clear_accum=False, + ) + T.gemm( + padded_grad, + fn_smem, + x_grad_frag, + transpose_A=False, + transpose_B=False, + clear_accum=False, + ) + + sqrsum_grad_frag = T.alloc_fragment((token_block, 1), T.float32) + T.copy(sqrsum_grad[px * token_block, pid_y], sqrsum_grad_frag) + for i, j in T.Parallel(token_block, hidden_block): + x_grad_frag[i, j] += 2 * x_smem[i, j] * sqrsum_grad_frag[i, 0] + + T.copy(x_grad_frag, x_grad[px * token_block, yz]) + + T.copy(fn_grad_frag, fn_grad[0, yz]) + + return _mhc_pre_norm_fn_bwd_mul_kernel + + +def round_to_tf32(x: torch.Tensor) -> torch.Tensor: + return (x.view(torch.int32) + 0x1000).view(torch.float32) diff --git a/aiter/ops/tilelang/mhc/post_kernel.py b/aiter/ops/tilelang/mhc/post_kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..5896266e4dea8191e7206bc14666ac6e191e919a --- /dev/null +++ b/aiter/ops/tilelang/mhc/post_kernel.py @@ -0,0 +1,210 @@ +# SPDX-License-Identifier: MIT + +import math + +import tilelang +import torch +from tilelang import language as T + +# Global guards for validating split-k stage0/stage1 kernels. +cu_count = torch.cuda.get_device_properties("cuda").multi_processor_count + + +@tilelang.jit( + pass_configs={ + tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True, + tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10, + tilelang.PassConfigKey.TL_DISABLE_VECTORIZE_256: True, + tilelang.PassConfigKey.TL_ENABLE_AGGRESSIVE_SHARED_MEMORY_MERGE: True, + }, +) +def _mhc_post_fwd(mhc: int, hidden: int, n_thr: int = 128, h_blk: int = 1024) -> tilelang.JITKernel: + n = T.dynamic("num_tokens") + h = hidden + + h_blk = math.gcd(hidden, h_blk) + + @T.prim_func + def _mhc_post_fwd_kernel( + a: T.Tensor[(n, mhc, mhc), T.float32], + b: T.Tensor[(n, mhc, h), T.bfloat16], + c: T.Tensor[(n, mhc), T.float32], + d: T.Tensor[(n, h), T.bfloat16], + x: T.Tensor[(n, mhc, h), T.bfloat16], + ) -> None: + with T.Kernel(n, threads=n_thr) as pid_n: + b_shared = T.alloc_shared((mhc, h_blk), T.bfloat16) + + x_local = T.alloc_fragment((mhc, h_blk), T.float32) + b_local = T.alloc_fragment((mhc, h_blk), T.float32) + d_local = T.alloc_fragment(h_blk, T.float32) + + a_local = T.alloc_fragment((mhc, mhc), T.float32) + c_local = T.alloc_fragment(mhc, T.float32) + T.copy(a[pid_n, 0, 0], a_local) + T.copy(c[pid_n, 0], c_local) + + for i0_h in T.Pipelined(T.ceildiv(h, h_blk), num_stages=1): + T.copy(b[pid_n, 0, i0_h * h_blk], b_shared, disable_tma=True) + T.copy(b_shared, b_local) + T.copy(d[pid_n, i0_h * h_blk], d_local, disable_tma=True) + + for i_mhco, i1_h in T.Parallel(mhc, h_blk): + x_local[i_mhco, i1_h] = c_local[i_mhco] * d_local[i1_h] + for i_mhci in T.serial(mhc): + x_local[i_mhco, i1_h] += a_local[i_mhci, i_mhco] * b_local[i_mhci, i1_h] + T.copy(x_local, x[pid_n, 0, i0_h * h_blk], disable_tma=True, coalesced_width=8) + + return _mhc_post_fwd_kernel + + +@tilelang.jit( + pass_configs={ + tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True, + tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10, + tilelang.PassConfigKey.TL_DISABLE_VECTORIZE_256: True, + tilelang.PassConfigKey.TL_ENABLE_AGGRESSIVE_SHARED_MEMORY_MERGE: True, + }, +) +def _mhc_post_fwd_split_h( + mhc: int, + hidden: int, + n_thr: int = 128, + h_blk: int = 1024, +) -> tilelang.JITKernel: + n = T.dynamic("num_tokens") + h = hidden + + h_blk = math.gcd(hidden, h_blk) + + @T.prim_func + def _mhc_post_fwd_split_h_kernel( + a: T.Tensor[(n, mhc, mhc), T.float32], + b: T.Tensor[(n, mhc, h), T.bfloat16], + c: T.Tensor[(n, mhc), T.float32], + d: T.Tensor[(n, h), T.bfloat16], + x: T.Tensor[(n, mhc, h), T.bfloat16], + ) -> None: + with T.Kernel(n, T.ceildiv(h, h_blk), threads=n_thr) as (pid_n, pid_h): + b_shared = T.alloc_shared((mhc, h_blk), T.bfloat16) + + x_local = T.alloc_fragment((mhc, h_blk), T.float32) + b_local = T.alloc_fragment((mhc, h_blk), T.float32) + d_local = T.alloc_fragment(h_blk, T.float32) + + a_local = T.alloc_fragment((mhc, mhc), T.float32) + c_local = T.alloc_fragment(mhc, T.float32) + T.copy(a[pid_n, 0, 0], a_local) + T.copy(c[pid_n, 0], c_local) + + h_start = pid_h * h_blk + T.copy(b[pid_n, 0, h_start], b_shared, disable_tma=True) + T.copy(b_shared, b_local) + T.copy(d[pid_n, h_start], d_local, disable_tma=True) + + for i_mhco, i1_h in T.Parallel(mhc, h_blk): + x_local[i_mhco, i1_h] = c_local[i_mhco] * d_local[i1_h] + for i_mhci in T.serial(mhc): + x_local[i_mhco, i1_h] += a_local[i_mhci, i_mhco] * b_local[i_mhci, i1_h] + T.copy(x_local, x[pid_n, 0, h_start], disable_tma=True, coalesced_width=8) + + return _mhc_post_fwd_split_h_kernel + + +@tilelang.jit( + pass_configs={ + tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True, + tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10, + tilelang.PassConfigKey.TL_DISABLE_VECTORIZE_256: True, + tilelang.PassConfigKey.TL_ENABLE_AGGRESSIVE_SHARED_MEMORY_MERGE: True, + }, +) +def _mhc_post_fwd_wo_shmem( + mhc: int, + hidden: int, + n_thr: int = 128, + h_blk: int = 1024, +) -> tilelang.JITKernel: + n = T.dynamic("num_tokens") + h = hidden + + h_blk = math.gcd(hidden, h_blk) + + @T.prim_func + def _mhc_post_fwd_wo_shmem_kernel( + a: T.Tensor[(n, mhc, mhc), T.float32], + b: T.Tensor[(n, mhc, h), T.bfloat16], + c: T.Tensor[(n, mhc), T.float32], + d: T.Tensor[(n, h), T.bfloat16], + x: T.Tensor[(n, mhc, h), T.bfloat16], + ) -> None: + with T.Kernel(n, threads=n_thr) as pid_n: + x_local = T.alloc_fragment((mhc, h_blk), T.float32) + b_local = T.alloc_fragment((mhc, h_blk), T.float32) + d_local = T.alloc_fragment(h_blk, T.float32) + + a_local = T.alloc_fragment((mhc, mhc), T.float32) + c_local = T.alloc_fragment(mhc, T.float32) + T.copy(a[pid_n, 0, 0], a_local) + T.copy(c[pid_n, 0], c_local) + + for i0_h in T.Pipelined(T.ceildiv(h, h_blk), num_stages=0): + T.copy(b[pid_n, 0, i0_h * h_blk], b_local, disable_tma=True) + T.copy(d[pid_n, i0_h * h_blk], d_local, disable_tma=True) + + for i_mhco, i1_h in T.Parallel(mhc, h_blk): + x_local[i_mhco, i1_h] = c_local[i_mhco] * d_local[i1_h] + for i_mhci in T.serial(mhc): + x_local[i_mhco, i1_h] += a_local[i_mhci, i_mhco] * b_local[i_mhci, i1_h] + + T.copy(x_local, x[pid_n, 0, i0_h * h_blk], disable_tma=True, coalesced_width=8) + + return _mhc_post_fwd_wo_shmem_kernel + + +def mhc_post_fwd( + x: torch.Tensor, + residual: torch.Tensor, + post_layer_mix: torch.Tensor, + comb_res_mix: torch.Tensor, + out: torch.Tensor | None = None, +) -> torch.Tensor: + num_tokens, mhc, hidden = residual.shape + + assert x.dtype == torch.bfloat16, f"x.dtype={x.dtype}" + assert residual.dtype == torch.bfloat16, f"residual.dtype={residual.dtype}" + assert post_layer_mix.dtype == torch.float32, f"post_layer_mix.dtype={post_layer_mix.dtype}" + assert comb_res_mix.dtype == torch.float32, f"comb_res_mix.dtype={comb_res_mix.dtype}" + assert x.shape == (num_tokens, hidden), f"x.shape={x.shape}" + assert post_layer_mix.shape == (num_tokens, mhc), f"post_layer_mix.shape={post_layer_mix.shape}" + assert comb_res_mix.shape == (num_tokens, mhc, mhc), f"comb_res_mix.shape={comb_res_mix.shape}" + + + residual = residual.contiguous() + assert x.is_contiguous() + assert post_layer_mix.is_contiguous() + assert comb_res_mix.is_contiguous() + + if out is None: + out = torch.empty_like(residual) + n = num_tokens + h_tiles = math.gcd(hidden, 1024) + h_tiles = hidden // h_tiles + n_thr = 128 + if n < cu_count * 2 and h_tiles > 1: + # increase cu num usage by adding h_split + kernel = _mhc_post_fwd_split_h(mhc, hidden, n_thr=n_thr) + elif n < cu_count * 2: + # use shared mem and stage pipeline + kernel = _mhc_post_fwd(mhc, hidden, n_thr=n_thr) + else: + # only use registers and no pipeline + kernel = _mhc_post_fwd_wo_shmem(mhc, hidden, n_thr=n_thr) + kernel( + comb_res_mix, + residual, + post_layer_mix, + x, + out, + ) + return out diff --git a/aiter/ops/tilelang/mhc/pre_big_fuse.py b/aiter/ops/tilelang/mhc/pre_big_fuse.py new file mode 100644 index 0000000000000000000000000000000000000000..f0fa90e4dc98e078e92be7ab99f4e1a4a4242069 --- /dev/null +++ b/aiter/ops/tilelang/mhc/pre_big_fuse.py @@ -0,0 +1,256 @@ +import functools +import math +from typing import NamedTuple + +import tilelang +import torch +from tilelang import language as T + +from .norm_fn_kernel import _mhc_pre_norm_fn_fwd_mul +from .pre_norm_fn_splitk_kernel import mhc_pre_gemm_sqrsum_splitk_kernel +from .pre_big_fuse_kernel import _mhc_pre_big_fuse + +# Global guards for validating split-k stage0/stage1 kernels. +cu_count = torch.cuda.get_device_properties("cuda").multi_processor_count + + +class PreBigFuseBlockInfo(NamedTuple): + token_block: int + hidden_block: int + hidden_loop: int + n_splits_pre: int + use_small_token_splitk: bool + + +@functools.lru_cache(maxsize=1024) +def get_block_info(num_tokens: int, mhc_hidden_size: int, cu_count: int) -> PreBigFuseBlockInfo: + token_block = 128 # use 128 for better performance + hidden_block = 128 # with hidden_block = 128, the occupancy is 2 + hidden_loop = mhc_hidden_size // hidden_block + token_loop = (num_tokens + token_block - 1) // token_block + + if token_loop <= 2: + if num_tokens > 128: + # for occupied 2 + n_splits_pre = 64 + if hidden_loop % n_splits_pre != 0: + hidden_block = 64 + hidden_loop = mhc_hidden_size // hidden_block + elif num_tokens > 64: + # for occupied 2 + token_block = 64 + n_splits_pre = 64 + if hidden_loop % n_splits_pre != 0: + hidden_block = 64 + hidden_loop = mhc_hidden_size // hidden_block + elif num_tokens > 32: + # for occupied 2 + token_block = 32 + n_splits_pre = 64 + if hidden_loop % n_splits_pre != 0: + hidden_block = 64 + hidden_loop = mhc_hidden_size // hidden_block + else: + # occupied 1 + token_block = 32 + n_splits_pre = 64 + if hidden_loop % n_splits_pre != 0: + hidden_block = 64 + hidden_loop = mhc_hidden_size // hidden_block + elif token_loop <= 4: + n_splits_pre = 32 + elif token_loop <= cu_count // 8: + n_splits_pre = 16 + elif token_loop <= cu_count // 4: + n_splits_pre = 8 + elif token_loop <= cu_count * 0.75: + n_splits_pre = 8 + elif token_loop <= cu_count * 2: + n_splits_pre = 4 + else: + n_splits_pre = 1 + + final_token_loop = (num_tokens + token_block - 1) // token_block + use_small_token_splitk = ( + n_splits_pre > 1 + and final_token_loop <= cu_count * 2 + and hidden_loop > 0 + and hidden_loop % n_splits_pre == 0 + ) + + if not use_small_token_splitk: + token_block = 64 + hidden_block = 128 + # print(f"use_small_token_splitk={use_small_token_splitk}, num_tokens={num_tokens}, hidden_loop={hidden_loop}, " + # f"MHC_PRE_BIG_FUSE_N_SPLITS_PRE={MHC_PRE_BIG_FUSE_N_SPLITS_PRE}, token_block={token_block}, hidden_block={hidden_block}") + + return PreBigFuseBlockInfo( + token_block=token_block, + hidden_block=hidden_block, + hidden_loop=hidden_loop, + n_splits_pre=n_splits_pre, + use_small_token_splitk=use_small_token_splitk, + ) + + +@functools.lru_cache(maxsize=128) +def _round_to_tf32_kernel(n_elem: int) -> tilelang.JITKernel: + return _compile_round_to_tf32(n_elem) + + +@tilelang.jit # inp, out both passed in; out_idx would mean only inp is passed and out is allocated inside the adapter +def _compile_round_to_tf32(n_elem: int) -> tilelang.JITKernel: + """Bitcast float32 -> int32, add 0x1000, bitcast back (1D linear scan for coalescing).""" + _TF32_ROUND_BITS = 0x1000 + _ROUND_TO_TF32_BLK_MAX = 2048 + n_blk = math.gcd(_ROUND_TO_TF32_BLK_MAX, n_elem) + + @T.prim_func + def _round_to_tf32_prim( + inp: T.Tensor[(n_elem,), T.float32], + out: T.Tensor[(n_elem,), T.float32], + ) -> None: + with T.Kernel(T.ceildiv(n_elem, n_blk)) as pid: + input_frag = T.alloc_fragment((n_blk,), T.float32) + output_frag = T.alloc_fragment((n_blk,), T.float32) + T.copy(inp[pid * n_blk], input_frag) + input_int = T.view(input_frag, (n_blk,), T.int32) + + for t in T.Parallel(n_blk): + input_int[t] += T.int32(_TF32_ROUND_BITS) + output_frag[t] = T.reinterpret(input_int[t], T.float32) + T.copy(output_frag, out[pid * n_blk]) + + return _round_to_tf32_prim + + +def round_to_tf32(fn: torch.Tensor) -> torch.Tensor: + """TF32 grid rounding via TileLang (flat numel; preserves original shape).""" + ne = int(fn.numel()) + out = torch.empty_like(fn) + _round_to_tf32_kernel(ne)(fn.reshape(ne), out.reshape(ne)) + return out + + +def mhc_pre_big_fuse( + residual: torch.Tensor, + fn: torch.Tensor, + mhc_scale: torch.Tensor, + mhc_base: torch.Tensor, + rms_eps: float, + mhc_pre_eps: float, + mhc_sinkhorn_eps: float, + mhc_post_mult_value: float, + sinkhorn_repeat: int, + n_splits: int = 16, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + assert residual.dtype == torch.bfloat16 + assert fn.dtype == torch.float32 + assert mhc_scale.dtype == torch.float32 + assert mhc_base.dtype == torch.float32 + + mhc_mult = residual.shape[-2] + hidden_size = residual.shape[-1] + mhc_mult2 = mhc_mult * mhc_mult + mhc_mult3 = mhc_mult * 2 + mhc_mult2 + + mhc_hidden_size = mhc_mult * hidden_size + assert fn.shape[0] == mhc_mult3 + assert fn.shape[1] == mhc_hidden_size + assert mhc_scale.shape == (3,) + assert mhc_base.shape == (mhc_mult3,) + + outer_shape = residual.shape[:-2] + + residual_flat = residual.view(-1, mhc_mult, hidden_size) + num_tokens = residual_flat.shape[0] + fn_flat = fn + + post_mix = torch.empty(num_tokens, mhc_mult, dtype=torch.float32, device=residual.device) + comb_mix = torch.empty(num_tokens, mhc_mult2, dtype=torch.float32, device=residual.device) + layer_input = torch.empty(num_tokens, hidden_size, dtype=torch.bfloat16, device=residual.device) + + # Bucket by 32 so get_block_info cache keys align with common launch granularity; real buffers still use num_tokens. + num_tokens_align = (int(num_tokens) + 31) // 32 * 32 + block_info = get_block_info(num_tokens_align, mhc_hidden_size, cu_count) + token_block = block_info.token_block + hidden_block = block_info.hidden_block + hidden_loop = block_info.hidden_loop + MHC_PRE_BIG_FUSE_N_SPLITS_PRE = block_info.n_splits_pre + use_small_token_splitk = block_info.use_small_token_splitk + + fn = round_to_tf32(fn) + + if use_small_token_splitk: + kernel_0, kernel_1 = mhc_pre_gemm_sqrsum_splitk_kernel( + mhc_mult3, + mhc_hidden_size, + split_k=MHC_PRE_BIG_FUSE_N_SPLITS_PRE, + token_block=token_block, + hidden_block=hidden_block, + ) + partial_out = torch.empty( + MHC_PRE_BIG_FUSE_N_SPLITS_PRE, num_tokens, mhc_mult3, dtype=torch.float32, device=residual.device + ) + partial_sqrsum = torch.empty( + MHC_PRE_BIG_FUSE_N_SPLITS_PRE, num_tokens, dtype=torch.float32, device=residual.device + ) + # gemm_out_mul = torch.empty( + # 1, num_tokens, mhc_mult3, dtype=torch.float32, device=residual.device + # ) + # gemm_out_sqrsum = torch.empty(1, num_tokens, dtype=torch.float32, device=residual.device) + kernel_0( + residual_flat.view(-1, mhc_hidden_size), + fn, + partial_out, + partial_sqrsum, + ) + gemm_out_mul = partial_out + gemm_out_sqrsum = partial_sqrsum + # kernel_1( + # partial_out, + # partial_sqrsum, + # gemm_out_mul.squeeze(0), + # gemm_out_sqrsum.squeeze(0), + # ) + n_splits = MHC_PRE_BIG_FUSE_N_SPLITS_PRE + else: + gemm_out_mul = torch.empty( + 1, num_tokens, mhc_mult3, dtype=torch.float32, device=residual.device + ) + gemm_out_sqrsum = torch.empty(1, num_tokens, dtype=torch.float32, device=residual.device) + n_splits = 1 + fwd_mul_kernel = _mhc_pre_norm_fn_fwd_mul(mhc_mult3, 1, mhc_hidden_size, token_block=token_block, hidden_block=hidden_block) + fwd_mul_kernel( + residual_flat.view(-1, mhc_hidden_size), + fn, + gemm_out_mul.view(-1, 1, mhc_mult3), + gemm_out_sqrsum.view(-1, 1), + ) + # END of TileLang implementation of pre-norm-fn forward matmul + + _mhc_pre_big_fuse( + hidden_size, + rms_eps, + mhc_pre_eps, + mhc_sinkhorn_eps, + mhc_post_mult_value, + sinkhorn_repeat, + n_splits=n_splits, + mhc_mult=mhc_mult, + )( + gemm_out_mul, + gemm_out_sqrsum, + mhc_scale, + mhc_base, + residual_flat, + post_mix, + comb_mix, + layer_input, + ) + + post_mix = post_mix.view(*outer_shape, mhc_mult, 1) + comb_mix = comb_mix.view(*outer_shape, mhc_mult, mhc_mult) + layer_input = layer_input.view(*outer_shape, hidden_size) + + return post_mix, comb_mix, layer_input diff --git a/aiter/ops/tilelang/mhc/pre_big_fuse_kernel.py b/aiter/ops/tilelang/mhc/pre_big_fuse_kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..e4c8c4ff51af2778cc002faa7b6ee1b0c19b3fa6 --- /dev/null +++ b/aiter/ops/tilelang/mhc/pre_big_fuse_kernel.py @@ -0,0 +1,204 @@ +import math + +import tilelang +import torch +from tilelang import language as T + + +@tilelang.jit( + pass_configs={ + tilelang.PassConfigKey.TL_DISABLE_WARP_SPECIALIZED: True, + tilelang.PassConfigKey.TL_PTXAS_REGISTER_USAGE_LEVEL: 10, + tilelang.PassConfigKey.TL_DISABLE_VECTORIZE_256: True, + tilelang.PassConfigKey.TL_ENABLE_FAST_MATH: True, + }, +) +def _mhc_pre_big_fuse( + hidden_size: int, + rms_eps: float, + mhc_pre_eps: float, + mhc_sinkhorn_eps: float, + mhc_post_mult_value: float, + sinkhorn_repeat: int, + n_splits: int = 16, + mhc_mult: int = 4, +): + num_tokens = T.dynamic('num_tokens') + mhc_mult3 = mhc_mult * (2 + mhc_mult) + hidden_block = math.gcd(512, hidden_size) + + @T.prim_func + def mhc_pre_big_fuse( + gemm_out_mul: T.Tensor[(n_splits, num_tokens, mhc_mult3), T.float32], + gemm_out_sqrsum: T.Tensor[(n_splits, num_tokens), T.float32], + mhc_scale: T.Tensor[(3,), T.float32], + mhc_base: T.Tensor[(mhc_mult3,), T.float32], + residual: T.Tensor[(num_tokens, mhc_mult, hidden_size), T.bfloat16], + # outputs + post_mix: T.Tensor[(num_tokens, mhc_mult), T.float32], + comb_mix: T.Tensor[(num_tokens, mhc_mult * mhc_mult), T.float32], + layer_input: T.Tensor[(num_tokens, hidden_size), T.bfloat16], + ) -> None: + threads = 128 + + n_splits_aligned = tilelang.math.next_power_of_2(n_splits) + + if n_splits >= 4: + split_groups = threads // 32 + # assert n_splits % split_groups == 0 + group_rows = n_splits // split_groups + with T.Kernel(num_tokens, threads=threads) as pid: + ################################################################## + # _mhc_pre_norm_fn_fwd_norm + tx = T.get_thread_binding() + mixes_shared = T.alloc_shared(mhc_mult3, T.float32) + rms = T.alloc_fragment(1, T.float32) + + if n_splits >= 4 and n_splits % split_groups == 0: + sqrsum = T.alloc_fragment(n_splits_aligned, T.float32) + T.copy(gemm_out_sqrsum[:, pid], sqrsum) + T.reduce_sum(sqrsum, rms) + rms[0] = T.rsqrt(rms[0] / (mhc_mult * hidden_size) + rms_eps) + mixes_pre = T.alloc_fragment((split_groups, 32), T.float32) + mixes_aligned = T.alloc_fragment(32, T.float32) + T.clear(mixes_pre) + for r in T.serial(group_rows): + for i, j in T.Parallel(split_groups, 32): + if j < mhc_mult3: + mixes_pre[i, j] += gemm_out_mul[i * group_rows + r, pid, j] + T.reduce_sum(mixes_pre, mixes_aligned, dim=0) + for i in T.Parallel(32): + if i < mhc_mult3: + mixes_shared[i] = mixes_aligned[i] * rms[0] + elif n_splits >= 2: + sqrsum = T.alloc_fragment(n_splits_aligned, T.float32) + T.copy(gemm_out_sqrsum[:, pid], sqrsum) + T.reduce_sum(sqrsum, rms) + rms[0] = T.rsqrt(rms[0] / (mhc_mult * hidden_size) + rms_eps) + mixes = T.alloc_fragment(mhc_mult3, T.float32) + for j in T.Parallel(mhc_mult3): + mixes[j] = 0 + for i in T.serial(n_splits): + mixes[j] += gemm_out_mul[i, pid, j] + mixes[j] *= rms[0] + T.copy(mixes, mixes_shared, disable_tma=True) + else: + rms[0] = gemm_out_sqrsum[0, pid] + rms[0] = T.rsqrt(rms[0] / (mhc_mult * hidden_size) + rms_eps) + mixes = T.alloc_fragment(mhc_mult3, T.float32) + for j in T.Parallel(mhc_mult3): + mixes[j] = gemm_out_mul[0, pid, j] + mixes[j] *= rms[0] + T.copy(mixes, mixes_shared, disable_tma=True) + + if tx < 64: + ################################################################## + # _mhc_pre_split_mixes_fwd (post & comb) + cm = T.alloc_fragment((mhc_mult, mhc_mult), T.float32) + for j in T.Parallel(mhc_mult): + post_mix[pid, j] = T.sigmoid(mixes_shared[j + mhc_mult] * mhc_scale[1] + mhc_base[j + mhc_mult]) * mhc_post_mult_value + for j, k in T.Parallel(mhc_mult, mhc_mult): + cm[j, k] = mixes_shared[j * mhc_mult + k + mhc_mult * 2] * mhc_scale[2] + mhc_base[j * mhc_mult + k + mhc_mult * 2] + + ################################################################## + # _mhc_sinkhorn_fwd + row_sum = T.alloc_fragment(mhc_mult, T.float32) + col_sum = T.alloc_fragment(mhc_mult, T.float32) + + # comb = comb.softmax(-1) + eps + row_max = T.alloc_fragment(mhc_mult, T.float32) + T.reduce_max(cm, row_max, dim=1) + for j, k in T.Parallel(mhc_mult, mhc_mult): + cm[j, k] = T.exp(cm[j, k] - row_max[j]) + T.reduce_sum(cm, row_sum, dim=1) + for j, k in T.Parallel(mhc_mult, mhc_mult): + cm[j, k] = cm[j, k] / row_sum[j] + mhc_sinkhorn_eps + + # comb = comb / (comb.sum(-2) + eps) + T.reduce_sum(cm, col_sum, dim=0) + for j, k in T.Parallel(mhc_mult, mhc_mult): + cm[j, k] = cm[j, k] / (col_sum[k] + mhc_sinkhorn_eps) + + for _ in T.serial(sinkhorn_repeat - 1): + # comb = comb / (comb.sum(-1) + eps) + T.reduce_sum(cm, row_sum, dim=1) + for j, k in T.Parallel(mhc_mult, mhc_mult): + cm[j, k] = cm[j, k] / (row_sum[j] + mhc_sinkhorn_eps) + + # comb = comb / (comb.sum(-2) + eps) + T.reduce_sum(cm, col_sum, dim=0) + for j, k in T.Parallel(mhc_mult, mhc_mult): + cm[j, k] = cm[j, k] / (col_sum[k] + mhc_sinkhorn_eps) + + # save comb_mix to global memory + for j, k in T.Parallel(mhc_mult, mhc_mult): + comb_mix[pid, j * mhc_mult + k] = cm[j, k] + else: + ################################################################## + # _mhc_pre_split_mixes_fwd (pre) + pre_mix_shared = T.alloc_fragment(mhc_mult, T.float32) + for j in T.serial(mhc_mult): + pre_mix_shared[j] = ( + T.sigmoid( + mixes_shared[j] * mhc_scale[0] + mhc_base[j], + ) + + mhc_pre_eps + ) + ################################################################### + # _mhc_pre_apply_mix_fwd + for i0_h in T.Pipelined(hidden_size // hidden_block, num_stages=0): + # xs = T.alloc_shared((mhc_mult, hidden_block), T.bfloat16) + xl = T.alloc_fragment((mhc_mult, hidden_block), T.float32) + T.copy(residual[pid, 0, i0_h * hidden_block], xl, disable_tma=True) + # T.copy(xs, xl, disable_tma=True) + + ol = T.alloc_fragment(hidden_block, T.float32) + T.clear(ol) + + for i_mhc in T.serial(mhc_mult): + pre = pre_mix_shared[i_mhc] + for i1_h in T.Parallel(hidden_block): + ol[i1_h] += pre * xl[i_mhc, i1_h] + + T.copy(ol, layer_input[pid, i0_h * hidden_block], disable_tma=True) + + return mhc_pre_big_fuse + + +def pre_big_fuse_tilelang( + gemm_out_mul: torch.Tensor, + gemm_out_sqrsum: torch.Tensor, + mhc_scale: torch.Tensor, + mhc_base: torch.Tensor, + residual: torch.Tensor, + post_mix: torch.Tensor, + comb_mix: torch.Tensor, + layer_input: torch.Tensor, + hidden_size: int, + rms_eps: float, + mhc_pre_eps: float, + mhc_sinkhorn_eps: float, + mhc_post_mult_value: float, + sinkhorn_repeat: int, + n_splits: int = 16, + mhc_mult: int = 4, +) -> None: + _mhc_pre_big_fuse( + hidden_size, + rms_eps, + mhc_pre_eps, + mhc_sinkhorn_eps, + mhc_post_mult_value, + sinkhorn_repeat, + n_splits=n_splits, + mhc_mult=mhc_mult, + )( + gemm_out_mul, + gemm_out_sqrsum, + mhc_scale, + mhc_base, + residual, + post_mix, + comb_mix, + layer_input, + ) diff --git a/aiter/ops/tilelang/mhc/pre_norm_fn_splitk_kernel.py b/aiter/ops/tilelang/mhc/pre_norm_fn_splitk_kernel.py new file mode 100644 index 0000000000000000000000000000000000000000..b22dfccd399f680ee4d8aeadc264ccd1e9ac400c --- /dev/null +++ b/aiter/ops/tilelang/mhc/pre_norm_fn_splitk_kernel.py @@ -0,0 +1,132 @@ +import functools +from typing import Tuple + +import tilelang +from tilelang import language as T + +_PASS_CONFIGS = { + tilelang.PassConfigKey.TL_ENABLE_AGGRESSIVE_SHARED_MEMORY_MERGE: True, + tilelang.PassConfigKey.TL_ENABLE_FAST_MATH: True, +} + +@functools.cache +def mhc_pre_gemm_sqrsum_splitk_kernel( + mhc_mult3: int, + mhc_hidden_size: int, + split_k: int, + token_block: int = 64, + hidden_block: int = 256, + threads: int = 256, +) -> Tuple[tilelang.JITKernel, tilelang.JITKernel]: + assert mhc_mult3 <= 32 + assert mhc_hidden_size % hidden_block == 0 + assert mhc_hidden_size % split_k == 0 + split_size = mhc_hidden_size // split_k + assert split_size % hidden_block == 0 + + num_tokens = T.dynamic("num_tokens") + + @tilelang.jit(pass_configs=_PASS_CONFIGS) + def mhc_pre_gemm_sqrsum_splitk_stage_0( + x: T.Tensor[(num_tokens, mhc_hidden_size), T.bfloat16], + fn: T.Tensor[(mhc_mult3, mhc_hidden_size), T.float32], + out_partial: T.Tensor[(split_k, num_tokens, mhc_mult3), T.float32], + sqrsum_partial: T.Tensor[(split_k, num_tokens), T.float32], + ): + with T.Kernel(split_k, T.ceildiv(num_tokens, token_block), threads=threads) as ( + bz, + px, + ): + out_frag = T.alloc_fragment((token_block, 32), T.float32) + sq_part4 = T.alloc_fragment((token_block, 16), T.float32) + T.clear(out_frag) + T.clear(sq_part4) + + k_base = bz * split_size + + for pz in T.Pipelined(split_size // hidden_block, num_stages=0): + x_frag_pre = T.alloc_fragment((token_block, hidden_block), T.bfloat16) + fn_frag_pre = T.alloc_fragment((32, hidden_block), T.float32) + x_frag_16 = T.alloc_fragment((token_block, hidden_block), T.bfloat16) + x_frag = T.alloc_fragment((token_block, hidden_block), T.float32) + fn_frag = T.alloc_fragment((32, hidden_block), T.float32) + + x_smem_16 = T.alloc_shared((token_block, hidden_block), T.bfloat16) + fn_smem = T.alloc_shared((32, hidden_block), T.float32) + T.annotate_layout({x_smem_16: tilelang.layout.make_hcu_swizzled_layout(x_smem_16, major_pack=2)}) + T.annotate_layout({fn_smem: tilelang.layout.make_hcu_swizzled_layout(fn_smem, major_pack=2)}) + + T.copy(x[px * token_block, k_base + pz * hidden_block], x_frag_pre) + T.copy(fn[0, k_base + pz * hidden_block], fn_frag_pre) + + T.copy(x_frag_pre, x_smem_16) + T.copy(x_smem_16, x_frag_16) + T.copy(x_frag_16, x_frag) + T.copy(fn_frag_pre, fn_smem) + T.copy(fn_smem, fn_frag) + for jj in T.serial(hidden_block // 16): + for i, j in T.Parallel(token_block, 16): + v = x_frag[i, jj * 16 + j] + sq_part4[i, j] += v * v + + T.gemm( + x_frag, + fn_frag, + out_frag, + transpose_A=False, + transpose_B=True, + k_pack=2, + policy=T.GemmWarpPolicy.FullRow, + use_tf32=True, + ) + + sq_l = T.alloc_fragment((token_block,), T.float32) + T.reduce_sum(sq_part4, sq_l) + out_shared = T.alloc_shared((token_block, 32), T.float32) + T.annotate_layout({out_shared: tilelang.layout.make_hcu_swizzled_layout(out_shared, major_pack=2)}) + T.copy(out_frag, out_shared) + + for i in T.Parallel(token_block): + t = px * token_block + i + if t < num_tokens: + sqrsum_partial[bz, t] = sq_l[i] + + for i, j in T.Parallel(token_block, 32): + t = px * token_block + i + if t < num_tokens and j < mhc_mult3: + out_partial[bz, t, j] = out_shared[i, j] + + @tilelang.jit + def mhc_pre_gemm_sqrsum_splitk_stage_1( + out_partial: T.Tensor[(split_k, num_tokens, 32), T.float32], + sqrsum_partial: T.Tensor[(split_k, num_tokens), T.float32], + out: T.Tensor[(num_tokens, mhc_mult3), T.float32], + sqrsum: T.Tensor[(num_tokens,), T.float32], + ): + warps_per_cta = threads // 64 + num_reduce = T.ceildiv(split_k, 64) + with T.Kernel(T.ceildiv(num_tokens, warps_per_cta), threads=threads) as (px,): + tx = T.get_thread_binding() + warp = tx // 64 + lane = tx % 64 + t = px * warps_per_cta + warp + s = T.alloc_local((1,), T.float32) + acc = T.alloc_local((1,), T.float32) + s[0] = 0 + acc[0] = 0 + + if t < num_tokens: + for r in T.serial(num_reduce): + bz = r * 64 + lane + s[0] += T.if_then_else(bz < split_k, sqrsum_partial[bz, t], 0.0) + sqrsum[t] = T.warp_reduce_sum(s[0]) + if lane < mhc_mult3: + for bz in T.serial(split_k): + acc[0] += out_partial[bz, t, lane] + out[t, lane] = acc[0] + + return ( + mhc_pre_gemm_sqrsum_splitk_stage_0, + mhc_pre_gemm_sqrsum_splitk_stage_1, + ) + diff --git a/aiter/ops/triton/configs/BW200B-EXTEND_ATTENTION-V2-DECODE-FP16.json b/aiter/ops/triton/configs/BW200B-EXTEND_ATTENTION-V2-DECODE-FP16.json new file mode 100644 index 0000000000000000000000000000000000000000..6694d4e163262e83aa07ccd5341850780bd45227 --- /dev/null +++ b/aiter/ops/triton/configs/BW200B-EXTEND_ATTENTION-V2-DECODE-FP16.json @@ -0,0 +1,49 @@ +{ + "config": { + "(8, 192, 128, False, True, True, True)": { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 1, + "matrix_instr_nonkdim": 16, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2 + }, + "(8, 192, 128, True, True, True, True)": { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 1, + "matrix_instr_nonkdim": 16, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2 + }, + "(16, 192, 128, False, True, False, False)": { + "BLOCK_M": 64, + "BLOCK_N": 32, + "waves_per_eu": 1, + "matrix_instr_nonkdim": 16, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 1 + }, + "(16, 192, 128, True, True, False, False)": { + "BLOCK_M": 64, + "BLOCK_N": 64, + "waves_per_eu": 1, + "matrix_instr_nonkdim": 16, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 1 + } + }, + "path": {} +} diff --git a/aiter/ops/triton/configs/BW200B-EXTEND_ATTENTION-V2-FP16.json b/aiter/ops/triton/configs/BW200B-EXTEND_ATTENTION-V2-FP16.json index aa014ae903e14b62f94be1ce276f616f4ec9dbfe..14d6287e7722397a32c2c58937ba4350a68ac089 100644 --- a/aiter/ops/triton/configs/BW200B-EXTEND_ATTENTION-V2-FP16.json +++ b/aiter/ops/triton/configs/BW200B-EXTEND_ATTENTION-V2-FP16.json @@ -1,6 +1,6 @@ { "config": { - "(8, 192, 128, False, True, True, 128)": { + "(8, 192, 128, False, True, True, True)": { "BLOCK_M": 32, "BLOCK_N": 64, "waves_per_eu": 1, @@ -11,7 +11,29 @@ "num_ctas": 1, "num_stages": 1 }, - "(16, 192, 128, False, True, False, -1)": { + "(8, 192, 128, True, True, True, True)": { + "BLOCK_M": 32, + "BLOCK_N": 64, + "waves_per_eu": 1, + "matrix_instr_nonkdim": 16, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 1 + }, + "(16, 192, 128, False, True, False, False)": { + "BLOCK_M": 32, + "BLOCK_N": 64, + "waves_per_eu": 1, + "matrix_instr_nonkdim": 16, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 1 + }, + "(16, 192, 128, True, True, False, False)": { "BLOCK_M": 32, "BLOCK_N": 64, "waves_per_eu": 1, diff --git a/aiter/ops/triton/configs/chunk_fwd_o/chunk_fwd_o-gfx936.json b/aiter/ops/triton/configs/chunk_fwd_o/chunk_fwd_o-gfx936.json new file mode 100644 index 0000000000000000000000000000000000000000..affb4f72e123343a02efabd27e1b28863902d138 --- /dev/null +++ b/aiter/ops/triton/configs/chunk_fwd_o/chunk_fwd_o-gfx936.json @@ -0,0 +1,22 @@ +{ + "config": { + "default": { + "BK": 128, + "BV": 64, + "num_warps": 4, + "num_stages": 2 + }, + "K=128,V=128,BT=64": { + "BK": 128, + "BV": 64, + "num_warps": 4, + "num_stages": 2 + }, + "K=128,V=128,BT=32": { + "BK": 128, + "BV": 64, + "num_warps": 4, + "num_stages": 2 + } + } +} diff --git a/aiter/ops/triton/configs/chunk_fwd_o/chunk_fwd_o-gfx938.json b/aiter/ops/triton/configs/chunk_fwd_o/chunk_fwd_o-gfx938.json new file mode 100644 index 0000000000000000000000000000000000000000..affb4f72e123343a02efabd27e1b28863902d138 --- /dev/null +++ b/aiter/ops/triton/configs/chunk_fwd_o/chunk_fwd_o-gfx938.json @@ -0,0 +1,22 @@ +{ + "config": { + "default": { + "BK": 128, + "BV": 64, + "num_warps": 4, + "num_stages": 2 + }, + "K=128,V=128,BT=64": { + "BK": 128, + "BV": 64, + "num_warps": 4, + "num_stages": 2 + }, + "K=128,V=128,BT=32": { + "BK": 128, + "BV": 64, + "num_warps": 4, + "num_stages": 2 + } + } +} diff --git a/aiter/ops/triton/configs/chunk_gated_delta_rule_fwd_h/chunk_gated_delta_rule_fwd_h-gfx936.json b/aiter/ops/triton/configs/chunk_gated_delta_rule_fwd_h/chunk_gated_delta_rule_fwd_h-gfx936.json new file mode 100644 index 0000000000000000000000000000000000000000..db34080927e29d3458626eb8608e9baf77552ddf --- /dev/null +++ b/aiter/ops/triton/configs/chunk_gated_delta_rule_fwd_h/chunk_gated_delta_rule_fwd_h-gfx936.json @@ -0,0 +1,19 @@ +{ + "config": { + "default": { + "BV": 32, + "num_warps": 8, + "num_stages": 2 + }, + "K=128,V=128,BT=64,H=8": { + "BV": 16, + "num_warps": 4, + "num_stages": 2 + }, + "K=128,V=128,BT=32,H=8": { + "BV": 32, + "num_warps": 8, + "num_stages": 2 + } + } +} diff --git a/aiter/ops/triton/configs/chunk_gated_delta_rule_fwd_h/chunk_gated_delta_rule_fwd_h-gfx938.json b/aiter/ops/triton/configs/chunk_gated_delta_rule_fwd_h/chunk_gated_delta_rule_fwd_h-gfx938.json new file mode 100644 index 0000000000000000000000000000000000000000..db34080927e29d3458626eb8608e9baf77552ddf --- /dev/null +++ b/aiter/ops/triton/configs/chunk_gated_delta_rule_fwd_h/chunk_gated_delta_rule_fwd_h-gfx938.json @@ -0,0 +1,19 @@ +{ + "config": { + "default": { + "BV": 32, + "num_warps": 8, + "num_stages": 2 + }, + "K=128,V=128,BT=64,H=8": { + "BV": 16, + "num_warps": 4, + "num_stages": 2 + }, + "K=128,V=128,BT=32,H=8": { + "BV": 32, + "num_warps": 8, + "num_stages": 2 + } + } +} diff --git a/aiter/ops/triton/configs/extend_attn/_fwd_kernel_v2-device=gfx938_cu72.json b/aiter/ops/triton/configs/extend_attn/_fwd_kernel_v2-device=gfx938_cu72.json new file mode 100644 index 0000000000000000000000000000000000000000..92485a8a019cc248f874587e2c8cdef9567d893f --- /dev/null +++ b/aiter/ops/triton/configs/extend_attn/_fwd_kernel_v2-device=gfx938_cu72.json @@ -0,0 +1,40 @@ +{ + "key": [ + "batch_size", + "kv_group_num", + "Lq", + "Lv", + "USE_CUSTOM_MASK", + "IS_CAUSAL", + "SKIP_PREFIX_CUSTOM_MASK", + "HAS_SINK", + "SLIDING_WINDOW_SIZE", + "xai_temperature_len", + "Q_Extend", + "K_Extend", + "V_Extend", + "O_Extend", + "K_Buffer", + "V_Buffer", + "qo_indptr", + "kv_indptr", + "kv_indices" + ], + "config": { + "(1, 16, 192, 128, False, True, True, False, -1, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int64', 'torch.int32', 'torch.int64')": { + "BLOCK_M": 128, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "none", + "matrix_instr_nonkdim": 16, + "sched_latency": "none", + "kpack": 2, + "USE_MLS": false, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 2 + } + }, + "path": { + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/extend_attn/_fwd_kernel_v2_decode-device=gfx938_cu72.json b/aiter/ops/triton/configs/extend_attn/_fwd_kernel_v2_decode-device=gfx938_cu72.json new file mode 100644 index 0000000000000000000000000000000000000000..a617fcfe716ecb5eaab49bcea053404392d27806 --- /dev/null +++ b/aiter/ops/triton/configs/extend_attn/_fwd_kernel_v2_decode-device=gfx938_cu72.json @@ -0,0 +1,172 @@ +{ + "key": [ + "batch_size", + "kv_group_num", + "Lq", + "Lv", + "USE_CUSTOM_MASK", + "IS_CAUSAL", + "SKIP_PREFIX_CUSTOM_MASK", + "HAS_SINK", + "SLIDING_WINDOW_SIZE", + "xai_temperature_len", + "Q_Extend", + "K_Extend", + "V_Extend", + "O_Extend", + "K_Buffer", + "V_Buffer", + "qo_indptr", + "kv_indptr", + "kv_indices", + "mask_ptr", + "mask_indptr", + "sink_ptr", + "window_kv_offset_ptr" + ], + "config": { + "(32, 8, 192, 128, True, True, True, True, 128, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64', 'torch.bfloat16', 'torch.int64')": { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "local-prefetch", + "matrix_instr_nonkdim": 16, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "USE_MLS": false, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 3 + }, + "(32, 16, 192, 128, True, True, True, False, -1, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64')": { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "local-prefetch", + "matrix_instr_nonkdim": 16, + "sched_latency": "none", + "kpack": 2, + "USE_MLS": false, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3 + }, + "(1, 16, 192, 128, True, True, True, False, -1, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64')": { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "none", + "matrix_instr_nonkdim": 16, + "sched_latency": "mmac5-ds10", + "kpack": 2, + "USE_MLS": false, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3 + }, + "(3, 8, 192, 128, True, True, True, True, 128, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64', 'torch.bfloat16', 'torch.int64')": { + "BLOCK_M": 16, + "BLOCK_N": 64, + "waves_per_eu": 1, + "schedule_hint": "none", + "matrix_instr_nonkdim": 16, + "sched_latency": "none", + "kpack": 2, + "USE_MLS": false, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 1 + }, + "(3, 16, 192, 128, True, True, True, False, -1, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64')": { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "none", + "matrix_instr_nonkdim": 16, + "sched_latency": "mmac5-ds10", + "kpack": 2, + "USE_MLS": false, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3 + }, + "(32, 16, 192, 128, True, True, True, False, -1, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64', 'torch.bfloat16')": { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "local-prefetch", + "matrix_instr_nonkdim": 16, + "sched_latency": "none", + "kpack": 2, + "USE_MLS": false, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3 + }, + "(1, 16, 192, 128, True, True, True, False, -1, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64', 'torch.bfloat16')": { + "BLOCK_M": 16, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "none", + "matrix_instr_nonkdim": 16, + "sched_latency": "mmac5-ds10", + "kpack": 2, + "USE_MLS": false, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3 + }, + "(3, 16, 192, 128, True, True, True, False, -1, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64', 'torch.bfloat16')": { + "BLOCK_M": 16, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "local-prefetch", + "matrix_instr_nonkdim": 16, + "sched_latency": "mmac5-ds10", + "kpack": 2, + "USE_MLS": false, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3 + }, + "(32, 16, 192, 128, True, True, True, False, -1, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64', 'torch.bfloat16', 'torch.int32')": { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "local-prefetch", + "matrix_instr_nonkdim": 16, + "sched_latency": "none", + "kpack": 2, + "USE_MLS": false, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3 + }, + "(1, 16, 192, 128, True, True, True, False, -1, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64', 'torch.bfloat16', 'torch.int32')": { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "local-prefetch", + "matrix_instr_nonkdim": 16, + "sched_latency": "none", + "kpack": 2, + "USE_MLS": false, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3 + }, + "(3, 16, 192, 128, True, True, True, False, -1, -1, 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.bfloat16', 'torch.int32', 'torch.int32', 'torch.int64', 'torch.bool', 'torch.int64', 'torch.bfloat16', 'torch.int32')": { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 1, + "schedule_hint": "local-prefetch", + "matrix_instr_nonkdim": 16, + "sched_latency": "none", + "kpack": 2, + "USE_MLS": false, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 3 + } + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/fused_recurrent_gated_delta_rule_packed_decode/fused_recurrent_gated_delta_rule_packed_decode-gfx936.json b/aiter/ops/triton/configs/fused_recurrent_gated_delta_rule_packed_decode/fused_recurrent_gated_delta_rule_packed_decode-gfx936.json new file mode 100644 index 0000000000000000000000000000000000000000..d32411d4419f206a2ee9536436edad43b6519650 --- /dev/null +++ b/aiter/ops/triton/configs/fused_recurrent_gated_delta_rule_packed_decode/fused_recurrent_gated_delta_rule_packed_decode-gfx936.json @@ -0,0 +1,59 @@ +{ + "config": { + "default": { + "BV": 32, + "num_warps": 1, + "num_stages": 2 + }, + "B=1,H=4,HV=16": { + "BV": 16, + "num_warps": 4, + "num_stages": 1 + }, + "B=2,H=4,HV=16": { + "BV": 16, + "num_warps": 4, + "num_stages": 2 + }, + "B=4,H=4,HV=16": { + "BV": 32, + "num_warps": 4, + "num_stages": 2 + }, + "B=8,H=4,HV=16": { + "BV": 32, + "num_warps": 4, + "num_stages": 2 + }, + "B=16,H=4,HV=16": { + "BV": 32, + "num_warps": 2, + "num_stages": 1 + }, + "B=32,H=4,HV=16": { + "BV": 32, + "num_warps": 1, + "num_stages": 1 + }, + "B=50,H=4,HV=16": { + "BV": 32, + "num_warps": 1, + "num_stages": 1 + }, + "B=64,H=4,HV=16": { + "BV": 32, + "num_warps": 1, + "num_stages": 2 + }, + "B=128,H=4,HV=16": { + "BV": 32, + "num_warps": 1, + "num_stages": 2 + }, + "B=256,H=4,HV=16": { + "BV": 32, + "num_warps": 1, + "num_stages": 2 + } + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/fused_recurrent_gated_delta_rule_packed_decode/fused_recurrent_gated_delta_rule_packed_decode-gfx938.json b/aiter/ops/triton/configs/fused_recurrent_gated_delta_rule_packed_decode/fused_recurrent_gated_delta_rule_packed_decode-gfx938.json new file mode 100644 index 0000000000000000000000000000000000000000..10e4c397bfbcb3a135223434f3babc1fd7dd9828 --- /dev/null +++ b/aiter/ops/triton/configs/fused_recurrent_gated_delta_rule_packed_decode/fused_recurrent_gated_delta_rule_packed_decode-gfx938.json @@ -0,0 +1,59 @@ +{ + "config": { + "default": { + "BV": 32, + "num_warps": 1, + "num_stages": 2 + }, + "B=1,H=4,HV=16": { + "BV": 16, + "num_warps": 4, + "num_stages": 1 + }, + "B=2,H=4,HV=16": { + "BV": 16, + "num_warps": 4, + "num_stages": 2 + }, + "B=4,H=4,HV=16": { + "BV": 32, + "num_warps": 4, + "num_stages": 2 + }, + "B=8,H=4,HV=16": { + "BV": 32, + "num_warps": 4, + "num_stages": 2 + }, + "B=16,H=4,HV=16": { + "BV": 32, + "num_warps": 2, + "num_stages": 1 + }, + "B=32,H=4,HV=16": { + "BV": 32, + "num_warps": 1, + "num_stages": 1 + }, + "B=50,H=4,HV=16": { + "BV": 32, + "num_warps": 1, + "num_stages": 2 + }, + "B=64,H=4,HV=16": { + "BV": 32, + "num_warps": 1, + "num_stages": 2 + }, + "B=128,H=4,HV=16": { + "BV": 32, + "num_warps": 1, + "num_stages": 2 + }, + "B=256,H=4,HV=16": { + "BV": 32, + "num_warps": 1, + "num_stages": 2 + } + } +} diff --git a/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update/fused_sigmoid_gating_delta_rule_update-gfx936.json b/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update/fused_sigmoid_gating_delta_rule_update-gfx936.json new file mode 100644 index 0000000000000000000000000000000000000000..77be6f8ce3a3eaca4babd457784715a6004290cc --- /dev/null +++ b/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update/fused_sigmoid_gating_delta_rule_update-gfx936.json @@ -0,0 +1,36 @@ +{ + "config": { + "default": { + "BV": 32, + "num_warps": 1 + }, + "T=4,H=4,HV=16": { + "BV": 32, + "num_warps": 4 + }, + "T=8,H=4,HV=16": { + "BV": 32, + "num_warps": 4 + }, + "T=16,H=4,HV=16": { + "BV": 64, + "num_warps": 4 + }, + "T=32,H=4,HV=16": { + "BV": 64, + "num_warps": 4 + }, + "T=64,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=128,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=256,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + } + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update/fused_sigmoid_gating_delta_rule_update-gfx938.json b/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update/fused_sigmoid_gating_delta_rule_update-gfx938.json new file mode 100644 index 0000000000000000000000000000000000000000..77be6f8ce3a3eaca4babd457784715a6004290cc --- /dev/null +++ b/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update/fused_sigmoid_gating_delta_rule_update-gfx938.json @@ -0,0 +1,36 @@ +{ + "config": { + "default": { + "BV": 32, + "num_warps": 1 + }, + "T=4,H=4,HV=16": { + "BV": 32, + "num_warps": 4 + }, + "T=8,H=4,HV=16": { + "BV": 32, + "num_warps": 4 + }, + "T=16,H=4,HV=16": { + "BV": 64, + "num_warps": 4 + }, + "T=32,H=4,HV=16": { + "BV": 64, + "num_warps": 4 + }, + "T=64,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=128,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=256,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + } + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update_recurrent/fused_sigmoid_gating_delta_rule_update_recurrent-gfx936.json b/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update_recurrent/fused_sigmoid_gating_delta_rule_update_recurrent-gfx936.json new file mode 100644 index 0000000000000000000000000000000000000000..de4510203c7094582384bb9eaf4f3bec84b1e8d2 --- /dev/null +++ b/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update_recurrent/fused_sigmoid_gating_delta_rule_update_recurrent-gfx936.json @@ -0,0 +1,56 @@ +{ + "config": { + "default": { + "BV": 32, + "num_warps": 1 + }, + "T=4,H=4,HV=16": { + "BV": 32, + "num_warps": 4 + }, + "T=16,H=4,HV=16": { + "BV": 16, + "num_warps": 1 + }, + "T=32,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=64,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=128,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=192,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=4,H=2,HV=8": { + "BV": 16, + "num_warps": 4 + }, + "T=16,H=2,HV=8": { + "BV": 32, + "num_warps": 4 + }, + "T=32,H=2,HV=8": { + "BV": 16, + "num_warps": 1 + }, + "T=64,H=2,HV=8": { + "BV": 16, + "num_warps": 1 + }, + "T=128,H=2,HV=8": { + "BV": 16, + "num_warps": 1 + }, + "T=192,H=2,HV=8": { + "BV": 16, + "num_warps": 1 + } + } +} diff --git a/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update_recurrent/fused_sigmoid_gating_delta_rule_update_recurrent-gfx938.json b/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update_recurrent/fused_sigmoid_gating_delta_rule_update_recurrent-gfx938.json new file mode 100644 index 0000000000000000000000000000000000000000..de4510203c7094582384bb9eaf4f3bec84b1e8d2 --- /dev/null +++ b/aiter/ops/triton/configs/fused_sigmoid_gating_delta_rule_update_recurrent/fused_sigmoid_gating_delta_rule_update_recurrent-gfx938.json @@ -0,0 +1,56 @@ +{ + "config": { + "default": { + "BV": 32, + "num_warps": 1 + }, + "T=4,H=4,HV=16": { + "BV": 32, + "num_warps": 4 + }, + "T=16,H=4,HV=16": { + "BV": 16, + "num_warps": 1 + }, + "T=32,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=64,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=128,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=192,H=4,HV=16": { + "BV": 32, + "num_warps": 1 + }, + "T=4,H=2,HV=8": { + "BV": 16, + "num_warps": 4 + }, + "T=16,H=2,HV=8": { + "BV": 32, + "num_warps": 4 + }, + "T=32,H=2,HV=8": { + "BV": 16, + "num_warps": 1 + }, + "T=64,H=2,HV=8": { + "BV": 16, + "num_warps": 1 + }, + "T=128,H=2,HV=8": { + "BV": 16, + "num_warps": 1 + }, + "T=192,H=2,HV=8": { + "BV": 16, + "num_warps": 1 + } + } +} diff --git a/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=fp8_w8a8,is_bottom=True.json b/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..caf7271b050591894c86a89b7416ba7953b790a6 --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,210 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 512, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 512, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 512, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 512, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "1024": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "4096": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "8192": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=fp8_w8a8.json b/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..2eceb124d60dc8ace4d93c7df68012b8bfdb9709 --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=fp8_w8a8.json @@ -0,0 +1,210 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "1024": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "4096": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "8192": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 2 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=int8_w8a8,is_bottom=True.json b/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=int8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..10e6a84eac5bf24dc3db51ee5c3cb49229420986 --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=int8_w8a8,is_bottom=True.json @@ -0,0 +1,210 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "1024": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "8192": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=int8_w8a8.json b/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=int8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..fa9d2a96e22a8ffa795964fe18fd4ce75b9cb1b8 --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=160,N=320,device_name=BW200B,dtype=int8_w8a8.json @@ -0,0 +1,210 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "1024": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "8192": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 1 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=256,N=128,device_name=K100_AI,dtype=int8_w8a8,block_shape=[128,128].json b/aiter/ops/triton/configs/moe/E=256,N=128,device_name=K100_AI,dtype=int8_w8a8,block_shape=[128,128].json index f6e29e2c28d6bbf938a6b4c9681a471a8e503c01..9fb543da6b3de647c60628c19517a7b259dd61e0 100644 --- a/aiter/ops/triton/configs/moe/E=256,N=128,device_name=K100_AI,dtype=int8_w8a8,block_shape=[128,128].json +++ b/aiter/ops/triton/configs/moe/E=256,N=128,device_name=K100_AI,dtype=int8_w8a8,block_shape=[128,128].json @@ -3,110 +3,143 @@ "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 4, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 2, "num_stages": 2 }, "2": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 8, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 2, "num_stages": 2 }, "4": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, - "COMBINE_SCALE_LOAD": false, + "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, - "num_stages": 1 + "num_stages": 2 }, "8": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 4, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, - "instruction_sched_variant": "none", - "num_warps": 8, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, "num_stages": 2 }, "16": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 4, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, - "instruction_sched_variant": "none", - "num_warps": 8, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, "num_stages": 2 }, "24": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 4, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "32": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 4, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "64": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "128": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "256": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, "num_stages": 2 }, "512": { - "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 2, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "1024": { @@ -115,48 +148,76 @@ "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "2048": { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 8, "num_stages": 2 }, "4096": { "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 8, "num_stages": 2 }, "8192": { - "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 4, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 4, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 8, "num_stages": 2 }, "16384": { - "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 4, - "COMBINE_SCALE_LOAD": true, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 4, - "num_stages": 2 + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 1 + }, + "32768": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 8, + "num_stages": 1 } } \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=256,N=128,device_name=K100_AI,dtype=int8_w8a8,is_bottom=True,block_shape=[128,128].json b/aiter/ops/triton/configs/moe/E=256,N=128,device_name=K100_AI,dtype=int8_w8a8,is_bottom=True,block_shape=[128,128].json index 0e58648ca1bd6b9f083f952dba50fe614596de28..ec30889f37b6e1d27e0803fef8d1e63aac2378c9 100644 --- a/aiter/ops/triton/configs/moe/E=256,N=128,device_name=K100_AI,dtype=int8_w8a8,is_bottom=True,block_shape=[128,128].json +++ b/aiter/ops/triton/configs/moe/E=256,N=128,device_name=K100_AI,dtype=int8_w8a8,is_bottom=True,block_shape=[128,128].json @@ -1,31 +1,40 @@ { "1": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 2, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "2": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, "num_stages": 1 }, "4": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_N": 512, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, "num_stages": 1 }, @@ -35,8 +44,11 @@ "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", - "num_warps": 8, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 1 }, "16": { @@ -45,89 +57,116 @@ "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, "num_stages": 1 }, "24": { - "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, "num_stages": 1 }, "32": { - "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", - "num_warps": 8, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 1 }, "64": { - "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "128": { - "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "256": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "512": { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, "num_stages": 2 }, "1024": { "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, "num_stages": 2 }, "2048": { - "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 2, - "num_stages": 1 + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 }, "4096": { "BLOCK_SIZE_M": 64, @@ -135,28 +174,50 @@ "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, "num_stages": 1 }, "8192": { - "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 256, - "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 4, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, "num_stages": 1 }, "16384": { - "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 256, - "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 4, + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, - "num_stages": 1 + "num_stages": 2 + }, + "32768": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 } } \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int4_w4a16,is_bottom=True.json b/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int4_w4a16,is_bottom=True.json index 8673cee30add80bd47b4f0986dc323c851aac493..2ba468a74912d64e7094eaf43c3be0430ee5eaa4 100644 --- a/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int4_w4a16,is_bottom=True.json +++ b/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int4_w4a16,is_bottom=True.json @@ -1,152 +1,223 @@ { "1": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", - "num_warps": 2, - "num_stages": 2 + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 }, "2": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 4, - "num_stages": 2 + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 1 }, "4": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 4, - "num_stages": 2 + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 1 }, "8": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "16": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "24": { - "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "32": { - "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "64": { - "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "128": { - "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "256": { - "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "512": { "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "1024": { "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "2048": { - "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 16, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "4096": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 16, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "8192": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 16, - "GROUP_SIZE_M": 4, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "32768": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 } } \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int4_w4a16.json b/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int4_w4a16.json index d469771170dc53cc9cf67f83eababc121e710d3e..ecb83c94954a8c9a9f2147bb0a8cdaff90a3d0a4 100644 --- a/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int4_w4a16.json +++ b/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int4_w4a16.json @@ -2,151 +2,222 @@ "1": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 16, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, - "num_stages": 1 + "num_stages": 2 }, "2": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, - "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 8, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 2, - "num_stages": 1 + "num_stages": 2 }, "4": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "8": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 4, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 1 }, "16": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, "num_stages": 1 }, "24": { - "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 1 }, "32": { - "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 1 }, "64": { - "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", - "num_warps": 8, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 1 }, "128": { - "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 1 }, "256": { - "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 1 }, "512": { "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 16, + "num_stages": 2 + }, + "1024": { + "BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 1 }, - "1024": { + "2048": { "BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", - "num_warps": 8, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 1 }, - "2048": { + "4096": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 16, - "num_stages": 2 + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 }, - "4096": { + "8192": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 16, - "num_stages": 2 + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 }, - "8192": { + "16384": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 16, - "num_stages": 2 + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "32768": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 } } \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int8_w8a8,block_shape=[128,128].json b/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int8_w8a8,block_shape=[128,128].json index 4b11a66a4e6b3d1c3a655f636c2b289572d5b60c..e388ab583d54e07a8bf4ad8a640981e442ed2d0d 100644 --- a/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int8_w8a8,block_shape=[128,128].json +++ b/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int8_w8a8,block_shape=[128,128].json @@ -1,141 +1,183 @@ { "1": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 2, "num_stages": 2 }, "2": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, - "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, - "num_stages": 1 + "num_stages": 2 }, "4": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, - "instruction_sched_variant": "none", - "num_warps": 8, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, "num_stages": 2 }, "8": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, - "instruction_sched_variant": "none", - "num_warps": 8, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, "num_stages": 2 }, "16": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, - "instruction_sched_variant": "none", - "num_warps": 8, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, "num_stages": 2 }, "24": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 4, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "32": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "64": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "128": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 4, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "256": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 2, "num_stages": 2 }, "512": { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, "num_stages": 2 }, "1024": { "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 8, "num_stages": 2 }, "2048": { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 8, "num_stages": 2 }, "4096": { "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 8, "num_stages": 2 }, @@ -143,20 +185,39 @@ "BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 4, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, - "num_stages": 2 + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 }, "16384": { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": true, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 8, - "num_stages": 2 + "num_stages": 1 + }, + "32768": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 } } \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int8_w8a8,is_bottom=True,block_shape=[128,128].json b/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int8_w8a8,is_bottom=True,block_shape=[128,128].json index f5feed3e5e282a1b28b17a6289858696a5c9d7cc..c14818a5e833814afefea309fe03d3594711cc07 100644 --- a/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int8_w8a8,is_bottom=True,block_shape=[128,128].json +++ b/aiter/ops/triton/configs/moe/E=256,N=256,device_name=K100_AI,dtype=int8_w8a8,is_bottom=True,block_shape=[128,128].json @@ -1,32 +1,41 @@ { "1": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "2": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, - "BLOCK_SIZE_K": 64, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 4, + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, "num_stages": 1 }, "4": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 8, + "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 4, + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 8, "num_stages": 1 }, "8": { @@ -35,78 +44,102 @@ "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", - "num_warps": 8, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 1 }, "16": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, - "num_stages": 2 + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 }, "24": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 8, - "num_stages": 2 + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 }, "32": { - "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, "num_stages": 2 }, "64": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", - "num_warps": 2, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "128": { - "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, "num_warps": 4, "num_stages": 2 }, "256": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "local-prefetch", - "num_warps": 8, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "512": { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "none", - "num_warps": 4, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 8, "num_stages": 2 }, "1024": { @@ -115,18 +148,24 @@ "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "local-prefetch", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 8, "num_stages": 2 }, "2048": { - "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", - "num_warps": 2, + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, "num_stages": 2 }, "4096": { @@ -135,7 +174,10 @@ "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 2, "num_stages": 2 }, @@ -145,18 +187,37 @@ "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, "num_warps": 2, "num_stages": 1 }, "16384": { - "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "COMBINE_SCALE_LOAD": false, - "instruction_sched_variant": "local-prefetch", - "num_warps": 2, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "32768": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, "num_stages": 1 } } \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=288,N=160,device_name=BW200B,dtype=fp8_w8a8,is_bottom=True.json b/aiter/ops/triton/configs/moe/E=288,N=160,device_name=BW200B,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..ebd4c4482f045dac07da23fd76baf54ed7d2d500 --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=288,N=160,device_name=BW200B,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,223 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 512, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 512, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 512, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "8192": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "32768": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=288,N=160,device_name=BW200B,dtype=fp8_w8a8.json b/aiter/ops/triton/configs/moe/E=288,N=160,device_name=BW200B,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..1d80dedc0c0e2d963ffe44633328489e9d622b66 --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=288,N=160,device_name=BW200B,dtype=fp8_w8a8.json @@ -0,0 +1,223 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 2 + }, + "8192": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "32768": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=288,N=320,device_name=BW200B,dtype=fp8_w8a8,is_bottom=True.json b/aiter/ops/triton/configs/moe/E=288,N=320,device_name=BW200B,dtype=fp8_w8a8,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..e5cd77984ab18326c3f83ad966d3ad0979b031f7 --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=288,N=320,device_name=BW200B,dtype=fp8_w8a8,is_bottom=True.json @@ -0,0 +1,223 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 512, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 1 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 512, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "8192": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "32768": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=288,N=320,device_name=BW200B,dtype=fp8_w8a8.json b/aiter/ops/triton/configs/moe/E=288,N=320,device_name=BW200B,dtype=fp8_w8a8.json new file mode 100644 index 0000000000000000000000000000000000000000..8c88b2eb6fee64dff844c6de68d2bc32c5ad1057 --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=288,N=320,device_name=BW200B,dtype=fp8_w8a8.json @@ -0,0 +1,223 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "8192": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "32768": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": true, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=384,N=1024,device_name=BW200,dtype=int4_w4a16,is_bottom=True.json b/aiter/ops/triton/configs/moe/E=384,N=1024,device_name=BW200,dtype=int4_w4a16,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..26244994461394fd5bb4932303d61e3da84de99a --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=384,N=1024,device_name=BW200,dtype=int4_w4a16,is_bottom=True.json @@ -0,0 +1,223 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "512": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 16, + "num_stages": 2 + }, + "8192": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 16, + "num_stages": 2 + }, + "16384": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 16, + "num_stages": 2 + }, + "32768": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 16, + "num_stages": 2 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=384,N=1024,device_name=BW200,dtype=int4_w4a16.json b/aiter/ops/triton/configs/moe/E=384,N=1024,device_name=BW200,dtype=int4_w4a16.json new file mode 100644 index 0000000000000000000000000000000000000000..8a8d31504632681ba63c0e891590521e2a337afe --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=384,N=1024,device_name=BW200,dtype=int4_w4a16.json @@ -0,0 +1,223 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 1 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 1 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 8, + "num_stages": 1 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 16, + "num_stages": 2 + }, + "8192": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 16, + "num_stages": 2 + }, + "16384": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 16, + "num_stages": 2 + }, + "32768": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "mmac5-ds10", + "kpack": 1, + "num_warps": 16, + "num_stages": 2 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=512,N=336,device_name=BW200B,is_bottom=True.json b/aiter/ops/triton/configs/moe/E=512,N=336,device_name=BW200B,is_bottom=True.json new file mode 100644 index 0000000000000000000000000000000000000000..401bda05f12b9627aa2729adacdbaed78d2f0376 --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=512,N=336,device_name=BW200B,is_bottom=True.json @@ -0,0 +1,223 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 8, + "num_stages": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 2, + "num_stages": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 4, + "num_stages": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 2 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 2, + "num_stages": 1 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 8, + "num_stages": 2 + }, + "4096": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 1 + }, + "8192": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 1, + "num_warps": 8, + "num_stages": 1 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 8, + "num_stages": 1 + }, + "32768": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 8, + "num_stages": 1 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/configs/moe/E=512,N=336,device_name=BW200B.json b/aiter/ops/triton/configs/moe/E=512,N=336,device_name=BW200B.json new file mode 100644 index 0000000000000000000000000000000000000000..5639a58e2104d686c138dc2e903efdd396846fc0 --- /dev/null +++ b/aiter/ops/triton/configs/moe/E=512,N=336,device_name=BW200B.json @@ -0,0 +1,223 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 2, + "num_stages": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 2, + "num_stages": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 2, + "num_stages": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 2, + "num_stages": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 1 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 1 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 2, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 2, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "local-prefetch", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 2 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 2, + "num_stages": 1 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 1 + }, + "4096": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 1 + }, + "8192": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 8, + "num_stages": 1 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 1 + }, + "32768": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "COMBINE_SCALE_LOAD": false, + "USE_MLS_LOAD": false, + "instruction_sched_variant": "none", + "sched_latency": "none", + "kpack": 2, + "num_warps": 4, + "num_stages": 1 + } +} \ No newline at end of file diff --git a/aiter/ops/triton/extend_attention.py b/aiter/ops/triton/extend_attention.py index e35d5dddbb25f3ae0438cd3f0c2b5864c6dbc51c..adaaab72104af1b4e04e7bbf713dea728961705f 100644 --- a/aiter/ops/triton/extend_attention.py +++ b/aiter/ops/triton/extend_attention.py @@ -38,6 +38,8 @@ from aiter.ops.triton.activation import _tanh import aiter.ops.triton.utils.arch_info as arch_info from aiter.ops.triton.utils.core import AITER_TRITON_CONFIGS_PATH +from triton import __version__ as triton_version +triton_minor_version = int(triton_version.split(".")[1]) @triton.jit def _fwd_kernel( @@ -348,6 +350,10 @@ def _fwd_kernel_v2( SKIP_PREFIX_CUSTOM_MASK: tl.constexpr, STORE_TRANSPOSE: tl.constexpr, HAS_SINK: tl.constexpr, + head_num: tl.constexpr, + USE_MLS: tl.constexpr, + batch_size: tl.constexpr, + # max_len_extend: tl.constexpr, ): cur_seq = tl.program_id(0) cur_head = tl.program_id(1) @@ -357,6 +363,24 @@ def _fwd_kernel_v2( tl.assume(K_Extend.to(tl.int64) >= 0) tl.assume(V_Extend.to(tl.int64) >= 0) + tl.assume(kv_group_num >= 0) + tl.assume(stride_qbs >= 0) + tl.assume(stride_qh >= 0) + tl.assume(stride_kbs >= 0) + tl.assume(stride_kh >= 0) + tl.assume(stride_vbs >= 0) + tl.assume(stride_vh >= 0) + tl.assume(stride_obs >= 0) + tl.assume(stride_oh >= 0) + tl.assume(stride_buf_kbs >= 0) + tl.assume(stride_buf_kh >= 0) + tl.assume(stride_buf_vbs >= 0) + tl.assume(stride_buf_vh >= 0) + tl.assume(head_num >= 0) + tl.assume(batch_size >= 0) + # tl.assume(max_len_extend >= 0) + + kv_head_num = head_num // kv_group_num cur_kv_head = cur_head // kv_group_num cur_seq_extend_start_idx = tl.load(qo_indptr + cur_seq) @@ -380,6 +404,10 @@ def _fwd_kernel_v2( mask_d = offs_d < Lq mask_dv = offs_dv < Lv + ALL_MASK_M = tl.min(mask_m.to(tl.int32), axis=0) == 1 + ALL_MASK_D = tl.min(mask_d.to(tl.int32), axis=0) == 1 + ALL_MASK_DV = tl.min(mask_dv.to(tl.int32), axis=0) == 1 + if xai_temperature_len > 0: offs_qidx = cur_seq_len_prefix + cur_block_m * BLOCK_M + offs_m xai_temperature_scale = 1.0 / tl.log2(float(xai_temperature_len)) @@ -389,10 +417,534 @@ def _fwd_kernel_v2( 1.0, ) - offs_q = ( + if USE_MLS: + q = tl.matrix_load( + Q_Extend + cur_head * stride_qh, + shape=(head_num, Lq), + strides=(stride_qbs, 1), + block_shape=(BLOCK_M, BLOCK_DMODEL), + offsets=((cur_seq_extend_start_idx + cur_block_m * BLOCK_M).to(tl.int32), 0), + ) + if not (ALL_MASK_M & ALL_MASK_D): + q = tl.where((mask_m[:, None]) & (mask_d[None, :]), q, 0.0) + else: + offs_q = ( + (cur_seq_extend_start_idx + cur_block_m * BLOCK_M + offs_m[:, None]) + * stride_qbs + + cur_head * stride_qh + + offs_d[None, :] + ) + q = tl.load( + Q_Extend + offs_q, mask=(mask_m[:, None]) & (mask_d[None, :]), other=0.0 + ) + + if BLOCK_DPE > 0: + offs_dpe = BLOCK_DMODEL + tl.arange(0, BLOCK_DPE) + if USE_MLS: + qpe = tl.matrix_load(Q_Extend + cur_head * stride_qh, + shape=(head_num, Lq), + strides=(stride_qbs, 1), + block_shape=(BLOCK_M, BLOCK_DPE), + offsets=((cur_seq_extend_start_idx + cur_block_m * BLOCK_M).to(tl.int32), + BLOCK_DMODEL), + ) + if not ALL_MASK_M: + qpe = tl.where(mask_m[:, None], qpe, 0.0) + else: + offs_qpe = ( + (cur_seq_extend_start_idx + cur_block_m * BLOCK_M + offs_m[:, None]) + * stride_qbs + + cur_head * stride_qh + + offs_dpe[None, :] + ) + qpe = tl.load(Q_Extend + offs_qpe, mask=mask_m[:, None], other=0.0) + + offs_n = tl.arange(0, BLOCK_N) + + acc = tl.zeros([BLOCK_M, BLOCK_DV], dtype=tl.float32) + deno = tl.zeros([BLOCK_M], dtype=tl.float32) + e_max = tl.zeros([BLOCK_M], dtype=tl.float32) - float("inf") + + for start_n in range(0, cur_seq_len_prefix, BLOCK_N): + start_n = tl.multiple_of(start_n, BLOCK_N) + mask_n = (start_n + offs_n) < cur_seq_len_prefix + ALL_MASK_N = tl.min(mask_n.to(tl.int32), axis=0) == 1 + + final_mask = mask_m[:, None] & mask_n[None, :] + if USE_CUSTOM_MASK and not SKIP_PREFIX_CUSTOM_MASK: + if USE_MLS: + custom_mask = tl.matrix_load( + mask_ptr + cur_seq_mask_start_idx, + shape=(cur_seq_len_extend, cur_seq_len + window_kv_offset), + strides=(cur_seq_len + window_kv_offset, 1), + block_shape=(BLOCK_M, BLOCK_N), + offsets=((cur_block_m * BLOCK_M).to(tl.int32), + (window_kv_offset + start_n).to(tl.int32)), + ) + if not (ALL_MASK_M & ALL_MASK_N): + custom_mask = tl.where((mask_m[:, None]) & (mask_n[None, :]), custom_mask, 0) + else: + custom_mask = tl.load( + mask_ptr + + cur_seq_mask_start_idx + + (cur_block_m * BLOCK_M + offs_m[:, None]) + * (cur_seq_len + window_kv_offset) + + window_kv_offset + + start_n + + offs_n[None, :], + mask=(mask_m[:, None] & mask_n[None, :]), + other=0, + ) + final_mask &= custom_mask + if SLIDING_WINDOW_SIZE > 0: + window_mask = ( + cur_seq_len_prefix + cur_block_m * BLOCK_M + offs_m[:, None] + ) <= (start_n + offs_n[None, :] + SLIDING_WINDOW_SIZE) + final_mask &= window_mask + + SKIP_TILE = False + if (USE_CUSTOM_MASK and not SKIP_PREFIX_CUSTOM_MASK) or SLIDING_WINDOW_SIZE > 0: + SKIP_TILE = tl.max(tl.max(final_mask.to(tl.int32), axis=1), axis=0) == 0 + + if not SKIP_TILE: + offs_kv_loc = tl.load( + kv_indices + cur_seq_kv_start_idx + start_n + offs_n, + mask=mask_n, + other=0, + ) + + # offs_kv_next = offs_kv_loc[1:] # [1, 2, ..., N-1] + # offs_kv_curr = offs_kv_loc[:-1] # [0, 1, ..., N-2] + # diff = offs_kv_next - offs_kv_curr # 长度 N-1 + # is_continuous = tl.all(diff == 1) + + # if USE_MLS and is_continuous: + # offs_kv_start_idx = tl.load( + # kv_indices + cur_seq_kv_start_idx + start_n, + # ) + + # k = tl.matrix_load( + # K_Buffer + cur_kv_head * stride_buf_kh, + # shape=(Lq, cur_seq_len_prefix.to(tl.int32)), + # strides=(1, stride_buf_kbs), + # block_shape=(BLOCK_DMODEL, BLOCK_N), + # offsets=(0, offs_kv_start_idx.to(tl.int32)), + # # mask=(mask_m[:, None] & mask_n[None, :]), + # # boundary_check=(0, 1), + # ) + # if not (ALL_MASK_N & ALL_MASK_D): + # k = tl.where((mask_n[None, :]) & (mask_d[:, None]), k, 0.0) + # else: + # offs_buf_k = ( + # offs_kv_loc[None, :] * stride_buf_kbs + # + cur_kv_head * stride_buf_kh + # + offs_d[:, None] + # ) + # k = tl.load( + # K_Buffer + offs_buf_k, + # mask=(mask_n[None, :]) & (mask_d[:, None]), + # other=0.0, + # ) + offs_buf_k = ( + offs_kv_loc[None, :] * stride_buf_kbs + + cur_kv_head * stride_buf_kh + + offs_d[:, None] + ) + k = tl.load( + K_Buffer + offs_buf_k, + mask=(mask_n[None, :]) & (mask_d[:, None]), + other=0.0, + ) + qk = tl.dot(q.to(k.dtype), k) + if BLOCK_DPE > 0: + # if USE_MLS: + # kpe = tl.matrix_load( + # K_Buffer + cur_kv_head * stride_buf_kh, + # shape=(Lq, cur_seq_len_prefix.to(tl.int32)), + # strides=(1, stride_buf_kbs), + # block_shape=(BLOCK_DPE, BLOCK_N), + # offsets=(BLOCK_DMODEL, offs_kv_start_idx.to(tl.int32)), + # # mask=(mask_m[:, None] & mask_n[None, :]), + # # boundary_check=(0, 1), + # ) + # if not ALL_MASK_N: + # kpe = tl.where((mask_n[None, :]), kpe, 0.0) + # else: + # offs_kpe = ( + # offs_kv_loc[None, :] * stride_buf_kbs + # + cur_kv_head * stride_buf_kh + # + offs_dpe[:, None] + # ) + # kpe = tl.load( + # K_Buffer + offs_kpe, + # mask=mask_n[None, :], + # other=0.0, + # ) + offs_kpe = ( + offs_kv_loc[None, :] * stride_buf_kbs + + cur_kv_head * stride_buf_kh + + offs_dpe[:, None] + ) + kpe = tl.load( + K_Buffer + offs_kpe, + mask=mask_n[None, :], + other=0.0, + ) + qk += tl.dot(qpe.to(kpe.dtype), kpe) + qk *= sm_scale * k_scale + + if logit_cap > 0: + qk = logit_cap * _tanh(qk / logit_cap) + + if xai_temperature_len > 0: + qk *= xai_temperature_reg[:, None] + + qk = tl.where(final_mask, qk, float("-inf")) + + row_max = tl.max(qk, 1) + row_max_fixed = tl.where(row_max == float("-inf"), -1e20, row_max) + n_e_max = tl.maximum(row_max_fixed, e_max) + + re_scale = tl.exp(e_max - n_e_max) + p = tl.exp(qk - n_e_max[:, None]) + deno = deno * re_scale + tl.sum(p, 1) + + # if USE_MLS: + # v = tl.matrix_load( + # V_Buffer + cur_kv_head * stride_buf_vh, + # shape=(cur_seq_len_prefix.to(tl.int32), Lv), + # strides=(stride_buf_kbs, 1), + # block_shape=(BLOCK_N, BLOCK_DV), + # offsets=(offs_kv_start_idx.to(tl.int32), 0), + # # mask=(mask_m[:, None] & mask_n[None, :]), + # # boundary_check=(0, 1), + # ) + # if not (ALL_MASK_N & ALL_MASK_DV): + # v = tl.where((mask_n[:, None] & mask_dv[None, :]), v, 0.0) + # else: + # offs_buf_v = ( + # offs_kv_loc[:, None] * stride_buf_vbs + # + cur_kv_head * stride_buf_vh + # + offs_dv[None, :] + # ) + # v = tl.load( + # V_Buffer + offs_buf_v, + # mask=mask_n[:, None] & mask_dv[None, :], + # other=0.0, + # ) + offs_buf_v = ( + offs_kv_loc[:, None] * stride_buf_vbs + + cur_kv_head * stride_buf_vh + + offs_dv[None, :] + ) + v = tl.load( + V_Buffer + offs_buf_v, + mask=mask_n[:, None] & mask_dv[None, :], + other=0.0, + ) + p = p.to(v.dtype) + acc = acc * re_scale[:, None] + tl.dot(p, v) * v_scale + + e_max = n_e_max + + cur_block_m_end = ( + cur_seq_len_extend + if not IS_CAUSAL + else tl.minimum(cur_seq_len_extend, (cur_block_m + 1) * BLOCK_M) + ) + for start_n in range(0, cur_block_m_end, BLOCK_N): + start_n = tl.multiple_of(start_n, BLOCK_N) + mask_n = (start_n + offs_n) < cur_block_m_end + ALL_MASK_N = tl.min(mask_n.to(tl.int32), axis=0) == 1 + + final_mask = mask_m[:, None] & mask_n[None, :] + if USE_CUSTOM_MASK: + if USE_MLS: + custom_mask = tl.matrix_load( + mask_ptr + cur_seq_mask_start_idx, + shape=(cur_block_m_end, cur_seq_len + window_kv_offset), + strides=(cur_seq_len + window_kv_offset, 1), + block_shape=(BLOCK_M, BLOCK_N), + offsets=((cur_block_m * BLOCK_M).to(tl.int32), + (window_kv_offset + cur_seq_len_prefix + start_n).to(tl.int32)), + ) + if not (ALL_MASK_M & ALL_MASK_N): + custom_mask = tl.where((mask_m[:, None]) & (mask_n[None, :]), custom_mask, 0) + else: + custom_mask = tl.load( + mask_ptr + + cur_seq_mask_start_idx + + (cur_block_m * BLOCK_M + offs_m[:, None]) + * (cur_seq_len + window_kv_offset) + + window_kv_offset + + cur_seq_len_prefix + + start_n + + offs_n[None, :], + mask=(mask_m[:, None] & mask_n[None, :]), + other=0, + ) + custom_mask &= mask_m[:, None] & mask_n[None, :] + final_mask &= custom_mask + elif IS_CAUSAL: + mask_causual = (cur_block_m * BLOCK_M + offs_m[:, None]) >= ( + start_n + offs_n[None, :] + ) + mask_causual &= mask_m[:, None] & mask_n[None, :] + final_mask &= mask_causual + else: + mask_non_causal = mask_m[:, None] & mask_n[None, :] + final_mask &= mask_non_causal + + if SLIDING_WINDOW_SIZE > 0: + window_mask = (cur_block_m * BLOCK_M + offs_m[:, None]) <= ( + start_n + offs_n[None, :] + SLIDING_WINDOW_SIZE + ) + final_mask &= window_mask + + SKIP_TILE = False + if USE_CUSTOM_MASK or SLIDING_WINDOW_SIZE > 0: + SKIP_TILE = tl.max(tl.max(final_mask.to(tl.int32), axis=1), axis=0) == 0 + + if not SKIP_TILE: + if USE_MLS: + k = tl.matrix_load( + K_Extend + cur_kv_head * stride_kh, + shape=(kv_head_num, Lq), + strides=(1, stride_kbs), + block_shape=(BLOCK_DMODEL, BLOCK_N), + offsets=(0, (cur_seq_extend_start_idx + start_n).to(tl.int32)), + ) + if not (ALL_MASK_N & ALL_MASK_D): + k = tl.where((mask_d[:, None]) & (mask_n[None, :]), k, 0.0) + else: + offs_k = ( + (cur_seq_extend_start_idx + start_n + offs_n[None, :]) * stride_kbs + + cur_kv_head * stride_kh + + offs_d[:, None] + ) + k = tl.load( + K_Extend + offs_k, mask=(mask_n[None, :]) & (mask_d[:, None]), other=0.0 + ) + + qk = tl.dot(q.to(k.dtype), k, out_dtype=tl.float32) + if BLOCK_DPE > 0: + if USE_MLS: + kpe = tl.matrix_load( + K_Extend + cur_kv_head * stride_kh, + shape=(kv_head_num, Lq), + strides=(1, stride_kbs), + block_shape=(BLOCK_DPE, BLOCK_N), + offsets=(BLOCK_DMODEL, (cur_seq_extend_start_idx + start_n).to(tl.int32)), + ) + if not ALL_MASK_N: + kpe = tl.where(mask_n[None, :], kpe, 0.0) + else: + offs_kpe = ( + (cur_seq_extend_start_idx + start_n + offs_n[None, :]) * stride_kbs + + cur_kv_head * stride_kh + + offs_dpe[:, None] + ) + kpe = tl.load( + K_Extend + offs_kpe, + mask=mask_n[None, :], + other=0.0, + ) + qk += tl.dot(qpe.to(kpe.dtype), kpe) + + qk *= sm_scale + + if logit_cap > 0: + qk = logit_cap * _tanh(qk / logit_cap) + + if xai_temperature_len > 0: + qk *= xai_temperature_reg[:, None] + + qk = tl.where(final_mask, qk, float("-inf")) + + row_max = tl.max(qk, 1) + row_max_fixed = tl.where(row_max == float("-inf"), -1e20, row_max) + n_e_max = tl.maximum(row_max_fixed, e_max) + + re_scale = tl.exp(e_max - n_e_max) + p = tl.exp(qk - n_e_max[:, None]) + deno = deno * re_scale + tl.sum(p, 1) + + if USE_MLS: + v = tl.matrix_load( + V_Extend + cur_kv_head * stride_vh, + shape=(kv_head_num, Lv), + strides=(stride_vbs, 1), + block_shape=(BLOCK_N, BLOCK_DV), + offsets=((cur_seq_extend_start_idx + start_n).to(tl.int32), 0), + ) + if not (ALL_MASK_N & ALL_MASK_DV): + v = tl.where((mask_n[:, None]) & (mask_dv[None, :]), v, 0.0) + else: + offs_v = ( + (cur_seq_extend_start_idx + start_n + offs_n[:, None]) * stride_vbs + + cur_kv_head * stride_vh + + offs_dv[None, :] + ) + v = tl.load( + V_Extend + offs_v, mask=mask_n[:, None] & mask_dv[None, :], other=0.0 + ) + p = p.to(v.dtype) + acc = acc * re_scale[:, None] + tl.dot(p, v) + + e_max = n_e_max + + if HAS_SINK: + cur_sink = tl.load(sink_ptr + cur_head) + deno += tl.exp(cur_sink - e_max) + + offs_o = ( (cur_seq_extend_start_idx + cur_block_m * BLOCK_M + offs_m[:, None]) - * stride_qbs - + cur_head * stride_qh + * stride_obs + + cur_head * stride_oh + + offs_dv[None, :] + ) + if STORE_TRANSPOSE: + tl.store( + O_Extend + offs_o.T, + (acc / deno[:, None]).T, + mask=(mask_m[:, None] & mask_dv[None, :]).T, + ) + else: + tl.store( + O_Extend + offs_o, + acc / deno[:, None], + mask=mask_m[:, None] & mask_dv[None, :], + ) + + +@triton.jit +def _fwd_kernel_v2_decode( + Q_Extend, + K_Extend, + V_Extend, + O_Extend, + K_Buffer, + V_Buffer, + qo_indptr, + kv_indptr, + kv_indices, + mask_ptr, + mask_indptr, + sink_ptr, + window_kv_offset_ptr, + sm_scale, + k_scale, + v_scale, + stride_qbs, + stride_qh, + stride_kbs, + stride_kh, + stride_vbs, + stride_vh, + stride_obs, + stride_oh, + stride_buf_kbs, + stride_buf_kh, + stride_buf_vbs, + stride_buf_vh, + SLIDING_WINDOW_SIZE: tl.constexpr, + logit_cap: tl.constexpr, + xai_temperature_len: tl.constexpr, + Lq: tl.constexpr, + Lv: tl.constexpr, + BLOCK_DMODEL: tl.constexpr, + BLOCK_DPE: tl.constexpr, + BLOCK_DV: tl.constexpr, + BLOCK_M: tl.constexpr, + BLOCK_N: tl.constexpr, + USE_CUSTOM_MASK: tl.constexpr, + IS_CAUSAL: tl.constexpr, + SKIP_PREFIX_CUSTOM_MASK: tl.constexpr, + STORE_TRANSPOSE: tl.constexpr, + HAS_SINK: tl.constexpr, + kv_group_num: tl.constexpr, + num_query_heads: tl.constexpr, + USE_MLS: tl.constexpr, + batch_size: tl.constexpr, + # max_len_extend: tl.constexpr, +): + """ + v2 decode: grid (batch, num_kv_heads, cdiv(max_len_extend, Q_SEQ)) with + Q_SEQ = BLOCK_M // kv_group_num (same as unified ``BLOCK_Q``; floor, not ceil). If BLOCK_M is not + a multiple of G, adjacent ``cur_block_m`` may overlap in query_pos like unified, but `mask_m` + and sequence bounds keep correctness. BLOCK_M is a power of 2 (host). Require BLOCK_M // G >= 1 to launch. + """ + # Per unified: BLOCK_Q = BLOCK_M // G; stride in query token index for each +1 of cur_block_m. + Q_SEQ: tl.constexpr = BLOCK_M // kv_group_num + cur_seq = tl.program_id(0) + cur_kv_head = tl.program_id(1) + cur_block_m = tl.program_id(2) + tl.assume(Q_Extend.to(tl.int64) >= 0) + tl.assume(K_Extend.to(tl.int64) >= 0) + tl.assume(V_Extend.to(tl.int64) >= 0) + + tl.assume(stride_qbs >= 0) + tl.assume(stride_qh >= 0) + tl.assume(stride_kbs >= 0) + tl.assume(stride_kh >= 0) + tl.assume(stride_vbs >= 0) + tl.assume(stride_vh >= 0) + tl.assume(stride_obs >= 0) + tl.assume(stride_oh >= 0) + tl.assume(stride_buf_kbs >= 0) + tl.assume(stride_buf_kh >= 0) + tl.assume(stride_buf_vbs >= 0) + tl.assume(stride_buf_vh >= 0) + tl.assume(batch_size >= 0) + # tl.assume(max_len_extend >= 0) + + kv_head_num = num_query_heads // kv_group_num + + cur_seq_extend_start_idx = tl.load(qo_indptr + cur_seq) + cur_seq_len_extend = tl.load(qo_indptr + cur_seq + 1) - cur_seq_extend_start_idx + cur_seq_kv_start_idx = tl.load(kv_indptr + cur_seq) + cur_seq_len_prefix = tl.load(kv_indptr + cur_seq + 1) - cur_seq_kv_start_idx + cur_seq_len = cur_seq_len_prefix + cur_seq_len_extend + + if cur_block_m * Q_SEQ >= cur_seq_len_extend: + return + + if USE_CUSTOM_MASK: + cur_seq_mask_start_idx = tl.load(mask_indptr + cur_seq) + + window_kv_offset = 0 + if USE_CUSTOM_MASK and SLIDING_WINDOW_SIZE > 0: + window_kv_offset = tl.load(window_kv_offset_ptr + cur_seq) + + offs_d = tl.arange(0, BLOCK_DMODEL) + offs_dv = tl.arange(0, BLOCK_DV) + offs_m = tl.arange(0, BLOCK_M) + # unified_attention-style: per offs_m, row = offs_m // G, h = offs_m % G (G = kv_group_num) + query_pos = cur_block_m * Q_SEQ + (offs_m // kv_group_num) + q_head_in_group = offs_m % kv_group_num + query_offset_0 = cur_seq_extend_start_idx + query_pos + query_offset_1 = cur_kv_head * kv_group_num + q_head_in_group + mask_m = (query_pos < cur_seq_len_extend) & (query_offset_1 < num_query_heads) + + mask_d = offs_d < Lq + mask_dv = offs_dv < Lv + + ALL_MASK_M = tl.min(mask_m.to(tl.int32), axis=0) == 1 + ALL_MASK_D = tl.min(mask_d.to(tl.int32), axis=0) == 1 + ALL_MASK_DV = tl.min(mask_dv.to(tl.int32), axis=0) == 1 + + if xai_temperature_len > 0: + offs_qidx = cur_seq_len_prefix + query_pos + xai_temperature_scale = 1.0 / tl.log2(float(xai_temperature_len)) + xai_temperature_reg = tl.where( + offs_qidx > xai_temperature_len, + tl.log2(offs_qidx.to(tl.float32)) * xai_temperature_scale, + 1.0, + ) + + offs_q = ( + query_offset_0[:, None] * stride_qbs + + query_offset_1[:, None] * stride_qh + offs_d[None, :] ) q = tl.load( @@ -402,9 +954,8 @@ def _fwd_kernel_v2( if BLOCK_DPE > 0: offs_dpe = BLOCK_DMODEL + tl.arange(0, BLOCK_DPE) offs_qpe = ( - (cur_seq_extend_start_idx + cur_block_m * BLOCK_M + offs_m[:, None]) - * stride_qbs - + cur_head * stride_qh + query_offset_0[:, None] * stride_qbs + + query_offset_1[:, None] * stride_qh + offs_dpe[None, :] ) qpe = tl.load(Q_Extend + offs_qpe, mask=mask_m[:, None], other=0.0) @@ -418,14 +969,38 @@ def _fwd_kernel_v2( for start_n in range(0, cur_seq_len_prefix, BLOCK_N): start_n = tl.multiple_of(start_n, BLOCK_N) mask_n = (start_n + offs_n) < cur_seq_len_prefix + ALL_MASK_N = tl.min(mask_n.to(tl.int32), axis=0) == 1 final_mask = mask_m[:, None] & mask_n[None, :] if USE_CUSTOM_MASK and not SKIP_PREFIX_CUSTOM_MASK: + # if USE_MLS: + # group_id = offs_m // kv_group_num + # custom_mask_group = tl.matrix_load( + # mask_ptr + cur_seq_mask_start_idx, + # shape=(cur_seq_len_prefix, cur_seq_len + window_kv_offset), + # strides=(cur_seq_len + window_kv_offset, 1), + # block_shape=(BLOCK_M // kv_group_num, BLOCK_N), + # offsets=((cur_block_m * Q_SEQ).to(tl.int32), + # (window_kv_offset + start_n).to(tl.int32)), + # ) + # custom_mask = custom_mask_group[group_id[:, None], offs_n[None, :]] + # if not (ALL_MASK_M & ALL_MASK_N): + # custom_mask = tl.where((mask_m[:, None] & mask_n[None, :]), custom_mask, 0) + # else: + # custom_mask = tl.load( + # mask_ptr + # + cur_seq_mask_start_idx + # + (query_pos[:, None]) * (cur_seq_len + window_kv_offset) + # + window_kv_offset + # + start_n + # + offs_n[None, :], + # mask=(mask_m[:, None] & mask_n[None, :]), + # other=0, + # ) custom_mask = tl.load( mask_ptr + cur_seq_mask_start_idx - + (cur_block_m * BLOCK_M + offs_m[:, None]) - * (cur_seq_len + window_kv_offset) + + (query_pos[:, None]) * (cur_seq_len + window_kv_offset) + window_kv_offset + start_n + offs_n[None, :], @@ -435,7 +1010,7 @@ def _fwd_kernel_v2( final_mask &= custom_mask if SLIDING_WINDOW_SIZE > 0: window_mask = ( - cur_seq_len_prefix + cur_block_m * BLOCK_M + offs_m[:, None] + cur_seq_len_prefix + query_pos[:, None] ) <= (start_n + offs_n[None, :] + SLIDING_WINDOW_SIZE) final_mask &= window_mask @@ -450,6 +1025,31 @@ def _fwd_kernel_v2( other=0, ) + # if USE_MLS: + # offs_kv_start_idx = tl.load( + # kv_indices + cur_seq_kv_start_idx + start_n, + # ) + + # k = tl.matrix_load( + # K_Buffer + cur_kv_head * stride_buf_kh, + # shape=(Lq, cur_seq_len_prefix.to(tl.int32)), + # strides=(1, stride_buf_kbs), + # block_shape=(BLOCK_DMODEL, BLOCK_N), + # offsets=(0, offs_kv_start_idx.to(tl.int32)), + # ) + # if not (ALL_MASK_N & ALL_MASK_D): + # k = tl.where((mask_n[None, :]) & (mask_d[:, None]), k, 0.0) + # else: + # offs_buf_k = ( + # offs_kv_loc[None, :] * stride_buf_kbs + # + cur_kv_head * stride_buf_kh + # + offs_d[:, None] + # ) + # k = tl.load( + # K_Buffer + offs_buf_k, + # mask=(mask_n[None, :]) & (mask_d[:, None]), + # other=0.0, + # ) offs_buf_k = ( offs_kv_loc[None, :] * stride_buf_kbs + cur_kv_head * stride_buf_kh @@ -460,9 +1060,29 @@ def _fwd_kernel_v2( mask=(mask_n[None, :]) & (mask_d[:, None]), other=0.0, ) - qk = tl.dot(q.to(k.dtype), k) if BLOCK_DPE > 0: + # if USE_MLS: + # kpe = tl.matrix_load( + # K_Buffer + cur_kv_head * stride_buf_kh, + # shape=(Lq, cur_seq_len_prefix.to(tl.int32)), + # strides=(1, stride_buf_kbs), + # block_shape=(BLOCK_DPE, BLOCK_N), + # offsets=(BLOCK_DMODEL, offs_kv_start_idx.to(tl.int32)), + # ) + # if not ALL_MASK_N: + # kpe = tl.where((mask_n[None, :]), kpe, 0.0) + # else: + # offs_kpe = ( + # offs_kv_loc[None, :] * stride_buf_kbs + # + cur_kv_head * stride_buf_kh + # + offs_dpe[:, None] + # ) + # kpe = tl.load( + # K_Buffer + offs_kpe, + # mask=mask_n[None, :], + # other=0.0, + # ) offs_kpe = ( offs_kv_loc[None, :] * stride_buf_kbs + cur_kv_head * stride_buf_kh @@ -484,30 +1104,40 @@ def _fwd_kernel_v2( qk = tl.where(final_mask, qk, float("-inf")) - # row_max_fixed avoids exp(-inf - (-inf)) when a row is all -inf in this tile; - # only needed under sliding window or custom mask (plain causal matches v1). - if SLIDING_WINDOW_SIZE > 0 or ( - USE_CUSTOM_MASK and not SKIP_PREFIX_CUSTOM_MASK - ): - row_max = tl.max(qk, 1) - row_max_fixed = tl.where(row_max == float("-inf"), -1e20, row_max) - n_e_max = tl.maximum(row_max_fixed, e_max) - else: - n_e_max = tl.maximum(tl.max(qk, 1), e_max) + row_max = tl.max(qk, 1) + row_max_fixed = tl.where(row_max == float("-inf"), -1e20, row_max) + n_e_max = tl.maximum(row_max_fixed, e_max) re_scale = tl.exp(e_max - n_e_max) p = tl.exp(qk - n_e_max[:, None]) deno = deno * re_scale + tl.sum(p, 1) + # if USE_MLS: + # v = tl.matrix_load( + # V_Buffer + cur_kv_head * stride_buf_vh, + # shape=(cur_seq_len_prefix.to(tl.int32), Lv), + # strides=(stride_buf_kbs, 1), + # block_shape=(BLOCK_N, BLOCK_DV), + # offsets=(offs_kv_start_idx.to(tl.int32), 0), + # ) + # if not (ALL_MASK_N & ALL_MASK_DV): + # v = tl.where((mask_n[:, None] & mask_dv[None, :]), v, 0.0) + # else: + # offs_buf_v = ( + # offs_kv_loc[:, None] * stride_buf_vbs + # + cur_kv_head * stride_buf_vh + # + offs_dv[None, :] + # ) + # v = tl.load( + # V_Buffer + offs_buf_v, mask=mask_n[:, None] & mask_dv[None, :], other=0.0 + # ) offs_buf_v = ( offs_kv_loc[:, None] * stride_buf_vbs + cur_kv_head * stride_buf_vh + offs_dv[None, :] ) v = tl.load( - V_Buffer + offs_buf_v, - mask=mask_n[:, None] & mask_dv[None, :], - other=0.0, + V_Buffer + offs_buf_v, mask=mask_n[:, None] & mask_dv[None, :], other=0.0 ) p = p.to(v.dtype) acc = acc * re_scale[:, None] + tl.dot(p, v) * v_scale @@ -517,19 +1147,44 @@ def _fwd_kernel_v2( cur_block_m_end = ( cur_seq_len_extend if not IS_CAUSAL - else tl.minimum(cur_seq_len_extend, (cur_block_m + 1) * BLOCK_M) + else tl.minimum(cur_seq_len_extend, (cur_block_m + 1) * Q_SEQ) ) for start_n in range(0, cur_block_m_end, BLOCK_N): start_n = tl.multiple_of(start_n, BLOCK_N) mask_n = (start_n + offs_n) < cur_block_m_end + ALL_MASK_N = tl.min(mask_n.to(tl.int32), axis=0) == 1 final_mask = mask_m[:, None] & mask_n[None, :] if USE_CUSTOM_MASK: + # if USE_MLS: + # group_id = offs_m // kv_group_num + # custom_mask_group = tl.matrix_load( + # mask_ptr + cur_seq_mask_start_idx, + # shape=(cur_block_m_end, cur_seq_len + window_kv_offset), + # strides=(cur_seq_len + window_kv_offset, 1), + # block_shape=(BLOCK_M // kv_group_num, BLOCK_N), + # offsets=((cur_block_m * Q_SEQ).to(tl.int32), + # (window_kv_offset + cur_seq_len_prefix + start_n).to(tl.int32)), + # ) + # custom_mask = custom_mask_group[group_id[:, None], offs_n[None, :]] + # if not (ALL_MASK_M & ALL_MASK_N): + # custom_mask = tl.where((mask_m[:, None] & mask_n[None, :]), custom_mask, 0) + # else: + # custom_mask = tl.load( + # mask_ptr + # + cur_seq_mask_start_idx + # + (query_pos[:, None]) * (cur_seq_len + window_kv_offset) + # + window_kv_offset + # + cur_seq_len_prefix + # + start_n + # + offs_n[None, :], + # mask=(mask_m[:, None] & mask_n[None, :]), + # other=0, + # ) custom_mask = tl.load( mask_ptr + cur_seq_mask_start_idx - + (cur_block_m * BLOCK_M + offs_m[:, None]) - * (cur_seq_len + window_kv_offset) + + (query_pos[:, None]) * (cur_seq_len + window_kv_offset) + window_kv_offset + cur_seq_len_prefix + start_n @@ -540,9 +1195,7 @@ def _fwd_kernel_v2( custom_mask &= mask_m[:, None] & mask_n[None, :] final_mask &= custom_mask elif IS_CAUSAL: - mask_causual = (cur_block_m * BLOCK_M + offs_m[:, None]) >= ( - start_n + offs_n[None, :] - ) + mask_causual = query_pos[:, None] >= (start_n + offs_n[None, :]) mask_causual &= mask_m[:, None] & mask_n[None, :] final_mask &= mask_causual else: @@ -550,7 +1203,7 @@ def _fwd_kernel_v2( final_mask &= mask_non_causal if SLIDING_WINDOW_SIZE > 0: - window_mask = (cur_block_m * BLOCK_M + offs_m[:, None]) <= ( + window_mask = query_pos[:, None] <= ( start_n + offs_n[None, :] + SLIDING_WINDOW_SIZE ) final_mask &= window_mask @@ -560,27 +1213,49 @@ def _fwd_kernel_v2( SKIP_TILE = tl.max(tl.max(final_mask.to(tl.int32), axis=1), axis=0) == 0 if not SKIP_TILE: - offs_k = ( - (cur_seq_extend_start_idx + start_n + offs_n[None, :]) * stride_kbs - + cur_kv_head * stride_kh - + offs_d[:, None] - ) - k = tl.load( - K_Extend + offs_k, mask=(mask_n[None, :]) & (mask_d[:, None]), other=0.0 - ) - - qk = tl.dot(q.to(k.dtype), k, out_dtype=tl.float32) - if BLOCK_DPE > 0: - offs_kpe = ( + if USE_MLS: + k = tl.matrix_load( + K_Extend + cur_kv_head * stride_kh, + shape=(kv_head_num, Lq), + strides=(1, stride_kbs), + block_shape=(BLOCK_DMODEL, BLOCK_N), + offsets=(0, (cur_seq_extend_start_idx + start_n).to(tl.int32)), + ) + if not (ALL_MASK_D & ALL_MASK_N): + k = tl.where((mask_d[:, None] & (mask_n[None, :])), k, 0.0) + else: + offs_k = ( (cur_seq_extend_start_idx + start_n + offs_n[None, :]) * stride_kbs + cur_kv_head * stride_kh - + offs_dpe[:, None] + + offs_d[:, None] ) - kpe = tl.load( - K_Extend + offs_kpe, - mask=mask_n[None, :], - other=0.0, + k = tl.load( + K_Extend + offs_k, mask=(mask_n[None, :]) & (mask_d[:, None]), other=0.0 ) + + qk = tl.dot(q.to(k.dtype), k, out_dtype=tl.float32) + if BLOCK_DPE > 0: + if USE_MLS: + kpe = tl.matrix_load( + K_Extend + cur_kv_head * stride_kh, + shape=(kv_head_num, Lq), + strides=(1, stride_kbs), + block_shape=(BLOCK_DPE, BLOCK_N), + offsets=(BLOCK_DMODEL, (cur_seq_extend_start_idx + start_n).to(tl.int32)), + ) + if not ALL_MASK_N: + kpe = tl.where(mask_n[None, :], kpe, 0.0) + else: + offs_kpe = ( + (cur_seq_extend_start_idx + start_n + offs_n[None, :]) * stride_kbs + + cur_kv_head * stride_kh + + offs_dpe[:, None] + ) + kpe = tl.load( + K_Extend + offs_kpe, + mask=mask_n[None, :], + other=0.0, + ) qk += tl.dot(qpe.to(kpe.dtype), kpe) qk *= sm_scale @@ -593,38 +1268,49 @@ def _fwd_kernel_v2( qk = tl.where(final_mask, qk, float("-inf")) - if SLIDING_WINDOW_SIZE > 0 or USE_CUSTOM_MASK: - row_max = tl.max(qk, 1) - row_max_fixed = tl.where(row_max == float("-inf"), -1e20, row_max) - n_e_max = tl.maximum(row_max_fixed, e_max) - else: - n_e_max = tl.maximum(tl.max(qk, 1), e_max) + row_max = tl.max(qk, 1) + row_max_fixed = tl.where(row_max == float("-inf"), -1e20, row_max) + n_e_max = tl.maximum(row_max_fixed, e_max) re_scale = tl.exp(e_max - n_e_max) p = tl.exp(qk - n_e_max[:, None]) deno = deno * re_scale + tl.sum(p, 1) - offs_v = ( - (cur_seq_extend_start_idx + start_n + offs_n[:, None]) * stride_vbs - + cur_kv_head * stride_vh - + offs_dv[None, :] - ) - v = tl.load( - V_Extend + offs_v, mask=mask_n[:, None] & mask_dv[None, :], other=0.0 - ) + if USE_MLS: + v = tl.matrix_load( + V_Extend + cur_kv_head * stride_vh, + shape=(kv_head_num, Lv), + strides=(stride_vbs, 1), + block_shape=(BLOCK_N, BLOCK_DV), + offsets=((cur_seq_extend_start_idx + start_n).to(tl.int32), 0), + ) + if not (ALL_MASK_N & ALL_MASK_DV): + v = tl.where((mask_n[:, None] & mask_dv[None, :]), v, 0.0) + else: + offs_v = ( + (cur_seq_extend_start_idx + start_n + offs_n[:, None]) * stride_vbs + + cur_kv_head * stride_vh + + offs_dv[None, :] + ) + v = tl.load( + V_Extend + offs_v, mask=mask_n[:, None] & mask_dv[None, :], other=0.0 + ) p = p.to(v.dtype) acc = acc * re_scale[:, None] + tl.dot(p, v) e_max = n_e_max if HAS_SINK: - cur_sink = tl.load(sink_ptr + cur_head) + cur_sink = tl.load( + sink_ptr + cur_kv_head * kv_group_num + q_head_in_group, + mask=mask_m, + other=0.0, + ) deno += tl.exp(cur_sink - e_max) offs_o = ( - (cur_seq_extend_start_idx + cur_block_m * BLOCK_M + offs_m[:, None]) - * stride_obs - + cur_head * stride_oh + query_offset_0[:, None] * stride_obs + + query_offset_1[:, None] * stride_oh + offs_dv[None, :] ) if STORE_TRANSPOSE: @@ -700,13 +1386,118 @@ def _load_config_v2(): raise ValueError( f"{dev}-EXTEND_ATTENTION-V2-FP16.json keys must be 7-tuples matching runtime " f"want7 (kv_group_num, Lq, Lv, USE_CUSTOM_MASK, IS_CAUSAL, HAS_SINK, " - f"SLIDING_WINDOW_SIZE); got length {len(tup)} for {k!r}" + f"USE_SLIDING_WINDOW); got length {len(tup)} for {k!r}" ) res["keys"].append(tup) return res +def _load_config_v2_decode(): + """Autotuned block sizes for :func:`_fwd_kernel_v2_decode` (short extend path).""" + dev = arch_info.get_device() + fpath = f"{AITER_TRITON_CONFIGS_PATH}/{dev}-EXTEND_ATTENTION-V2-DECODE-FP16.json" + try: + with open(fpath, "r") as file: + data = json.load(file) + except FileNotFoundError: + return {"config": {}, "path": {}, "key": [], "keys": []} + res = {} + res["config"] = data["config"] + res["path"] = data.get("path", {}) + res["key"] = list(data["config"].keys()) + res["keys"] = [] + for k in res["key"]: + tup = create_tuple(k) + if len(tup) != 7: + raise ValueError( + f"{dev}-EXTEND_ATTENTION-V2-DECODE-FP16.json keys must be 7-tuples matching runtime " + f"want7 (kv_group_num, Lq, Lv, USE_CUSTOM_MASK, IS_CAUSAL, HAS_SINK, " + f"USE_SLIDING_WINDOW); got length {len(tup)} for {k!r}" + ) + res["keys"].append(tup) + return res + + +TORCH_DTYPE_TO_DTYPE = { + torch.float32: "f32", + torch.float: "f32", + torch.float16: "f16", + torch.half: "f16", + torch.bfloat16: "bf16", + torch.float64: "f64", + torch.double: "f64", + torch.float8_e4m3fn: "f8_e4m3fn", + torch.float8_e5m2: "f8_e5m2", + torch.int8: "i8", + torch.int16: "i16", + torch.int32: "i32", + torch.int64: "i64", + torch.long: "i64", + torch.uint8: "u8", + torch.bool: "bool", +} + + +@functools.lru_cache +def get_gpu_label(): + target = triton.runtime.driver.active.get_current_target() + device = torch.cuda.current_device() + num_cu = torch.cuda.get_device_properties(device).multi_processor_count + return f"{target.arch}_cu{num_cu}" + + +def _load_config_v3(): + """Autotuned configs for :func:`_fwd_kernel_v2` (fp8 / sglang-style scale path). + + Each ``config`` entry key must parse to a **7-tuple** via :func:`create_tuple`, matching + runtime ``want7``; 5-tuple keys are not accepted. + """ + fpath = f"{AITER_TRITON_CONFIGS_PATH}/extend_attn/_fwd_kernel_v2-device={get_gpu_label()}.json" + try: + with open(fpath, "r") as file: + data = json.load(file) + except FileNotFoundError: + return {"key_name": {}, "fpath": {}, "config": {}, "path": {}, "key": [], "keys": []} + res = {} + res["key_name"] = data["key"] + res['fpath'] = fpath + res["config"] = data["config"] + res["path"] = data.get("path", {}) + res["key"] = list(data["config"].keys()) + res["keys"] = [] + for k in res["key"]: + tup = create_tuple(k) + res["keys"].append(tup) + return res + + +def _load_config_v3_decode(): + """Autotuned block sizes for :func:`_fwd_kernel_v2_decode` (short extend path).""" + dev = arch_info.get_device() + # fpath = f"{AITER_TRITON_CONFIGS_PATH}/extend_attn/_fwd_kernel_v2_decode-device={get_gpu_label()}-dtype=bf16.json" + fpath = f"{AITER_TRITON_CONFIGS_PATH}/extend_attn/_fwd_kernel_v2_decode-device={get_gpu_label()}.json" + try: + with open(fpath, "r") as file: + data = json.load(file) + except FileNotFoundError: + return {"key_name": {}, "fpath": {}, "config": {}, "path": {}, "key": [], "keys": []} + res = {} + res["key_name"] = data["key"] + res['fpath'] = fpath + res["config"] = data["config"] + res["path"] = data.get("path", {}) + res["key"] = list(data["config"].keys()) + res["keys"] = [] + for k in res["key"]: + tup = create_tuple(k) + res["keys"].append(tup) + return res + + global_config_v2 = _load_config_v2() +global_config_v2_decode = _load_config_v2_decode() +global_config_v3 = _load_config_v3() +global_config_v3_decode = _load_config_v3_decode() default_config = { "BLOCK_M": 32, @@ -715,7 +1506,8 @@ default_config = { "matrix_instr_nonkdim": 16, "kpack": 2, "num_warps": 4, - "num_stages": 1 + "num_stages": 2, + "USE_MLS": False, } @@ -744,20 +1536,21 @@ def _get_config_v2( use_custom_mask, is_causal, has_sink: bool, - sliding_window_size: int, + use_sliding_window: bool, ): """ Lookup order for ``_fwd_kernel_v2`` block sizes: - 1. ``want7 = (kv_group_num, Lq, Lv, use_custom_mask, is_causal, has_sink, sliding_window_size)`` + 1. ``want7 = (kv_group_num, Lq, Lv, use_custom_mask, is_causal, has_sink, USE_SLIDING_WINDOW)`` against ``{arch}-EXTEND_ATTENTION-V2-FP16.json``. JSON keys must be **7-tuple** strings, - same shape as ``want7`` (see :func:`_load_config_v2`). + same shape as ``want7`` (see :func:`_load_config_v2`). The last element is a bool: + same tuning bucket for any ``sliding_window_size > 0``; use ``False`` when disabled (``<= 0``). 2. If no V2 entry matches, :data:`default_config` (no fallback to v1 JSON). Log field mapping (typical): ``kv_group_num = q_extend.size(-2) // k_extend.size(-2)``, ``Lq = q_extend.size(-1)``, ``Lv = v_extend.size(-1)``, ``use_custom_mask = custom_mask is not None``, ``is_causal`` as passed, - ``has_sink = sinks is not None``, ``sliding_window_size`` as passed (use ``-1`` if disabled). + ``has_sink = sinks is not None``, ``USE_SLIDING_WINDOW = (sliding_window_size > 0)``. """ want7 = ( kv_group_num, @@ -766,7 +1559,7 @@ def _get_config_v2( use_custom_mask, is_causal, has_sink, - sliding_window_size, + use_sliding_window, ) for i, keys in enumerate(global_config_v2["keys"]): if keys == want7: @@ -777,10 +1570,137 @@ def _get_config_v2( return default_config, None +@functools.lru_cache(maxsize=1024) +def _get_config_v2_decode( + kv_group_num, + Lq, + Lv, + use_custom_mask, + is_causal, + has_sink: bool, + use_sliding_window: bool, +): + """ + Same ``want7`` as :func:`_get_config_v2`, but loads ``{arch}-EXTEND_ATTENTION-V2-DECODE-FP16.json`` + for :func:`_fwd_kernel_v2_decode`. + """ + want7 = ( + kv_group_num, + Lq, + Lv, + use_custom_mask, + is_causal, + has_sink, + use_sliding_window, + ) + for i, keys in enumerate(global_config_v2_decode["keys"]): + if keys == want7: + key = global_config_v2_decode["key"][i] + return global_config_v2_decode["config"][key], global_config_v2_decode[ + "path" + ].get(key) + + print("WARNING: optimal V2 decode config not found, just use default config") + return default_config, None + + +def find_closest_index(target, lst): + # lst: [(index, batch_size), ...] + def key(item): + val = item[1] + diff = val - target + if diff >= 0: + return (0, diff) + else: + return (1, -diff) + + best = min(lst, key=key) + return best[0] + + +@functools.lru_cache(maxsize=1024) +def _get_config_v3(key): + _key = str(key) + _configs = global_config_v3["config"] + for k, v in _configs.items(): + if k == _key: + return v + + # find the nearest batch size + # key = (batch_size, *other) + bs = [] + _keys = global_config_v3["keys"] + for i, k in enumerate(_keys): + if k[1:] == key[1:]: + bs.append((i, k[0])) + + if bs: + __key = global_config_v3["key"][find_closest_index(key[0], bs)] + print(f'WARNING: Not found key {_key} from {global_config_v3["fpath"]}, mapping to key {__key}') + return _configs[__key] + else: + print(f'WARNING: Not found optimal config from {global_config_v3["fpath"]} with key {str(key)}, just use default config') + return default_config + + +@functools.lru_cache(maxsize=1024) +def _get_config_v3_decode(key): + _key = str(key) + _configs = global_config_v3_decode["config"] + for k, v in _configs.items(): + if k == _key: + return v + + # find the nearest batch size + # key = (batch_size, *other) + bs = [] + _keys = global_config_v3_decode["keys"] + for i, k in enumerate(_keys): + if k[1:] == key[1:]: + bs.append((i, k[0])) + + if bs: + __key = global_config_v3_decode["key"][find_closest_index(key[0], bs)] + print(f'WARNING: Not found key {_key} from {global_config_v3["fpath"]}, mapping to key {__key}') + return _configs[__key] + else: + print(f'WARNING: Not found optimal config from {global_config_v3_decode["fpath"]} with key {str(key)}, just use default config') + return default_config + + def has_kernel_cache(path): return False if not path or not os.path.isdir(f'{cache_knob.dir}/{path}') else True +@functools.lru_cache(maxsize=1024) +def get_v2_decode_final_grid( + max_len_extend: int, + kv_group_num: int, + block_m_cfg: int, + batch_size: int, + kv_head_num: int, +) -> tuple[int, tuple[int, int, int]]: + """Decode path: power-of-2 ``block_m_decode``; grid-3 cdiv matches kernel q_seq stride.""" + prod = max_len_extend * kv_group_num + npo2 = triton.next_power_of_2(prod) + kv_group_num_align = triton.next_power_of_2(kv_group_num) + block_m_decode = block_m_cfg + if prod < 16: + block_m_decode = 16 + elif block_m_cfg > npo2: + block_m_decode = npo2 + else: + block_m_decode = max(block_m_cfg, kv_group_num_align) + block_count = batch_size * kv_head_num * triton.cdiv( + max_len_extend, block_m_decode // kv_group_num + ) + if block_count <= 32: + block_m_decode = max(max(block_m_decode // 2, 16), kv_group_num_align) + q_seq = block_m_decode // kv_group_num + grid = (batch_size, kv_head_num, triton.cdiv(max_len_extend, q_seq)) + return block_m_decode, grid + + def to_dtype(torch_dtype): if torch_dtype == torch.float32: return 'fp32' @@ -793,7 +1713,6 @@ def to_dtype(torch_dtype): else: return str(torch_dtype) - def extend_attention_fwd( q_extend, k_extend, @@ -818,6 +1737,7 @@ def extend_attention_fwd( sinks=None, window_kv_offsets=None, xai_temperature_len=-1, + force_v2_prefill: bool = False, ): """ q_extend, k_extend, v_extend, o_extend: contiguous tensors @@ -828,7 +1748,11 @@ def extend_attention_fwd( extensions follow with defaults. ``k_scale`` / ``v_scale`` must both be ``None`` or both set (``float`` / ``int`` like sglang, or 1-element ``torch.Tensor`` on device); if both are set, :func:`_fwd_kernel_v2` is used. + + If ``force_v2_prefill`` is True and v2 is active, always use :func:`_fwd_kernel_v2` + even when ``max_len_extend < 32`` (for tests / parity vs :func:`_fwd_kernel_v2_decode`). """ + # force_v2_prefill = True Lq, Lv = ( q_extend.shape[-1], v_extend.shape[-1], @@ -853,30 +1777,87 @@ def extend_attention_fwd( sm_scale = sm_scale or 1.0 / (Lq**0.5) batch_size, head_num = qo_indptr.shape[0] - 1, q_extend.shape[1] - kv_group_num = q_extend.shape[1] // k_extend.shape[1] + kv_head_num = k_extend.shape[1] + kv_group_num = head_num // kv_head_num USE_CUSTOM_MASK = custom_mask is not None # Skip custom mask for prefix part SKIP_PREFIX_CUSTOM_MASK = skip_prefix_custom_mask use_v2 = k_scale is not None or v_scale is not None + use_v2_decode = ( + use_v2 and max_len_extend < 32 and not force_v2_prefill + ) + USE_SLIDING_WINDOW = sliding_window_size > 0 if not USE_CUSTOM_MASK: - custom_mask = torch.tensor([0], dtype=torch.bool, device=q_extend.device) - mask_indptr = torch.tensor([0], dtype=torch.int32, device=q_extend.device) + # custom_mask = torch.tensor([0], dtype=torch.bool, device=q_extend.device) + # mask_indptr = torch.tensor([0], dtype=torch.int32, device=q_extend.device) + # set to None to avoid capture cudagraph err + custom_mask = None + mask_indptr = None if config is None: if q_extend.dtype == torch.float16 or q_extend.dtype == torch.bfloat16: if use_v2: - config, path = _get_config_v2( - kv_group_num, - Lq, - Lv, - USE_CUSTOM_MASK, - is_causal, - sinks is not None, - sliding_window_size, - ) + if triton_minor_version >= 5: # >= 3.5 + key = [ + batch_size, + kv_group_num, + Lq, + Lv, + USE_CUSTOM_MASK, + is_causal, + skip_prefix_custom_mask, + sinks is not None, + sliding_window_size, + xai_temperature_len, + str(q_extend.dtype), + str(k_extend.dtype), + str(v_extend.dtype), + str(o_extend.dtype), + str(k_buffer.dtype), + str(v_buffer.dtype), + str(qo_indptr.dtype), + str(kv_indptr.dtype), + str(kv_indices.dtype), + ] + for o in [ + custom_mask, + mask_indptr, + sinks, + window_kv_offsets + ]: + if o is not None: + if hasattr(o, 'dtype'): + key.append(str(o.dtype)) + + key = tuple(key) + if use_v2_decode: + config = _get_config_v3_decode(key) + else: + config = _get_config_v3(key) + else: + if use_v2_decode: + config, path = _get_config_v2_decode( + kv_group_num, + Lq, + Lv, + USE_CUSTOM_MASK, + is_causal, + sinks is not None, + USE_SLIDING_WINDOW, + ) + else: + config, path = _get_config_v2( + kv_group_num, + Lq, + Lv, + USE_CUSTOM_MASK, + is_causal, + sinks is not None, + USE_SLIDING_WINDOW, + ) else: keys = [kv_group_num, Lq, Lv, USE_CUSTOM_MASK, is_causal] config, path = _get_config(*keys) @@ -884,7 +1865,17 @@ def extend_attention_fwd( config, path = default_config, None assert config is not None, "ERROR: optimal config not found" - grid = (batch_size, head_num, triton.cdiv(max_len_extend, config["BLOCK_M"])) + block_m_cfg = config["BLOCK_M"] + # Decode: block_m_decode is power of 2; Q_SEQ = block_m // G (floor), same as unified BLOCK_Q; + # grid-3 cdiv(max_len_extend, q_seq) matches kernel cur_block_m stride. + + if use_v2_decode: + block_m_decode, grid = get_v2_decode_final_grid( + max_len_extend, kv_group_num, block_m_cfg, batch_size, kv_head_num + ) + # print(f"{max_len_extend=}, {use_v2_decode=}, {block_m_decode=}, {grid=}") + else: + grid = (batch_size, head_num, triton.cdiv(max_len_extend, block_m_cfg)) # num_stages = 1 # extra_kargs = {} @@ -922,32 +1913,69 @@ def extend_attention_fwd( HAS_SINK = sinks is not None assert k_scale is not None and v_scale is not None, "k_scale and v_scale must both be set" # k_scale / v_scale kept in Python API; v2 kernel TEMP omits them for perf vs v1. - _fwd_kernel_v2[grid]( - q_extend, - k_extend, - v_extend, - o_extend, - k_buffer, - v_buffer, - qo_indptr, - kv_indptr, - kv_indices, - custom_mask, - mask_indptr, - sinks, - window_kv_offsets, - sm_scale, - k_scale, - v_scale, - kv_group_num, - *stride_args, - SLIDING_WINDOW_SIZE=sliding_window_size, - logit_cap=logit_cap, - xai_temperature_len=xai_temperature_len, - HAS_SINK=HAS_SINK, + block_const_v2 = { **block_const, **config, - ) + } + if use_v2_decode: + block_const_v2 = {**block_const_v2, "BLOCK_M": block_m_decode} + _fwd_kernel_v2_decode[grid]( + q_extend, + k_extend, + v_extend, + o_extend, + k_buffer, + v_buffer, + qo_indptr, + kv_indptr, + kv_indices, + custom_mask, + mask_indptr, + sinks, + window_kv_offsets, + sm_scale, + k_scale, + v_scale, + *stride_args, + SLIDING_WINDOW_SIZE=sliding_window_size, + logit_cap=logit_cap, + xai_temperature_len=xai_temperature_len, + HAS_SINK=HAS_SINK, + kv_group_num=kv_group_num, + num_query_heads=head_num, + **block_const_v2, + batch_size=batch_size, + # USE_MLS=True if os.getenv("TRITON_USE_MLS", "0") == "1" and kv_group_num == head_num else False, + ) + else: + _fwd_kernel_v2[grid]( + q_extend, + k_extend, + v_extend, + o_extend, + k_buffer, + v_buffer, + qo_indptr, + kv_indptr, + kv_indices, + custom_mask, + mask_indptr, + sinks, + window_kv_offsets, + sm_scale, + k_scale, + v_scale, + kv_group_num, + *stride_args, + SLIDING_WINDOW_SIZE=sliding_window_size, + logit_cap=logit_cap, + xai_temperature_len=xai_temperature_len, + HAS_SINK=HAS_SINK, + **block_const_v2, + head_num=head_num, + batch_size=batch_size, + # USE_MLS=True if os.getenv("TRITON_USE_MLS", "0") == "1" and kv_group_num == head_num else False, + ) return fn = ( diff --git a/aiter/ops/triton/fla/fused_recurrent.py b/aiter/ops/triton/fla/fused_recurrent.py new file mode 100644 index 0000000000000000000000000000000000000000..456bc3ccada49c19a83652ce01239d3ee2b397d4 --- /dev/null +++ b/aiter/ops/triton/fla/fused_recurrent.py @@ -0,0 +1,272 @@ +# SPDX-License-Identifier: MIT + +import functools +import json +import os +from typing import Tuple + +import torch +import triton +import triton.language as tl + +import aiter.ops.triton.utils.arch_info as arch_info +from aiter import logger +from aiter.ops.triton.utils.core import AITER_TRITON_CONFIGS_PATH + + +# HAS_DUMPED_PACKED_DECODE_KERNEL_METADATA = False +TRITON_CONFIG_CHECK = os.environ.get("TRITON_CONFIG_CHECK", "0") == "1" + +_DEFAULT_FUSED_RECURRENT_PACKED_DECODE_CONFIG = { + "BV": 32, + "num_warps": 1, + "num_stages": 1, +} + + +@functools.lru_cache(maxsize=1) +def _load_fused_recurrent_packed_decode_configs() -> dict: + device_name = arch_info.get_arch() + path = os.path.join( + AITER_TRITON_CONFIGS_PATH, + "fused_recurrent_gated_delta_rule_packed_decode", + f"fused_recurrent_gated_delta_rule_packed_decode-{device_name}.json", + ) + if not os.path.exists(path): + logger.warning( + f"fused_recurrent_gated_delta_rule_packed_decode config not found at {path}, " + f"using default {_DEFAULT_FUSED_RECURRENT_PACKED_DECODE_CONFIG}." + ) + return {} + with open(path) as f: + payload = json.load(f) + return payload.get("config", {}) if isinstance(payload, dict) else {} + + +@functools.lru_cache +def _get_fused_recurrent_packed_decode_config(B: int, H: int, HV: int) -> dict: + cfgs = _load_fused_recurrent_packed_decode_configs() + key = f"B={B},H={H},HV={HV}" + cfg = cfgs.get(key) + if cfg is None: + candidates = [] + for k, v in cfgs.items(): + if k == "default": + continue + try: + parts = {x.split("=")[0]: int(x.split("=")[1]) for x in k.split(",")} + except Exception: + continue + if parts.get("H") == H and parts.get("HV") == HV and "B" in parts: + candidates.append((abs(parts["B"] - B), parts["B"], v)) + if candidates: + candidates.sort(key=lambda x: x[0]) + _, nearest_b, cfg = candidates[0] + if TRITON_CONFIG_CHECK: + logger.warning( + f"fused_recurrent_packed_decode config key '{key}' not found, " + f"using nearest-B config with B={nearest_b}: {cfg}." + ) + if cfg is None: + default_cfg = cfgs.get("default", _DEFAULT_FUSED_RECURRENT_PACKED_DECODE_CONFIG) + if TRITON_CONFIG_CHECK: + logger.warning( + f"fused_recurrent_packed_decode config key '{key}' not found, " + f"using default config: {default_cfg}." + ) + cfg = default_cfg + merged = dict(_DEFAULT_FUSED_RECURRENT_PACKED_DECODE_CONFIG) + merged.update(cfg) + return merged + + +@triton.jit +def fused_recurrent_gated_delta_rule_packed_decode_kernel( + mixed_qkv, + a, + b, + A_log, + dt_bias, + o, + h0, + ht, + ssm_state_indices, + scale, + stride_mixed_qkv_tok: tl.constexpr, + stride_a_tok: tl.constexpr, + stride_b_tok: tl.constexpr, + stride_init_state_token: tl.constexpr, + stride_final_state_token: tl.constexpr, + stride_indices_seq: tl.constexpr, + H: tl.constexpr, + HV: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + SOFTPLUS_THRESHOLD: tl.constexpr, + USE_QK_L2NORM_IN_KERNEL: tl.constexpr, +): + i_v, i_nh = tl.program_id(0), tl.program_id(1) + i_n, i_hv = i_nh // HV, i_nh % HV + i_h = i_hv // (HV // H) + + o_k = tl.arange(0, BK) + o_v = i_v * BV + tl.arange(0, BV) + mask_k = o_k < K + mask_v = o_v < V + + state_idx = tl.load(ssm_state_indices + i_n * stride_indices_seq).to(tl.int64) + p_o = o + (i_n * HV + i_hv) * V + o_v + + if state_idx < 0: + zero = tl.zeros([BV], dtype=tl.float32).to(p_o.dtype.element_ty) + tl.store(p_o, zero, mask=mask_v) + return + + p_h0 = h0 + state_idx * stride_init_state_token + p_h0 = p_h0 + i_hv * V * K + o_v[:, None] * K + o_k[None, :] + # [BV, BK] + b_h = tl.load(p_h0, mask=(mask_v[:, None] & mask_k[None, :]), other=0).to(tl.float32) + + p_mixed = mixed_qkv + i_n * stride_mixed_qkv_tok + k_off = (H * K) + i_h * K + o_k + v_off = (2 * H * K) + i_hv * V + o_v + b_k = tl.load(p_mixed + k_off, mask=mask_k, other=0).to(tl.float32) + b_v = tl.load(p_mixed + v_off, mask=mask_v, other=0).to(tl.float32) + + if USE_QK_L2NORM_IN_KERNEL: + k_norm_inv = tl.rsqrt(tl.sum(b_k * b_k) + 1e-6) + b_k = b_k * k_norm_inv + + x = tl.load(a + i_n * stride_a_tok + i_hv).to(tl.float32) + x += tl.load(dt_bias + i_hv).to(tl.float32) + softplus_x = tl.where(x <= SOFTPLUS_THRESHOLD, tl.log(1.0 + tl.exp(x)), x) + g_val = -tl.exp(tl.load(A_log + i_hv).to(tl.float32)) * softplus_x + beta_val = tl.sigmoid(tl.load(b + i_n * stride_b_tok + i_hv).to(tl.float32)) + + b_h *= tl.exp(g_val) + b_v -= tl.sum(b_h * b_k[None, :], 1) + b_v *= beta_val + b_h += b_v[:, None] * b_k[None, :] + + q_off = i_h * K + o_k + b_q = tl.load(p_mixed + q_off, mask=mask_k, other=0).to(tl.float32) + if USE_QK_L2NORM_IN_KERNEL: + q_norm_inv = tl.rsqrt(tl.sum(b_q * b_q) + 1e-6) + b_q = b_q * q_norm_inv + b_o = tl.sum(b_h * b_q[None, :], 1) + b_o = b_o * scale + tl.store(p_o, b_o.to(p_o.dtype.element_ty), mask=mask_v) + + p_ht = ht + state_idx * stride_final_state_token + p_ht = p_ht + i_hv * V * K + o_v[:, None] * K + o_k[None, :] + tl.store(p_ht, b_h.to(p_ht.dtype.element_ty), mask=(mask_v[:, None] & mask_k[None, :])) + + +def fused_recurrent_gated_delta_rule_packed_decode( + mixed_qkv: torch.Tensor, + a: torch.Tensor, + b: torch.Tensor, + A_log: torch.Tensor, + dt_bias: torch.Tensor, + scale: float, + initial_state: torch.Tensor, + out: torch.Tensor, + ssm_state_indices: torch.Tensor, + use_qk_l2norm_in_kernel: bool = False, + kernel_cfg: dict | None = None, +) -> Tuple[torch.Tensor, torch.Tensor]: + global HAS_DUMPED_PACKED_DECODE_KERNEL_METADATA + + if mixed_qkv.ndim != 2: + raise ValueError(f"`mixed_qkv` must be 2D, got ndim={mixed_qkv.ndim}.") + if a.ndim != 2 or b.ndim != 2: + raise ValueError(f"`a` and `b` must be 2D, got a.ndim={a.ndim}, b.ndim={b.ndim}.") + if A_log.ndim != 1 or dt_bias.ndim != 1: + raise ValueError("`A_log` and `dt_bias` must be 1D.") + if ssm_state_indices.ndim != 1: + raise ValueError("`ssm_state_indices` must be 1D.") + if initial_state.ndim != 4: + raise ValueError(f"`initial_state` must be 4D, got ndim={initial_state.ndim}.") + + dev = mixed_qkv.device + if any(t.device != dev for t in (a, b, A_log, dt_bias, initial_state, out, ssm_state_indices)): + raise ValueError("All tensors must be on the same device.") + + B = mixed_qkv.shape[0] + if a.shape[0] != B or b.shape[0] != B or ssm_state_indices.shape[0] != B: + raise ValueError("Batch dimensions of mixed_qkv/a/b/ssm_state_indices must match.") + + HV, V, K = initial_state.shape[-3:] + if a.shape[1] != HV or b.shape[1] != HV: + raise ValueError("`a` and `b` second dim must match HV from initial_state.") + if A_log.numel() != HV or dt_bias.numel() != HV: + raise ValueError("`A_log` and `dt_bias` numel must equal HV.") + if out.shape != (B, 1, HV, V): + raise ValueError(f"`out` must have shape {(B, 1, HV, V)}, got {tuple(out.shape)}.") + + qkv_dim = mixed_qkv.shape[1] + qk_dim = qkv_dim - HV * V + if qk_dim <= 0 or qk_dim % 2 != 0: + raise ValueError("Invalid mixed_qkv layout for packed decode.") + q_dim = qk_dim // 2 + if q_dim % K != 0: + raise ValueError("Inferred q_dim must be divisible by K.") + H = q_dim // K + if H <= 0 or HV % H != 0: + raise ValueError(f"Invalid inferred heads: H={H}, HV={HV}.") + + BK = triton.next_power_of_2(K) + cfg = kernel_cfg if kernel_cfg is not None else _get_fused_recurrent_packed_decode_config(B, H, HV) + BV = min(triton.next_power_of_2(V), int(cfg["BV"])) + + stride_mixed_qkv_tok = mixed_qkv.stride(0) + stride_a_tok = a.stride(0) + stride_b_tok = b.stride(0) + stride_init_state_token = initial_state.stride(0) + stride_final_state_token = initial_state.stride(0) + stride_indices_seq = ssm_state_indices.stride(0) + + NV = triton.cdiv(V, BV) + grid = (NV, B * HV) + launch_kwargs = dict( + mixed_qkv=mixed_qkv, + a=a, + b=b, + A_log=A_log, + dt_bias=dt_bias, + o=out, + h0=initial_state, + ht=initial_state, + ssm_state_indices=ssm_state_indices, + scale=scale, + stride_mixed_qkv_tok=stride_mixed_qkv_tok, + stride_a_tok=stride_a_tok, + stride_b_tok=stride_b_tok, + stride_init_state_token=stride_init_state_token, + stride_final_state_token=stride_final_state_token, + stride_indices_seq=stride_indices_seq, + H=H, + HV=HV, + K=K, + V=V, + BK=BK, + BV=BV, + SOFTPLUS_THRESHOLD=20.0, + USE_QK_L2NORM_IN_KERNEL=use_qk_l2norm_in_kernel, + num_warps=cfg["num_warps"], + num_stages=cfg["num_stages"], + ) + compiled_kernel = fused_recurrent_gated_delta_rule_packed_decode_kernel[grid](**launch_kwargs) + + ''' + if not HAS_DUMPED_PACKED_DECODE_KERNEL_METADATA and compiled_kernel is not None: + print("packed decode kernel metadata") + print(f" grid: {grid}") + print(f" registers: {compiled_kernel.n_regs}") + print(f" spills: {compiled_kernel.n_spills}") + print(f" shared memory: {compiled_kernel.metadata.shared} bytes") + HAS_DUMPED_PACKED_DECODE_KERNEL_METADATA = True + ''' + return out, initial_state diff --git a/aiter/ops/triton/fla/fused_sigmoid_gating.py b/aiter/ops/triton/fla/fused_sigmoid_gating.py new file mode 100644 index 0000000000000000000000000000000000000000..6f154eda0af10279dc11bed7487e3645050ffc99 --- /dev/null +++ b/aiter/ops/triton/fla/fused_sigmoid_gating.py @@ -0,0 +1,351 @@ +# SPDX-License-Identifier: MIT + +from typing import Tuple + +import functools +import json +import os + +import torch +import triton +import triton.language as tl + +import aiter.ops.triton.utils.arch_info as arch_info +from aiter import logger +from aiter.ops.triton.utils.core import AITER_TRITON_CONFIGS_PATH + +# HAS_DUMPED_SIGMOID_GATING_KERNEL_METADATA = False +TRITON_CONFIG_CHECK = os.environ.get("TRITON_CONFIG_CHECK", "0") == "1" + +_DEFAULT_FUSED_SIGMOID_GATING_CONFIG = { + "BV": 32, + "num_warps": 1, +} + + +@functools.lru_cache(maxsize=1) +def _load_fused_sigmoid_gating_configs() -> dict: + device_name = arch_info.get_arch() + path = os.path.join( + AITER_TRITON_CONFIGS_PATH, + "fused_sigmoid_gating_delta_rule_update", + f"fused_sigmoid_gating_delta_rule_update-{device_name}.json", + ) + if not os.path.exists(path): + logger.warning( + f"fused_sigmoid_gating_delta_rule_update config not found at {path}, " + f"using default {_DEFAULT_FUSED_SIGMOID_GATING_CONFIG}." + ) + return {} + with open(path) as f: + payload = json.load(f) + return payload.get("config", {}) if isinstance(payload, dict) else {} + + +@functools.lru_cache +def _get_fused_sigmoid_gating_config(T: int, H: int, HV: int) -> dict: + cfgs = _load_fused_sigmoid_gating_configs() + key = f"T={T},H={H},HV={HV}" + cfg = cfgs.get(key) + if cfg is None: + candidates = [] + for k, v in cfgs.items(): + if k == "default": + continue + try: + parts = {x.split("=")[0]: int(x.split("=")[1]) for x in k.split(",")} + except Exception: + continue + if parts.get("H") == H and parts.get("HV") == HV and "T" in parts: + candidates.append((abs(parts["T"] - T), parts["T"], v)) + if candidates: + candidates.sort(key=lambda x: x[0]) + _, nearest_t, cfg = candidates[0] + if TRITON_CONFIG_CHECK: + logger.warning( + f"fused_sigmoid_gating config key '{key}' not found, " + f"using nearest-T config with T={nearest_t}: {cfg}." + ) + if cfg is None: + default_cfg = cfgs.get("default", _DEFAULT_FUSED_SIGMOID_GATING_CONFIG) + if TRITON_CONFIG_CHECK: + logger.warning( + f"fused_sigmoid_gating config key '{key}' not found, " + f"using default config: {default_cfg}." + ) + cfg = default_cfg + merged = dict(_DEFAULT_FUSED_SIGMOID_GATING_CONFIG) + merged.update(cfg) + return merged + + +@triton.heuristics( + { + "USE_INITIAL_STATE": lambda args: args["h0"] is not None, + "IS_VARLEN": lambda args: args["cu_seqlens"] is not None, + "IS_CONTINUOUS_BATCHING": lambda args: args["ssm_state_indices"] is not None, + "IS_SPEC_DECODING": lambda args: args["num_accepted_tokens"] is not None, + } +) +@triton.jit(do_not_specialize=["N", "T"]) +def fused_sigmoid_gating_delta_rule_update_kernel( + A_log, + a, + b, + dt_bias, + beta, + threshold, + q, + k, + v, + o, + h0, + ht, + cu_seqlens, + ssm_state_indices, + num_accepted_tokens, + scale, + N: tl.int64, + T: tl.int64, + B: tl.constexpr, + H: tl.constexpr, + HV: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + stride_init_state_token: tl.constexpr, + stride_final_state_token: tl.constexpr, + stride_indices_seq: tl.constexpr, + stride_indices_tok: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + INPLACE_FINAL_STATE: tl.constexpr, + USE_QK_L2NORM_IN_KERNEL: tl.constexpr, + IS_VARLEN: tl.constexpr, + IS_CONTINUOUS_BATCHING: tl.constexpr, + IS_SPEC_DECODING: tl.constexpr, + IS_KDA: tl.constexpr, +): + i_k, i_v, i_nh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_n, i_hv = i_nh // HV, i_nh % HV + i_h = i_hv // (HV // H) + if IS_VARLEN: + bos, eos = ( + tl.load(cu_seqlens + i_n).to(tl.int64), + tl.load(cu_seqlens + i_n + 1).to(tl.int64), + ) + all = T + T = eos - bos + else: + bos, eos = i_n * T, i_n * T + T + all = B * T + + if T == 0: + return + + o_k = i_k * BK + tl.arange(0, BK) + o_v = i_v * BV + tl.arange(0, BV) + + p_q = q + (bos * H + i_h) * K + o_k + p_k = k + (bos * H + i_h) * K + o_k + p_v = v + (bos * HV + i_hv) * V + o_v + + p_A_log = A_log + i_hv + if not IS_KDA: + p_a = a + bos * HV + i_hv + p_dt_bias = dt_bias + i_hv + else: + p_a = a + (bos * HV + i_hv) * K + o_k + p_dt_bias = dt_bias + i_hv * K + o_k + + p_b = b + bos * HV + i_hv + p_o = o + ((i_k * all + bos) * HV + i_hv) * V + o_v + + mask_k = o_k < K + mask_v = o_v < V + mask_h = mask_v[:, None] & mask_k[None, :] + + b_A_log = tl.exp(tl.load(p_A_log).to(tl.float32)) + if not IS_KDA: + b_dt_bias = tl.load(p_dt_bias).to(tl.float32) + + b_h = tl.zeros([BV, BK], dtype=tl.float32) + if USE_INITIAL_STATE: + if IS_CONTINUOUS_BATCHING: + if IS_SPEC_DECODING: + i_t = tl.load(num_accepted_tokens + i_n).to(tl.int64) - 1 + else: + i_t = 0 + state_idx = tl.load(ssm_state_indices + i_n * stride_indices_seq + i_t).to( + tl.int64 + ) + if state_idx < 0: + return + p_h0 = h0 + state_idx * stride_init_state_token + else: + p_h0 = h0 + bos * HV * V * K + p_h0 = p_h0 + i_hv * V * K + o_v[:, None] * K + o_k[None, :] + b_h += tl.load(p_h0, mask=mask_h, other=0).to(tl.float32) + + for i_t in range(0, T): + b_q = tl.load(p_q, mask=mask_k, other=0).to(tl.float32) + b_k = tl.load(p_k, mask=mask_k, other=0).to(tl.float32) + b_b = tl.load(p_b).to(tl.float32) + + if not IS_KDA: + x = tl.load(p_a).to(tl.float32) + b_dt_bias + else: + x = tl.load(p_a).to(tl.float32) + tl.load(p_dt_bias).to(tl.float32) + softplus_x = tl.where( + beta * x <= threshold, (1 / beta) * tl.log(1 + tl.exp(beta * x)), x + ) + b_g = -b_A_log * softplus_x + b_beta = tl.sigmoid(b_b) + b_v = tl.load(p_v, mask=mask_v, other=0).to(tl.float32) + + if USE_QK_L2NORM_IN_KERNEL: + b_q = b_q * (tl.rsqrt(tl.sum(b_q * b_q) + 1e-6)) + b_k = b_k * (tl.rsqrt(tl.sum(b_k * b_k) + 1e-6)) + b_q = b_q * scale + if not IS_KDA: + b_h *= tl.exp(b_g) + else: + b_h *= tl.exp(b_g[None, :]) + b_v -= tl.sum(b_h * b_k[None, :], 1) + b_v *= b_beta + b_h += b_v[:, None] * b_k[None, :] + b_o = tl.sum(b_h * b_q[None, :], 1) + tl.store(p_o, b_o.to(p_o.dtype.element_ty), mask=mask_v) + + if INPLACE_FINAL_STATE: + final_state_idx = tl.load( + ssm_state_indices + i_n * stride_indices_seq + i_t + ).to(tl.int64) + if final_state_idx >= 0: + p_ht = ht + final_state_idx * stride_final_state_token + p_ht = p_ht + i_hv * V * K + o_v[:, None] * K + o_k[None, :] + tl.store(p_ht, b_h.to(p_ht.dtype.element_ty), mask=mask_h) + else: + p_ht = ht + (bos + i_t) * stride_final_state_token + p_ht = p_ht + i_hv * V * K + o_v[:, None] * K + o_k[None, :] + tl.store(p_ht, b_h.to(p_ht.dtype.element_ty), mask=mask_h) + + p_q += H * K + p_k += H * K + p_o += HV * V + p_v += HV * V + p_b += HV + p_a += HV + + +def fused_sigmoid_gating_delta_rule_update( + A_log: torch.Tensor, + a: torch.Tensor, + b: torch.Tensor, + dt_bias: torch.Tensor, + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + beta: float = 1.0, + threshold: float = 20.0, + scale: float | None = None, + initial_state: torch.Tensor | None = None, + inplace_final_state: bool = True, + cu_seqlens: torch.Tensor | None = None, + ssm_state_indices: torch.Tensor | None = None, + num_accepted_tokens: torch.Tensor | None = None, + use_qk_l2norm_in_kernel: bool = False, + is_kda: bool = False, + kernel_cfg: dict | None = None, +) -> Tuple[torch.Tensor, torch.Tensor]: + global HAS_DUMPED_SIGMOID_GATING_KERNEL_METADATA + + B, T, H, K, V = *k.shape, v.shape[-1] + HV = v.shape[2] + N = B if cu_seqlens is None else len(cu_seqlens) - 1 + + BK = triton.next_power_of_2(K) + cfg = kernel_cfg if kernel_cfg is not None else _get_fused_sigmoid_gating_config(T, H, HV) + BV = min(triton.next_power_of_2(V), int(cfg["BV"])) + NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV) + num_warps = int(cfg["num_warps"]) + if NK != 1: + raise ValueError(f"NK > 1 is not supported (K={K}, BK={BK}, NK={NK}).") + + if scale is None: + scale = K**-0.5 + elif scale <= 0: + raise ValueError("scale must be positive.") + + if initial_state is None: + raise ValueError("initial_state must not be None.") + + if cu_seqlens is not None and q.shape[0] != 1: + raise ValueError( + f"q.shape[0] must be 1 when using cu_seqlens, got {q.shape[0]}." + ) + + o = q.new_empty(NK, *v.shape) + final_state = initial_state if inplace_final_state else q.new_empty(T, HV, V, K, dtype=initial_state.dtype) + + stride_init_state_token = initial_state.stride(0) + stride_final_state_token = final_state.stride(0) + + if ssm_state_indices is None: + stride_indices_seq, stride_indices_tok = 1, 1 + elif ssm_state_indices.ndim == 1: + stride_indices_seq, stride_indices_tok = ssm_state_indices.stride(0), 1 + elif ssm_state_indices.ndim == 2: + stride_indices_seq, stride_indices_tok = ssm_state_indices.stride() + else: + raise ValueError( + f"ssm_state_indices must be 1D/2D when provided, got ndim={ssm_state_indices.ndim}." + ) + + grid = (NK, NV, N * HV) + compiled_kernel = fused_sigmoid_gating_delta_rule_update_kernel[grid]( + A_log=A_log, + a=a.contiguous(), + b=b.contiguous(), + dt_bias=dt_bias, + beta=beta, + threshold=threshold, + q=q.contiguous(), + k=k.contiguous(), + v=v.contiguous(), + o=o, + h0=initial_state, + ht=final_state, + cu_seqlens=cu_seqlens, + ssm_state_indices=ssm_state_indices, + num_accepted_tokens=num_accepted_tokens, + scale=scale, + N=N, + T=T, + B=B, + H=H, + HV=HV, + K=K, + V=V, + BK=BK, + BV=BV, + stride_init_state_token=stride_init_state_token, + stride_final_state_token=stride_final_state_token, + stride_indices_seq=stride_indices_seq, + stride_indices_tok=stride_indices_tok, + INPLACE_FINAL_STATE=inplace_final_state, + USE_QK_L2NORM_IN_KERNEL=use_qk_l2norm_in_kernel, + IS_KDA=is_kda, + num_warps=num_warps, + num_stages=1, + ) + ''' + if not HAS_DUMPED_SIGMOID_GATING_KERNEL_METADATA and compiled_kernel is not None: + print("sigmoid gating kernel metadata") + print(f" grid: {grid}") + print(f" registers: {compiled_kernel.n_regs}") + print(f" spills: {compiled_kernel.n_spills}") + print(f" shared memory: {compiled_kernel.metadata.shared} bytes") + HAS_DUMPED_SIGMOID_GATING_KERNEL_METADATA = True + ''' + return o.squeeze(0), final_state diff --git a/aiter/ops/triton/fla/fused_sigmoid_gating_recurrent.py b/aiter/ops/triton/fla/fused_sigmoid_gating_recurrent.py new file mode 100644 index 0000000000000000000000000000000000000000..832a519db277c615b4b56ee6f3a5952cb09713a0 --- /dev/null +++ b/aiter/ops/triton/fla/fused_sigmoid_gating_recurrent.py @@ -0,0 +1,434 @@ +from typing import Optional + +import functools +import json +import os + +import torch +import triton +import triton.language as tl +import aiter.ops.triton.utils.arch_info as arch_info +from aiter import logger +from aiter.ops.triton.utils.core import AITER_TRITON_CONFIGS_PATH + +# HAS_DUMPED_SIGMOID_GATING_REC_KERNEL_METADATA = False +TRITON_CONFIG_CHECK = os.environ.get("TRITON_CONFIG_CHECK", "0") == "1" +_DEFAULT_FUSED_SIGMOID_GATING_REC_CONFIG = { + "BV": 32, + "num_warps": 1, +} + + +@functools.lru_cache(maxsize=1) +def _load_fused_sigmoid_gating_recurrent_configs() -> dict: + device_name = arch_info.get_arch() + path = os.path.join( + AITER_TRITON_CONFIGS_PATH, + "fused_sigmoid_gating_delta_rule_update_recurrent", + f"fused_sigmoid_gating_delta_rule_update_recurrent-{device_name}.json", + ) + if not os.path.exists(path): + logger.warning( + f"fused_sigmoid_gating_delta_rule_update_recurrent config not found at {path}, " + f"using default {_DEFAULT_FUSED_SIGMOID_GATING_REC_CONFIG}." + ) + return {} + with open(path) as f: + payload = json.load(f) + return payload.get("config", {}) if isinstance(payload, dict) else {} + +@functools.lru_cache +def _get_fused_sigmoid_gating_recurrent_config(T: int, H: int, HV: int) -> dict: + cfgs = _load_fused_sigmoid_gating_recurrent_configs() + key = f"T={T},H={H},HV={HV}" + cfg = cfgs.get(key) + if cfg is None: + candidates = [] + for k, v in cfgs.items(): + if k == "default": + continue + try: + parts = {x.split("=")[0]: int(x.split("=")[1]) for x in k.split(",")} + except Exception: + continue + if parts.get("H") == H and parts.get("HV") == HV and "T" in parts: + candidates.append((abs(parts["T"] - T), parts["T"], v)) + if candidates: + candidates.sort(key=lambda x: x[0]) + _, nearest_t, cfg = candidates[0] + if TRITON_CONFIG_CHECK: + logger.warning( + f"fused_sigmoid_gating_recurrent config key '{key}' not found, " + f"using nearest-T config with T={nearest_t}: {cfg}." + ) + if cfg is None: + default_cfg = cfgs.get("default", _DEFAULT_FUSED_SIGMOID_GATING_REC_CONFIG) + if TRITON_CONFIG_CHECK: + logger.warning( + f"fused_sigmoid_gating_recurrent config key '{key}' not found, " + f"using default config: {default_cfg}." + ) + cfg = default_cfg + merged = dict(_DEFAULT_FUSED_SIGMOID_GATING_REC_CONFIG) + merged.update(cfg) + return merged + + +@triton.jit(do_not_specialize=["T"]) +def fused_sigmoid_gating_delta_rule_update_kernel( + A_log, + a, + dt_bias, + softplus_beta, + softplus_threshold, + q, + k, + v, + b, + o, + h0_source, + h0_indices, + cu_seqlens, + # Parameters for target_verify support (unused for decode) + intermediate_states_buffer, + intermediate_state_indices, + cache_steps, + retrieve_parent_token_ptr, + stride_retrieve_parent_token_seq: tl.constexpr, + stride_retrieve_parent_token_token: tl.constexpr, + # ================================================ + scale, + T, + stride_q, + stride_k, + stride_v, + stride_b, + NP2_T: tl.constexpr, + B: tl.constexpr, + H: tl.constexpr, + HV: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + USE_QK_L2NORM_IN_KERNEL: tl.constexpr, + IS_VARLEN: tl.constexpr, + IS_KDA: tl.constexpr, + # Optional flags for target_verify support (default False for decode) + DISABLE_STATE_UPDATE: tl.constexpr = False, + CACHE_INTERMEDIATE_STATES: tl.constexpr = False, + HAS_EAGLE_TREE_CUSTOM_ATTN_MASK: tl.constexpr = False, +): + """ + Fused kernel that combines sigmoid gating computation with recurrent delta rule update. + """ + i_k, i_v, i_nh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_n, i_hv = i_nh // HV, i_nh % HV + i_h = i_hv // (HV // H) + + if IS_VARLEN: + bos, eos = ( + tl.load(cu_seqlens + i_n).to(tl.int64), + tl.load(cu_seqlens + i_n + 1).to(tl.int64), + ) + all = T + T = eos - bos + else: + bos, eos = i_n * T, i_n * T + T + all = B * T + + o_k = i_k * BK + tl.arange(0, BK) + o_v = i_v * BV + tl.arange(0, BV) + + p_q = q + bos * stride_q + i_h * K + o_k + p_k = k + bos * stride_k + i_h * K + o_k + p_v = v + bos * stride_v + i_hv * V + o_v + p_b = b + bos * stride_b + i_hv + p_o = o + ((i_k * all + bos) * HV + i_hv) * V + o_v + + # Gating computation pointers + p_A_log = A_log + i_hv + if IS_KDA: + p_a = a + (bos * HV + i_hv) * K + o_k + p_dt_bias = dt_bias + i_hv * K + o_k + else: + p_a = a + bos * HV + i_hv + p_dt_bias = dt_bias + i_hv + + mask_k = o_k < K + mask_v = o_v < V + mask_h = mask_k[:, None] & mask_v[None, :] + + b_h = tl.zeros([BK, BV], dtype=tl.float32) + if USE_INITIAL_STATE: + idx = tl.load(h0_indices + i_n) + if idx >= 0: + p_h0 = ( + h0_source + + idx * HV * K * V + + i_hv * K * V + + o_v[None, :] * K + + o_k[:, None] + ) + b_h += tl.load(p_h0, mask=mask_h, other=0).to(tl.float32) + + # Preload tree attention data if needed + if HAS_EAGLE_TREE_CUSTOM_ATTN_MASK: + token_indices = tl.arange(0, NP2_T) + mask_retrieve = token_indices < T + retrieve_parent_token_base = ( + retrieve_parent_token_ptr + + (i_n * stride_retrieve_parent_token_seq) + + token_indices * stride_retrieve_parent_token_token + ) + parent_idx_tokens = tl.load( + retrieve_parent_token_base, mask=mask_retrieve, other=0 + ) + + # Prepare intermediate state cache index if enabled + cache_idx = -1 + if CACHE_INTERMEDIATE_STATES: + cache_idx = tl.load(intermediate_state_indices + i_n) + # Invariant across timesteps. + b_A = tl.exp(tl.load(p_A_log).to(tl.float32)) + if not IS_KDA: + b_dt_bias = tl.load(p_dt_bias).to(tl.float32) + + step_idx = 0 + for _ in range(0, T): + # Tree attention: load parent's cached state + if HAS_EAGLE_TREE_CUSTOM_ATTN_MASK: + # step_idx == 0 uses b_h from USE_INITIAL_STATE + if step_idx != 0 and cache_idx >= 0: + parent_step_idx = tl.sum( + tl.where(token_indices == step_idx, parent_idx_tokens, 0) + ) + step_offset = parent_step_idx * HV * K * V + cache_ptr = ( + intermediate_states_buffer + + cache_idx * cache_steps * HV * K * V + + step_offset + + i_hv * K * V + + o_v[None, :] * K + + o_k[:, None] + ) + b_h = tl.load(cache_ptr, mask=mask_h, other=0).to(tl.float32) + + # Load k first; q is loaded later right before output to reduce register live range. + b_k = tl.load(p_k, mask=mask_k, other=0).to(tl.float32) + + # Compute sigmoid gating + # Load gating parameters + if IS_KDA: + b_a = tl.load(p_a, mask=mask_k, other=0).to(tl.float32) + b_dt_bias = tl.load(p_dt_bias, mask=mask_k, other=0).to(tl.float32) + else: + b_a = tl.load(p_a).to(tl.float32) + + # Compute g with tighter live ranges for intermediates. + x = b_a + b_dt_bias + x_scaled = softplus_beta * x + x = tl.where( + x_scaled <= softplus_threshold, + (1.0 / softplus_beta) * tl.log(1.0 + tl.exp(x_scaled)), + x, + ) + b_g = -b_A * x + + # Apply L2 normalization to k early; q normalization is deferred until q is loaded. + if USE_QK_L2NORM_IN_KERNEL: + b_k = b_k * tl.rsqrt(tl.sum(b_k * b_k) + 1e-6) + + # Apply gating to hidden state: h *= exp(g) + if IS_KDA: + b_h *= tl.exp(b_g[:, None]) + else: + b_h *= tl.exp(b_g) + + b_v = tl.load(p_v, mask=mask_v, other=0).to(tl.float32) + + # Delta rule: v -= sum(h * k, dim=0) + b_v -= tl.sum(b_h * b_k[:, None], 0) + + # Apply beta gating: v *= beta + b_v *= tl.sigmoid(tl.load(p_b).to(tl.float32)) + + # Update hidden state: h += k[:, None] * v[None, :] + b_h += b_k[:, None] * b_v[None, :] + + # Load q late to shorten q live range and lower peak register pressure. + b_q = tl.load(p_q, mask=mask_k, other=0).to(tl.float32) + if USE_QK_L2NORM_IN_KERNEL: + b_q = b_q * tl.rsqrt(tl.sum(b_q * b_q) + 1e-6) + b_q = b_q * scale + + # Compute output: o = sum(h * q, dim=0) + b_o = tl.sum(b_h * b_q[:, None], 0) + tl.store(p_o, b_o.to(p_o.dtype.element_ty), mask=mask_v) + + # Cache intermediate states if enabled + if CACHE_INTERMEDIATE_STATES: + if cache_idx >= 0: + step_offset = step_idx * HV * K * V + cache_ptr = ( + intermediate_states_buffer + + cache_idx * cache_steps * HV * K * V + + step_offset + + i_hv * K * V + + o_v[None, :] * K + + o_k[:, None] + ) + tl.store(cache_ptr, b_h.to(cache_ptr.dtype.element_ty), mask=mask_h) + + step_idx += 1 + + # Update pointers for next timestep + p_q += stride_q + p_k += stride_k + p_v += stride_v + p_b += stride_b + p_o += HV * V + if IS_KDA: + p_a += HV * K + else: + p_a += HV + + # Store final state back to h0_source with bounds checking + if not DISABLE_STATE_UPDATE: + if USE_INITIAL_STATE: + idx = tl.load(h0_indices + i_n) + if idx >= 0: + p_h0 = ( + h0_source + + idx * HV * K * V + + i_hv * K * V + + o_v[None, :] * K + + o_k[:, None] + ) + tl.store(p_h0, b_h.to(p_h0.dtype.element_ty), mask=mask_h) + + +def fused_sigmoid_gating_delta_rule_update( + A_log: torch.Tensor, + a: torch.Tensor, + dt_bias: torch.Tensor, + softplus_beta: float, + softplus_threshold: float, + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + b: torch.Tensor, + initial_state_source: torch.Tensor, + initial_state_indices: torch.Tensor, + scale: Optional[float] = None, + use_qk_l2norm_in_kernel: bool = False, + cu_seqlens: Optional[torch.Tensor] = None, + is_kda: bool = False, + # Optional parameters for target_verify support + disable_state_update: bool = False, + intermediate_states_buffer: Optional[torch.Tensor] = None, + intermediate_state_indices: Optional[torch.Tensor] = None, + cache_steps: Optional[int] = None, + retrieve_parent_token: Optional[torch.Tensor] = None, + kernel_cfg: dict | None = None, +): + global HAS_DUMPED_SIGMOID_GATING_REC_KERNEL_METADATA + """ + Fused triton implementation of sigmoid gating delta rule update. + This function uses a single fused kernel that combines both sigmoid gating computation + and the recurrent delta rule update for better performance. + + Supports both decode and target_verify modes: + - decode: standard single-step update with state write-back + - target_verify: multi-step with intermediate state caching, optional tree attention, + and optional state update disable + """ + B, T, H, K, V = *k.shape, v.shape[-1] + stride_q = q.stride()[1] + stride_k = k.stride()[1] + stride_v = v.stride()[1] + stride_b = b.stride()[-2] + HV = v.shape[2] + N = B if cu_seqlens is None else len(cu_seqlens) - 1 + BK = triton.next_power_of_2(K) + cfg = kernel_cfg if kernel_cfg is not None else _get_fused_sigmoid_gating_recurrent_config(T, H, HV) + BV = min(triton.next_power_of_2(V), int(cfg["BV"])) + NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV) + assert NK == 1, "NK > 1 is not supported yet" + num_warps = int(cfg["num_warps"]) + + if scale is None: + scale = k.shape[-1] ** -0.5 + else: + assert scale > 0, "scale must be positive" + + o = q.new_empty(NK, *v.shape) + + # Prepare retrieve_parent_token strides + if retrieve_parent_token is not None: + stride_retrieve_parent_token_seq = retrieve_parent_token.stride(0) + stride_retrieve_parent_token_token = retrieve_parent_token.stride(1) + else: + stride_retrieve_parent_token_seq = 0 + stride_retrieve_parent_token_token = 0 + + NP2_T = triton.next_power_of_2(T) + + grid = (NK, NV, N * HV) + + compiled_kernel = fused_sigmoid_gating_delta_rule_update_kernel[grid]( + A_log=A_log, + a=a, + dt_bias=dt_bias, + softplus_beta=softplus_beta, + softplus_threshold=softplus_threshold, + q=q, + k=k, + v=v, + b=b, + o=o, + h0_source=initial_state_source, + h0_indices=initial_state_indices, + cu_seqlens=cu_seqlens, + intermediate_states_buffer=intermediate_states_buffer, + intermediate_state_indices=intermediate_state_indices, + cache_steps=0 if cache_steps is None else cache_steps, + retrieve_parent_token_ptr=retrieve_parent_token, + stride_retrieve_parent_token_seq=stride_retrieve_parent_token_seq, + stride_retrieve_parent_token_token=stride_retrieve_parent_token_token, + scale=scale, + T=T, + stride_q=stride_q, + stride_k=stride_k, + stride_v=stride_v, + stride_b=stride_b, + NP2_T=NP2_T, + B=B, + H=H, + HV=HV, + K=K, + V=V, + BK=BK, + BV=BV, + USE_INITIAL_STATE=initial_state_source is not None, + USE_QK_L2NORM_IN_KERNEL=use_qk_l2norm_in_kernel, + IS_VARLEN=cu_seqlens is not None, + IS_KDA=is_kda, + DISABLE_STATE_UPDATE=disable_state_update, + CACHE_INTERMEDIATE_STATES=intermediate_states_buffer is not None, + HAS_EAGLE_TREE_CUSTOM_ATTN_MASK=retrieve_parent_token is not None, + num_warps=num_warps, + num_stages=1, + ) + ''' + if not HAS_DUMPED_SIGMOID_GATING_REC_KERNEL_METADATA and compiled_kernel is not None: + print("sigmoid gating recurrent kernel metadata") + print(f" grid: {grid}") + print(f" registers: {compiled_kernel.n_regs}") + print(f" spills: {compiled_kernel.n_spills}") + print(f" shared memory: {compiled_kernel.metadata.shared} bytes") + HAS_DUMPED_SIGMOID_GATING_REC_KERNEL_METADATA = True + ''' + o = o.squeeze(0) + return o diff --git a/aiter/ops/triton/fla/fused_sigmoid_gating_recurrent_ref.py b/aiter/ops/triton/fla/fused_sigmoid_gating_recurrent_ref.py new file mode 100644 index 0000000000000000000000000000000000000000..b1d34c0c7c77f3cd3535dec0c96c85454d009778 --- /dev/null +++ b/aiter/ops/triton/fla/fused_sigmoid_gating_recurrent_ref.py @@ -0,0 +1,353 @@ +from typing import Optional + +import torch +import triton +import triton.language as tl + + +@triton.jit(do_not_specialize=["T"]) +def fused_sigmoid_gating_delta_rule_update_kernel_ref( + A_log, + a, + dt_bias, + softplus_beta, + softplus_threshold, + q, + k, + v, + b, + o, + h0_source, + h0_indices, + cu_seqlens, + # Parameters for target_verify support (unused for decode) + intermediate_states_buffer, + intermediate_state_indices, + cache_steps, + retrieve_parent_token_ptr, + stride_retrieve_parent_token_seq: tl.constexpr, + stride_retrieve_parent_token_token: tl.constexpr, + # ================================================ + scale, + T, + stride_q, + stride_k, + stride_v, + stride_b, + NP2_T: tl.constexpr, + B: tl.constexpr, + H: tl.constexpr, + HV: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + USE_QK_L2NORM_IN_KERNEL: tl.constexpr, + IS_VARLEN: tl.constexpr, + IS_KDA: tl.constexpr, + # Optional flags for target_verify support (default False for decode) + DISABLE_STATE_UPDATE: tl.constexpr = False, + CACHE_INTERMEDIATE_STATES: tl.constexpr = False, + HAS_EAGLE_TREE_CUSTOM_ATTN_MASK: tl.constexpr = False, +): + """ + Fused kernel that combines sigmoid gating computation with recurrent delta rule update. + """ + i_k, i_v, i_nh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_n, i_hv = i_nh // HV, i_nh % HV + i_h = i_hv // (HV // H) + + if IS_VARLEN: + bos, eos = ( + tl.load(cu_seqlens + i_n).to(tl.int64), + tl.load(cu_seqlens + i_n + 1).to(tl.int64), + ) + all = T + T = eos - bos + else: + bos, eos = i_n * T, i_n * T + T + all = B * T + + o_k = i_k * BK + tl.arange(0, BK) + o_v = i_v * BV + tl.arange(0, BV) + + p_q = q + bos * stride_q + i_h * K + o_k + p_k = k + bos * stride_k + i_h * K + o_k + p_v = v + bos * stride_v + i_hv * V + o_v + p_b = b + bos * stride_b + i_hv + p_o = o + ((i_k * all + bos) * HV + i_hv) * V + o_v + + # Gating computation pointers + p_A_log = A_log + i_hv + if IS_KDA: + p_a = a + (bos * HV + i_hv) * K + o_k + p_dt_bias = dt_bias + i_hv * K + o_k + else: + p_a = a + bos * HV + i_hv + p_dt_bias = dt_bias + i_hv + + mask_k = o_k < K + mask_v = o_v < V + mask_h = mask_k[:, None] & mask_v[None, :] + + b_h = tl.zeros([BK, BV], dtype=tl.float32) + if USE_INITIAL_STATE: + idx = tl.load(h0_indices + i_n) + if idx >= 0: + p_h0 = ( + h0_source + + idx * HV * K * V + + i_hv * K * V + + o_v[None, :] * K + + o_k[:, None] + ) + b_h += tl.load(p_h0, mask=mask_h, other=0).to(tl.float32) + + # Preload tree attention data if needed + if HAS_EAGLE_TREE_CUSTOM_ATTN_MASK: + token_indices = tl.arange(0, NP2_T) + mask_retrieve = token_indices < T + retrieve_parent_token_base = ( + retrieve_parent_token_ptr + + (i_n * stride_retrieve_parent_token_seq) + + token_indices * stride_retrieve_parent_token_token + ) + parent_idx_tokens = tl.load( + retrieve_parent_token_base, mask=mask_retrieve, other=0 + ) + + # Prepare intermediate state cache index if enabled + cache_idx = -1 + if CACHE_INTERMEDIATE_STATES: + cache_idx = tl.load(intermediate_state_indices + i_n) + + step_idx = 0 + for _ in range(0, T): + # Tree attention: load parent's cached state + if HAS_EAGLE_TREE_CUSTOM_ATTN_MASK: + # step_idx == 0 uses b_h from USE_INITIAL_STATE + if step_idx != 0 and cache_idx >= 0: + parent_step_idx = tl.sum( + tl.where(token_indices == step_idx, parent_idx_tokens, 0) + ) + step_offset = parent_step_idx * HV * K * V + cache_ptr = ( + intermediate_states_buffer + + cache_idx * cache_steps * HV * K * V + + step_offset + + i_hv * K * V + + o_v[None, :] * K + + o_k[:, None] + ) + b_h = tl.load(cache_ptr, mask=mask_h, other=0).to(tl.float32) + + # Load inputs + b_q = tl.load(p_q, mask=mask_k, other=0).to(tl.float32) + b_k = tl.load(p_k, mask=mask_k, other=0).to(tl.float32) + b_v = tl.load(p_v, mask=mask_v, other=0).to(tl.float32) + b_b = tl.load(p_b).to(tl.float32) + + # Compute sigmoid gating + # Load gating parameters + b_A_log = tl.load(p_A_log).to(tl.float32) + if IS_KDA: + b_a = tl.load(p_a, mask=mask_k, other=0).to(tl.float32) + b_dt_bias = tl.load(p_dt_bias, mask=mask_k, other=0).to(tl.float32) + else: + b_a = tl.load(p_a).to(tl.float32) + b_dt_bias = tl.load(p_dt_bias).to(tl.float32) + + # Compute g = -exp(A_log) * softplus(a + dt_bias) + x = b_a + b_dt_bias + beta_x = softplus_beta * x + # Apply softplus with numerical stability + softplus_x = tl.where( + beta_x <= softplus_threshold, + (1.0 / softplus_beta) * tl.log(1.0 + tl.exp(beta_x)), + x, + ) + b_g = -tl.exp(b_A_log) * softplus_x + + # Compute beta = sigmoid(b) + b_beta = 1.0 / (1.0 + tl.exp(-b_b)) + + # Apply L2 normalization if enabled + if USE_QK_L2NORM_IN_KERNEL: + b_q = b_q / (tl.sqrt(tl.sum(b_q * b_q) + 1e-6)) + b_k = b_k / (tl.sqrt(tl.sum(b_k * b_k) + 1e-6)) + + b_q = b_q * scale + + # Apply gating to hidden state: h *= exp(g) + if IS_KDA: + b_h *= tl.exp(b_g[:, None]) + else: + b_h *= tl.exp(b_g) + + # Delta rule: v -= sum(h * k, dim=0) + b_v -= tl.sum(b_h * b_k[:, None], 0) + + # Apply beta gating: v *= beta + b_v *= b_beta + + # Update hidden state: h += k[:, None] * v[None, :] + b_h += b_k[:, None] * b_v[None, :] + + # Compute output: o = sum(h * q, dim=0) + b_o = tl.sum(b_h * b_q[:, None], 0) + tl.store(p_o, b_o.to(p_o.dtype.element_ty), mask=mask_v) + + # Cache intermediate states if enabled + if CACHE_INTERMEDIATE_STATES: + if cache_idx >= 0: + step_offset = step_idx * HV * K * V + cache_ptr = ( + intermediate_states_buffer + + cache_idx * cache_steps * HV * K * V + + step_offset + + i_hv * K * V + + o_v[None, :] * K + + o_k[:, None] + ) + tl.store(cache_ptr, b_h.to(cache_ptr.dtype.element_ty), mask=mask_h) + + step_idx += 1 + + # Update pointers for next timestep + p_q += stride_q + p_k += stride_k + p_v += stride_v + p_b += stride_b + p_o += HV * V + if IS_KDA: + p_a += HV * K + else: + p_a += HV + + # Store final state back to h0_source with bounds checking + if not DISABLE_STATE_UPDATE: + if USE_INITIAL_STATE: + idx = tl.load(h0_indices + i_n) + if idx >= 0: + p_h0 = ( + h0_source + + idx * HV * K * V + + i_hv * K * V + + o_v[None, :] * K + + o_k[:, None] + ) + tl.store(p_h0, b_h.to(p_h0.dtype.element_ty), mask=mask_h) + + +def fused_sigmoid_gating_delta_rule_update( + A_log: torch.Tensor, + a: torch.Tensor, + dt_bias: torch.Tensor, + softplus_beta: float, + softplus_threshold: float, + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + b: torch.Tensor, + initial_state_source: torch.Tensor, + initial_state_indices: torch.Tensor, + scale: Optional[float] = None, + use_qk_l2norm_in_kernel: bool = False, + cu_seqlens: Optional[torch.Tensor] = None, + is_kda: bool = False, + # Optional parameters for target_verify support + disable_state_update: bool = False, + intermediate_states_buffer: Optional[torch.Tensor] = None, + intermediate_state_indices: Optional[torch.Tensor] = None, + cache_steps: Optional[int] = None, + retrieve_parent_token: Optional[torch.Tensor] = None, +): + """ + Fused triton implementation of sigmoid gating delta rule update. + This function uses a single fused kernel that combines both sigmoid gating computation + and the recurrent delta rule update for better performance. + + Supports both decode and target_verify modes: + - decode: standard single-step update with state write-back + - target_verify: multi-step with intermediate state caching, optional tree attention, + and optional state update disable + """ + B, T, H, K, V = *k.shape, v.shape[-1] + stride_q = q.stride()[1] + stride_k = k.stride()[1] + stride_v = v.stride()[1] + stride_b = b.stride()[-2] + HV = v.shape[2] + N = B if cu_seqlens is None else len(cu_seqlens) - 1 + BK, BV = triton.next_power_of_2(K), min(triton.next_power_of_2(V), 32) + NK, NV = triton.cdiv(K, BK), triton.cdiv(V, BV) + assert NK == 1, "NK > 1 is not supported yet" + num_stages = 3 + num_warps = 1 + + if scale is None: + scale = k.shape[-1] ** -0.5 + else: + assert scale > 0, "scale must be positive" + + o = q.new_empty(NK, *v.shape) + + # Prepare retrieve_parent_token strides + if retrieve_parent_token is not None: + stride_retrieve_parent_token_seq = retrieve_parent_token.stride(0) + stride_retrieve_parent_token_token = retrieve_parent_token.stride(1) + else: + stride_retrieve_parent_token_seq = 0 + stride_retrieve_parent_token_token = 0 + + NP2_T = triton.next_power_of_2(T) + + grid = (NK, NV, N * HV) + + fused_sigmoid_gating_delta_rule_update_kernel_ref[grid]( + A_log=A_log, + a=a, + dt_bias=dt_bias, + softplus_beta=softplus_beta, + softplus_threshold=softplus_threshold, + q=q, + k=k, + v=v, + b=b, + o=o, + h0_source=initial_state_source, + h0_indices=initial_state_indices, + cu_seqlens=cu_seqlens, + intermediate_states_buffer=intermediate_states_buffer, + intermediate_state_indices=intermediate_state_indices, + cache_steps=0 if cache_steps is None else cache_steps, + retrieve_parent_token_ptr=retrieve_parent_token, + stride_retrieve_parent_token_seq=stride_retrieve_parent_token_seq, + stride_retrieve_parent_token_token=stride_retrieve_parent_token_token, + scale=scale, + T=T, + stride_q=stride_q, + stride_k=stride_k, + stride_v=stride_v, + stride_b=stride_b, + NP2_T=NP2_T, + B=B, + H=H, + HV=HV, + K=K, + V=V, + BK=BK, + BV=BV, + USE_INITIAL_STATE=initial_state_source is not None, + USE_QK_L2NORM_IN_KERNEL=use_qk_l2norm_in_kernel, + IS_VARLEN=cu_seqlens is not None, + IS_KDA=is_kda, + DISABLE_STATE_UPDATE=disable_state_update, + CACHE_INTERMEDIATE_STATES=intermediate_states_buffer is not None, + HAS_EAGLE_TREE_CUSTOM_ATTN_MASK=retrieve_parent_token is not None, + num_warps=num_warps, + num_stages=num_stages, + ) + o = o.squeeze(0) + return o diff --git a/aiter/ops/triton/fla/sglang/chunk_delta_h.py b/aiter/ops/triton/fla/sglang/chunk_delta_h.py new file mode 100644 index 0000000000000000000000000000000000000000..2ef1296393b67054a9289d78ca903db712bde083 --- /dev/null +++ b/aiter/ops/triton/fla/sglang/chunk_delta_h.py @@ -0,0 +1,489 @@ +# SPDX-License-Identifier: MIT + +import functools +import json +import os + +import torch +import triton +import triton.language as tl + +import aiter.ops.triton.utils.arch_info as arch_info +from aiter import logger +from aiter.ops.triton.utils.core import AITER_TRITON_CONFIGS_PATH + +TRITON_CONFIG_CHECK = os.environ.get("TRITON_CONFIG_CHECK", "0") == "1" +HAS_DUMPED_CHUNK_DELTA_H_KERNEL_METADATA = False + +@triton.jit +def safe_exp(x): + return exp(tl.where(x <= 0, x, float("-inf"))) + +@triton.jit +def exp(x): + return tl.exp(x) + + +@triton.jit +def exp2(x): + return tl.math.exp2(x) + + +def prepare_chunk_indices(cu_seqlens: torch.LongTensor, chunk_size: int) -> torch.LongTensor: + chunk_rows = [] + for i in range(len(cu_seqlens) - 1): + seqlen = int((cu_seqlens[i + 1] - cu_seqlens[i]).item()) + n_chunks = triton.cdiv(seqlen, chunk_size) + for chunk_idx in range(n_chunks): + chunk_rows.append([i, chunk_idx]) + if len(chunk_rows) == 0: + return torch.empty((0, 2), dtype=torch.long, device=cu_seqlens.device) + return torch.tensor(chunk_rows, dtype=torch.long, device=cu_seqlens.device) + + +def prepare_chunk_offsets(cu_seqlens: torch.LongTensor, chunk_size: int) -> torch.LongTensor: + seq_lens = cu_seqlens[1:] - cu_seqlens[:-1] + chunk_counts = (seq_lens + chunk_size - 1) // chunk_size + offsets = torch.zeros_like(chunk_counts) + if len(offsets) > 1: + offsets[1:] = torch.cumsum(chunk_counts, dim=0)[:-1] + return offsets + + +_DEFAULT_CHUNK_DELTA_H_CONFIG = { + "BV": 32, + "num_warps": 8, + "num_stages": 2, +} + + +@functools.lru_cache(maxsize=1) +def _load_chunk_delta_h_configs() -> dict: + device_name = arch_info.get_arch() + path = os.path.join( + AITER_TRITON_CONFIGS_PATH, + "chunk_gated_delta_rule_fwd_h", + f"chunk_gated_delta_rule_fwd_h-{device_name}.json", + ) + if not os.path.exists(path): + logger.warning( + f"chunk_gated_delta_rule_fwd_h config not found at {path}, using default {_DEFAULT_CHUNK_DELTA_H_CONFIG}." + ) + return {} + with open(path) as f: + payload = json.load(f) + return payload.get("config", {}) if isinstance(payload, dict) else {} + +@functools.lru_cache +def _get_chunk_delta_h_config(K: int, V: int, BT: int, H: int) -> dict: + cfgs = _load_chunk_delta_h_configs() + key = f"K={K},V={V},BT={BT},H={H}" + cfg = cfgs.get(key) + + if cfg is None: + default_cfg = cfgs.get("default", _DEFAULT_CHUNK_DELTA_H_CONFIG) + if TRITON_CONFIG_CHECK: + logger.warning( + "chunk_gated_delta_rule_fwd_h config missing for " + f"{key}, using default config {default_cfg}." + ) + cfg = default_cfg + merged = dict(_DEFAULT_CHUNK_DELTA_H_CONFIG) + merged.update(cfg) + return merged + + +def launch_chunk_gated_delta_rule_fwd_kernel_h_blockdim64( + *, + k: torch.Tensor, + u: torch.Tensor, + w: torch.Tensor, + v_new: torch.Tensor | None, + g: torch.Tensor | None, + gk: torch.Tensor | None, + h: torch.Tensor, + initial_state: torch.Tensor | None, + initial_state_indices: torch.Tensor | None, + # final_state: torch.Tensor | None, + cu_seqlens: torch.LongTensor | None, + chunk_offsets: torch.LongTensor | None, + N: int, + T: int, + H: int, + Hg: int, + K: int, + V: int, + BT: int, + kernel_cfg: dict | None, +): + global HAS_DUMPED_CHUNK_DELTA_H_KERNEL_METADATA + + def grid(meta): + return (triton.cdiv(V, meta["BV"]), N * H) + + cfg = kernel_cfg if kernel_cfg is not None else _get_chunk_delta_h_config(K, V, BT, H) + launch_grid = (triton.cdiv(V, cfg["BV"]), N * H) + compiled_kernel = chunk_gated_delta_rule_fwd_kernel_h_blockdim64[grid]( + k=k, + v=u, + w=w, + v_new=v_new, + g=g, + gk=gk, + h=h, + initial_state=initial_state, + initial_state_indices=initial_state_indices, + cu_seqlens=cu_seqlens, + chunk_offsets=chunk_offsets, + T=T, + H=H, + Hg=Hg, + K=K, + V=V, + BT=BT, + BV=cfg["BV"], + INPLACE_UPDATE=True, + num_warps=cfg["num_warps"], + num_stages=cfg["num_stages"], + ) + if ( + TRITON_CONFIG_CHECK + and not HAS_DUMPED_CHUNK_DELTA_H_KERNEL_METADATA + and compiled_kernel is not None + ): + print("chunk_gated_delta_rule_fwd_kernel_h_blockdim64 metadata") + print(f" grid: {launch_grid}") + print( + f" meta: BT={BT}, BV={cfg['BV']}, K={K}, V={V}, H={H}, Hg={Hg}, N={N}, T={T}, " + f"num_warps={cfg['num_warps']}, num_stages={cfg['num_stages']}" + ) + print(f" registers: {compiled_kernel.n_regs}") + print(f" spills: {compiled_kernel.n_spills}") + print(f" shared memory: {compiled_kernel.metadata.shared} bytes") + HAS_DUMPED_CHUNK_DELTA_H_KERNEL_METADATA = True + + +@triton.heuristics({ + "USE_G": lambda args: args["g"] is not None, + "USE_GK": lambda args: args["gk"] is not None, + "USE_INITIAL_STATE": lambda args: args["initial_state"] is not None, + # "USE_INITIAL_STATE_INDICES": lambda args: args["initial_state_indices"] is not None, + # "STORE_FINAL_STATE": lambda args: args["ht"] is not None, + "SAVE_NEW_VALUE": lambda args: args["v_new"] is not None, + "IS_VARLEN": lambda args: args["cu_seqlens"] is not None, +}) +@triton.jit(do_not_specialize=["T"]) +def chunk_gated_delta_rule_fwd_kernel_h_blockdim64( + k, + v, + w, + v_new, + g, + gk, + h, + initial_state, + initial_state_indices, + cu_seqlens, + chunk_offsets, + T, + H: tl.constexpr, + Hg: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr, + USE_GK: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + INPLACE_UPDATE: tl.constexpr, + SAVE_NEW_VALUE: tl.constexpr, + IS_VARLEN: tl.constexpr, +): + i_v, i_nh = tl.program_id(0), tl.program_id(1) + i_n, i_h = i_nh // H, i_nh % H + if IS_VARLEN: + bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load( + cu_seqlens + i_n + 1 + ).to(tl.int32) + T = eos - bos + NT = tl.cdiv(T, BT) + boh = tl.load(chunk_offsets + i_n).to(tl.int32) + else: + bos, eos = i_n * T, i_n * T + T + NT = tl.cdiv(T, BT) + boh = i_n * NT + + # [BV, BK] + b_h1 = tl.zeros([BV, 64], dtype=tl.float32) + if K > 64: + b_h2 = tl.zeros([BV, 64], dtype=tl.float32) + if K > 128: + b_h3 = tl.zeros([BV, 64], dtype=tl.float32) + if K > 192: + b_h4 = tl.zeros([BV, 64], dtype=tl.float32) + + # calculate offset + h += ((boh * H + i_h) * V * K).to(tl.int64) + v += ((bos * H + i_h) * V).to(tl.int64) + k += ((bos * Hg + i_h // (H // Hg)) * K).to(tl.int64) + w += ((bos * H + i_h) * K).to(tl.int64) + if SAVE_NEW_VALUE: + v_new += ((bos * H + i_h) * V).to(tl.int64) + stride_v = H * V + stride_h = H * V * K + stride_k = Hg * K + stride_w = H * K + + index = tl.load(initial_state_indices + i_n).to(tl.int32) + h0 = initial_state + index * stride_h + ht = initial_state + index * stride_h + if USE_INITIAL_STATE: + h0 = h0 + i_h * V * K + if INPLACE_UPDATE: + ht = ht + i_h * V * K + + # load initial state + if USE_INITIAL_STATE: + p_h0_1 = tl.make_block_ptr(h0, (V, K), (K, 1), (i_v * BV, 0), (BV, 64), (1, 0)) + b_h1 += tl.load(p_h0_1, boundary_check=(0, 1)).to(tl.float32) + if K > 64: + p_h0_2 = tl.make_block_ptr( + h0, (V, K), (K, 1), (i_v * BV, 64), (BV, 64), (1, 0) + ) + b_h2 += tl.load(p_h0_2, boundary_check=(0, 1)).to(tl.float32) + if K > 128: + p_h0_3 = tl.make_block_ptr( + h0, (V, K), (K, 1), (i_v * BV, 128), (BV, 64), (1, 0) + ) + b_h3 += tl.load(p_h0_3, boundary_check=(0, 1)).to(tl.float32) + if K > 192: + p_h0_4 = tl.make_block_ptr( + h0, (V, K), (K, 1), (i_v * BV, 192), (BV, 64), (1, 0) + ) + b_h4 += tl.load(p_h0_4, boundary_check=(0, 1)).to(tl.float32) + + # main recurrence + for i_t in range(NT): + p_h1 = tl.make_block_ptr( + h + i_t * stride_h, (V, K), (K, 1), (i_v * BV, 0), (BV, 64), (1, 0) + ) + tl.store(p_h1, b_h1.to(p_h1.dtype.element_ty), boundary_check=(0, 1)) + if K > 64: + p_h2 = tl.make_block_ptr( + h + i_t * stride_h, (V, K), (K, 1), (i_v * BV, 64), (BV, 64), (1, 0) + ) + tl.store(p_h2, b_h2.to(p_h2.dtype.element_ty), boundary_check=(0, 1)) + if K > 128: + p_h3 = tl.make_block_ptr( + h + i_t * stride_h, (V, K), (K, 1), (i_v * BV, 128), (BV, 64), (1, 0) + ) + tl.store(p_h3, b_h3.to(p_h3.dtype.element_ty), boundary_check=(0, 1)) + if K > 192: + p_h4 = tl.make_block_ptr( + h + i_t * stride_h, (V, K), (K, 1), (i_v * BV, 192), (BV, 64), (1, 0) + ) + tl.store(p_h4, b_h4.to(p_h4.dtype.element_ty), boundary_check=(0, 1)) + + p_w1 = tl.make_block_ptr( + w, (T, K), (stride_w, 1), (i_t * BT, 0), (BT, 64), (1, 0) + ) + b_w1 = tl.load(p_w1, boundary_check=(0, 1)) + if K > 64: + p_w2 = tl.make_block_ptr( + w, (T, K), (stride_w, 1), (i_t * BT, 64), (BT, 64), (1, 0) + ) + b_w2 = tl.load(p_w2, boundary_check=(0, 1)) + if K > 128: + p_w3 = tl.make_block_ptr( + w, (T, K), (stride_w, 1), (i_t * BT, 128), (BT, 64), (1, 0) + ) + b_w3 = tl.load(p_w3, boundary_check=(0, 1)) + if K > 192: + p_w4 = tl.make_block_ptr( + w, (T, K), (stride_w, 1), (i_t * BT, 192), (BT, 64), (1, 0) + ) + b_w4 = tl.load(p_w4, boundary_check=(0, 1)) + + b_v = tl.dot(b_w1, tl.trans(b_h1).to(b_w1.dtype)) + if K > 64: + b_v += tl.dot(b_w2, tl.trans(b_h2).to(b_w2.dtype)) + if K > 128: + b_v += tl.dot(b_w3, tl.trans(b_h3).to(b_w3.dtype)) + if K > 192: + b_v += tl.dot(b_w4, tl.trans(b_h4).to(b_w4.dtype)) + p_v = tl.make_block_ptr( + v, (T, V), (stride_v, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0) + ) + b_v = tl.load(p_v, boundary_check=(0, 1)) - b_v + + if SAVE_NEW_VALUE: + p_v = tl.make_block_ptr( + v_new, (T, V), (stride_v, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0) + ) + tl.store(p_v, b_v.to(p_v.dtype.element_ty), boundary_check=(0, 1)) + + last_idx = min((i_t + 1) * BT, T) - 1 + if USE_G: + b_g_last = tl.load(g + bos * H + last_idx * H + i_h) + p_g = tl.make_block_ptr( + g + bos * H + i_h, (T,), (H,), (i_t * BT,), (BT,), (0,) + ) + b_g = tl.load(p_g, boundary_check=(0,)) + b_v = b_v * safe_exp(b_g_last - b_g)[:, None] + b_g_last = exp(b_g_last) + b_h1 = b_h1 * b_g_last + if K > 64: + b_h2 = b_h2 * b_g_last + if K > 128: + b_h3 = b_h3 * b_g_last + if K > 192: + b_h4 = b_h4 * b_g_last + + if USE_GK: + o_k1 = tl.arange(0, 64) + b_gk_last1 = tl.load( + gk + (bos + last_idx) * H * K + i_h * K + o_k1, + mask=(o_k1 < K), + other=0.0, + ) + b_h1 *= exp(b_gk_last1)[None, :] + if K > 64: + o_k2 = 64 + o_k1 + b_gk_last2 = tl.load( + gk + (bos + last_idx) * H * K + i_h * K + o_k2, + mask=(o_k2 < K), + other=0.0, + ) + b_h2 *= exp(b_gk_last2)[None, :] + if K > 128: + o_k3 = 128 + o_k1 + b_gk_last3 = tl.load( + gk + (bos + last_idx) * H * K + i_h * K + o_k3, + mask=(o_k3 < K), + other=0.0, + ) + b_h3 *= exp(b_gk_last3)[None, :] + if K > 192: + o_k4 = 192 + o_k1 + b_gk_last4 = tl.load( + gk + (bos + last_idx) * H * K + i_h * K + o_k4, + mask=(o_k4 < K), + other=0.0, + ) + b_h4 *= exp(b_gk_last4)[None, :] + b_v = b_v.to(k.dtype.element_ty) + + p_k1 = tl.make_block_ptr( + k, (K, T), (1, stride_k), (0, i_t * BT), (64, BT), (0, 1) + ) + b_k1 = tl.load(p_k1, boundary_check=(0, 1)) + if K > 64: + p_k2 = tl.make_block_ptr( + k, (K, T), (1, stride_k), (64, i_t * BT), (64, BT), (0, 1) + ) + b_k2 = tl.load(p_k2, boundary_check=(0, 1)) + if K > 128: + p_k3 = tl.make_block_ptr( + k, (K, T), (1, stride_k), (128, i_t * BT), (64, BT), (0, 1) + ) + b_k3 = tl.load(p_k3, boundary_check=(0, 1)) + if K > 192: + p_k4 = tl.make_block_ptr( + k, (K, T), (1, stride_k), (192, i_t * BT), (64, BT), (0, 1) + ) + b_k4 = tl.load(p_k4, boundary_check=(0, 1)) + + b_h1 += tl.trans(tl.dot(b_k1, b_v)) + if K > 64: + b_h2 += tl.trans(tl.dot(b_k2, b_v)) + if K > 128: + b_h3 += tl.trans(tl.dot(b_k3, b_v)) + if K > 192: + b_h4 += tl.trans(tl.dot(b_k4, b_v)) + + # epilogue + if INPLACE_UPDATE: + p_ht = tl.make_block_ptr(ht, (V, K), (K, 1), (i_v * BV, 0), (BV, 64), (1, 0)) + tl.store(p_ht, b_h1.to(p_ht.dtype.element_ty), boundary_check=(0, 1)) + if K > 64: + p_ht = tl.make_block_ptr( + ht, (V, K), (K, 1), (i_v * BV, 64), (BV, 64), (1, 0) + ) + tl.store(p_ht, b_h2.to(p_ht.dtype.element_ty), boundary_check=(0, 1)) + if K > 128: + p_ht = tl.make_block_ptr( + ht, (V, K), (K, 1), (i_v * BV, 128), (BV, 64), (1, 0) + ) + tl.store(p_ht, b_h3.to(p_ht.dtype.element_ty), boundary_check=(0, 1)) + if K > 192: + p_ht = tl.make_block_ptr( + ht, (V, K), (K, 1), (i_v * BV, 192), (BV, 64), (1, 0) + ) + tl.store(p_ht, b_h4.to(p_ht.dtype.element_ty), boundary_check=(0, 1)) + + +def chunk_gated_delta_rule_fwd_h( + k: torch.Tensor, + w: torch.Tensor, + u: torch.Tensor, + g: torch.Tensor | None = None, + gk: torch.Tensor | None = None, + initial_state: torch.Tensor | None = None, + initial_state_indices: torch.Tensor | None = None, + output_final_state: bool = True, + chunk_size: int = 64, + save_new_value: bool = True, + cu_seqlens: torch.LongTensor | None = None, + chunk_indices: torch.LongTensor | None = None, + use_exp2: bool = False, + transpose_state_layout: bool = True, + kernel_cfg: dict | None = None, +): + B, T, Hg, K, V = *k.shape, u.shape[-1] + H = u.shape[-2] + BT = chunk_size + + chunk_indices = ( + prepare_chunk_indices(cu_seqlens, chunk_size) + if cu_seqlens is not None + else None + ) + # N: the actual number of sequences in the batch with either equal or variable lengths + if cu_seqlens is None: + N, NT, chunk_offsets = B, triton.cdiv(T, BT), None + else: + N, NT, chunk_offsets = ( + len(cu_seqlens) - 1, + len(chunk_indices), + prepare_chunk_offsets(cu_seqlens, BT), + ) + assert K <= 256, "current kernel does not support head dimension larger than 256." + + h = k.new_empty(B, NT, H, V, K) + + v_new = torch.empty_like(u) if save_new_value else None + + launch_chunk_gated_delta_rule_fwd_kernel_h_blockdim64( + k=k, + u=u, + w=w, + v_new=v_new, + g=g, + gk=gk, + h=h, + initial_state=initial_state, + initial_state_indices=initial_state_indices, + cu_seqlens=cu_seqlens, + chunk_offsets=chunk_offsets, + N=N, + T=T, + H=H, + Hg=Hg, + K=K, + V=V, + BT=BT, + # use_exp2=use_exp2, + # transpose_state_layout=transpose_state_layout, + kernel_cfg=kernel_cfg, + ) + return h, v_new diff --git a/aiter/ops/triton/fla/sglang/chunk_o.py b/aiter/ops/triton/fla/sglang/chunk_o.py new file mode 100644 index 0000000000000000000000000000000000000000..8ab6f0a0d02aae053a553c906c5fc07d7685a399 --- /dev/null +++ b/aiter/ops/triton/fla/sglang/chunk_o.py @@ -0,0 +1,293 @@ +# SPDX-License-Identifier: MIT + +import functools +import json +import os + +import torch +import triton +import triton.language as tl + +import aiter.ops.triton.utils.arch_info as arch_info +from aiter import logger +from aiter.ops.triton.utils.core import AITER_TRITON_CONFIGS_PATH + +TRITON_CONFIG_CHECK = os.environ.get("TRITON_CONFIG_CHECK", "0") == "1" +HAS_DUMPED_CHUNK_FWD_O_KERNEL_METADATA = False + +@triton.jit +def safe_exp(x): + return exp(tl.where(x <= 0, x, float("-inf"))) + + +@triton.jit +def exp(x): + return tl.exp(x) + + +@triton.jit +def exp2(x): + return tl.math.exp2(x) + + +def prepare_chunk_indices(cu_seqlens: torch.LongTensor, chunk_size: int) -> torch.LongTensor: + chunk_rows = [] + for i in range(len(cu_seqlens) - 1): + seqlen = int((cu_seqlens[i + 1] - cu_seqlens[i]).item()) + n_chunks = triton.cdiv(seqlen, chunk_size) + for chunk_idx in range(n_chunks): + chunk_rows.append([i, chunk_idx]) + if len(chunk_rows) == 0: + return torch.empty((0, 2), dtype=torch.long, device=cu_seqlens.device) + return torch.tensor(chunk_rows, dtype=torch.long, device=cu_seqlens.device) + + +_DEFAULT_CHUNK_O_CONFIG = { + "BK": 128, + "BV": 64, + "num_warps": 4, + "num_stages": 2, +} + + +@functools.lru_cache(maxsize=1) +def _load_chunk_o_configs() -> dict: + device_name = arch_info.get_arch() + path = os.path.join( + AITER_TRITON_CONFIGS_PATH, + "chunk_fwd_o", + f"chunk_fwd_o-{device_name}.json", + ) + if not os.path.exists(path): + logger.warning( + f"chunk_fwd_o config not found at {path}, using default {_DEFAULT_CHUNK_O_CONFIG}." + ) + return {} + with open(path) as f: + payload = json.load(f) + return payload.get("config", {}) if isinstance(payload, dict) else {} + + +@functools.lru_cache +def _get_chunk_o_config(K: int, V: int, BT: int) -> dict: + cfgs = _load_chunk_o_configs() + key = f"K={K},V={V},BT={BT}" + cfg = cfgs.get(key) + if cfg is None: + default_cfg = cfgs.get("default", _DEFAULT_CHUNK_O_CONFIG) + if TRITON_CONFIG_CHECK: + logger.warning( + "chunk_fwd_o config missing for " + f"{key}, using default config {default_cfg}." + ) + cfg = default_cfg + merged = dict(_DEFAULT_CHUNK_O_CONFIG) + merged.update(cfg) + return merged + + +def launch_chunk_fwd_kernel_o( + *, + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + h: torch.Tensor, + g: torch.Tensor | None, + o: torch.Tensor, + cu_seqlens: torch.LongTensor | None, + chunk_indices: torch.LongTensor | None, + scale: float, + T: int, + H: int, + Hg: int, + K: int, + V: int, + BT: int, + NT: int, + B: int, + kernel_cfg: dict | None, +): + global HAS_DUMPED_CHUNK_FWD_O_KERNEL_METADATA + + def grid(meta): + return (triton.cdiv(V, meta["BV"]), NT, B * H) + + cfg = kernel_cfg if kernel_cfg is not None else _get_chunk_o_config(K, V, BT) + launch_grid = (triton.cdiv(V, cfg["BV"]), NT, B * H) + compiled_kernel = chunk_fwd_kernel_o[grid]( + q=q, + k=k, + v=v, + h=h, + g=g, + o=o, + cu_seqlens=cu_seqlens, + chunk_indices=chunk_indices, + scale=scale, + T=T, + H=H, + Hg=Hg, + K=K, + V=V, + BT=BT, + BK=cfg["BK"], + BV=cfg["BV"], + num_warps=cfg["num_warps"], + num_stages=cfg["num_stages"], + ) + if ( + TRITON_CONFIG_CHECK + and not HAS_DUMPED_CHUNK_FWD_O_KERNEL_METADATA + and compiled_kernel is not None + ): + print("chunk_fwd_kernel_o metadata") + print(f" grid: {launch_grid}") + print( + f" meta: BT={BT}, BK={cfg['BK']}, BV={cfg['BV']}, K={K}, V={V}, H={H}, Hg={Hg}, " + f"NT={NT}, B={B}, T={T}, num_warps={cfg['num_warps']}, num_stages={cfg['num_stages']}" + ) + print(f" registers: {compiled_kernel.n_regs}") + print(f" spills: {compiled_kernel.n_spills}") + print(f" shared memory: {compiled_kernel.metadata.shared} bytes") + HAS_DUMPED_CHUNK_FWD_O_KERNEL_METADATA = True + + +@triton.heuristics({ + "USE_G": lambda args: args["g"] is not None, + "IS_VARLEN": lambda args: args["cu_seqlens"] is not None, +}) +@triton.jit(do_not_specialize=["T"]) +def chunk_fwd_kernel_o( + q, + k, + v, + h, + g, + o, + cu_seqlens, + chunk_indices, + scale, + T, + H: tl.constexpr, + Hg: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr, + IS_VARLEN: tl.constexpr, +): + i_v, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_b, i_h = i_bh // H, i_bh % H + + if IS_VARLEN: + i_tg = i_t + i_n, i_t = ( + tl.load(chunk_indices + i_t * 2).to(tl.int32), + tl.load(chunk_indices + i_t * 2 + 1).to(tl.int32), + ) + bos, eos = ( + tl.load(cu_seqlens + i_n).to(tl.int32), + tl.load(cu_seqlens + i_n + 1).to(tl.int32), + ) + T = eos - bos + else: + NT = tl.cdiv(T, BT) + i_tg = i_b * NT + i_t + bos, eos = i_b * T, i_b * T + T + + q += (bos * Hg + i_h // (H // Hg)) * K + k += (bos * Hg + i_h // (H // Hg)) * K + v += (bos * H + i_h) * V + o += (bos * H + i_h) * V + h += (i_tg * H + i_h).to(tl.int64) * V * K + + b_o = tl.zeros([BT, BV], dtype=tl.float32) + b_A = tl.zeros([BT, BT], dtype=tl.float32) + + for i_k in range(tl.cdiv(K, BK)): + p_q = tl.make_block_ptr( + q, (T, K), (Hg * K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0) + ) + p_k = tl.make_block_ptr( + k, (K, T), (1, Hg * K), (i_k * BK, i_t * BT), (BK, BT), (0, 1) + ) + p_h = tl.make_block_ptr( + h, (V, K), (K, 1), (i_v * BV, i_k * BK), (BV, BK), (1, 0) + ) + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_h = tl.load(p_h, boundary_check=(0, 1)) + b_o += tl.dot(b_q, tl.trans(b_h)) + b_A += tl.dot(b_q, b_k) + + if USE_G: + g += bos * H + i_h + p_g = tl.make_block_ptr(g, (T,), (H,), (i_t * BT,), (BT,), (0,)) + b_g = tl.load(p_g, boundary_check=(0,)) + b_o = b_o * exp(b_g)[:, None] + b_A = b_A * safe_exp(b_g[:, None] - b_g[None, :]) + + o_i = tl.arange(0, BT) + m_A = o_i[:, None] >= o_i[None, :] + b_A = tl.where(m_A, b_A, 0) + + p_v = tl.make_block_ptr( + v, (T, V), (H * V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0) + ) + p_o = tl.make_block_ptr( + o, (T, V), (H * V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0) + ) + b_v = tl.load(p_v, boundary_check=(0, 1)) + b_o = b_o * scale + tl.dot(b_A.to(b_v.dtype), b_v) * scale + tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0, 1)) + + +def chunk_fwd_o( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + h: torch.Tensor, + g: torch.Tensor | None = None, + g_gamma: torch.Tensor | None = None, + scale: float | None = None, + cu_seqlens: torch.LongTensor | None = None, + chunk_size: int = 64, + chunk_indices: torch.LongTensor | None = None, + use_exp2: bool = False, + transpose_state_layout: bool = False, + kernel_cfg: dict | None = None, +) -> torch.Tensor: + B, T, Hg, K, V = *q.shape, v.shape[-1] + H = v.shape[-2] + BT = chunk_size + if chunk_indices is None and cu_seqlens is not None: + chunk_indices = prepare_chunk_indices(cu_seqlens, BT) + NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices) + if scale is None: + scale = k.shape[-1] ** -0.5 + + o = torch.empty_like(v) + + launch_chunk_fwd_kernel_o( + q=q, + k=k, + v=v, + h=h, + g=g, + o=o, + cu_seqlens=cu_seqlens, + chunk_indices=chunk_indices, + scale=scale, + T=T, + H=H, + Hg=Hg, + K=K, + V=V, + BT=BT, + NT=NT, + B=B, + kernel_cfg=kernel_cfg, + ) + return o diff --git a/aiter/ops/triton/fla/vllm/chunk_delta_h.py b/aiter/ops/triton/fla/vllm/chunk_delta_h.py new file mode 100644 index 0000000000000000000000000000000000000000..19ec812cfb9ce61ec62feba39a481126a22e63dc --- /dev/null +++ b/aiter/ops/triton/fla/vllm/chunk_delta_h.py @@ -0,0 +1,511 @@ +# SPDX-License-Identifier: MIT + +import functools +import json +import os + +import torch +import triton +import triton.language as tl + +import aiter.ops.triton.utils.arch_info as arch_info +from aiter import logger +from aiter.ops.triton.utils.core import AITER_TRITON_CONFIGS_PATH + +TRITON_CONFIG_CHECK = os.environ.get("TRITON_CONFIG_CHECK", "0") == "1" + +@triton.jit +def exp(x): + return tl.exp(x) + + +@triton.jit +def exp2(x): + return tl.math.exp2(x) + + +def prepare_chunk_indices(cu_seqlens: torch.LongTensor, chunk_size: int) -> torch.LongTensor: + chunk_rows = [] + for i in range(len(cu_seqlens) - 1): + seqlen = int((cu_seqlens[i + 1] - cu_seqlens[i]).item()) + n_chunks = triton.cdiv(seqlen, chunk_size) + for chunk_idx in range(n_chunks): + chunk_rows.append([i, chunk_idx]) + if len(chunk_rows) == 0: + return torch.empty((0, 2), dtype=torch.long, device=cu_seqlens.device) + return torch.tensor(chunk_rows, dtype=torch.long, device=cu_seqlens.device) + + +def prepare_chunk_offsets(cu_seqlens: torch.LongTensor, chunk_size: int) -> torch.LongTensor: + seq_lens = cu_seqlens[1:] - cu_seqlens[:-1] + chunk_counts = (seq_lens + chunk_size - 1) // chunk_size + offsets = torch.zeros_like(chunk_counts) + if len(offsets) > 1: + offsets[1:] = torch.cumsum(chunk_counts, dim=0)[:-1] + return offsets + + +_DEFAULT_CHUNK_DELTA_H_CONFIG = { + "BV": 32, + "num_warps": 8, + "num_stages": 2, +} + + +@functools.lru_cache(maxsize=1) +def _load_chunk_delta_h_configs() -> dict: + device_name = arch_info.get_arch() + path = os.path.join( + AITER_TRITON_CONFIGS_PATH, + "chunk_gated_delta_rule_fwd_h", + f"chunk_gated_delta_rule_fwd_h-{device_name}.json", + ) + if not os.path.exists(path): + logger.warning( + f"chunk_gated_delta_rule_fwd_h config not found at {path}, using default {_DEFAULT_CHUNK_DELTA_H_CONFIG}." + ) + return {} + with open(path) as f: + payload = json.load(f) + return payload.get("config", {}) if isinstance(payload, dict) else {} + + +@functools.lru_cache +def _get_chunk_delta_h_config(K: int, V: int, BT: int, H: int) -> dict: + cfgs = _load_chunk_delta_h_configs() + key = f"K={K},V={V},BT={BT},H={H}" + cfg = cfgs.get(key) + + if cfg is None: + default_cfg = cfgs.get("default", _DEFAULT_CHUNK_DELTA_H_CONFIG) + if TRITON_CONFIG_CHECK: + logger.warning( + "chunk_gated_delta_rule_fwd_h config missing for " + f"{key}, using default config {default_cfg}." + ) + cfg = default_cfg + merged = dict(_DEFAULT_CHUNK_DELTA_H_CONFIG) + merged.update(cfg) + return merged + + +def launch_chunk_gated_delta_rule_fwd_kernel_h_blockdim64( + *, + k: torch.Tensor, + u: torch.Tensor, + w: torch.Tensor, + v_new: torch.Tensor | None, + g: torch.Tensor | None, + gk: torch.Tensor | None, + h: torch.Tensor, + initial_state: torch.Tensor | None, + initial_state_indices: torch.Tensor | None, + final_state: torch.Tensor | None, + cu_seqlens: torch.LongTensor | None, + chunk_offsets: torch.LongTensor | None, + N: int, + T: int, + H: int, + Hg: int, + K: int, + V: int, + BT: int, + use_exp2: bool, + transpose_state_layout: bool, + kernel_cfg: dict | None, +): + def grid(meta): + return (triton.cdiv(V, meta["BV"]), N * H) + + cfg = kernel_cfg if kernel_cfg is not None else _get_chunk_delta_h_config(K, V, BT, H) + chunk_gated_delta_rule_fwd_kernel_h_blockdim64[grid]( + k=k, + v=u, + w=w, + v_new=v_new, + g=g, + gk=gk, + h=h, + h0=initial_state, + initial_state_indices=initial_state_indices, + ht=final_state, + cu_seqlens=cu_seqlens, + chunk_offsets=chunk_offsets, + T=T, + H=H, + Hg=Hg, + K=K, + V=V, + BT=BT, + BV=cfg["BV"], + USE_EXP2=use_exp2, + TRANSPOSE_STATE=transpose_state_layout, + num_warps=cfg["num_warps"], + num_stages=cfg["num_stages"], + ) + + +@triton.heuristics({ + "USE_G": lambda args: args["g"] is not None, + "USE_GK": lambda args: args["gk"] is not None, + "USE_INITIAL_STATE": lambda args: args["h0"] is not None, + "USE_INITIAL_STATE_INDICES": lambda args: args["initial_state_indices"] is not None, + "STORE_FINAL_STATE": lambda args: args["ht"] is not None, + "SAVE_NEW_VALUE": lambda args: args["v_new"] is not None, + "IS_VARLEN": lambda args: args["cu_seqlens"] is not None, +}) +@triton.jit(do_not_specialize=["T"]) +def chunk_gated_delta_rule_fwd_kernel_h_blockdim64( + k, + v, + w, + v_new, + g, + gk, + h, + h0, + initial_state_indices, + ht, + cu_seqlens, + chunk_offsets, + T, + H: tl.constexpr, + Hg: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr, + USE_GK: tl.constexpr, + USE_INITIAL_STATE: tl.constexpr, + USE_INITIAL_STATE_INDICES: tl.constexpr, + STORE_FINAL_STATE: tl.constexpr, + SAVE_NEW_VALUE: tl.constexpr, + USE_EXP2: tl.constexpr, + TRANSPOSE_STATE: tl.constexpr, + IS_VARLEN: tl.constexpr, +): + i_v, i_nh = tl.program_id(0), tl.program_id(1) + i_n, i_h = i_nh // H, i_nh % H + if IS_VARLEN: + bos, eos = tl.load(cu_seqlens + i_n).to(tl.int32), tl.load(cu_seqlens + i_n + 1).to(tl.int32) + T = eos - bos + NT = tl.cdiv(T, BT) + boh = tl.load(chunk_offsets + i_n).to(tl.int32) + else: + bos, eos = i_n * T, i_n * T + T + NT = tl.cdiv(T, BT) + boh = i_n * NT + + if TRANSPOSE_STATE: + b_h1 = tl.zeros([BV, 64], dtype=tl.float32) + if K > 64: + b_h2 = tl.zeros([BV, 64], dtype=tl.float32) + if K > 128: + b_h3 = tl.zeros([BV, 64], dtype=tl.float32) + if K > 192: + b_h4 = tl.zeros([BV, 64], dtype=tl.float32) + else: + b_h1 = tl.zeros([64, BV], dtype=tl.float32) + if K > 64: + b_h2 = tl.zeros([64, BV], dtype=tl.float32) + if K > 128: + b_h3 = tl.zeros([64, BV], dtype=tl.float32) + if K > 192: + b_h4 = tl.zeros([64, BV], dtype=tl.float32) + + h += (boh * H + i_h).to(tl.int64) * K * V + v += (bos * H + i_h).to(tl.int64) * V + k += ((bos * Hg + i_h // (H // Hg)) * K).to(tl.int64) + w += (bos * H + i_h).to(tl.int64) * K + if SAVE_NEW_VALUE: + v_new += (bos * H + i_h).to(tl.int64) * V + + i_s = i_n + if USE_INITIAL_STATE_INDICES: + i_s = tl.load(initial_state_indices + i_n).to(tl.int32) + + if USE_INITIAL_STATE: + h0 = h0 + (i_s * H + i_h).to(tl.int64) * K * V + if STORE_FINAL_STATE: + ht = ht + (i_s * H + i_h).to(tl.int64) * K * V + + if USE_INITIAL_STATE: + if TRANSPOSE_STATE: + p_h0_1 = tl.make_block_ptr(h0, (V, K), (K, 1), (i_v * BV, 0), (BV, 64), (1, 0)) + else: + p_h0_1 = tl.make_block_ptr(h0, (K, V), (V, 1), (0, i_v * BV), (64, BV), (1, 0)) + b_h1 += tl.load(p_h0_1, boundary_check=(0, 1)).to(tl.float32) + if K > 64: + if TRANSPOSE_STATE: + p_h0_2 = tl.make_block_ptr(h0, (V, K), (K, 1), (i_v * BV, 64), (BV, 64), (1, 0)) + else: + p_h0_2 = tl.make_block_ptr(h0, (K, V), (V, 1), (64, i_v * BV), (64, BV), (1, 0)) + b_h2 += tl.load(p_h0_2, boundary_check=(0, 1)).to(tl.float32) + if K > 128: + if TRANSPOSE_STATE: + p_h0_3 = tl.make_block_ptr(h0, (V, K), (K, 1), (i_v * BV, 128), (BV, 64), (1, 0)) + else: + p_h0_3 = tl.make_block_ptr(h0, (K, V), (V, 1), (128, i_v * BV), (64, BV), (1, 0)) + b_h3 += tl.load(p_h0_3, boundary_check=(0, 1)).to(tl.float32) + if K > 192: + if TRANSPOSE_STATE: + p_h0_4 = tl.make_block_ptr(h0, (V, K), (K, 1), (i_v * BV, 192), (BV, 64), (1, 0)) + else: + p_h0_4 = tl.make_block_ptr(h0, (K, V), (V, 1), (192, i_v * BV), (64, BV), (1, 0)) + b_h4 += tl.load(p_h0_4, boundary_check=(0, 1)).to(tl.float32) + + for i_t in range(NT): + i_t_int64 = i_t.to(tl.int64) + if TRANSPOSE_STATE: + p_h1 = tl.make_block_ptr(h + i_t_int64 * H * K * V, (V, K), (K, 1), (i_v * BV, 0), (BV, 64), (1, 0)) + else: + p_h1 = tl.make_block_ptr(h + i_t_int64 * H * K * V, (K, V), (V, 1), (0, i_v * BV), (64, BV), (1, 0)) + tl.store(p_h1, b_h1.to(p_h1.dtype.element_ty), boundary_check=(0, 1)) + if K > 64: + if TRANSPOSE_STATE: + p_h2 = tl.make_block_ptr(h + i_t_int64 * H * K * V, (V, K), (K, 1), (i_v * BV, 64), (BV, 64), (1, 0)) + else: + p_h2 = tl.make_block_ptr(h + i_t_int64 * H * K * V, (K, V), (V, 1), (64, i_v * BV), (64, BV), (1, 0)) + tl.store(p_h2, b_h2.to(p_h2.dtype.element_ty), boundary_check=(0, 1)) + if K > 128: + if TRANSPOSE_STATE: + p_h3 = tl.make_block_ptr(h + i_t_int64 * H * K * V, (V, K), (K, 1), (i_v * BV, 128), (BV, 64), (1, 0)) + else: + p_h3 = tl.make_block_ptr(h + i_t_int64 * H * K * V, (K, V), (V, 1), (128, i_v * BV), (64, BV), (1, 0)) + tl.store(p_h3, b_h3.to(p_h3.dtype.element_ty), boundary_check=(0, 1)) + if K > 192: + if TRANSPOSE_STATE: + p_h4 = tl.make_block_ptr(h + i_t_int64 * H * K * V, (V, K), (K, 1), (i_v * BV, 192), (BV, 64), (1, 0)) + else: + p_h4 = tl.make_block_ptr(h + i_t_int64 * H * K * V, (K, V), (V, 1), (192, i_v * BV), (64, BV), (1, 0)) + tl.store(p_h4, b_h4.to(p_h4.dtype.element_ty), boundary_check=(0, 1)) + + p_w = tl.make_block_ptr(w, (T, K), (H * K, 1), (i_t * BT, 0), (BT, 64), (1, 0)) + b_w = tl.load(p_w, boundary_check=(0, 1)) + if TRANSPOSE_STATE: + b_v = tl.dot(b_w, tl.trans(b_h1).to(b_w.dtype)) + else: + b_v = tl.dot(b_w, b_h1.to(b_w.dtype)) + if K > 64: + p_w = tl.make_block_ptr(w, (T, K), (H * K, 1), (i_t * BT, 64), (BT, 64), (1, 0)) + b_w = tl.load(p_w, boundary_check=(0, 1)) + if TRANSPOSE_STATE: + b_v += tl.dot(b_w, tl.trans(b_h2).to(b_w.dtype)) + else: + b_v += tl.dot(b_w, b_h2.to(b_w.dtype)) + if K > 128: + p_w = tl.make_block_ptr(w, (T, K), (H * K, 1), (i_t * BT, 128), (BT, 64), (1, 0)) + b_w = tl.load(p_w, boundary_check=(0, 1)) + if TRANSPOSE_STATE: + b_v += tl.dot(b_w, tl.trans(b_h3).to(b_w.dtype)) + else: + b_v += tl.dot(b_w, b_h3.to(b_w.dtype)) + if K > 192: + p_w = tl.make_block_ptr(w, (T, K), (H * K, 1), (i_t * BT, 192), (BT, 64), (1, 0)) + b_w = tl.load(p_w, boundary_check=(0, 1)) + if TRANSPOSE_STATE: + b_v += tl.dot(b_w, tl.trans(b_h4).to(b_w.dtype)) + else: + b_v += tl.dot(b_w, b_h4.to(b_w.dtype)) + p_v = tl.make_block_ptr(v, (T, V), (H * V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + b_v = tl.load(p_v, boundary_check=(0, 1)) - b_v + + if SAVE_NEW_VALUE: + p_v = tl.make_block_ptr(v_new, (T, V), (H * V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + tl.store(p_v, b_v.to(p_v.dtype.element_ty), boundary_check=(0, 1)) + + last_idx = min((i_t + 1) * BT, T) - 1 + if USE_G: + m_t = (i_t * BT + tl.arange(0, BT)) < T + b_g_last = tl.load(g + (bos * H + last_idx * H + i_h).to(tl.int64)).to(tl.float32) + p_g = tl.make_block_ptr(g + (bos * H + i_h).to(tl.int64), (T,), (H,), (i_t * BT,), (BT,), (0,)) + b_g = tl.load(p_g, boundary_check=(0,)).to(tl.float32) + if USE_EXP2: + b_v = b_v * tl.where(m_t, exp2(b_g_last - b_g), 0)[:, None] + b_g_last = exp2(b_g_last) + else: + b_v = b_v * tl.where(m_t, exp(b_g_last - b_g), 0)[:, None] + b_g_last = exp(b_g_last) + b_h1 *= b_g_last + if K > 64: + b_h2 *= b_g_last + if K > 128: + b_h3 *= b_g_last + if K > 192: + b_h4 *= b_g_last + + if USE_GK: + o_k1 = tl.arange(0, 64) + b_gk_last1 = tl.load(gk + (bos + last_idx) * H * K + i_h * K + o_k1, mask=(o_k1 < K), other=0.0).to(tl.float32) + if TRANSPOSE_STATE: + if USE_EXP2: + b_h1 *= exp2(b_gk_last1)[None, :] + else: + b_h1 *= exp(b_gk_last1)[None, :] + else: + if USE_EXP2: + b_h1 *= exp2(b_gk_last1)[:, None] + else: + b_h1 *= exp(b_gk_last1)[:, None] + if K > 64: + o_k2 = 64 + o_k1 + b_gk_last2 = tl.load(gk + (bos + last_idx) * H * K + i_h * K + o_k2, mask=(o_k2 < K), other=0.0).to(tl.float32) + if TRANSPOSE_STATE: + if USE_EXP2: + b_h2 *= exp2(b_gk_last2)[None, :] + else: + b_h2 *= exp(b_gk_last2)[None, :] + else: + if USE_EXP2: + b_h2 *= exp2(b_gk_last2)[:, None] + else: + b_h2 *= exp(b_gk_last2)[:, None] + if K > 128: + o_k3 = 128 + o_k1 + b_gk_last3 = tl.load(gk + (bos + last_idx) * H * K + i_h * K + o_k3, mask=(o_k3 < K), other=0.0).to(tl.float32) + if TRANSPOSE_STATE: + if USE_EXP2: + b_h3 *= exp2(b_gk_last3)[None, :] + else: + b_h3 *= exp(b_gk_last3)[None, :] + else: + if USE_EXP2: + b_h3 *= exp2(b_gk_last3)[:, None] + else: + b_h3 *= exp(b_gk_last3)[:, None] + if K > 192: + o_k4 = 192 + o_k1 + b_gk_last4 = tl.load(gk + (bos + last_idx) * H * K + i_h * K + o_k4, mask=(o_k4 < K), other=0.0).to(tl.float32) + if TRANSPOSE_STATE: + if USE_EXP2: + b_h4 *= exp2(b_gk_last4)[None, :] + else: + b_h4 *= exp(b_gk_last4)[None, :] + else: + if USE_EXP2: + b_h4 *= exp2(b_gk_last4)[:, None] + else: + b_h4 *= exp(b_gk_last4)[:, None] + + b_v = b_v.to(k.dtype.element_ty) + + p_k = tl.make_block_ptr(k, (K, T), (1, Hg * K), (0, i_t * BT), (64, BT), (0, 1)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + if TRANSPOSE_STATE: + b_h1 += tl.trans(tl.dot(b_k, b_v)) + else: + b_h1 += tl.dot(b_k, b_v) + if K > 64: + p_k = tl.make_block_ptr(k, (K, T), (1, Hg * K), (64, i_t * BT), (64, BT), (0, 1)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + if TRANSPOSE_STATE: + b_h2 += tl.trans(tl.dot(b_k, b_v)) + else: + b_h2 += tl.dot(b_k, b_v) + if K > 128: + p_k = tl.make_block_ptr(k, (K, T), (1, Hg * K), (128, i_t * BT), (64, BT), (0, 1)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + if TRANSPOSE_STATE: + b_h3 += tl.trans(tl.dot(b_k, b_v)) + else: + b_h3 += tl.dot(b_k, b_v) + if K > 192: + p_k = tl.make_block_ptr(k, (K, T), (1, Hg * K), (192, i_t * BT), (64, BT), (0, 1)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + if TRANSPOSE_STATE: + b_h4 += tl.trans(tl.dot(b_k, b_v)) + else: + b_h4 += tl.dot(b_k, b_v) + + if STORE_FINAL_STATE: + if TRANSPOSE_STATE: + p_ht = tl.make_block_ptr(ht, (V, K), (K, 1), (i_v * BV, 0), (BV, 64), (1, 0)) + else: + p_ht = tl.make_block_ptr(ht, (K, V), (V, 1), (0, i_v * BV), (64, BV), (1, 0)) + tl.store(p_ht, b_h1.to(p_ht.dtype.element_ty), boundary_check=(0, 1)) + if K > 64: + if TRANSPOSE_STATE: + p_ht = tl.make_block_ptr(ht, (V, K), (K, 1), (i_v * BV, 64), (BV, 64), (1, 0)) + else: + p_ht = tl.make_block_ptr(ht, (K, V), (V, 1), (64, i_v * BV), (64, BV), (1, 0)) + tl.store(p_ht, b_h2.to(p_ht.dtype.element_ty), boundary_check=(0, 1)) + if K > 128: + if TRANSPOSE_STATE: + p_ht = tl.make_block_ptr(ht, (V, K), (K, 1), (i_v * BV, 128), (BV, 64), (1, 0)) + else: + p_ht = tl.make_block_ptr(ht, (K, V), (V, 1), (128, i_v * BV), (64, BV), (1, 0)) + tl.store(p_ht, b_h3.to(p_ht.dtype.element_ty), boundary_check=(0, 1)) + if K > 192: + if TRANSPOSE_STATE: + p_ht = tl.make_block_ptr(ht, (V, K), (K, 1), (i_v * BV, 192), (BV, 64), (1, 0)) + else: + p_ht = tl.make_block_ptr(ht, (K, V), (V, 1), (192, i_v * BV), (64, BV), (1, 0)) + tl.store(p_ht, b_h4.to(p_ht.dtype.element_ty), boundary_check=(0, 1)) + + +def chunk_gated_delta_rule_fwd_h( + k: torch.Tensor, + w: torch.Tensor, + u: torch.Tensor, + g: torch.Tensor | None = None, + gk: torch.Tensor | None = None, + initial_state: torch.Tensor | None = None, + initial_state_indices: torch.Tensor | None = None, + output_final_state: bool = True, + chunk_size: int = 64, + save_new_value: bool = True, + cu_seqlens: torch.LongTensor | None = None, + chunk_indices: torch.LongTensor | None = None, + use_exp2: bool = False, + transpose_state_layout: bool = True, + kernel_cfg: dict | None = None, +): + B, T, Hg, K, V = *k.shape, u.shape[-1] + H = u.shape[-2] + BT = chunk_size + + if chunk_indices is None and cu_seqlens is not None: + chunk_indices = prepare_chunk_indices(cu_seqlens, chunk_size) + if cu_seqlens is None: + N, NT, chunk_offsets = B, triton.cdiv(T, BT), None + else: + N, NT = len(cu_seqlens) - 1, len(chunk_indices) + chunk_offsets = prepare_chunk_offsets(cu_seqlens, BT) + assert K <= 256, "current kernel does not support head dimension larger than 256." + + state_rows = initial_state.shape[0] if initial_state is not None else N + + if transpose_state_layout: + h = k.new_empty(B, NT, H, V, K) + final_state = k.new_empty(state_rows, H, V, K, dtype=torch.float32) if output_final_state else None + else: + h = k.new_empty(B, NT, H, K, V) + final_state = k.new_empty(state_rows, H, K, V, dtype=torch.float32) if output_final_state else None + v_new = torch.empty_like(u) if save_new_value else None + + launch_chunk_gated_delta_rule_fwd_kernel_h_blockdim64( + k=k, + u=u, + w=w, + v_new=v_new, + g=g, + gk=gk, + h=h, + initial_state=initial_state, + initial_state_indices=initial_state_indices, + final_state=final_state, + cu_seqlens=cu_seqlens, + chunk_offsets=chunk_offsets, + N=N, + T=T, + H=H, + Hg=Hg, + K=K, + V=V, + BT=BT, + use_exp2=use_exp2, + transpose_state_layout=transpose_state_layout, + kernel_cfg=kernel_cfg, + ) + return h, v_new, final_state diff --git a/aiter/ops/triton/fla/vllm/chunk_o.py b/aiter/ops/triton/fla/vllm/chunk_o.py new file mode 100644 index 0000000000000000000000000000000000000000..6be1c23e968815f11afacb5b7769d1e2a67902d8 --- /dev/null +++ b/aiter/ops/triton/fla/vllm/chunk_o.py @@ -0,0 +1,294 @@ +# SPDX-License-Identifier: MIT + +import functools +import json +import os + +import torch +import triton +import triton.language as tl + +import aiter.ops.triton.utils.arch_info as arch_info +from aiter import logger +from aiter.ops.triton.utils.core import AITER_TRITON_CONFIGS_PATH + +TRITON_CONFIG_CHECK = os.environ.get("TRITON_CONFIG_CHECK", "0") == "1" + + +@triton.jit +def exp(x): + return tl.exp(x) + + +@triton.jit +def exp2(x): + return tl.math.exp2(x) + + +def prepare_chunk_indices(cu_seqlens: torch.LongTensor, chunk_size: int) -> torch.LongTensor: + chunk_rows = [] + for i in range(len(cu_seqlens) - 1): + seqlen = int((cu_seqlens[i + 1] - cu_seqlens[i]).item()) + n_chunks = triton.cdiv(seqlen, chunk_size) + for chunk_idx in range(n_chunks): + chunk_rows.append([i, chunk_idx]) + if len(chunk_rows) == 0: + return torch.empty((0, 2), dtype=torch.long, device=cu_seqlens.device) + return torch.tensor(chunk_rows, dtype=torch.long, device=cu_seqlens.device) + + +_DEFAULT_CHUNK_O_CONFIG = { + "BK": 128, + "BV": 64, + "num_warps": 4, + "num_stages": 2, +} + + +@functools.lru_cache(maxsize=1) +def _load_chunk_o_configs() -> dict: + device_name = arch_info.get_arch() + path = os.path.join( + AITER_TRITON_CONFIGS_PATH, + "chunk_fwd_o", + f"chunk_fwd_o-{device_name}.json", + ) + if not os.path.exists(path): + logger.warning( + f"chunk_fwd_o config not found at {path}, using default {_DEFAULT_CHUNK_O_CONFIG}." + ) + return {} + with open(path) as f: + payload = json.load(f) + return payload.get("config", {}) if isinstance(payload, dict) else {} + + +@functools.lru_cache +def _get_chunk_o_config(K: int, V: int, BT: int, transpose_state_layout: bool) -> dict: + cfgs = _load_chunk_o_configs() + key = f"K={K},V={V},BT={BT}" + cfg = cfgs.get(key) + if cfg is None: + default_cfg = cfgs.get("default", _DEFAULT_CHUNK_O_CONFIG) + if TRITON_CONFIG_CHECK: + logger.warning( + "chunk_fwd_o config missing for " + f"{key}, using default config {default_cfg}." + ) + cfg = default_cfg + merged = dict(_DEFAULT_CHUNK_O_CONFIG) + merged.update(cfg) + return merged + + +def launch_chunk_fwd_kernel_o( + *, + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + h: torch.Tensor, + g: torch.Tensor | None, + g_gamma: torch.Tensor | None, + o: torch.Tensor, + cu_seqlens: torch.LongTensor | None, + chunk_indices: torch.LongTensor | None, + scale: float, + T: int, + H: int, + Hg: int, + K: int, + V: int, + BT: int, + NT: int, + B: int, + use_exp2: bool, + transpose_state_layout: bool, + kernel_cfg: dict | None, +): + def grid(meta): + return (triton.cdiv(V, meta["BV"]), NT, B * H) + + cfg = kernel_cfg if kernel_cfg is not None else _get_chunk_o_config(K, V, BT, transpose_state_layout) + + chunk_fwd_kernel_o[grid]( + q=q, + k=k, + v=v, + h=h, + g=g, + g_gamma=g_gamma, + o=o, + cu_seqlens=cu_seqlens, + chunk_indices=chunk_indices, + scale=scale, + T=T, + H=H, + Hg=Hg, + K=K, + V=V, + BT=BT, + BK=cfg["BK"], + BV=cfg["BV"], + USE_EXP2=use_exp2, + TRANSPOSE_STATE=transpose_state_layout, + num_warps=cfg["num_warps"], + num_stages=cfg["num_stages"], + ) + + +@triton.heuristics({ + "USE_G": lambda args: args["g"] is not None, + "USE_G_GAMMA": lambda args: args["g_gamma"] is not None, + "IS_VARLEN": lambda args: args["cu_seqlens"] is not None, +}) +@triton.jit(do_not_specialize=["T"]) +def chunk_fwd_kernel_o( + q, + k, + v, + h, + g, + g_gamma, + o, + cu_seqlens, + chunk_indices, + scale, + T, + H: tl.constexpr, + Hg: tl.constexpr, + K: tl.constexpr, + V: tl.constexpr, + BT: tl.constexpr, + BK: tl.constexpr, + BV: tl.constexpr, + USE_G: tl.constexpr, + USE_G_GAMMA: tl.constexpr, + USE_EXP2: tl.constexpr, + TRANSPOSE_STATE: tl.constexpr, + IS_VARLEN: tl.constexpr, +): + i_v, i_t, i_bh = tl.program_id(0), tl.program_id(1), tl.program_id(2) + i_b, i_h = i_bh // H, i_bh % H + + if IS_VARLEN: + i_tg = i_t + i_n = tl.load(chunk_indices + i_t * 2).to(tl.int32) + i_t = tl.load(chunk_indices + i_t * 2 + 1).to(tl.int32) + bos = tl.load(cu_seqlens + i_n).to(tl.int32) + eos = tl.load(cu_seqlens + i_n + 1).to(tl.int32) + T = eos - bos + else: + NT = tl.cdiv(T, BT) + i_tg = i_b * NT + i_t + bos, eos = i_b * T, i_b * T + T + + q += (bos * Hg + i_h // (H // Hg)) * K + k += (bos * Hg + i_h // (H // Hg)) * K + v += (bos * H + i_h) * V + o += (bos * H + i_h) * V + h += (i_tg * H + i_h).to(tl.int64) * K * V + + b_o = tl.zeros([BT, BV], dtype=tl.float32) + b_A = tl.zeros([BT, BT], dtype=tl.float32) + + for i_k in range(tl.cdiv(K, BK)): + p_q = tl.make_block_ptr(q, (T, K), (Hg * K, 1), (i_t * BT, i_k * BK), (BT, BK), (1, 0)) + p_k = tl.make_block_ptr(k, (K, T), (1, Hg * K), (i_k * BK, i_t * BT), (BK, BT), (0, 1)) + if TRANSPOSE_STATE: + p_h = tl.make_block_ptr(h, (V, K), (K, 1), (i_v * BV, i_k * BK), (BV, BK), (1, 0)) + else: + p_h = tl.make_block_ptr(h, (K, V), (V, 1), (i_k * BK, i_v * BV), (BK, BV), (1, 0)) + + b_q = tl.load(p_q, boundary_check=(0, 1)) + b_k = tl.load(p_k, boundary_check=(0, 1)) + b_h = tl.load(p_h, boundary_check=(0, 1)) + if TRANSPOSE_STATE: + b_o += tl.dot(b_q, tl.trans(b_h)) + else: + b_o += tl.dot(b_q, b_h) + b_A += tl.dot(b_q, b_k) + + if USE_G: + g += bos * H + i_h + p_g = tl.make_block_ptr(g, (T,), (H,), (i_t * BT,), (BT,), (0,)) + b_g = tl.load(p_g, boundary_check=(0,)) + if USE_EXP2: + b_o = b_o * exp2(b_g)[:, None] + b_A = b_A * exp2(b_g[:, None] - b_g[None, :]) + else: + b_o = b_o * exp(b_g)[:, None] + b_A = b_A * exp(b_g[:, None] - b_g[None, :]) + + if USE_G_GAMMA: + b_gamma = tl.load(g_gamma + i_h) + b_g = b_gamma * (tl.arange(0, BT) + 1) + if USE_EXP2: + b_o = b_o * exp2(b_g)[:, None] + b_A = b_A * exp2(b_g[:, None] - b_g[None, :]) + else: + b_o = b_o * exp(b_g)[:, None] + b_A = b_A * exp(b_g[:, None] - b_g[None, :]) + + o_t = i_t * BT + tl.arange(0, BT) + m_t = o_t < T + m_A = (o_t[:, None] >= o_t[None, :]) & (m_t[:, None] & m_t) + b_A = tl.where(m_A, b_A, 0) + + p_v = tl.make_block_ptr(v, (T, V), (H * V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + p_o = tl.make_block_ptr(o, (T, V), (H * V, 1), (i_t * BT, i_v * BV), (BT, BV), (1, 0)) + + b_v = tl.load(p_v, boundary_check=(0, 1)) + b_o = b_o * scale + tl.dot(b_A.to(b_v.dtype), b_v) * scale + tl.store(p_o, b_o.to(p_o.dtype.element_ty), boundary_check=(0, 1)) + + +def chunk_fwd_o( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + h: torch.Tensor, + g: torch.Tensor | None = None, + g_gamma: torch.Tensor | None = None, + scale: float | None = None, + cu_seqlens: torch.LongTensor | None = None, + chunk_size: int = 64, + chunk_indices: torch.LongTensor | None = None, + use_exp2: bool = False, + transpose_state_layout: bool = False, + kernel_cfg: dict | None = None, +) -> torch.Tensor: + B, T, Hg, K, V = *q.shape, v.shape[-1] + H = v.shape[-2] + BT = chunk_size + if chunk_indices is None and cu_seqlens is not None: + chunk_indices = prepare_chunk_indices(cu_seqlens, BT) + NT = triton.cdiv(T, BT) if cu_seqlens is None else len(chunk_indices) + if scale is None: + scale = k.shape[-1] ** -0.5 + + o = torch.empty_like(v) + + launch_chunk_fwd_kernel_o( + q=q, + k=k, + v=v, + h=h, + g=g, + g_gamma=g_gamma, + o=o, + cu_seqlens=cu_seqlens, + chunk_indices=chunk_indices, + scale=scale, + T=T, + H=H, + Hg=Hg, + K=K, + V=V, + BT=BT, + NT=NT, + B=B, + use_exp2=use_exp2, + transpose_state_layout=transpose_state_layout, + kernel_cfg=kernel_cfg, + ) + return o diff --git a/aiter/ops/triton/flash_attention_forward.py b/aiter/ops/triton/flash_attention_forward.py index 5bf922326c2b98987d93a7b17c3f048ff454fcfb..a3fdc9dd2bd865887ded4c249c33d473b5036028 100644 --- a/aiter/ops/triton/flash_attention_forward.py +++ b/aiter/ops/triton/flash_attention_forward.py @@ -26,6 +26,8 @@ import torch import triton import triton.language as tl +from aiter.ops.triton.utils.arch_info import is_mls_avail + torch_dtype: tl.constexpr = torch.float16 @@ -145,7 +147,7 @@ def _attn_fwd_inner( if MASK_STEPS: K_block_ptr = tl.advance(K_block_ptr, (0, n_full_blocks * BLOCK_N)) V_block_ptr = tl.advance(V_block_ptr, (n_full_blocks * BLOCK_N, 0)) - + # loop over k, v, and update accumulator for start_n in range(block_min, block_max, BLOCK_N): # For padded blocks, we will overrun the tensor size if @@ -393,87 +395,24 @@ def get_cdna_autotune_configs(): # num_stages=1, # num_warps=4, # ), - ], ['IS_CAUSAL', 'dropout_p', 'BLOCK_DMODEL', 'USE_FP8'] - -def get_rdna_autotune_configs(): - return [ - triton.Config( - { - 'BLOCK_M': 32, - 'BLOCK_N': 32, - 'waves_per_eu': 4, - 'PRE_LOAD_V': False, - 'USE_MLS': True, - }, - num_stages=1, - num_warps=2), - triton.Config( - { - 'BLOCK_M': 32, - 'BLOCK_N': 32, - 'waves_per_eu': 2, - 'PRE_LOAD_V': False, - 'USE_MLS': True, - }, - num_stages=1, - num_warps=2), - triton.Config( - { - 'BLOCK_M': 32, - 'BLOCK_N': 16, - 'waves_per_eu': 4, - 'PRE_LOAD_V': False, - 'USE_MLS': True, - }, - num_stages=1, - num_warps=2), - triton.Config( - { - 'BLOCK_M': 32, - 'BLOCK_N': 16, - 'waves_per_eu': 2, - 'PRE_LOAD_V': False, - 'USE_MLS': True, - }, - num_stages=1, - num_warps=2), - # Fails in AccelerateAMDMatmul (Triton) assert when using FP8: - # triton.Config( - # { - # 'BLOCK_M': 16, - # 'BLOCK_N': 16, - # 'waves_per_eu': 4, - # 'PRE_LOAD_V': False - # }, - # num_stages=1, - # num_warps=2), - # triton.Config( - # { - # 'BLOCK_M': 16, - # 'BLOCK_N': 16, - # 'waves_per_eu': 2, - # 'PRE_LOAD_V': False - # }, - # num_stages=1, - # num_warps=2), - # # Fall-back config. - # triton.Config( - # { - # 'BLOCK_M': 16, - # 'BLOCK_N': 16, - # 'waves_per_eu': 1, - # 'PRE_LOAD_V': False - # }, - # num_stages=1, - # num_warps=2), - ], ['IS_CAUSAL', 'dropout_p', 'BLOCK_DMODEL', 'USE_FP8', "USE_MLS"] + ], ['IS_CAUSAL', 'dropout_p', 'BLOCK_DMODEL', 'HQ', 'HK', 'USE_FP8', 'USE_MLS'] def get_autotune_configs(): - #if on_gfx1x(): - # return get_rdna_autotune_configs() - #else: - return get_cdna_autotune_configs() + configs, keys = get_cdna_autotune_configs() + if is_mls_avail(): + mls_configs = [] + for cfg in configs: + cfg_kwargs = dict(cfg.kwargs) + cfg_kwargs["USE_MLS"] = True + mls_configs.append( + triton.Config( + cfg_kwargs, + num_stages=cfg.num_stages, + num_warps=cfg.num_warps, + )) + configs = configs + mls_configs + return configs, keys autotune_configs, autotune_keys = get_autotune_configs() @@ -492,20 +431,20 @@ def prune_configs(configs, nargs, **kwargs): return False return [c for c in configs if not _prune(c)] -''' + @triton.autotune( configs=autotune_configs, key=autotune_keys, + perf_debug=True, prune_configs_by={"early_config_prune": prune_configs} ) -''' -''' -@triton.utils.hcutune( - configs=autotune_configs, - always_tuning=True, - key=['IS_CAUSAL', 'dropout_p', 'BLOCK_DMODEL', 'USE_FP8', 'USE_MLS'] -) -''' +# ''' +# @triton.utils.hcutune( +# configs=autotune_configs, +# always_tuning=True, +# key=['IS_CAUSAL', 'dropout_p', 'BLOCK_DMODEL', 'USE_FP8', 'USE_MLS'] +# ) +# ''' @triton.heuristics( values={ "PADDED_HEAD": lambda args: args["ACTUAL_BLOCK_DMODEL"] != args["BLOCK_DMODEL"], @@ -1079,52 +1018,79 @@ class _attention(torch.autograd.Function): arg_max_seqlens_q = 0 if on_gfx1x() else max_seqlens_q arg_max_seqlens_k = 0 if on_gfx1x() else max_seqlens_k - if not use_fp8: - if padded_d_model <= 128: - config = { - "BLOCK_M": 128, - "BLOCK_N": 64, - "waves_per_eu": 3, - "PRE_LOAD_V": False, - "USE_MLS": False, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 1, - } - else: - config = { - "BLOCK_M": 32, - "BLOCK_N": 32, - "waves_per_eu": 4, - "PRE_LOAD_V": False, - "USE_MLS": False, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 1, - } - else: - if padded_d_model <= 128: - config = { - "BLOCK_M": 128, - "BLOCK_N": 128, - "waves_per_eu": 2, - "PRE_LOAD_V": False, - "USE_MLS": False, - "num_warps": 4, - "num_ctas": 1, - "num_stages": 1, - } + use_autotune = len(getattr(attn_fwd, "configs", [])) > 0 + launch_config = {} + # Keep the legacy static config path when autotune is disabled. + if not use_autotune: + _use_mls = is_mls_avail() + if not use_fp8: + if _use_mls: + launch_config = { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 4, + "PRE_LOAD_V": False, + "USE_MLS": True, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 1, + } + elif padded_d_model <= 128: + launch_config = { + "BLOCK_M": 128, + "BLOCK_N": 64, + "waves_per_eu": 3, + "PRE_LOAD_V": False, + "USE_MLS": False, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 1, + } + else: + launch_config = { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 4, + "PRE_LOAD_V": False, + "USE_MLS": False, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 1, + } else: - config = { - "BLOCK_M": 32, - "BLOCK_N": 32, - "waves_per_eu": 4, - "PRE_LOAD_V": False, - "USE_MLS": False, - "num_warps": 8, - "num_ctas": 1, - "num_stages": 1, - } + if _use_mls: + launch_config = { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 4, + "PRE_LOAD_V": False, + "USE_MLS": True, + "num_warps": 2, + "num_ctas": 1, + "num_stages": 1, + } + elif padded_d_model <= 128: + launch_config = { + "BLOCK_M": 128, + "BLOCK_N": 128, + "waves_per_eu": 2, + "PRE_LOAD_V": False, + "USE_MLS": False, + "num_warps": 4, + "num_ctas": 1, + "num_stages": 1, + } + else: + launch_config = { + "BLOCK_M": 32, + "BLOCK_N": 32, + "waves_per_eu": 4, + "PRE_LOAD_V": False, + "USE_MLS": False, + "num_warps": 8, + "num_ctas": 1, + "num_stages": 1, + } attn_fwd[grid]( q, @@ -1164,7 +1130,7 @@ class _attention(torch.autograd.Function): RETURN_ENCODED_SOFTMAX=False, USE_FP8=use_fp8, USE_FP8_OUT=fp8_out_scale is not None, - **config, + **launch_config, ) ctx.grid = grid @@ -1179,4 +1145,4 @@ class _attention(torch.autograd.Function): return o, encoded_softmax -triton_attention = _attention.apply \ No newline at end of file +triton_attention = _attention.apply diff --git a/aiter/ops/triton/fused_moe.py b/aiter/ops/triton/fused_moe.py index 8d2f0f9ac484b29e26511e6d7cd7257a22f4b8a0..2248bda2b5a967f7f51cabdb6dacd497f6bbd311 100644 --- a/aiter/ops/triton/fused_moe.py +++ b/aiter/ops/triton/fused_moe.py @@ -9,6 +9,11 @@ import torch import triton import triton.language as tl import aiter.ops.triton.utils.arch_info as arch_info +from aiter.ops.triton.moe_activation import ( + _normalize_activation_and_gate, + adjust_N_for_activation, + _apply_activation, +) from aiter.ops.triton.moe_op import fused_moe as invoke_fused_moe_kernel, support_mls # from vllm import _custom_ops as ops @@ -23,7 +28,7 @@ from aiter import dtypes,moe_sorting_fwd device_name = arch_info.get_device() -def get_moe_sum_config(M, top_k, N): +def get_moe_sum_config(M, topk, N): if M < 32: return {"BLOCK_SIZE": 128, "num_warps": 1} else: @@ -41,7 +46,7 @@ def get_moe_sum_config(M, top_k, N): # return configs # @triton.autotune( -# key=['M', 'N', 'top_k','compute_type'], +# key=['M', 'N', 'topk','compute_type'], # configs=generate_sum_configs(), # # configs = [ # # triton.Config({'BLOCK_SIZE': 64 }, num_warps=1), @@ -57,10 +62,10 @@ def get_moe_sum_config(M, top_k, N): @triton.jit def moe_sum_kernel( output_ptr, # [M, N] - input_ptr, # [M, top_k, N] + input_ptr, # [M, topk, N] M, N: tl.constexpr, - top_k: tl.constexpr, + topk: tl.constexpr, routed_scaling_factor, BLOCK_SIZE: tl.constexpr, stride_output_m, @@ -86,7 +91,7 @@ def moe_sum_kernel( acc = tl.zeros((BLOCK_SIZE,), dtype=tl.float32) input_row_ptr = input_ptr + pid_m.to(tl.int64) * stride_input_m - for k in range(top_k): + for k in range(topk): input_ptrs = input_row_ptr + ( k * stride_input_k + offs_n * stride_input_n @@ -113,10 +118,10 @@ def triton_moe_sum(input_tensor, 1D tile version of moe_sum. Args: - input_tensor: [M, top_k, N] + input_tensor: [M, topk, N] output_tensor: [M, N] """ - M, top_k, N = input_tensor.shape + M, topk, N = input_tensor.shape assert output_tensor.dtype == torch.float16 or \ output_tensor.dtype == torch.bfloat16 or \ @@ -135,7 +140,7 @@ def triton_moe_sum(input_tensor, assert input_tensor.shape[2] == output_tensor.shape[1] # 计算grid - config = get_moe_sum_config(M, top_k, N) + config = get_moe_sum_config(M, topk, N) grid = (M * triton.cdiv(N, config["BLOCK_SIZE"]),) # grid = lambda META: (M * triton.cdiv(N, META['BLOCK_SIZE']), ) @@ -144,7 +149,7 @@ def triton_moe_sum(input_tensor, input_tensor, M, N, - top_k, + topk, routed_scaling_factor, stride_output_m=output_tensor.stride(0), stride_output_n=output_tensor.stride(1), @@ -158,217 +163,6 @@ def triton_moe_sum(input_tensor, return output_tensor -# @triton.autotune( -# configs=[ -# triton.Config( -# {"BLOCK_SIZE_M": bm, "BLOCK_SIZE_N": bn}, -# num_warps=nw, -# ) -# for bm in (1, 4, 16, 32, 64, 128) -# for bn in (32, 128, 256, 512,) -# for nw in (1, 2, 4) -# ], -# key=["M", "N", "ACT"], -# perf_debug=True, -# ) -@triton.heuristics( - { - "M_DIV": lambda args: (args["M"] % args["BLOCK_SIZE_M"]) == 0, - "N_DIV": lambda args: (args["N"] % args["BLOCK_SIZE_N"]) == 0, - } -) -@triton.jit -def activation_and_mul_kernel( - out_ptr, - in_ptr, - M, - N, - stride_in0, - stride_in1, - stride_out0, - stride_out1, - ACT: tl.constexpr, - M_DIV: tl.constexpr, - N_DIV: tl.constexpr, - BLOCK_SIZE_M: tl.constexpr, - BLOCK_SIZE_N: tl.constexpr, -): - tl.assume(M > 0) - tl.assume(N > 0) - tl.assume(stride_in0 >= 0) - tl.assume(stride_in1 >= 0) - tl.assume(stride_out0 >= 0) - tl.assume(stride_out1 >= 0) - - pid_m = tl.program_id(1) - pid_n = tl.program_id(0) - - offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) - offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) - - mask_m = offs_m < M - mask_n = offs_n < N - mask = mask_m[:, None] & mask_n[None, :] - - in_base = in_ptr + offs_m[:, None] * stride_in0 + offs_n[None, :] * stride_in1 - if M_DIV and N_DIV: - x0 = tl.load(in_base) - x1 = tl.load(in_base + N * stride_in1) - else: - x0 = tl.load(in_base, mask=mask, other=0.0) - x1 = tl.load(in_base + N * stride_in1, mask=mask, other=0.0) - - x0_f = x0.to(tl.float32) - x1_f = x1.to(tl.float32) - if ACT == 0: - act = x0_f * (1.0 / (1.0 + tl.exp(-x0_f))) - else: - act = x0_f * 0.5 * (1.0 + tl.erf(x0_f * 0.7071067811865476)) - y = act * x1_f - - out_ptrs = out_ptr + offs_m[:, None] * stride_out0 + offs_n[None, :] * stride_out1 - if M_DIV and N_DIV: - tl.store(out_ptrs, y) - else: - tl.store(out_ptrs, y, mask=mask) - -def get_triton_activation_and_mul_config(M, N): - if M <= 512: - return {"BLOCK_SIZE_M": 1, "BLOCK_SIZE_N": 256, "num_warps": 1} - return {"BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 128, "num_warps": 4} - - -@triton.heuristics( - { - "M_DIV": lambda args: (args["M"] % args["BLOCK_SIZE_M"]) == 0, - "N_DIV": lambda args: (args["N"] % args["BLOCK_SIZE_N"]) == 0, - } -) -@triton.jit -def relu2_kernel( - out_ptr, - in_ptr, - M, - N, - stride_in0, - stride_in1, - stride_out0, - stride_out1, - M_DIV: tl.constexpr, - N_DIV: tl.constexpr, - BLOCK_SIZE_M: tl.constexpr, - BLOCK_SIZE_N: tl.constexpr, -): - tl.assume(M > 0) - tl.assume(N > 0) - pid_m = tl.program_id(1) - pid_n = tl.program_id(0) - offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) - offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) - mask_m = offs_m < M - mask_n = offs_n < N - mask = mask_m[:, None] & mask_n[None, :] - in_base = in_ptr + offs_m[:, None] * stride_in0 + offs_n[None, :] * stride_in1 - if M_DIV and N_DIV: - x = tl.load(in_base) - else: - x = tl.load(in_base, mask=mask, other=0.0) - xf = x.to(tl.float32) - y = tl.where(xf > 0.0, xf * xf, 0.0) - out_ptrs = out_ptr + offs_m[:, None] * stride_out0 + offs_n[None, :] * stride_out1 - if M_DIV and N_DIV: - tl.store(out_ptrs, y.to(x.dtype)) - else: - tl.store(out_ptrs, y.to(x.dtype), mask=mask) - - -def triton_relu2(out: torch.Tensor, inp: torch.Tensor) -> None: - """Elementwise ReLU² (no gate / no second-path mul). out.shape == inp.shape.""" - assert inp.shape == out.shape - assert inp.is_contiguous() and out.is_contiguous() - M, N = inp.shape - config = get_triton_activation_and_mul_config(M, N) - grid = ( - triton.cdiv(N, config["BLOCK_SIZE_N"]), - triton.cdiv(M, config["BLOCK_SIZE_M"]), - ) - relu2_kernel[grid]( - out, - inp, - M, - N, - inp.stride(0), - inp.stride(1), - out.stride(0), - out.stride(1), - **config, - ) - - -def triton_silu_and_mul(out: torch.Tensor, input: torch.Tensor) -> None: - assert input.shape[-1] % 2 == 0 - assert input.is_contiguous() - assert out.is_contiguous() - M = input.numel() // input.shape[-1] - N = input.shape[-1] // 2 - input_2d = input.view(M, input.shape[-1]) - out_2d = out.view(M, N) - - - # grid = lambda META: ( - # triton.cdiv(N, META["BLOCK_SIZE_N"]), - # triton.cdiv(M, META["BLOCK_SIZE_M"]), - # ) - - config = get_triton_activation_and_mul_config(M, N) - grid = ( - triton.cdiv(N, config["BLOCK_SIZE_N"]), - triton.cdiv(M, config["BLOCK_SIZE_M"]), - ) - - activation_and_mul_kernel[grid]( - out_2d, - input_2d, - M, - N, - input_2d.stride(0), - input_2d.stride(1), - out_2d.stride(0), - out_2d.stride(1), - ACT=0, - **config, - ) - - -def triton_gelu_and_mul(out: torch.Tensor, input: torch.Tensor) -> None: - assert input.shape[-1] % 2 == 0 - assert input.is_contiguous() - assert out.is_contiguous() - M = input.numel() // input.shape[-1] - N = input.shape[-1] // 2 - input_2d = input.view(M, input.shape[-1]) - out_2d = out.view(M, N) - - # grid = lambda META: ( - # triton.cdiv(M, META["BLOCK_SIZE_M"]), - # triton.cdiv(N, META["BLOCK_SIZE_N"]), - # ) - config = get_triton_activation_and_mul_config(M, N) - grid = (triton.cdiv(M, config['BLOCK_SIZE_M']) * triton.cdiv(N, config['BLOCK_SIZE_N']),) - activation_and_mul_kernel[grid]( - out_2d, - input_2d, - M, - N, - input_2d.stride(0), - input_2d.stride(1), - out_2d.stride(0), - out_2d.stride(1), - ACT=1, - **config, - ) - - def ceil_div(a, b): return (a + b - 1) // b @@ -517,7 +311,7 @@ def moe_align_block_size( size for matrix multiplication. Parameters: - - topk_ids: A tensor of shape [total_tokens, top_k] representing the + - topk_ids: A tensor of shape [total_tokens, topk] representing the top-k expert indices for each token. - block_size: The block size used in block matrix multiplication. - num_experts: The total number of experts. @@ -541,7 +335,7 @@ def moe_align_block_size( Example: Given topk_ids = [[2, 3, 4], [1, 2, 4], [1, 3, 4], [1, 2, 3]], block_size = 4, and num_experts = 4: - - We initially have 12 tokens (after repeating 'top_k' times) and 4 experts, + - We initially have 12 tokens (after repeating 'topk' times) and 4 experts, with each expert needing to process 3 tokens. - As block_size is 4, we pad 1 token for each expert. - First, flatten topk_ids to [2, 3, 4, 1, 2, 4, 1, 3, 4, 1, 2, 3]. @@ -670,10 +464,16 @@ def inplace_fused_experts(hidden_states: torch.Tensor, topk_weights: torch.Tensor, topk_ids: torch.Tensor, activation: Optional[str] = None, + b1: Optional[torch.Tensor] = None, + b2: Optional[torch.Tensor] = None, + is_gated: Optional[bool] = None, + apply_router_weight_on_input: bool = False, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, use_int8_w8a16: bool = False, use_int4_w4a16: bool = False, + use_int4_w4a8: bool = False, + per_channel_quant: bool = False, global_num_experts: int = -1, expert_map: Optional[torch.Tensor] = None, w1_scale: Optional[torch.Tensor] = None, @@ -682,14 +482,47 @@ def inplace_fused_experts(hidden_states: torch.Tensor, w2_zp: Optional[torch.Tensor] = None, a1_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[List[int]] = None) -> None: + block_shape: Optional[List[int]] = None, + output_dtype: Optional[torch.dtype] = None, + no_combine: bool = False, + routed_scaling_factor: Optional[float] = 1.0, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None) -> None: if activation is None: activation = "silu" - fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids, True, - activation, use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16, - use_int4_w4a16, global_num_experts, expert_map, - w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, a2_scale, - block_shape) + fused_experts_impl( + hidden_states=hidden_states, + w1=w1, + w2=w2, + topk_weights=topk_weights, + topk_ids=topk_ids, + output_dtype=output_dtype, + inplace=True, + activation=activation, + is_gated=is_gated, + b1=b1, + b2=b2, + apply_router_weight_on_input=apply_router_weight_on_input, + use_fp8_w8a8=use_fp8_w8a8, + use_int8_w8a8=use_int8_w8a8, + use_int8_w8a16=use_int8_w8a16, + use_int4_w4a16=use_int4_w4a16, + use_int4_w4a8=use_int4_w4a8, + per_channel_quant=per_channel_quant, + global_num_experts=global_num_experts, + expert_map=expert_map, + w1_scale=w1_scale, + w2_scale=w2_scale, + w1_zp=w1_zp, + w2_zp=w2_zp, + a1_scale=a1_scale, + a2_scale=a2_scale, + block_shape=block_shape, + no_combine=no_combine, + routed_scaling_factor=routed_scaling_factor, + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) def outplace_fused_experts( hidden_states: torch.Tensor, @@ -698,10 +531,16 @@ def outplace_fused_experts( topk_weights: torch.Tensor, topk_ids: torch.Tensor, activation: Optional[str] = None, + b1: Optional[torch.Tensor] = None, + b2: Optional[torch.Tensor] = None, + is_gated: Optional[bool] = None, + apply_router_weight_on_input: bool = False, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, use_int8_w8a16: bool = False, use_int4_w4a16: bool = False, + use_int4_w4a8: bool = False, + per_channel_quant: bool = False, global_num_experts: int = -1, expert_map: Optional[torch.Tensor] = None, w1_scale: Optional[torch.Tensor] = None, @@ -710,14 +549,47 @@ def outplace_fused_experts( w2_zp: Optional[torch.Tensor] = None, a1_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[List[int]] = None) -> torch.Tensor: + block_shape: Optional[List[int]] = None, + output_dtype: Optional[torch.dtype] = None, + no_combine: bool = False, + routed_scaling_factor: Optional[float] = 1.0, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None) -> torch.Tensor: if activation is None: activation = "silu" - return fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids, - False, activation, use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16, - use_int4_w4a16, global_num_experts, expert_map, - w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, - a2_scale, block_shape) + return fused_experts_impl( + hidden_states=hidden_states, + w1=w1, + w2=w2, + topk_weights=topk_weights, + topk_ids=topk_ids, + output_dtype=output_dtype, + inplace=False, + activation=activation, + is_gated=is_gated, + b1=b1, + b2=b2, + apply_router_weight_on_input=apply_router_weight_on_input, + use_fp8_w8a8=use_fp8_w8a8, + use_int8_w8a8=use_int8_w8a8, + use_int8_w8a16=use_int8_w8a16, + use_int4_w4a16=use_int4_w4a16, + use_int4_w4a8=use_int4_w4a8, + per_channel_quant=per_channel_quant, + global_num_experts=global_num_experts, + expert_map=expert_map, + w1_scale=w1_scale, + w2_scale=w2_scale, + w1_zp=w1_zp, + w2_zp=w2_zp, + a1_scale=a1_scale, + a2_scale=a2_scale, + block_shape=block_shape, + no_combine=no_combine, + routed_scaling_factor=routed_scaling_factor, + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) def fused_experts(hidden_states: torch.Tensor, w1: torch.Tensor, @@ -726,10 +598,16 @@ def fused_experts(hidden_states: torch.Tensor, topk_ids: torch.Tensor, inplace: bool = False, activation: Optional[str] = None, + b1: Optional[torch.Tensor] = None, + b2: Optional[torch.Tensor] = None, + is_gated: Optional[bool] = None, + apply_router_weight_on_input: bool = False, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, use_int8_w8a16: bool = False, use_int4_w4a16: bool = False, + use_int4_w4a8: bool = False, + per_channel_quant: bool = False, global_num_experts: int = -1, expert_map: Optional[torch.Tensor] = None, w1_scale: Optional[torch.Tensor] = None, @@ -738,22 +616,47 @@ def fused_experts(hidden_states: torch.Tensor, w2_zp: Optional[torch.Tensor] = None, a1_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[List[int]] = None) -> torch.Tensor: + block_shape: Optional[List[int]] = None, + output_dtype: Optional[torch.dtype] = None, + no_combine: bool = False, + routed_scaling_factor: Optional[float] = 1.0, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None) -> torch.Tensor: if activation is None: activation = 'silu' - if inplace: - torch.ops.vllm.inplace_fused_experts( - hidden_states, w1, w2, topk_weights, topk_ids, activation, - use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16, use_int4_w4a16, global_num_experts, - expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, a2_scale, - block_shape) - return hidden_states - else: - return torch.ops.vllm.outplace_fused_experts( - hidden_states, w1, w2, topk_weights, topk_ids, activation, - use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16, use_int4_w4a16, global_num_experts, - expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, a2_scale, - block_shape) + return fused_experts_impl( + hidden_states=hidden_states, + w1=w1, + w2=w2, + topk_weights=topk_weights, + topk_ids=topk_ids, + output_dtype=output_dtype, + inplace=inplace, + activation=activation, + is_gated=is_gated, + b1=b1, + b2=b2, + apply_router_weight_on_input=apply_router_weight_on_input, + use_fp8_w8a8=use_fp8_w8a8, + use_int8_w8a8=use_int8_w8a8, + use_int8_w8a16=use_int8_w8a16, + use_int4_w4a16=use_int4_w4a16, + use_int4_w4a8=use_int4_w4a8, + per_channel_quant=per_channel_quant, + global_num_experts=global_num_experts, + expert_map=expert_map, + w1_scale=w1_scale, + w2_scale=w2_scale, + w1_zp=w1_zp, + w2_zp=w2_zp, + a1_scale=a1_scale, + a2_scale=a2_scale, + block_shape=block_shape, + no_combine=no_combine, + routed_scaling_factor=routed_scaling_factor, + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) def fused_moe_fake( hidden_states: torch.Tensor, @@ -761,9 +664,13 @@ def fused_moe_fake( w2: torch.Tensor, topk_weights: torch.Tensor, topk_ids: torch.Tensor, - odtype:torch.dtype, #compute or output type for i8& f8 + output_dtype: Optional[torch.dtype] = None, inplace: bool = False, activation: str = "silu", + is_gated: Optional[bool] = None, + b1: Optional[torch.Tensor] = None, + b2: Optional[torch.Tensor] = None, + apply_router_weight_on_input: bool = False, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, use_int8_w8a16: bool = False, @@ -779,15 +686,19 @@ def fused_moe_fake( a1_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None, block_shape: Optional[List[int]] = None, + no_combine: bool = False, routed_scaling_factor: Optional[float] = 1.0, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None, fn_key: Optional[str] = None, ) -> torch.Tensor: device = topk_ids.device M, topk = topk_ids.shape - dtype = odtype + dtype = (torch.bfloat16 if hidden_states.dtype == torch.bfloat16 else torch.float16) if output_dtype is None else output_dtype # E, model_dim, inter_dim = get_inter_dim(w1.shape, w2.shape) # FIXME: W2.size must be same as hidden_dim - moe_buf = torch.empty(hidden_states.shape, dtype=dtype, device=device) + output_shape = (M, topk, w2.shape[1]) if no_combine else hidden_states.shape + moe_buf = torch.empty(output_shape, dtype=dtype, device=device) return moe_buf @functools.lru_cache() def _bottom_moe_use_mls(): @@ -800,9 +711,13 @@ def fused_experts_impl(hidden_states: torch.Tensor, w2: torch.Tensor, topk_weights: torch.Tensor, topk_ids: torch.Tensor, - odtype:torch.dtype, #compute or output type for i8& f8 + output_dtype: Optional[torch.dtype] = None, inplace: bool = False, activation: str = "silu", + is_gated: Optional[bool] = None, + b1: Optional[torch.Tensor] = None, + b2: Optional[torch.Tensor] = None, + apply_router_weight_on_input: bool = False, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, use_int8_w8a16: bool = False, @@ -818,14 +733,25 @@ def fused_experts_impl(hidden_states: torch.Tensor, a1_scale: Optional[torch.Tensor] = None, a2_scale: Optional[torch.Tensor] = None, block_shape: Optional[List[int]] = None, - routed_scaling_factor: Optional[float] = 1.0)-> torch.Tensor: + no_combine: bool = False, + routed_scaling_factor: Optional[float] = 1.0, + gemm1_alpha: Optional[float] = None, + gemm1_limit: Optional[float] = None)-> torch.Tensor: if routed_scaling_factor is None: routed_scaling_factor = 1.0 + activation, is_gated = _normalize_activation_and_gate(activation, is_gated) + activation_out_dim = adjust_N_for_activation(w1.shape[1], is_gated) + + if output_dtype is None: + output_dtype = hidden_states.dtype + # Check constraints. if use_int4_w4a16 or use_int4_w4a8: - assert hidden_states.shape[1] // 2 == w1.shape[ - 2], "Hidden size mismatch" + if is_gated: + assert hidden_states.shape[1] // 2 == w1.shape[2], "Hidden size mismatch" + else: + assert hidden_states.shape[1] == w1.shape[2], "Hidden size mismatch" else: assert hidden_states.shape[1] == w1.shape[2], "Hidden size mismatch" @@ -836,12 +762,21 @@ def fused_experts_impl(hidden_states: torch.Tensor, assert hidden_states.dtype in [ torch.float32, torch.float16, torch.bfloat16, torch.int8, torch.float8_e4m3fn ] + assert output_dtype in [torch.float16, torch.bfloat16, torch.float32], "Unsupported output_dtype" num_tokens, _ = hidden_states.shape E, N, _ = w1.shape if global_num_experts == -1: global_num_experts = E - top_k_num = topk_ids.shape[1] + topk = topk_ids.shape[1] + + if output_dtype == torch.bfloat16: + compute_type = tl.bfloat16 + elif output_dtype == torch.float16: + compute_type = tl.float16 + else: + compute_type = tl.float32 + # We execute the fused_moe kernel in chunks to circumvent this issue: # https://github.com/vllm-project/vllm/issues/5938 CHUNK_SIZE = int(os.environ.get("TRITON_FUSED_MOE_CHUNK_SIZE", "16384")) @@ -856,7 +791,8 @@ def fused_experts_impl(hidden_states: torch.Tensor, use_int4_w4a8=use_int4_w4a8, use_mxfp4_w4a4=False, #always false in wna16 block_shape=block_shape, - is_bottom=False) + is_bottom=False, + is_gated=is_gated) moe_config_func2 = get_optimal_moe_config_func( hidden_states, w2, topk_ids, use_int8_w8a16=use_int8_w8a16, @@ -869,20 +805,18 @@ def fused_experts_impl(hidden_states: torch.Tensor, is_bottom=True) config = moe_config_func(M) config2, max_block_m = moe_config_func2(M) - - # config["COMBINE_SCALE_LOAD"] = False - # config2["COMBINE_SCALE_LOAD"] = False - # config["USE_MLS_LOAD"] = True - # config2["USE_MLS_LOAD"] = True + if config["BLOCK_SIZE_M"] != config2["BLOCK_SIZE_M"]: + raise ValueError( + "Top and bottom MoE configs must use the same BLOCK_SIZE_M: " + f"top={config['BLOCK_SIZE_M']}, bottom={config2['BLOCK_SIZE_M']}" + ) bottom_moe_a_use_mls = ( _bottom_moe_use_mls() and not use_int4_w4a8 and config2 is not None and config2.get("USE_MLS_LOAD", False)) - # bottom_moe_a_use_mls = False - topk = top_k_num max_padded_tokens = ( min(M * topk, E + 1) * (max_block_m - 1) if bottom_moe_a_use_mls else 0 ) @@ -893,32 +827,25 @@ def fused_experts_impl(hidden_states: torch.Tensor, if expert_map is not None: cache13 = torch.zeros(total_tokens * max(N, w2.shape[1]), device=hidden_states.device, - dtype=odtype) + dtype=output_dtype) else: cache13 = torch.empty(total_tokens * max(N, w2.shape[1]), device=hidden_states.device, - dtype=odtype) + dtype=output_dtype) intermediate_cache3 = cache13[:M * topk * w2.shape[1]].view( (M, topk, w2.shape[1])) - if hidden_states.dtype == torch.bfloat16: - compute_type = tl.bfloat16 - elif hidden_states.dtype == torch.float16: - compute_type = tl.float16 - elif hidden_states.dtype == torch.float32: - compute_type = tl.float32 - elif hidden_states.dtype == torch.int8 or hidden_states.dtype == torch.float8_e4m3fn: - if odtype == torch.bfloat16: - compute_type = tl.bfloat16 - else: - compute_type = tl.float16 - else: - raise ValueError(f"Unsupported compute_type: {hidden_states.dtype}") - - if inplace: + if no_combine: + assert not inplace, "no_combine + inplace is not supported" + out_hidden_states = torch.empty( + (num_tokens, topk, w2.shape[1]), + device=hidden_states.device, + dtype=output_dtype, + ) + elif inplace: out_hidden_states = hidden_states else: - out_hidden_states = torch.empty(hidden_states.shape, device=hidden_states.device, dtype=odtype) + out_hidden_states = torch.empty(hidden_states.shape, device=hidden_states.device, dtype=output_dtype) for chunk in range((num_tokens // CHUNK_SIZE) + 1): begin_chunk_idx, end_chunk_idx = (chunk * CHUNK_SIZE, @@ -937,10 +864,15 @@ def fused_experts_impl(hidden_states: torch.Tensor, # do not need to be adjusted. config = moe_config_func(tokens_in_chunk) config2, max_block_m = moe_config_func2(tokens_in_chunk) + if config["BLOCK_SIZE_M"] != config2["BLOCK_SIZE_M"]: + raise ValueError( + "Top and bottom MoE configs must use the same BLOCK_SIZE_M: " + f"top={config['BLOCK_SIZE_M']}, bottom={config2['BLOCK_SIZE_M']}" + ) bottom_moe_a_use_mls = ( _bottom_moe_use_mls() and config2 is not None - and config2.pop("USE_MLS_LOAD", False) + and config2.get("USE_MLS_LOAD", False) and (block_shape is not None and (use_int8_w8a8 or use_fp8_w8a8))) intermediate_cache3 = intermediate_cache3[:tokens_in_chunk] @@ -955,29 +887,32 @@ def fused_experts_impl(hidden_states: torch.Tensor, ) if expert_map is not None: intermediate_cache2 = torch.zeros( - (total_tokens, N // 2 if activation != "relu2" else N), + (total_tokens, activation_out_dim), device=hidden_states.device, - dtype=odtype) + dtype=output_dtype) else: intermediate_cache2 = torch.empty( - (total_tokens, N // 2 if activation != "relu2" else N), + (total_tokens, activation_out_dim), device=hidden_states.device, - dtype=odtype) + dtype=output_dtype) curr_topk_ids = topk_ids[begin_chunk_idx:end_chunk_idx] curr_topk_weights = topk_weights[begin_chunk_idx:end_chunk_idx] - ck_sorting = expert_map is None # TODO: check why expert_map failed issue. + ck_sorting = True sorted_weights = None if not ck_sorting: sorted_token_ids, expert_ids, num_tokens_post_padded = ( moe_align_block_size(curr_topk_ids, config['BLOCK_SIZE_M'], global_num_experts, expert_map)) else: + # Convert expert_map (global->local mapping, -1 for inactive) to + # expert_mask (binary 0/1 mask) expected by moe_sorting_ck's C kernel. + expert_mask = (expert_map >= 0).to(torch.int32) if expert_map is not None else None sorted_token_ids, sorted_weights, expert_ids, num_tokens_post_padded, \ _tokens_positions_per_expert, _moe_buf = ( moe_sorting_ck(curr_topk_ids, curr_topk_weights, global_num_experts, - w2.shape[1], odtype, config["BLOCK_SIZE_M"], expert_map) + w2.shape[1], output_dtype, config["BLOCK_SIZE_M"], expert_mask) ) if (use_int8_w8a8 or use_fp8_w8a8 or use_int4_w4a8) and per_channel_quant: @@ -998,8 +933,8 @@ def fused_experts_impl(hidden_states: torch.Tensor, sorted_weights, expert_ids, num_tokens_post_padded, - False, - top_k_num, + apply_router_weight_on_input, + topk, compute_type=compute_type, use_fp8_w8a8=use_fp8_w8a8, use_int8_w8a8=use_int8_w8a8, @@ -1010,7 +945,8 @@ def fused_experts_impl(hidden_states: torch.Tensor, block_shape=block_shape, c_sorted=bottom_moe_a_use_mls, ck_sorting=ck_sorting, - ck_topk=top_k_num, + ck_topk=topk, + B_bias=b1, config=config) elif block_shape is not None and (use_int8_w8a8 or use_int4_w4a8 or use_fp8_w8a8): quant_dtype = torch.float8_e4m3fn if use_fp8_w8a8 else torch.int8 @@ -1030,8 +966,8 @@ def fused_experts_impl(hidden_states: torch.Tensor, sorted_weights, expert_ids, num_tokens_post_padded, - False, - top_k_num, + apply_router_weight_on_input, + topk, compute_type=compute_type, use_fp8_w8a8=use_fp8_w8a8, use_int8_w8a8=use_int8_w8a8, @@ -1042,7 +978,8 @@ def fused_experts_impl(hidden_states: torch.Tensor, block_shape=block_shape, c_sorted=bottom_moe_a_use_mls, ck_sorting=ck_sorting, - ck_topk=top_k_num, + ck_topk=topk, + B_bias=b1, config=config) else: invoke_fused_moe_kernel(curr_hidden_states, @@ -1057,8 +994,8 @@ def fused_experts_impl(hidden_states: torch.Tensor, sorted_weights, expert_ids, num_tokens_post_padded, - False, - top_k_num, + apply_router_weight_on_input, + topk, compute_type=compute_type, use_fp8_w8a8=use_fp8_w8a8, use_int8_w8a8=use_int8_w8a8, @@ -1069,23 +1006,17 @@ def fused_experts_impl(hidden_states: torch.Tensor, block_shape=block_shape, c_sorted=bottom_moe_a_use_mls, ck_sorting=ck_sorting, - ck_topk=top_k_num, + ck_topk=topk, + B_bias=b1, config=config) - if activation == "silu": - triton_silu_and_mul(intermediate_cache2, - intermediate_cache1.view(-1, N)) - # torch.ops._C.silu_and_mul(intermediate_cache2, - # intermediate_cache1.view(-1, N)) - elif activation == "gelu": - triton_gelu_and_mul(intermediate_cache2, - intermediate_cache1.view(-1, N)) - elif activation == "relu2": - triton_relu2( - intermediate_cache2, - intermediate_cache1.view(-1, N), - ) - else: - raise ValueError(f"Unsupported FusedMoe activation: {activation}") + _apply_activation( + activation=activation, + is_gated=is_gated, + activated_out=intermediate_cache2, + ffn1_out_2d=intermediate_cache1.view(-1, N), + gemm1_alpha=gemm1_alpha, + gemm1_limit=gemm1_limit, + ) if expert_map != None: # for EP mode, intermediate_cache1 and intermediate_cache3 need be zeros inited # since intermediate_cache1 and intermediate_cache3 shared same buffer, @@ -1108,7 +1039,7 @@ def fused_experts_impl(hidden_states: torch.Tensor, sorted_weights, expert_ids, num_tokens_post_padded, - True, + (not apply_router_weight_on_input) and (not no_combine), 1, compute_type=compute_type, use_fp8_w8a8=use_fp8_w8a8, @@ -1120,7 +1051,9 @@ def fused_experts_impl(hidden_states: torch.Tensor, block_shape=block_shape, bottom_a_use_mls_load=bottom_moe_a_use_mls, ck_sorting=ck_sorting, - ck_topk=top_k_num, + ck_topk=topk, + scale_bias_with_routed_weight=(not apply_router_weight_on_input) and (not no_combine), + B_bias=b2, config=config2) elif block_shape is not None and (use_int8_w8a8 or use_int4_w4a8 or use_fp8_w8a8): quant_dtype = torch.float8_e4m3fn if use_fp8_w8a8 else torch.int8 @@ -1137,7 +1070,7 @@ def fused_experts_impl(hidden_states: torch.Tensor, sorted_weights, expert_ids, num_tokens_post_padded, - True, + (not apply_router_weight_on_input) and (not no_combine), 1, compute_type=compute_type, use_fp8_w8a8=use_fp8_w8a8, @@ -1149,7 +1082,9 @@ def fused_experts_impl(hidden_states: torch.Tensor, block_shape=block_shape, bottom_a_use_mls_load=bottom_moe_a_use_mls, ck_sorting=ck_sorting, - ck_topk=top_k_num, + ck_topk=topk, + scale_bias_with_routed_weight=(not apply_router_weight_on_input) and (not no_combine), + B_bias=b2, config=config2) else: invoke_fused_moe_kernel(intermediate_cache2, @@ -1164,7 +1099,7 @@ def fused_experts_impl(hidden_states: torch.Tensor, sorted_weights, expert_ids, num_tokens_post_padded, - True, + (not apply_router_weight_on_input) and (not no_combine), 1, compute_type=compute_type, use_fp8_w8a8=use_fp8_w8a8, @@ -1176,22 +1111,18 @@ def fused_experts_impl(hidden_states: torch.Tensor, block_shape=block_shape, bottom_a_use_mls_load=bottom_moe_a_use_mls, ck_sorting=ck_sorting, - ck_topk=top_k_num, + ck_topk=topk, + scale_bias_with_routed_weight=(not apply_router_weight_on_input) and (not no_combine), + B_bias=b2, config=config2) - - mode_use_triton_moe_sum = out_hidden_states.dtype == torch.float16 or \ - out_hidden_states.dtype == torch.bfloat16 or \ - out_hidden_states.dtype == torch.float32 - if mode_use_triton_moe_sum: - triton_moe_sum(intermediate_cache3.view(*intermediate_cache3.shape), - out_hidden_states[begin_chunk_idx:end_chunk_idx], - routed_scaling_factor=routed_scaling_factor) + if no_combine: + out_hidden_states[begin_chunk_idx:end_chunk_idx].copy_(intermediate_cache3) else: - moe_sum(intermediate_cache3.view(*intermediate_cache3.shape), - out_hidden_states[begin_chunk_idx:end_chunk_idx]) - if routed_scaling_factor != 1.0: - out_hidden_states[begin_chunk_idx:end_chunk_idx].mul_( - routed_scaling_factor) + triton_moe_sum( + intermediate_cache3.view(*intermediate_cache3.shape), + out_hidden_states[begin_chunk_idx:end_chunk_idx], + routed_scaling_factor=routed_scaling_factor, + ) if end_chunk_idx < num_tokens and expert_map != None: # if has next chunk, intermediate_cache3 need init to zeros intermediate_cache3.fill_(0) diff --git a/aiter/ops/triton/moe_activation.py b/aiter/ops/triton/moe_activation.py new file mode 100644 index 0000000000000000000000000000000000000000..1f53bbbe83ad91eb23c959c847213397c19b3a5f --- /dev/null +++ b/aiter/ops/triton/moe_activation.py @@ -0,0 +1,626 @@ +# SPDX-License-Identifier: Apache-2.0 +"""Shared MoE activation helpers and Triton kernels. + +Terminology: +- "gated" activation means gate-up fusion: input is interpreted as [gate, up] + (typically shape [..., 2D]) and output is activation(gate) * up (shape [..., D]). +- "non-gated" activation means no gate multiplication: input/output keep the + same last-dimension size (typically [..., D] -> [..., D]). +""" + +from typing import Optional, Tuple + +import torch +import triton +import triton.language as tl + + +@triton.heuristics( + { + "N_DIV": lambda args: (args["N"] % args["BLOCK_SIZE_N"]) == 0, + } +) +@triton.jit +def activation_and_mul_kernel( + out_ptr, # [M, N] + in_ptr, # [M, 2N], chunked layout [gate(0:N), up(N:2N)] + M, + N: tl.constexpr, + stride_in0, + stride_in1, + stride_out0, + stride_out1, + ACT: tl.constexpr, + N_DIV: tl.constexpr, + BLOCK_SIZE_N: tl.constexpr, +): + # 1D launch over [M, N] tiles: + # pid -> (pid_m, pid_n), each program computes one [1, BLOCK_SIZE_N] slice. + tl.assume(M > 0) + tl.assume(N > 0) + tl.assume(stride_in0 > 0) + tl.assume(stride_in1 > 0) + tl.assume(stride_out0 > 0) + tl.assume(stride_out1 > 0) + tl.assume(in_ptr.to(tl.int64) >= 0) + tl.assume(out_ptr.to(tl.int64) >= 0) + + pid = tl.program_id(axis=0) + num_pid_n = tl.cdiv(N, BLOCK_SIZE_N) + pid_m = pid // num_pid_n + pid_n = pid % num_pid_n + + offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) + + mask_n = offs_n < N + row_in = in_ptr + pid_m.to(tl.int64) * stride_in0 + gate_ptrs = row_in + offs_n * stride_in1 + up_ptrs = row_in + (offs_n + N) * stride_in1 + if N_DIV: + gate = tl.load(gate_ptrs) + up = tl.load(up_ptrs) + else: + gate = tl.load(gate_ptrs, mask=mask_n, other=0.0) + up = tl.load(up_ptrs, mask=mask_n, other=0.0) + + gate_f = gate.to(tl.float32) + up_f = up.to(tl.float32) + # ACT=0: SiLU(x) * y, where SiLU(x)=x*sigmoid(x) + # ACT=1: GELU(x) * y, exact erf form: + # GELU(x)=0.5*x*(1+erf(x/sqrt(2))) + if ACT == 0: + act = gate_f * (1.0 / (1.0 + tl.exp(-gate_f))) + else: + act = gate_f * 0.5 * (1.0 + tl.erf(gate_f * 0.7071067811865476)) + y = act * up_f + + out_ptrs = out_ptr + pid_m.to(tl.int64) * stride_out0 + offs_n * stride_out1 + if N_DIV: + tl.store(out_ptrs, y.to(gate.dtype)) + else: + tl.store(out_ptrs, y.to(gate.dtype), mask=mask_n) + + +def get_triton_activation_and_mul_config(M, _N): + # 2 configs tuned on gfx930 (BW1000B), bf16: + # small-M: BS128_W1 — stable, low overhead for decode + # large-M: BS1024_W1 — ~1.4-1.7x faster than old BS512_W4 at M>=4K + if M <= 16: + return {"BLOCK_SIZE_N": 128, "num_warps": 1} + return {"BLOCK_SIZE_N": 1024, "num_warps": 1} + + +def get_triton_activation_no_mul_config(M, N): + # 2 configs tuned on gfx930 (BW1000B), bf16: + # small-M: BS128_W1 — stable, low overhead for decode + # large-M: BS1024_W2 — ~1.4-2.6x faster than old BS256_W2 at M>=4K + if M <= 16: + return {"BLOCK_SIZE_N": 128, "num_warps": 1} + return {"BLOCK_SIZE_N": 1024, "num_warps": 2} + + +@triton.heuristics( + { + "N_DIV": lambda args: (args["N"] % args["BLOCK_SIZE_N"]) == 0, + } +) +@triton.jit +def activation_no_mul_1d_kernel( + out_ptr, # [M, N] + in_ptr, # [M, N] + M, + N: tl.constexpr, + stride_in0, + stride_in1, + stride_out0, + stride_out1, + ACT: tl.constexpr, # 0:silu 1:gelu 2:relu2 + BLOCK_SIZE_N: tl.constexpr, + N_DIV: tl.constexpr, +): + tl.assume(M > 0) + tl.assume(N > 0) + tl.assume(stride_in0 > 0) + tl.assume(stride_in1 > 0) + tl.assume(stride_out0 > 0) + tl.assume(stride_out1 > 0) + tl.assume(in_ptr.to(tl.int64) >= 0) + tl.assume(out_ptr.to(tl.int64) >= 0) + + pid = tl.program_id(axis=0) + num_pid_n = tl.cdiv(N, BLOCK_SIZE_N) + pid_m = pid // num_pid_n + pid_n = pid % num_pid_n + + offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) + mask_n = offs_n < N + + in_ptrs = in_ptr + pid_m.to(tl.int64) * stride_in0 + offs_n * stride_in1 + if N_DIV: + x = tl.load(in_ptrs) + else: + x = tl.load(in_ptrs, mask=mask_n, other=0.0) + + xf = x.to(tl.float32) + # ACT=0: SiLU(x)=x*sigmoid(x) + # ACT=1: GELU(x)=0.5*x*(1+erf(x/sqrt(2))) + # ACT=2: ReLU^2(x)=max(x,0)^2 + if ACT == 0: + y = xf * (1.0 / (1.0 + tl.exp(-xf))) + elif ACT == 1: + y = xf * 0.5 * (1.0 + tl.erf(xf * 0.7071067811865476)) + else: + y = tl.where(xf > 0.0, xf * xf, 0.0) + + out_ptrs = out_ptr + pid_m.to(tl.int64) * stride_out0 + offs_n * stride_out1 + if N_DIV: + tl.store(out_ptrs, y.to(x.dtype)) + else: + tl.store(out_ptrs, y.to(x.dtype), mask=mask_n) + + +def triton_relu2(out: torch.Tensor, inp: torch.Tensor) -> None: + # ReLU^2(x) = max(x, 0)^2 + assert inp.shape == out.shape + assert inp.is_contiguous() and out.is_contiguous() + M, N = inp.shape + config = get_triton_activation_no_mul_config(M, N) + grid = (M * triton.cdiv(N, config["BLOCK_SIZE_N"]),) + activation_no_mul_1d_kernel[grid]( + out, + inp, + M, + N, + inp.stride(0), + inp.stride(1), + out.stride(0), + out.stride(1), + ACT=2, + **config, + ) + + +def triton_silu_no_mul(out: torch.Tensor, inp: torch.Tensor) -> None: + # SiLU(x) = x * sigmoid(x) + assert inp.shape == out.shape + assert inp.is_contiguous() and out.is_contiguous() + M, N = inp.shape + config = get_triton_activation_no_mul_config(M, N) + grid = (M * triton.cdiv(N, config["BLOCK_SIZE_N"]),) + activation_no_mul_1d_kernel[grid]( + out, + inp, + M, + N, + inp.stride(0), + inp.stride(1), + out.stride(0), + out.stride(1), + ACT=0, + **config, + ) + + +def triton_gelu_no_mul(out: torch.Tensor, inp: torch.Tensor) -> None: + # GELU(x) = 0.5 * x * (1 + erf(x / sqrt(2))) + assert inp.shape == out.shape + assert inp.is_contiguous() and out.is_contiguous() + M, N = inp.shape + config = get_triton_activation_no_mul_config(M, N) + grid = (M * triton.cdiv(N, config["BLOCK_SIZE_N"]),) + activation_no_mul_1d_kernel[grid]( + out, + inp, + M, + N, + inp.stride(0), + inp.stride(1), + out.stride(0), + out.stride(1), + ACT=1, + **config, + ) + + +def triton_silu_and_mul(out: torch.Tensor, input: torch.Tensor) -> None: + # Split input into [x, y] along last dim and compute: + # out = SiLU(x) * y + # Shapes: input=[M, 2N], out=[M, N] + assert input.shape[-1] % 2 == 0 + assert input.is_contiguous() + assert out.is_contiguous() + M = input.numel() // input.shape[-1] + N = input.shape[-1] // 2 + input_2d = input.view(M, input.shape[-1]) + out_2d = out.view(M, N) + config = get_triton_activation_and_mul_config(M, N) + grid = (M * triton.cdiv(N, config["BLOCK_SIZE_N"]),) + activation_and_mul_kernel[grid]( + out_2d, + input_2d, + M, + N, + input_2d.stride(0), + input_2d.stride(1), + out_2d.stride(0), + out_2d.stride(1), + ACT=0, + **config, + ) + + +def triton_gelu_and_mul(out: torch.Tensor, input: torch.Tensor) -> None: + # Split input into [x, y] along last dim and compute: + # out = GELU(x) * y + # Shapes: input=[M, 2N], out=[M, N] + assert input.shape[-1] % 2 == 0 + assert input.is_contiguous() + assert out.is_contiguous() + M = input.numel() // input.shape[-1] + N = input.shape[-1] // 2 + input_2d = input.view(M, input.shape[-1]) + out_2d = out.view(M, N) + config = get_triton_activation_and_mul_config(M, N) + grid = (M * triton.cdiv(N, config["BLOCK_SIZE_N"]),) + activation_and_mul_kernel[grid]( + out_2d, + input_2d, + M, + N, + input_2d.stride(0), + input_2d.stride(1), + out_2d.stride(0), + out_2d.stride(1), + ACT=1, + **config, + ) + + +def get_triton_swiglu_variant_config(M, D): + # 2 configs tuned on gfx930 (BW1000B), bf16: + # small-M: BS128_W1 — stable, low overhead for decode + # large-M: BS1024_W1 — ~1.3-2.1x faster than old BS256_W2 at M>=4K + if M <= 16: + return {"BLOCK_SIZE_D": 128, "num_warps": 1} + return {"BLOCK_SIZE_D": 1024, "num_warps": 1} + + +def get_triton_swiglu_oai_interleaved_config(M, D): + # BLOCK_SIZE_D >= 128 produces incorrect results due to interleaved + # address pattern (offs_d*2) failing to generate correct buffer_load. + # Must keep BS64. small-M: W1; large-M: W4 for better occupancy. + if M <= 16: + return {"BLOCK_SIZE_D": 64, "num_warps": 1} + return {"BLOCK_SIZE_D": 64, "num_warps": 4} + + +@triton.heuristics( + { + "D_DIV": lambda args: (args["D"] % args["BLOCK_SIZE_D"]) == 0, + } +) +@triton.jit +def swiglu_variant_1d_kernel( + out_ptr, # [M, D] + in_ptr, # [M, 2*D] + M, + D: tl.constexpr, + stride_in0, + stride_in1, + stride_out0, + stride_out1, + ALPHA, + LIMIT, + MODE: tl.constexpr, # 0:gpt_oss 1:silu_clamp_mul 2:step + BLOCK_SIZE_D: tl.constexpr, + D_DIV: tl.constexpr, +): + tl.assume(M > 0) + tl.assume(D > 0) + tl.assume(stride_in0 > 0) + tl.assume(stride_in1 > 0) + tl.assume(stride_out0 > 0) + tl.assume(stride_out1 > 0) + tl.assume(in_ptr.to(tl.int64) >= 0) + tl.assume(out_ptr.to(tl.int64) >= 0) + + pid = tl.program_id(axis=0) + num_pid_d = tl.cdiv(D, BLOCK_SIZE_D) + pid_m = pid // num_pid_d + pid_d = pid % num_pid_d + + offs_d = pid_d * BLOCK_SIZE_D + tl.arange(0, BLOCK_SIZE_D) + mask_d = offs_d < D + + row_in = in_ptr + pid_m.to(tl.int64) * stride_in0 + gate_ptrs = row_in + offs_d * stride_in1 + up_ptrs = row_in + (offs_d + D) * stride_in1 + + if D_DIV: + gate = tl.load(gate_ptrs) + up = tl.load(up_ptrs) + else: + gate = tl.load(gate_ptrs, mask=mask_d, other=0.0) + up = tl.load(up_ptrs, mask=mask_d, other=0.0) + + gate_f = gate.to(tl.float32) + up_f = up.to(tl.float32) + gate_clamp = tl.minimum(gate_f, LIMIT) + up_clamp = tl.minimum(tl.maximum(up_f, -LIMIT), LIMIT) + + # MODE=0 (gpt_oss/oai): + # out = gate' * sigmoid(alpha*gate') * (up' + 1) + # MODE=1 (silu_clamp_mul): + # out = SiLU(gate') * up' + # MODE=2 (step): + # out = clamp(SiLU(gate), max=LIMIT) * up' + if MODE == 0: + y = gate_clamp * (1.0 / (1.0 + tl.exp(-(gate_clamp * ALPHA)))) * (up_clamp + 1.0) + elif MODE == 1: + y = gate_clamp * (1.0 / (1.0 + tl.exp(-gate_clamp))) * up_clamp + else: + silu_gate = gate_f * (1.0 / (1.0 + tl.exp(-gate_f))) + silu_clamp = tl.minimum(silu_gate, LIMIT) + y = silu_clamp * up_clamp + + out_ptrs = out_ptr + pid_m.to(tl.int64) * stride_out0 + offs_d * stride_out1 + if D_DIV: + tl.store(out_ptrs, y.to(gate.dtype)) + else: + tl.store(out_ptrs, y.to(gate.dtype), mask=mask_d) + + +def _triton_chunked_swiglu_variant( + out: torch.Tensor, + inp: torch.Tensor, + *, + alpha: float, + limit: float, + mode: int, +) -> None: + assert inp.shape[-1] % 2 == 0 + assert inp.is_contiguous() and out.is_contiguous() + M = inp.numel() // inp.shape[-1] + D = inp.shape[-1] // 2 + out_rows = out.view(M, D) + inp_rows = inp.view(M, inp.shape[-1]) + config = get_triton_swiglu_variant_config(M, D) + # 1D launch: a single pid is mapped to (row_id, d_tile_id) inside kernel. + grid = (M * triton.cdiv(D, config["BLOCK_SIZE_D"]),) + swiglu_variant_1d_kernel[grid]( + out_rows, + inp_rows, + M, + D, + inp_rows.stride(0), + inp_rows.stride(1), + out_rows.stride(0), + out_rows.stride(1), + alpha, + limit, + MODE=mode, + **config, + ) + + +def triton_swiglu_silu_clamp_mul(out: torch.Tensor, inp: torch.Tensor, limit: float) -> None: + # out = SiLU(clamp(gate,max=limit)) * clamp(up,-limit,limit) + _triton_chunked_swiglu_variant(out, inp, alpha=1.0, limit=limit, mode=1) + + +def triton_swiglu_gpt_oss_sigmoid_alpha( + out: torch.Tensor, inp: torch.Tensor, alpha: float, limit: float +) -> None: + # out = gate' * sigmoid(alpha * gate') * (up' + 1), chunked layout + _triton_chunked_swiglu_variant(out, inp, alpha=alpha, limit=limit, mode=0) + + +def triton_swiglu_step_and_mul(out: torch.Tensor, inp: torch.Tensor, limit: float) -> None: + # out = clamp(SiLU(gate),max=limit) * clamp(up,-limit,limit) + _triton_chunked_swiglu_variant(out, inp, alpha=1.0, limit=limit, mode=2) + + +@triton.heuristics( + { + "D_DIV": lambda args: (args["D"] % args["BLOCK_SIZE_D"]) == 0, + } +) +@triton.jit +def swiglu_oai_interleaved_1d_kernel( + out_ptr, # [M, D] + in_ptr, # [M, 2*D], interleaved layout [g0, u0, g1, u1, ...] + M, + D, + stride_in0, + stride_in1, + stride_out0, + stride_out1, + ALPHA, + LIMIT, + BLOCK_SIZE_D: tl.constexpr, + D_DIV: tl.constexpr, +): + tl.assume(M > 0) + tl.assume(D > 0) + tl.assume(stride_in0 > 0) + tl.assume(stride_in1 > 0) + tl.assume(stride_out0 > 0) + tl.assume(stride_out1 > 0) + tl.assume(in_ptr.to(tl.int64) >= 0) + tl.assume(out_ptr.to(tl.int64) >= 0) + + pid = tl.program_id(axis=0) + num_pid_d = tl.cdiv(D, BLOCK_SIZE_D) + pid_m = pid // num_pid_d + pid_d = pid % num_pid_d + + offs_d = pid_d * BLOCK_SIZE_D + tl.arange(0, BLOCK_SIZE_D) + mask_d = offs_d < D + + row_in = in_ptr + pid_m.to(tl.int64) * stride_in0 + gate_ptrs = row_in + (offs_d * 2) * stride_in1 + up_ptrs = row_in + (offs_d * 2 + 1) * stride_in1 + + if D_DIV: + gate = tl.load(gate_ptrs) + up = tl.load(up_ptrs) + else: + gate = tl.load(gate_ptrs, mask=mask_d, other=0.0) + up = tl.load(up_ptrs, mask=mask_d, other=0.0) + + gate_f = gate.to(tl.float32) + up_f = up.to(tl.float32) + gate_clamp = tl.minimum(gate_f, LIMIT) + up_clamp = tl.minimum(tl.maximum(up_f, -LIMIT), LIMIT) + + # OpenAI interleaved SwiGLU variant: + # out = gate' * sigmoid(alpha * gate') * (up' + 1) + y = gate_clamp * (1.0 / (1.0 + tl.exp(-(gate_clamp * ALPHA)))) * (up_clamp + 1.0) + + out_ptrs = out_ptr + pid_m.to(tl.int64) * stride_out0 + offs_d * stride_out1 + if D_DIV: + tl.store(out_ptrs, y.to(gate.dtype)) + else: + tl.store(out_ptrs, y.to(gate.dtype), mask=mask_d) + + +def triton_swiglu_oai_and_mul(out: torch.Tensor, inp: torch.Tensor, alpha: float, limit: float) -> None: + # out = gate' * sigmoid(alpha * gate') * (up' + 1), interleaved layout + assert inp.shape[-1] % 2 == 0 + assert inp.is_contiguous() and out.is_contiguous() + M = inp.numel() // inp.shape[-1] + D = inp.shape[-1] // 2 + out_2d = out.view(M, D) + inp_2d = inp.view(M, inp.shape[-1]) + config = get_triton_swiglu_oai_interleaved_config(M, D) + grid = (M * triton.cdiv(D, config["BLOCK_SIZE_D"]),) + swiglu_oai_interleaved_1d_kernel[grid]( + out_2d, + inp_2d, + M, + D, + inp_2d.stride(0), + inp_2d.stride(1), + out_2d.stride(0), + out_2d.stride(1), + alpha, + limit, + **config, + ) + + +_NO_MUL_ACTIVATIONS = {"silu_no_mul", "gelu_no_mul", "relu2_no_mul", "relu2"} +_GATED_ONLY_ACTIVATIONS = {"swigluoai", "swiglustep"} +_SUPPORTED_ACTIVATIONS = { + "silu", + "gelu", + "swigluoai", + "swiglustep", + "silu_no_mul", + "gelu_no_mul", + "relu2_no_mul", + "relu2", +} + + +def _normalize_activation_and_gate( + activation: str, + is_gated: Optional[bool], +) -> Tuple[str, bool]: + # Match upstream style: keep (activation, is_gated) as a direct pair. + # 1) validate string, 2) infer is_gated when not explicitly given, + # 3) validate gated/non-gated constraints. + activation_name = activation.lower() + if activation_name not in _SUPPORTED_ACTIVATIONS: + raise ValueError(f"Unsupported activation: {activation}") + + if is_gated is None: + is_gated = activation_name not in _NO_MUL_ACTIVATIONS + + if is_gated and activation_name in _NO_MUL_ACTIVATIONS: + raise ValueError(f"Activation '{activation}' is non-gated but is_gated=True") + if (not is_gated) and activation_name in _GATED_ONLY_ACTIVATIONS: + raise ValueError(f"Activation '{activation}' requires gated mode") + + return activation_name, is_gated + + +def adjust_N_for_activation(n: int, is_gated: bool) -> int: + """Align with vLLM naming: output dim after expert activation.""" + return n // 2 if is_gated else n + + +def _apply_activation( + activation: str, + is_gated: bool, + activated_out: torch.Tensor, + ffn1_out_2d: torch.Tensor, + gemm1_alpha: Optional[float], + gemm1_limit: Optional[float], +) -> None: + """Apply MoE expert activation. + + Shapes: + - gated path: `ffn1_out_2d` is [rows, 2D], `activated_out` is [rows, D] + - non-gated path: `ffn1_out_2d` and `activated_out` are both [rows, D] + + Meaning of `is_gated`: + - True: run gate-up fusion (activation(gate) * up). + - False: run plain activation without gate multiplication. + """ + if is_gated: + if activation == "silu": + # Gated SiLU family: + # base: out = SiLU(gate) * up + # gpt-oss(alpha,limit): out = gate'*sigmoid(alpha*gate')*(up'+1) + # limit-only: out = SiLU(gate')*up' + if gemm1_alpha is not None: + if gemm1_limit is None: + raise ValueError("gemm1_limit must be set when gemm1_alpha is set") + triton_swiglu_gpt_oss_sigmoid_alpha( + activated_out, + ffn1_out_2d, + gemm1_alpha, + gemm1_limit, + ) + elif gemm1_limit is not None: + triton_swiglu_silu_clamp_mul( + activated_out, + ffn1_out_2d, + gemm1_limit, + ) + else: + triton_silu_and_mul(activated_out, ffn1_out_2d) + elif activation == "gelu": + # Gated GELU: out = GELU(gate) * up + if gemm1_alpha is not None or gemm1_limit is not None: + raise ValueError("gemm1_alpha/gemm1_limit are not supported for gated GELU") + triton_gelu_and_mul(activated_out, ffn1_out_2d) + elif activation == "swigluoai": + # OpenAI interleaved SwiGLU variant: + # out = gate'*sigmoid(alpha*gate')*(up'+1) + alpha = 1.702 if gemm1_alpha is None else gemm1_alpha + limit = 7.0 if gemm1_limit is None else gemm1_limit + triton_swiglu_oai_and_mul(activated_out, ffn1_out_2d, alpha, limit) + elif activation == "swiglustep": + # Step variant: + # out = clamp(SiLU(gate),max=limit) * clamp(up,-limit,limit) + limit = 7.0 if gemm1_limit is None else gemm1_limit + if gemm1_alpha is not None: + raise ValueError("gemm1_alpha is not supported for swiglustep") + triton_swiglu_step_and_mul(activated_out, ffn1_out_2d, limit) + else: + raise ValueError(f"Unsupported gated activation: {activation}") + return + + if activation in {"silu", "silu_no_mul"}: + # Non-gated: out = SiLU(x) + triton_silu_no_mul(activated_out, ffn1_out_2d) + elif activation in {"gelu", "gelu_no_mul"}: + # Non-gated: out = GELU(x) + triton_gelu_no_mul(activated_out, ffn1_out_2d) + elif activation in {"relu2", "relu2_no_mul"}: + # Non-gated: out = ReLU(x)^2 + triton_relu2(activated_out, ffn1_out_2d) + else: + raise ValueError(f"Unsupported non-gated activation: {activation}") diff --git a/aiter/ops/triton/moe_op.py b/aiter/ops/triton/moe_op.py index c334e1112cb5d05ea18769ea64d899b3e50731bf..d94214c5c2469c2852e9c4de601af9f0afec0aef 100644 --- a/aiter/ops/triton/moe_op.py +++ b/aiter/ops/triton/moe_op.py @@ -198,6 +198,7 @@ def write_zeros_to_output(c_ptr, stride_cm, stride_cn, pid_n, N, offs_token, c_mask = token_mask[:, None] & (offs_cn[None, :] < N) tl.store(c_ptrs, accumulator, mask=c_mask) + @triton.jit def pid_grid(pid: int, num_pid_m: int, num_pid_n: int, GROUP_SIZE_M: tl.constexpr = 1): """ @@ -499,7 +500,7 @@ def fused_moe_kernel_gptq_awq( k_mask = offs_k[:, None] < K - k * BLOCK_SIZE_K k_other = 0.0 - masks_szk = offs_szk < K // group_size + masks_szk = offs_szk[:, None] < K // group_size masks_a = token_mask[:, None] & (offs_k[None, :] < K - k * BLOCK_SIZE_K) else: k_mask = None @@ -521,13 +522,19 @@ def fused_moe_kernel_gptq_awq( b_scale_ptrs = b_scale_ptr + (off_experts * stride_bse + \ offs_bn[None, :] * stride_bsn + \ offs_szk[:, None] * stride_bsk).to(tl.int32) - b_scale = tl.load(b_scale_ptrs, mask=masks_szk, other=k_other) + if not block_k_diviable: + b_scale = tl.load(b_scale_ptrs, mask=masks_szk, other=k_other) + else: + b_scale = tl.load(b_scale_ptrs) b_scale = b_scale.to(tl.float32) b_zp_ptrs = b_zp_ptr + (off_experts * stride_bze + \ (offs_bn[None, :]//2) * stride_bzn + \ offs_szk[:, None] * stride_bzk).to(tl.int32) - b_zp = tl.load(b_zp_ptrs, mask=masks_szk, other=k_other) + if not block_k_diviable: + b_zp = tl.load(b_zp_ptrs, mask=masks_szk, other=k_other) + else: + b_zp = tl.load(b_zp_ptrs) b_zp = ((b_zp >> b_zp_shifter) & 0xF) b_zp = b_zp.to(tl.float32) @@ -569,6 +576,7 @@ def fused_moe_kernel_gptq_awq( # K dimension. if not block_k_diviable: + # Keep the same partial-K masking semantics as SGLang gptq_awq kernel. k_mask = offs_k[:, None] < K - k * BLOCK_SIZE_K k_other = 0.0 else: @@ -587,7 +595,10 @@ def fused_moe_kernel_gptq_awq( offs_bn[None, :] * stride_bsn + \ ((offs_k[:, None] + BLOCK_SIZE_K * k) // group_size) * \ stride_bsk).to(tl.int32) - b_scale = tl.load(b_scale_ptrs, mask=k_mask, other=k_other) + if not block_k_diviable: + b_scale = tl.load(b_scale_ptrs, mask=k_mask, other=k_other) + else: + b_scale = tl.load(b_scale_ptrs) b_scale = b_scale.to(tl.float32) if has_zp and use_int4_w4a16: @@ -595,7 +606,10 @@ def fused_moe_kernel_gptq_awq( b_zp_ptrs = b_zp_ptr + (off_experts * stride_bze + \ (offs_bn[None, :] // 2) * stride_bzn + \ offs_k_true * stride_bzk).to(tl.int32) - b_zp = tl.load(b_zp_ptrs, mask=k_mask, other=k_other) + if not block_k_diviable: + b_zp = tl.load(b_zp_ptrs, mask=k_mask, other=k_other) + else: + b_zp = tl.load(b_zp_ptrs) b_zp = ((b_zp >> b_zp_shifter) & 0xF) b_zp = b_zp.to(tl.float32) elif has_zp and use_int8_w8a16: @@ -603,7 +617,10 @@ def fused_moe_kernel_gptq_awq( b_zp_ptrs = b_zp_ptr + (off_experts * stride_bze + \ offs_bn[None, :] * stride_bzn + \ offs_k_true * stride_bzk).to(tl.int32) - b_zp = tl.load(b_zp_ptrs, mask=k_mask, other=k_other) + if not block_k_diviable: + b_zp = tl.load(b_zp_ptrs, mask=k_mask, other=k_other) + else: + b_zp = tl.load(b_zp_ptrs) b_zp = b_zp.to(tl.float32) # We accumulate along the K dimension. @@ -1217,6 +1234,7 @@ def fused_moe_kernel( sorted_weights_ptr, a_scale_ptr, b_scale_ptr, + b_bias_ptr, # Matrix dimensions N, K, @@ -1238,6 +1256,8 @@ def fused_moe_kernel( stride_bse, stride_bsk, stride_bsn, + stride_bbe, + stride_bbn, total_tokens, # Block size for block-wise quantization group_n: tl.constexpr, @@ -1253,6 +1273,7 @@ def fused_moe_kernel( USE_MLS_LOAD: tl.constexpr, MUL_ROUTED_WEIGHT: tl.constexpr, USE_ADDR_OFFSET_INT64_A: tl.constexpr, + USE_ADDR_OFFSET_INT64_B: tl.constexpr, USE_ADDR_OFFSET_INT64_C: tl.constexpr, top_k: tl.constexpr, compute_type: tl.constexpr, @@ -1265,6 +1286,8 @@ def fused_moe_kernel( ck_sorting: tl.constexpr, ck_topk: tl.constexpr, NUM_XCDS: tl.constexpr, + SCALE_BIAS_WITH_ROUTED_WEIGHT: tl.constexpr, + ADD_BIAS: tl.constexpr, ): """ Implements the fused computation for a Mixture of Experts (MOE) using @@ -1377,8 +1400,21 @@ def fused_moe_kernel( a_ptrs = a_ptr + (offs_token[:, None] // top_k * stride_am + offs_k[None, :] * stride_ak).to(tl.int32) - b_ptrs = b_ptr + off_experts * stride_be + (offs_k[:, None] * stride_bk + - offs_bn[None, :] * stride_bn).to(tl.int32) + if USE_ADDR_OFFSET_INT64_B: + b_expert_offset = off_experts.to(tl.int64) * stride_be + b_ptrs = b_ptr + ( + b_expert_offset + + offs_k[:, None].to(tl.int64) * stride_bk + + offs_bn[None, :].to(tl.int64) * stride_bn + ) + else: + b_expert_offset = off_experts * stride_be + b_ptrs = b_ptr + ( + b_expert_offset + + offs_k[:, None] * stride_bk + + offs_bn[None, :] * stride_bn + ).to(tl.int32) + b_base_ptr = b_ptr + b_expert_offset if use_int8_w8a16: b_scale_ptrs = b_scale_ptr + off_experts * stride_bse + offs_bn[ @@ -1441,7 +1477,7 @@ def fused_moe_kernel( b0 = tl.load(b_ptrs) else: b0 = tl.matrix_load( - b_ptr + off_experts * stride_be, + b_base_ptr, shape=[K, N], strides=[stride_bk, stride_bn], block_shape=[BLOCK_SIZE_K, BLOCK_SIZE_N], @@ -1479,7 +1515,7 @@ def fused_moe_kernel( b1 = tl.load(b_ptrs + BLOCK_SIZE_K * stride_bk) else: b1 = tl.matrix_load( - b_ptr + off_experts * stride_be, + b_base_ptr, shape=[K, N], strides=[stride_bk, stride_bn], block_shape=[BLOCK_SIZE_K, BLOCK_SIZE_N], @@ -1533,7 +1569,7 @@ def fused_moe_kernel( b = tl.load(b_ptrs) else: b = tl.matrix_load( - b_ptr + off_experts * stride_be, + b_base_ptr, shape=[K, N], strides=[stride_bk, stride_bn], block_shape=[BLOCK_SIZE_K, BLOCK_SIZE_N], @@ -1589,6 +1625,24 @@ def fused_moe_kernel( accumulator = (accumulator * a_scale * b_scale).to(compute_type) else: accumulator = accumulator.to(compute_type) + + if ADD_BIAS: + offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) + bias_ptrs = b_bias_ptr + ( + off_experts.to(tl.int64) * stride_bbe + + offs_cn[None, :].to(tl.int64) * stride_bbn + ) + if block_n_diviable: + bias = tl.load(bias_ptrs) + else: + bias = tl.load(bias_ptrs, mask=offs_cn[None, :] < N, other=0.0) + if SCALE_BIAS_WITH_ROUTED_WEIGHT: + if ck_sorting: + moe_weight = tl.load(sorted_weights_ptr + offs_token_id, mask=token_mask, other=0) + else: + moe_weight = tl.load(topk_weights_ptr + offs_token, mask=token_mask, other=0) + bias = bias * moe_weight[:, None] + accumulator = accumulator + bias.to(accumulator.dtype) # ----------------------------------------------------------- # Write back the block of the output offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) @@ -1681,6 +1735,7 @@ def fused_moe_splitk_kernel( SPLIT_K: tl.constexpr, k_per_split: tl.constexpr, USE_ADDR_OFFSET_INT64_A: tl.constexpr, + USE_ADDR_OFFSET_INT64_B: tl.constexpr, USE_ADDR_OFFSET_INT64_C: tl.constexpr, top_k: tl.constexpr, compute_type: tl.constexpr, @@ -1817,8 +1872,21 @@ def fused_moe_splitk_kernel( a_ptrs = a_ptr + (offs_token[:, None] // top_k * stride_am + (offs_k[None, :] + k_start) * stride_ak).to(tl.int32) - b_ptrs = b_ptr + off_experts * stride_be + ((offs_k[:, None] + k_start) * stride_bk + - offs_bn[None, :] * stride_bn).to(tl.int32) + if USE_ADDR_OFFSET_INT64_B: + b_expert_offset = off_experts.to(tl.int64) * stride_be + b_ptrs = b_ptr + ( + b_expert_offset + + (offs_k[:, None] + k_start).to(tl.int64) * stride_bk + + offs_bn[None, :].to(tl.int64) * stride_bn + ) + else: + b_expert_offset = off_experts * stride_be + b_ptrs = b_ptr + ( + b_expert_offset + + (offs_k[:, None] + k_start) * stride_bk + + offs_bn[None, :] * stride_bn + ).to(tl.int32) + b_base_ptr = b_ptr + b_expert_offset if use_int8_w8a16: b_scale_ptrs = b_scale_ptr + off_experts * stride_bse + offs_bn[ @@ -1883,7 +1951,7 @@ def fused_moe_splitk_kernel( b0 = tl.load(b_ptrs) else: b0 = tl.matrix_load( - b_ptr + off_experts * stride_be, + b_base_ptr, shape=[K, N], strides=[stride_bk, stride_bn], block_shape=[BLOCK_SIZE_K, BLOCK_SIZE_N], @@ -1921,7 +1989,7 @@ def fused_moe_splitk_kernel( b1 = tl.load(b_ptrs + BLOCK_SIZE_K * stride_bk) else: b1 = tl.matrix_load( - b_ptr + off_experts * stride_be, + b_base_ptr, shape=[K, N], strides=[stride_bk, stride_bn], block_shape=[BLOCK_SIZE_K, BLOCK_SIZE_N], @@ -1972,7 +2040,7 @@ def fused_moe_splitk_kernel( b = tl.load(b_ptrs) else: b = tl.matrix_load( - b_ptr + off_experts * stride_be, + b_base_ptr, shape=[K, N], strides=[stride_bk, stride_bn], block_shape=[BLOCK_SIZE_K, BLOCK_SIZE_N], @@ -2116,6 +2184,7 @@ def fused_moe_persistent_kernel( USE_MLS_LOAD: tl.constexpr, MUL_ROUTED_WEIGHT: tl.constexpr, USE_ADDR_OFFSET_INT64_A: tl.constexpr, + USE_ADDR_OFFSET_INT64_B: tl.constexpr, USE_ADDR_OFFSET_INT64_C: tl.constexpr, top_k: tl.constexpr, compute_type: tl.constexpr, @@ -2247,8 +2316,21 @@ def fused_moe_persistent_kernel( a_ptrs = a_ptr + (offs_token[:, None] // top_k * stride_am + offs_k[None, :] * stride_ak).to(tl.int32) - b_ptrs = b_ptr + off_experts * stride_be + (offs_k[:, None] * stride_bk + - offs_bn[None, :] * stride_bn).to(tl.int32) + if USE_ADDR_OFFSET_INT64_B: + b_expert_offset = off_experts.to(tl.int64) * stride_be + b_ptrs = b_ptr + ( + b_expert_offset + + offs_k[:, None].to(tl.int64) * stride_bk + + offs_bn[None, :].to(tl.int64) * stride_bn + ) + else: + b_expert_offset = off_experts * stride_be + b_ptrs = b_ptr + ( + b_expert_offset + + offs_k[:, None] * stride_bk + + offs_bn[None, :] * stride_bn + ).to(tl.int32) + b_base_ptr = b_ptr + b_expert_offset if use_int8_w8a16: b_scale_ptrs = b_scale_ptr + off_experts * stride_bse + offs_bn[ @@ -2311,7 +2393,7 @@ def fused_moe_persistent_kernel( b0 = tl.load(b_ptrs) else: b0 = tl.matrix_load( - b_ptr + off_experts * stride_be, + b_base_ptr, shape=[K, N], strides=[stride_bk, stride_bn], block_shape=[BLOCK_SIZE_K, BLOCK_SIZE_N], @@ -2349,7 +2431,7 @@ def fused_moe_persistent_kernel( b1 = tl.load(b_ptrs + BLOCK_SIZE_K * stride_bk) else: b1 = tl.matrix_load( - b_ptr + off_experts * stride_be, + b_base_ptr, shape=[K, N], strides=[stride_bk, stride_bn], block_shape=[BLOCK_SIZE_K, BLOCK_SIZE_N], @@ -2403,7 +2485,7 @@ def fused_moe_persistent_kernel( b = tl.load(b_ptrs) else: b = tl.matrix_load( - b_ptr + off_experts * stride_be, + b_base_ptr, shape=[K, N], strides=[stride_bk, stride_bn], block_shape=[BLOCK_SIZE_K, BLOCK_SIZE_N], @@ -2520,6 +2602,8 @@ def fused_moe( bottom_a_use_mls_load: bool = False, ck_sorting: bool = False, ck_topk: int = 8, + scale_bias_with_routed_weight: bool = False, + B_bias: Optional[torch.Tensor] = None, config: Optional[Dict[str, Any]] = None, ) -> None: assert topk_weights is not None or not mul_routed_weight @@ -2577,6 +2661,8 @@ def fused_moe( if (use_int8_w8a16 or use_int4_w4a16 or use_int4_w4a8) and \ block_shape is not None and block_shape[1] > 0: + if B_bias is not None and use_int4_w4a8: + raise ValueError("B_bias is not supported in fused_moe_kernel_gptq_awq_w4a8 yet") assert B_scale is not None and B_scale.ndim == 3 assert B_zp is None or B_zp.ndim == 3 offset_max = 2**31 - 1 @@ -2622,7 +2708,6 @@ def fused_moe( group_size=block_shape[1], MUL_ROUTED_WEIGHT=mul_routed_weight, USE_ADDR_OFFSET_INT64_A=use_addr_offset_int64_a, - USE_ADDR_OFFSET_INT64_B=use_addr_offset_int64_b, USE_ADDR_OFFSET_INT64_C=use_addr_offset_int64_c, top_k=top_k, compute_type=compute_type, @@ -2692,6 +2777,8 @@ def fused_moe( block_shape[1])) if use_int4_w4a8 and per_channel_quant: + if B_bias is not None: + raise ValueError("B_bias is not supported in fused_moe_kernel_gptq_awq_w4a8_channelwise yet") assert B_scale is not None and B_scale.ndim in (2, 3) assert B_zp is None channelwise_config = config.copy() @@ -2744,9 +2831,6 @@ def fused_moe( **channelwise_config, ) - if use_addr_offset_int64_b: - raise Exception("use_addr_offset_int64_b is not written for ep some cases!") - SPLIT_K = config.pop("SPLIT_K", splitk_size) if mul_routed_weight: SPLIT_K = 0 @@ -2800,6 +2884,7 @@ def fused_moe( 0 if block_shape is None else block_shape[1], MUL_ROUTED_WEIGHT=mul_routed_weight, USE_ADDR_OFFSET_INT64_A=use_addr_offset_int64_a, + USE_ADDR_OFFSET_INT64_B=use_addr_offset_int64_b, USE_ADDR_OFFSET_INT64_C=use_addr_offset_int64_c, top_k=top_k, compute_type=compute_type, @@ -2818,6 +2903,8 @@ def fused_moe( **config, ) elif SPLIT_K > 1: + if B_bias is not None: + raise ValueError("B_bias is not supported in fused_moe_splitk_kernel yet") grid = lambda META: (triton.cdiv(EM, META['BLOCK_SIZE_M']) * triton.cdiv( B.shape[1], META['BLOCK_SIZE_N']), SPLIT_K) @@ -2866,6 +2953,7 @@ def fused_moe( MUL_ROUTED_WEIGHT=mul_routed_weight, SPLIT_K=SPLIT_K, USE_ADDR_OFFSET_INT64_A=use_addr_offset_int64_a, + USE_ADDR_OFFSET_INT64_B=use_addr_offset_int64_b, USE_ADDR_OFFSET_INT64_C=use_addr_offset_int64_c, top_k=top_k, compute_type=compute_type, @@ -2897,6 +2985,7 @@ def fused_moe( sorted_weights, A_scale, B_scale, + (B_bias if B_bias is not None else B), B.size(1), B.size(2), EM, @@ -2918,11 +3007,14 @@ def fused_moe( if B_scale is not None and B_scale.ndim == 3 else 0, B_scale.stride(1) if B_scale is not None and B_scale.ndim >= 2 else 0, + B_bias.stride(0) if B_bias is not None else B.stride(0), + B_bias.stride(1) if B_bias is not None else B.stride(1), A.size(0), 0 if block_shape is None else block_shape[0], 0 if block_shape is None else block_shape[1], MUL_ROUTED_WEIGHT=mul_routed_weight, USE_ADDR_OFFSET_INT64_A=use_addr_offset_int64_a, + USE_ADDR_OFFSET_INT64_B=use_addr_offset_int64_b, USE_ADDR_OFFSET_INT64_C=use_addr_offset_int64_c, top_k=top_k, compute_type=compute_type, @@ -2935,6 +3027,8 @@ def fused_moe( ck_sorting=ck_sorting, ck_topk=ck_topk, NUM_XCDS=1, + SCALE_BIAS_WITH_ROUTED_WEIGHT=scale_bias_with_routed_weight, + ADD_BIAS=B_bias is not None, BLOCK_SIZE_K=BLOCK_SIZE_K, COMBINE_SCALE_LOAD=config.pop("COMBINE_SCALE_LOAD", None), **config, diff --git a/aiter/ops/triton/utils/arch_info.py b/aiter/ops/triton/utils/arch_info.py index 158875a891962b50108e95c9d5910494ebc05bef..683fd221c508e1dab62c5ae7b4803d4c27c7a650 100644 --- a/aiter/ops/triton/utils/arch_info.py +++ b/aiter/ops/triton/utils/arch_info.py @@ -6,6 +6,7 @@ _ARCH_TO_DEVICE = { "gfx928": "K100_AI", "gfx936": "BW200", "gfx938": "BW200B", + "gfx92a": "K200_AI", } @@ -22,7 +23,11 @@ def is_fp4_avail(): def is_fp8_avail(): - return get_arch() in ("gfx938") + return get_arch() in ("gfx938", "gfx92a") + + +def is_mls_avail(): + return get_arch() in ("gfx938", "gfx92a") def get_fp8_dtypes(): diff --git a/aiter/ops/triton/utils/moe_config_utils.py b/aiter/ops/triton/utils/moe_config_utils.py index 64db129890be880f5edf958069b2828f04fd951d..601f1354112146746717c78e8be658a5668191ec 100644 --- a/aiter/ops/triton/utils/moe_config_utils.py +++ b/aiter/ops/triton/utils/moe_config_utils.py @@ -157,12 +157,33 @@ def get_moe_configs( config_file_path = os.path.join( f"{AITER_TRITON_CONFIGS_PATH}", "moe", json_file_name) + def _load_config(config_path: str) -> Optional[dict[int, Any]]: + try: + with open(config_path) as f: + config_data = json.load(f) + except json.JSONDecodeError as e: + logger.warning( + "Invalid MoE config JSON at %s (%s). Fallback to other configs/default.", + config_path, + e, + ) + return None + + if not isinstance(config_data, dict): + logger.warning( + "Invalid MoE config format at %s (expect dict). Fallback to other configs/default.", + config_path, + ) + return None + return {int(key): val for key, val in config_data.items()} + if os.path.exists(config_file_path): - with open(config_file_path) as f: - logger.info("Using configuration from %s for MoE layer.", - config_file_path) + logger.info("Using configuration from %s for MoE layer.", + config_file_path) + loaded_cfg = _load_config(config_file_path) + if loaded_cfg is not None: # If a configuration has been found, return it - return {int(key): val for key, val in json.load(f).items()} + return loaded_cfg elif is_bottom: # if config with is_bottom json file not found, try to fallback use config without bottom json. fallback_json_file_name = get_config_file_name(E, N, dtype, block_shape) @@ -170,10 +191,11 @@ def get_moe_configs( f"{AITER_TRITON_CONFIGS_PATH}", "moe", fallback_json_file_name) if os.path.exists(fallback_config_file_path): - with open(fallback_config_file_path) as f: - logger.info("Using fallback configuration from %s for MoE layer.", - fallback_config_file_path) - return {int(key): val for key, val in json.load(f).items()} + logger.info("Using fallback configuration from %s for MoE layer.", + fallback_config_file_path) + loaded_cfg = _load_config(fallback_config_file_path) + if loaded_cfg is not None: + return loaded_cfg # for EP mode, local experts num may not match any config file, try to find nearest E which is power of two nearestE = closest_power_of_two(E, dtype) @@ -182,10 +204,11 @@ def get_moe_configs( f"{AITER_TRITON_CONFIGS_PATH}", "moe", fallback_json_file_name) if os.path.exists(fallback_config_file_path): - with open(fallback_config_file_path) as f: - logger.info("Using fallback configuration from %s for MoE layer.", - fallback_config_file_path) - return {int(key): val for key, val in json.load(f).items()} + logger.info("Using fallback configuration from %s for MoE layer.", + fallback_config_file_path) + loaded_cfg = _load_config(fallback_config_file_path) + if loaded_cfg is not None: + return loaded_cfg # If no optimized configuration is available, we will use the default # configuration logger.warning( @@ -201,6 +224,7 @@ def try_get_optimal_moe_config( is_marlin: bool = False, block_shape: Optional[list[int]] = None, is_bottom: bool = False, + is_gated: bool = True, ): # First try to load optimal config from the file @@ -214,8 +238,10 @@ def try_get_optimal_moe_config( else: # w1_weight E, N, K = w_shape - # for w1_weight intermediate_size is merged by two weights, so N//2 for real intermediate_size. - N = N // 2 + # Gated MoE stores gate/up together as [E, 2N, K]. Non-gated + # activations such as relu2 store [E, N, K], so keep N unchanged. + if is_gated: + N = N // 2 block_n = block_shape[0] if block_shape else 0 block_k = block_shape[1] if block_shape else 0 @@ -257,6 +283,7 @@ def get_optimal_moe_config_func( use_mxfp4_w4a4: Optional[bool] = False, block_shape: Optional[List[int]] = None, is_bottom: bool = False, + is_gated: bool = True, ): config_dtype = get_config_dtype_str(use_fp8_w8a8=use_fp8_w8a8, use_int8_w8a8=use_int8_w8a8, @@ -273,4 +300,5 @@ def get_optimal_moe_config_func( config_dtype, block_shape=block_shape, is_bottom=is_bottom, + is_gated=is_gated, ) diff --git a/aiter/test_common.py b/aiter/test_common.py index 84d092f264824e34d317c353f4ddf3562456934d..2b1b93fd5ac7d6abcdbb73828c427f402f37dff5 100644 --- a/aiter/test_common.py +++ b/aiter/test_common.py @@ -28,8 +28,12 @@ def perftest( func, *args, **kwargs ) - properties = torch.cuda.get_device_properties(gpu_id) - free_memory = torch.cuda.mem_get_info(gpu_id)[0] + try: + properties = torch.cuda.get_device_properties(gpu_id) + free_memory = torch.cuda.mem_get_info(gpu_id)[0] + except (AssertionError, RuntimeError): + properties = None + free_memory = 0 cache_size = min( getattr(properties, "L2_cache_size", 4096 * 1024) * 64 * 128, (free_memory - iter_used_memory + inputSize) * 0.9, diff --git a/aiter/tune_moe_cli_entry.py b/aiter/tune_moe_cli_entry.py new file mode 100644 index 0000000000000000000000000000000000000000..d9100d5b8c6ae2bd3a8f6512f5e275570d10d8a6 --- /dev/null +++ b/aiter/tune_moe_cli_entry.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: MIT +"""tune-moe-cli console entry shim. + +Supports two layouts: +1) wheel install: import from ``aiter.fused_moe_autotune``. +2) setup.py develop source tree: fallback to ``op_tests...`` module path. +""" + +from __future__ import annotations + +import importlib +import sys + + +def _try_import_main(module_name: str): + module = importlib.import_module(module_name) + return getattr(module, "main") + + +def main() -> int: + try: + target = _try_import_main("aiter.fused_moe_autotune.tune_moe_cli") + except ModuleNotFoundError as exc: + # develop mode fallback: keep source tree simple without creating + # a physical aiter/fused_moe_autotune directory. + missing = exc.name or "" + if not missing.startswith("aiter.fused_moe_autotune"): + raise + try: + target = _try_import_main("op_tests.triton_autotune.fused_moe.tune_moe_cli") + except ModuleNotFoundError: + print( + "错误: 无法导入 tune_moe_cli 模块。\n" + "请确认当前环境为以下之一:\n" + " 1) 已正确安装 aiter wheel;\n" + " 2) 在 aiter 源码目录执行过 `python setup.py develop`。", + file=sys.stderr, + ) + return 1 + return int(target()) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/aiter/utility/aiter_types.py b/aiter/utility/aiter_types.py new file mode 100644 index 0000000000000000000000000000000000000000..d496430b770595f932749e36e68af61abf791f37 --- /dev/null +++ b/aiter/utility/aiter_types.py @@ -0,0 +1,90 @@ +# SPDX-License-Identifier: MIT +# Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +import ctypes +import re +from pathlib import Path + + +# support develop and install mode +def _find_aiter_enum_h() -> Path: + root = Path(__file__).resolve().parents[2] + candidates = [ + root / "csrc" / "include" / "aiter_enum.h", + root / "aiter_meta" / "csrc" / "include" / "aiter_enum.h", + ] + for p in candidates: + if p.exists(): + return p + raise FileNotFoundError(f"aiter_enum.h not found in {[str(c) for c in candidates]}") + + +_AITER_ENUM_H = _find_aiter_enum_h() +_PREFIX = "AITER_DTYPE_" + + +# get aiter_dtypes(python) from aiter_enum.h +def _parse_aiter_dtypes(header: Path) -> dict: + """Parse AiterDtype enum from aiter_enum.h, returns {short_name: int_id}.""" + assert header.exists(), f"Header not found: {header}" + text = header.read_text() + text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL) + m = re.search(r"typedef\s+enum\s*\{([^}]+)\}\s*AiterDtype\s*;", text) + assert m, f"AiterDtype enum not found in {header}" + result = {} + next_val = 0 + for line in m.group(1).split("\n"): + line = re.sub(r"//.*", "", line).strip().rstrip(",") + if not line: + continue + if "=" in line: + name, val = line.split("=", 1) + name = name.strip() + next_val = int(val.strip()) + else: + name = line + result[name.removeprefix(_PREFIX)] = next_val + next_val += 1 + return result + + +aiter_dtypes = _parse_aiter_dtypes(_AITER_ENUM_H) +""" +expected format of aiter_dtypes: { + "fp8": 0, + "fp8_e8m0": 1, + "fp16": 2, + "bf16": 3, + "fp32": 4, + "i4x2": 5, + "fp4x2": 6, + "u32": 7, + "i32": 8, + "i16": 9, + "i8": 10, + "u8": 11, + "i64": 12, + "u64": 13, + } +""" + + +class aiter_tensor_t(ctypes.Structure): + _fields_ = [ + ("ptr", ctypes.c_void_p), + ("numel_", ctypes.c_size_t), + ("ndim", ctypes.c_int), + ("shape", ctypes.c_int64 * 8), + ("strides", ctypes.c_int64 * 8), + ("dtype_", ctypes.c_int), + ("device_id", ctypes.c_int), + ] + + +_EXPECTED_SIZEOF_AITER_TENSOR = ( + 160 # must match sizeof(aiter_tensor_t) in csrc/include/aiter_tensor.h +) +assert ctypes.sizeof(aiter_tensor_t) == _EXPECTED_SIZEOF_AITER_TENSOR, ( + f"aiter_tensor_t layout mismatch: Python sizeof={ctypes.sizeof(aiter_tensor_t)}, " + f"expected C sizeof={_EXPECTED_SIZEOF_AITER_TENSOR}. " + f"Check struct field order and alignment against csrc/include/aiter_tensor.h" +) diff --git a/aiter/utility/dtypes.py b/aiter/utility/dtypes.py index 5e094fd604f4c0a20f41b3a1798d60fb979dfef1..0a2afeb0c4883163419655779c1cdab090cfbc07 100644 --- a/aiter/utility/dtypes.py +++ b/aiter/utility/dtypes.py @@ -2,6 +2,7 @@ import torch from ..ops.enum import QuantType, ActivationType from ..jit.utils.chip_info import get_gfx +from .aiter_types import aiter_dtypes, aiter_tensor_t import argparse defaultDtypes = { @@ -28,6 +29,9 @@ u32 = torch.uint32 i32 = torch.int32 i16 = torch.int16 i8 = torch.int8 +u8 = torch.uint8 +i64 = torch.int64 +u64 = torch.uint64 d_dtypes = { "fp8": fp8, @@ -41,8 +45,55 @@ d_dtypes = { "i32": i32, "i16": i16, "i8": i8, + "u8": u8, + "i64": i64, + "u64": u64, } +# Map torch dtype → AiterDtype integer id (built from aiter_enum.h at import time) +_torch_to_aiter_dtype = {d_dtypes[name]: idx for name, idx in aiter_dtypes.items() if name in d_dtypes} + + +def torch_to_aiter_pybind(tensor: torch.Tensor): + """Convert torch.Tensor to pybind aiter_tensor_t for passing to C++ ops. + + Unlike torch_to_aiter() which returns a ctypes aiter_tensor_t struct, + this function constructs a *pybind11* aiter_tensor_t via + module_aiter_core. The two types are not interchangeable. + """ + assert tensor.ndim <= 8, f"aiter_tensor_t supports at most 8 dims, got {tensor.ndim}" + assert tensor.dtype in _torch_to_aiter_dtype, f"Unsupported dtype: {tensor.dtype}" + + from ..jit.core import get_module + + aiter_tensor_cls = get_module("module_custom_all_reduce").aiter_tensor_t + return aiter_tensor_cls( + tensor.data_ptr(), + tensor.numel(), + tensor.ndim, + list(tensor.shape), + list(tensor.stride()), + _torch_to_aiter_dtype[tensor.dtype], + tensor.device.index if tensor.is_cuda else -1, + ) + + +def torch_to_aiter(tensor: torch.Tensor) -> aiter_tensor_t: + """ctypes binding: torch.Tensor -> aiter_tensor_t, zero-copy, same GPU memory.""" + assert tensor.ndim <= 8, f"aiter_tensor_t supports at most 8 dims, got {tensor.ndim}" + assert tensor.dtype in _torch_to_aiter_dtype, f"Unsupported dtype: {tensor.dtype}" + + at = aiter_tensor_t() + at.ptr = tensor.data_ptr() + at.numel_ = tensor.numel() + at.ndim = tensor.ndim + for i in range(tensor.ndim): + at.shape[i] = tensor.shape[i] + at.strides[i] = tensor.stride(i) + at.dtype_ = _torch_to_aiter_dtype[tensor.dtype] + at.device_id = tensor.device.index if tensor.is_cuda else -1 + return at + def str2bool(v): if isinstance(v, bool): diff --git a/csrc/include/aiter_enum.h b/csrc/include/aiter_enum.h index 5b4d877c817c4737c18644f935d0ed3cf4834205..c36a725f957541e9b5a64da5aca0c7c7399f80cd 100644 --- a/csrc/include/aiter_enum.h +++ b/csrc/include/aiter_enum.h @@ -1,12 +1,15 @@ #pragma once // SPDX-License-Identifier: MIT - +// Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +// Single source of truth: aiter/ops/enum.py parses enums from this file +#include enum class ActivationType : int { - No = -1, - Gelu = 0, - Silu = 1, + No = -1, + Silu = 0, + Gelu = 1, + Swiglu = 2, }; enum class QuantType : int { @@ -16,4 +19,67 @@ enum class QuantType : int per_1x32, per_1x128, per_128x128, + per_256x128, + per_1024x128, }; +typedef enum +{ + AITER_DTYPE_fp8, + AITER_DTYPE_fp8_e8m0, + AITER_DTYPE_fp16, + AITER_DTYPE_bf16, + AITER_DTYPE_fp32, + AITER_DTYPE_i4x2, + AITER_DTYPE_fp4x2, + AITER_DTYPE_u32, + AITER_DTYPE_i32, + AITER_DTYPE_i16, + AITER_DTYPE_i8, + AITER_DTYPE_u8, + AITER_DTYPE_i64, + AITER_DTYPE_u64, +} AiterDtype; + +static inline size_t AiterDtype_element_size(AiterDtype dtype) +{ + switch(dtype) + { + case AITER_DTYPE_fp8: + case AITER_DTYPE_fp8_e8m0: + case AITER_DTYPE_i4x2: + case AITER_DTYPE_fp4x2: + case AITER_DTYPE_i8: + case AITER_DTYPE_u8: return 1; + case AITER_DTYPE_fp16: + case AITER_DTYPE_bf16: + case AITER_DTYPE_i16: return 2; + case AITER_DTYPE_fp32: + case AITER_DTYPE_u32: + case AITER_DTYPE_i32: return 4; + case AITER_DTYPE_i64: + case AITER_DTYPE_u64: return 8; + default: return 0; + } +} + +static inline std::string AiterDtype_to_str(int dtype) +{ + switch(dtype) + { + case AITER_DTYPE_fp8: return "fp8"; + case AITER_DTYPE_fp8_e8m0: return "fp8_e8m0"; + case AITER_DTYPE_fp16: return "fp16"; + case AITER_DTYPE_bf16: return "bf16"; + case AITER_DTYPE_fp32: return "fp32"; + case AITER_DTYPE_i4x2: return "i4x2"; + case AITER_DTYPE_fp4x2: return "fp4x2"; + case AITER_DTYPE_u32: return "u32"; + case AITER_DTYPE_i32: return "i32"; + case AITER_DTYPE_i16: return "i16"; + case AITER_DTYPE_i8: return "i8"; + case AITER_DTYPE_u8: return "u8"; + case AITER_DTYPE_i64: return "i64"; + case AITER_DTYPE_u64: return "u64"; + default: return "unknown"; + } +} diff --git a/csrc/include/aiter_hip_common.h b/csrc/include/aiter_hip_common.h index a6d2d3543d65b6de72d576b452b83f176a839e44..2b71552af65741071249495fb404df0c7b5b9076 100644 --- a/csrc/include/aiter_hip_common.h +++ b/csrc/include/aiter_hip_common.h @@ -1,9 +1,13 @@ // SPDX-License-Identifier: MIT #pragma once +#include "aiter_enum.h" #include "ck_tile/core.hpp" #include #include +#include +#include +#include enum class GPUArch { @@ -12,19 +16,65 @@ enum class GPUArch gfx946, }; -#define HIP_CALL(call) \ - do \ - { \ - hipError_t err = call; \ - if(err != hipSuccess) \ - { \ - printf("\n[AITER] %s:%d fail to call %s ---> [HIP error](%s)\n", \ - __FILE__, \ - __LINE__, \ - #call, \ - hipGetErrorString(err)); \ - exit(0); \ - } \ +namespace aiter_detail { + +inline thread_local bool g_aiter_can_throw = false; + +// Fatal (non-recoverable) error handler — used by HIP_CALL. +// Always aborts; does not consult g_aiter_can_throw. +template +[[noreturn, gnu::noinline]] inline void aiter_check_fatal(const char* file, size_t line, Args&&... args) +{ + std::cerr << "[AITER] " << file << ":" << line << " "; + (std::cerr << ... << std::forward(args)) << std::endl; + std::abort(); +} + +template +[[noreturn]] inline void check_fail(const char* file, int line, Args&&... args) +{ + std::ostringstream oss; + oss << "[AITER] " << file << ":" << line; + if constexpr(sizeof...(Args) > 0) + { + oss << " "; + (oss << ... << std::forward(args)); + } + else + { + oss << " check failed"; + } + std::string msg = oss.str(); + std::cerr << msg << std::endl; + if(g_aiter_can_throw) + { + throw std::runtime_error(std::move(msg)); + } + std::abort(); +} +} // namespace aiter_detail + +#define AITER_CHECK(x, ...) \ + do \ + { \ + if(!(x)) [[unlikely]] \ + { \ + aiter_detail::check_fail(__FILE__, __LINE__ __VA_OPT__(, ) __VA_ARGS__); \ + } \ + } while(0) + +#define HIP_CALL(call) \ + do \ + { \ + hipError_t err = call; \ + if(err != hipSuccess) [[unlikely]] \ + { \ + aiter_detail::aiter_check_fatal(__FILE__, \ + __LINE__, \ + "fail to call " #call " ---> [HIP error](", \ + hipGetErrorString(err), \ + ')'); \ + } \ } while(0) struct p3 @@ -126,3 +176,21 @@ static const uint32_t get_num_cu_func() static const uint32_t num_cu = get_num_cu_local(); return num_cu; } + +/// RAII guard that saves the current HIP device and restores it on destruction. +/// Required by AiterTensor factory methods and any code that temporarily switches devices. +class HipDeviceGuard +{ +public: + explicit HipDeviceGuard(int device_id) + { + HIP_CALL(hipGetDevice(&prev_device_)); + HIP_CALL(hipSetDevice(device_id)); + } + ~HipDeviceGuard() noexcept { HIP_CALL(hipSetDevice(prev_device_)); } + HipDeviceGuard(const HipDeviceGuard&) = delete; + HipDeviceGuard& operator=(const HipDeviceGuard&) = delete; + +private: + int prev_device_{}; +}; diff --git a/csrc/include/aiter_opus_plus.h b/csrc/include/aiter_opus_plus.h new file mode 100644 index 0000000000000000000000000000000000000000..9d176c90d69cc6afcc2330c1b75e8712ff223f3b --- /dev/null +++ b/csrc/include/aiter_opus_plus.h @@ -0,0 +1,875 @@ +// SPDX-License-Identifier: MIT + +#pragma once + +#include "hip_reduce.h" +#include "opus/opus.hpp" +// todo: remove this to use aiterTensor dtype +#include +#include +#include + +namespace aiter { +using namespace opus; +#define RT 0 +#define GROUP_NT 3 + +using index_t = int; + +///////////////////////////////////////////////////////////////////////////////////////////////////////// +// scaled type conversion: v_pk_mul_f32 + v_med3_f32 + v_cvt_pk_{fp8,bf8}_f32 +// Identical ISA to ck_tile::vec_convert for performance parity + +OPUS_D fp32x2_t pk_mul_f32(fp32x2_t a, fp32x2_t b) +{ + fp32x2_t c; + asm volatile("v_pk_mul_f32 %0, %1, %2" : "=v"(c) : "v"(a), "v"(b)); + return c; +} + +// fp32x2 -> fp8x2 with scale + saturation clamp (E4M3) +// ISA: v_pk_mul_f32 + v_med3_f32 x2 + v_cvt_pk_fp8_f32 +template , bool> = true> +OPUS_D decltype(auto) fp32_to_fp8_scaled_x2(const S& s, float inverted_scale) +{ + fp32x2_t tmp = pk_mul_f32(s, fp32x2_t{inverted_scale, inverted_scale}); +#if defined(__gfx942__) + constexpr float hi = 240.0f, lo = -240.0f; +#else + constexpr float hi = 448.0f, lo = -448.0f; +#endif + float a = tmp[0], b = tmp[1]; + int w; + asm volatile("v_med3_f32 %1, %1, %3, %4\n" + "v_med3_f32 %2, %2, %3, %4\n" + "v_cvt_pk_fp8_f32 %0, %1, %2" + : "=v"(w), "+v"(a), "+v"(b) + : "v"(lo), "v"(hi)); + return __builtin_bit_cast(fp8x2_t, static_cast(w)); +} + +template , bool> = true> +OPUS_D decltype(auto) fp32_to_fp8_scaled_x4(const S& s, float inverted_scale) +{ + auto lo = fp32_to_fp8_scaled_x2(fp32x2_t{s[0], s[1]}, inverted_scale); + auto hi = fp32_to_fp8_scaled_x2(fp32x2_t{s[2], s[3]}, inverted_scale); + return fp8x4_t{lo[0], lo[1], hi[0], hi[1]}; +} + +// fp32x2 -> bf8x2 with scale + saturation clamp (E5M2) +// ISA: v_pk_mul_f32 + v_med3_f32 x2 + v_cvt_pk_bf8_f32 +template , bool> = true> +OPUS_D decltype(auto) fp32_to_bf8_scaled_x2(const S& s, float inverted_scale) +{ + fp32x2_t tmp = pk_mul_f32(s, fp32x2_t{inverted_scale, inverted_scale}); + constexpr float hi = 57344.0f, lo = -57344.0f; + float a = tmp[0], b = tmp[1]; + int w; + asm volatile("v_med3_f32 %1, %1, %3, %4\n" + "v_med3_f32 %2, %2, %3, %4\n" + "v_cvt_pk_bf8_f32 %0, %1, %2" + : "=v"(w), "+v"(a), "+v"(b) + : "v"(lo), "v"(hi)); + return __builtin_bit_cast(bf8x2_t, static_cast(w)); +} + +template , bool> = true> +OPUS_D decltype(auto) fp32_to_bf8_scaled_x4(const S& s, float inverted_scale) +{ + auto lo = fp32_to_bf8_scaled_x2(fp32x2_t{s[0], s[1]}, inverted_scale); + auto hi = fp32_to_bf8_scaled_x2(fp32x2_t{s[2], s[3]}, inverted_scale); + return bf8x4_t{lo[0], lo[1], hi[0], hi[1]}; +} + +// fp32x2 -> i8x2 with scale +// ISA: v_pk_mul_f32 + v_cvt_i32_f32 x2 +template , bool> = true> +OPUS_D decltype(auto) fp32_to_i8_scaled_x2(const S& s, float inverted_scale) +{ + fp32x2_t tmp = pk_mul_f32(s, fp32x2_t{inverted_scale, inverted_scale}); + return i8x2_t{static_cast(tmp[0]), static_cast(tmp[1])}; +} + +template , bool> = true> +OPUS_D decltype(auto) fp32_to_i8_scaled_x4(const S& s, float inverted_scale) +{ + fp32x2_t tmp0 = pk_mul_f32(fp32x2_t{s[0], s[1]}, fp32x2_t{inverted_scale, inverted_scale}); + fp32x2_t tmp1 = pk_mul_f32(fp32x2_t{s[2], s[3]}, fp32x2_t{inverted_scale, inverted_scale}); + return i8x4_t{static_cast(tmp0[0]), + static_cast(tmp0[1]), + static_cast(tmp1[0]), + static_cast(tmp1[1])}; +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////// +// fp16x2 -> fp4 with scale (v_cvt_scalef32_pk_fp4_f16, gfx950 only) +// opus.hpp has fp32->fp4 and bf16->fp4 but NOT fp16->fp4 +#if defined(__gfx950__) +template , bool> = true> +OPUS_D constexpr decltype(auto) fp16_to_fp4_scaled_x2(const S& s, float scale, number = {}) +{ + u32_t w; + w = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(w, s, scale, sel); + return __builtin_bit_cast(array, static_cast(w)); +} +template , bool> = true> +OPUS_D constexpr decltype(auto) fp16_to_fp4_scaled_x4(const S& s, float scale) +{ + u32_t w; + w = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(w, fp16x2_t{s[0], s[1]}, scale, 0); + w = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(w, fp16x2_t{s[2], s[3]}, scale, 1); + return __builtin_bit_cast(array, static_cast(w)); +} +template , bool> = true> +OPUS_D constexpr decltype(auto) fp16_to_fp4_scaled_x8(const S& s, float scale) +{ + u32_t w; + w = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(w, fp16x2_t{s[0], s[1]}, scale, 0); + w = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(w, fp16x2_t{s[2], s[3]}, scale, 1); + w = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(w, fp16x2_t{s[4], s[5]}, scale, 2); + w = __builtin_amdgcn_cvt_scalef32_pk_fp4_f16(w, fp16x2_t{s[6], s[7]}, scale, 3); + return __builtin_bit_cast(array, w); +} +#else +template , bool> = true> +OPUS_D constexpr decltype(auto) fp16_to_fp4_scaled_x2(const S&, float) +{ + return array{}; +} +template , bool> = true> +OPUS_D constexpr decltype(auto) fp16_to_fp4_scaled_x4(const S&, float) +{ + return array{}; +} +template , bool> = true> +OPUS_D constexpr decltype(auto) fp16_to_fp4_scaled_x8(const S&, float) +{ + return array{}; +} +#endif + +// bf16 -> fp4 larger vectors (bf16x4/x8) using opus bf16_to_fp4_packed_x2 +template , bool> = true> +OPUS_D constexpr decltype(auto) bf16_to_fp4_scaled_x4(const S& s, float scale) +{ + auto lo = bf16_to_fp4_packed_x2(bf16x2_t{s[0], s[1]}, scale); + auto hi = bf16_to_fp4_packed_x2(bf16x2_t{s[2], s[3]}, scale); + return array{lo, hi}; +} +template , bool> = true> +OPUS_D constexpr decltype(auto) bf16_to_fp4_scaled_x8(const S& s, float scale) +{ + auto a = bf16_to_fp4_packed_x2(bf16x2_t{s[0], s[1]}, scale); + auto b = bf16_to_fp4_packed_x2(bf16x2_t{s[2], s[3]}, scale); + auto c = bf16_to_fp4_packed_x2(bf16x2_t{s[4], s[5]}, scale); + auto d = bf16_to_fp4_packed_x2(bf16x2_t{s[6], s[7]}, scale); + return array{a, b, c, d}; +} + +// fp4 -> fp32/bf16/fp16 dequant helpers. Input fp4_t stores two packed fp4 values. +template >, bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_fp32_scaled_x2(const S& s, float scale) +{ + return fp4_to_fp32_packed_x2(s, scale); +} + +template >, bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_fp32_scaled_x4(const S& s, float scale) +{ + return fp4_to_fp32_packed_x4(s, scale); +} + +template >, bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_fp32_scaled_x8(const S& s, float scale) +{ + return fp4_to_fp32_packed_x8(s, scale); +} + +template >, bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_bf16_scaled_x2(const S& s, float scale) +{ +#if defined(__gfx950__) + u32_t packed; + if constexpr(std::is_same_v) + { + packed = static_cast(__builtin_bit_cast(u8_t, s)); + } + else + { + packed = static_cast(__builtin_bit_cast(u8_t, s[0])); + } + return __builtin_amdgcn_cvt_scalef32_pk_bf16_fp4(packed, scale, 0); +#else + auto x = fp4_to_fp32_scaled_x2(s, scale); + return bf16x2_t{static_cast(x[0]), static_cast(x[1])}; +#endif +} + +template >, bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_bf16_scaled_x4(const S& s, float scale) +{ + auto lo = fp4_to_bf16_scaled_x2(s[0], scale); + auto hi = fp4_to_bf16_scaled_x2(s[1], scale); + return bf16x4_t{lo[0], lo[1], hi[0], hi[1]}; +} + +template >, bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_bf16_scaled_x8(const S& s, float scale) +{ + auto a = fp4_to_bf16_scaled_x2(s[0], scale); + auto b = fp4_to_bf16_scaled_x2(s[1], scale); + auto c = fp4_to_bf16_scaled_x2(s[2], scale); + auto d = fp4_to_bf16_scaled_x2(s[3], scale); + return bf16x8_t{a[0], a[1], b[0], b[1], c[0], c[1], d[0], d[1]}; +} + +template >, bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_fp16_scaled_x2(const S& s, float scale) +{ + auto x = fp4_to_fp32_scaled_x2(s, scale); + return fp16x2_t{static_cast(x[0]), static_cast(x[1])}; +} + +template >, bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_fp16_scaled_x4(const S& s, float scale) +{ + auto x = fp4_to_fp32_scaled_x4(s, scale); + return fp16x4_t{static_cast(x[0]), + static_cast(x[1]), + static_cast(x[2]), + static_cast(x[3])}; +} + +template >, bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_fp16_scaled_x8(const S& s, float scale) +{ + auto x = fp4_to_fp32_scaled_x8(s, scale); + return fp16x8_t{static_cast(x[0]), + static_cast(x[1]), + static_cast(x[2]), + static_cast(x[3]), + static_cast(x[4]), + static_cast(x[5]), + static_cast(x[6]), + static_cast(x[7])}; +} + +template && std::is_same_v, fp4_t> && + !is_any_of_v, array, array>, + bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_fp32_scaled(const S& s, float scale) +{ + constexpr index_t N = size(); + vector_t out; + static_for([&](auto i) { + auto x = fp4_to_fp32_scaled_x2(s[i.value], scale); + out[i.value * 2] = x[0]; + out[i.value * 2 + 1] = x[1]; + }); + return out; +} + +template && std::is_same_v, fp4_t> && + !is_any_of_v, array, array>, + bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_bf16_scaled(const S& s, float scale) +{ + constexpr index_t N = size(); + vector_t out; + static_for([&](auto i) { + auto x = fp4_to_bf16_scaled_x2(s[i.value], scale); + out[i.value * 2] = x[0]; + out[i.value * 2 + 1] = x[1]; + }); + return out; +} + +template && std::is_same_v, fp4_t> && + !is_any_of_v, array, array>, + bool> = true> +OPUS_D constexpr decltype(auto) fp4_to_fp16_scaled(const S& s, float scale) +{ + constexpr index_t N = size(); + vector_t out; + static_for([&](auto i) { + auto x = fp4_to_fp16_scaled_x2(s[i.value], scale); + out[i.value * 2] = x[0]; + out[i.value * 2 + 1] = x[1]; + }); + return out; +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////// +// scaled_cast: type conversion with scale multiplication (ck_tile::vec_convert equivalent) +// Usage: aiter::scaled_cast(fp32_vec, inverted_scale) + +// --- 8-bit targets (fp8, bf8, i8): fp32 source x2/x4 --- +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp32_to_fp8_scaled_x2(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp32_to_bf8_scaled_x2(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp32_to_i8_scaled_x2(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp32_to_fp8_scaled_x4(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp32_to_bf8_scaled_x4(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp32_to_i8_scaled_x4(s, inverted_scale); +} + +// --- fp4 target: fp32 source (delegates to opus cast) --- +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp32_to_fp4_packed_x2(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp32_to_fp4_packed_x4(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp32_to_fp4_packed_x8(s, inverted_scale); +} + +// --- fp4 target: bf16 source --- +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return bf16_to_fp4_packed_x2(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return bf16_to_fp4_scaled_x4(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return bf16_to_fp4_scaled_x8(s, inverted_scale); +} + +// --- fp4 target: fp16 source --- +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp16_to_fp4_scaled_x2(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp16_to_fp4_scaled_x4(s, inverted_scale); +} +template && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + return fp16_to_fp4_scaled_x8(s, inverted_scale); +} + +// --- fp4 source: dequant to fp32 --- +template > && std::is_same_v, + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_fp32_scaled_x2(s, scale); +} +template > && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_fp32_scaled_x4(s, scale); +} +template > && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_fp32_scaled_x8(s, scale); +} +template && std::is_same_v, fp4_t> && + !is_any_of_v, array, array> && + std::is_same_v, + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_fp32_scaled(s, scale); +} + +// --- fp4 source: dequant to bf16 --- +template > && std::is_same_v, + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_bf16_scaled_x2(s, scale); +} +template > && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_bf16_scaled_x4(s, scale); +} +template > && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_bf16_scaled_x8(s, scale); +} +template && std::is_same_v, fp4_t> && + !is_any_of_v, array, array> && + std::is_same_v, + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_bf16_scaled(s, scale); +} + +// --- fp4 source: dequant to fp16 --- +template > && std::is_same_v, + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_fp16_scaled_x2(s, scale); +} +template > && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_fp16_scaled_x4(s, scale); +} +template > && std::is_same_v, bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_fp16_scaled_x8(s, scale); +} +template && std::is_same_v, fp4_t> && + !is_any_of_v, array, array> && + std::is_same_v, + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float scale) +{ + return fp4_to_fp16_scaled(s, scale); +} + +///////////////////////////////////////////////////////////////////////////////////////////////////////// +// auto-fold: build flat output vector using x2 primitives in a loop + +// 8-bit targets (fp8, bf8, i8): any fp32 vector size via x2 loop +template && std::is_same_v, fp32_t> && + !is_any_of_v && + (std::is_same_v || std::is_same_v || + std::is_same_v), + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + constexpr index_t N = size(); + static_assert(N % 2 == 0); + vector_t out; + static_for([&](auto i) { + auto pair = scaled_cast(fp32x2_t{s[i.value * 2], s[i.value * 2 + 1]}, inverted_scale); + out[i.value * 2] = pair[0]; + out[i.value * 2 + 1] = pair[1]; + }); + return out; +} + +// two-hop: non-fp32 source -> convert to fp32 via static_cast -> scaled_cast to 8-bit target +// Uses static_cast instead of opus::cast to handle _Float16/__fp16 mismatch +template && !std::is_same_v, fp32_t> && + (std::is_same_v || std::is_same_v || + std::is_same_v), + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + constexpr index_t N = size(); + vector_t fp32_vec; + static_for([&](auto i) { fp32_vec[i.value] = static_cast(s[i.value]); }); + return scaled_cast(fp32_vec, inverted_scale); +} + +// fp4 target: any fp32 vector size via x2 loop +template < + typename D, + typename S, + std::enable_if_t && std::is_same_v, fp32_t> && + !is_any_of_v && std::is_same_v, + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + constexpr index_t N = size(); + static_assert(N % 2 == 0); + array out; + static_for([&](auto i) { + auto packed = scaled_cast(fp32x2_t{s[i.value * 2], s[i.value * 2 + 1]}, inverted_scale); + out[i.value] = packed[0]; + }); + return out; +} + +// fp4 target: non-fp32 source -> convert to fp32 via static_cast -> scaled_cast to fp4 +template && !std::is_same_v, fp32_t> && + !is_any_of_v && + std::is_same_v, + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + constexpr index_t N = size(); + vector_t fp32_vec; + static_for([&](auto i) { fp32_vec[i.value] = static_cast(s[i.value]); }); + return scaled_cast(fp32_vec, inverted_scale); +} + +// general fallback: fp32 source -> any non-quantized target with scale +template && std::is_same_v, fp32_t> && + !is_any_of_v, + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + constexpr index_t N = size(); + S tmp; + static_for([&](auto i) { tmp[i.value] = s[i.value] * inverted_scale; }); + if constexpr(std::is_same_v) + { + return tmp; + } + else + { + return cast(tmp); + } +} + +// general fallback: non-fp32 source -> any non-quantized target with scale (two-hop via fp32) +template && !std::is_same_v, fp32_t> && + !is_any_of_v, + bool> = true> +OPUS_D decltype(auto) scaled_cast(const S& s, float inverted_scale) +{ + constexpr index_t N = size(); + vector_t fp32_vec; + static_for([&](auto i) { fp32_vec[i.value] = static_cast(s[i.value]); }); + return scaled_cast(fp32_vec, inverted_scale); +} + +// Load a large vector (vec_size elements of type T) from gmem buffer in chunks. +// Each chunk issues one buffer_load instruction of chunk_bytes bytes (4/8/16 -> +// dword/dwordx2/dwordx4). Total loads = vec_size * sizeof(T) / chunk_bytes. +// +// interleave=false: chunks are contiguous in GMEM. +// GMEM layout (per thread): +// base + row_offset +// |<-- chunk_bytes -->|<-- chunk_bytes -->|<-- chunk_bytes -->|<-- chunk_bytes -->| +// [ chunk 0 ][ chunk 1 ][ chunk 2 ][ chunk 3 ] +// +// interleave=true: chunks are strided by interleave_thread_size * chunk_bytes in GMEM. +// GMEM layout (thread 0 loads marked with *, other threads fill the gaps): +// base + row_offset +// |<- chunk_bytes ->|<- (interleave_thread_size-1)*chunk_bytes gap ->|<- chunk_bytes ->|... +// [ *chunk 0 (t0)* ][ chunk 0 (t1) ]...[ chunk 0 (tN-1) ] [ *chunk 1 (t0)* ]... +// +// Each thread's chunks are interleaved with other threads' data, +// stride = interleave_thread_size * chunk_bytes bytes between chunks. +// +// Example: T=bf16(2B), vec_size=32, chunk_bytes=16, interleave_thread_size=256 +// total = 64B -> 4x buffer_load_dwordx4, each loading 8 bf16 elements. +// interleave stride = 256 * 16 = 4096 bytes between chunks. +template +__device__ opus::vector_t load_vector_nbytes(opus::gmem& buffer, int row_offset) +{ + static_assert(vec_size * sizeof(T) % chunk_bytes == 0, + "vec_size * sizeof(T) must be a multiple of chunk_bytes"); + static constexpr index_t num_chunks = vec_size * sizeof(T) / chunk_bytes; + constexpr index_t chunk_size_elements = chunk_bytes / sizeof(T); + constexpr index_t interleave_bytes = interleave_thread_size * chunk_bytes; + + opus::vector_t result; + T* result_ptr = reinterpret_cast(&result); + + opus::static_for([&](auto i) { + constexpr index_t chunk_offset_bytes = + interleave ? i.value * interleave_bytes : i.value * chunk_bytes; + constexpr index_t chunk_offset_elements = chunk_offset_bytes / sizeof(T); + + opus::vector_t* chunk_ptr = + reinterpret_cast*>( + result_ptr + i.value * chunk_size_elements); + *chunk_ptr = + buffer.template load(row_offset, chunk_offset_elements); + }); + + return result; +} + +// Store a vector (vec_size elements of DTYPE_I) to gmem buffer in chunks, with optional type +// conversion. Mirror of load_vector_nbytes but for writing. Each chunk issues one buffer_store of +// chunk_bytes bytes. +// +// Template params: +// T : buffer element type (storage type in GMEM) +// DTYPE_I : input element type in registers (e.g. float) +// vec_size : number of input elements +// chunk_bytes: bytes per buffer_store instruction (4/8/16 -> dword/dwordx2/dwordx4) +// T_R : target conversion type before storing (default = T) +// if T_R != DTYPE_I, data is converted per-chunk before store. +// interleave : same strided layout as load_vector_nbytes +// (stride = interleave_thread_size * chunk_bytes) +// +// interleave=false: chunks are contiguous in GMEM. +// GMEM layout (per thread): +// base + row_offset +// |<-- chunk_bytes -->|<-- chunk_bytes -->|<-- chunk_bytes -->|<-- chunk_bytes -->| +// [ chunk 0 ][ chunk 1 ][ chunk 2 ][ chunk 3 ] +// +// interleave=true: chunks are strided by interleave_thread_size * chunk_bytes in GMEM. +// GMEM layout (thread 0 stores marked with *, other threads fill the gaps): +// base + row_offset +// |<- chunk_bytes ->|<- (interleave_thread_size-1)*chunk_bytes gap ->|<- chunk_bytes ->|... +// [ *chunk 0 (t0)* ][ chunk 0 (t1) ]...[ chunk 0 (tN-1) ] [ *chunk 1 (t0)* ]... +// +// Each thread's chunks are interleaved with other threads' data, +// stride = interleave_thread_size * chunk_bytes bytes between chunks. +// +// Conversion paths (when T_R != DTYPE_I): +// - T_R is bf16/fp16: per-element type_convert (scalar loop) +// - otherwise: vec_convert with inverted_scale (e.g. float -> fp8/fp4) +// When T_R == DTYPE_I: direct store, no conversion. +template +__device__ void store_vector_nbytes(opus::gmem& buffer, + const opus::vector_t& vec, + int row_offset, + float inverted_scale = 1.0f) +{ + static constexpr int32_t store_vec_size = + std::is_same_v ? vec_size / 2 : vec_size; + static_assert(store_vec_size * sizeof(T) % chunk_bytes == 0, + "store_vec_size * sizeof(T) must be a multiple of chunk_bytes"); + static constexpr index_t num_chunks = store_vec_size * sizeof(T) / chunk_bytes; + static constexpr index_t chunk_size_elements = vec_size / num_chunks; + static constexpr index_t store_chunk_size_elements = store_vec_size / num_chunks; + static constexpr index_t interleave_bytes = interleave_thread_size * chunk_bytes; + const DTYPE_I* vec_ptr = reinterpret_cast(&vec); + using chunk_type = opus::vector_t; + using store_type = opus::vector_t; + + opus::static_for([&](auto i) { + constexpr index_t chunk_offset_bytes = + interleave ? i.value * interleave_bytes : i.value * chunk_bytes; + constexpr index_t chunk_offset_elements = chunk_offset_bytes / sizeof(T); + + const chunk_type* chunk_ptr = + reinterpret_cast(vec_ptr + i.value * chunk_size_elements); + if constexpr(!std::is_same_v) + { + if constexpr(std::is_same_v || std::is_same_v) + { + opus::vector_t chunk_convert; + for(int j = 0; j < chunk_size_elements; j++) + { + chunk_convert[j] = opus::cast((*chunk_ptr)[j]); + } + store_type& chunk_store = reinterpret_cast(chunk_convert); + buffer.template store( + chunk_store, row_offset, chunk_offset_elements); + } + else if constexpr(std::is_same_v) + { + auto chunk_convert = scaled_cast(*chunk_ptr, inverted_scale); + store_type& chunk_store = reinterpret_cast(chunk_convert); + buffer.template store( + chunk_store, row_offset, chunk_offset_elements); + } + else + { + opus::vector_t chunk_convert; + chunk_convert = scaled_cast(*chunk_ptr, inverted_scale); + store_type& chunk_store = reinterpret_cast(chunk_convert); + buffer.template store( + chunk_store, row_offset, chunk_offset_elements); + } + // Workaround: compiler may not insert s_nop after the last buffer_store, causing a + // WAR hazard where vdata VGPRs are overwritten before buffer_store finishes reading + // them. + asm volatile("s_nop 0"); + } + else + { + const store_type* chunk_store_ptr = reinterpret_cast(chunk_ptr); + buffer.template store( + *chunk_store_ptr, row_offset, chunk_offset_elements); + } + }); +} + +// High-level store API: automatically selects the best chunk_bytes (16/8/4) for +// store_vector_nbytes. Picks the largest chunk size that evenly divides the total store bytes. +// +// When interleave=true, num_repeat controls how many interleaved repeats per thread, +// which affects the effective store size used to choose chunk_bytes. +template +__device__ void store_vector(opus::gmem& buffer, + const opus::vector_t& vec, + int row_offset, + float inverted_scale = 1.0f) +{ + static constexpr int32_t num_store_repeat = interleave ? num_repeat : 1; + static constexpr int32_t store_vec_size = + std::is_same_v ? vec_size / 2 : vec_size; + if constexpr((store_vec_size * sizeof(T) / num_store_repeat) % 16 == 0) + { + store_vector_nbytes( + buffer, vec, row_offset, inverted_scale); + } + else if constexpr((store_vec_size * sizeof(T) / num_store_repeat) % 8 == 0) + { + store_vector_nbytes( + buffer, vec, row_offset, inverted_scale); + } + else if constexpr((store_vec_size * sizeof(T) / num_store_repeat) % 4 == 0) + { + store_vector_nbytes( + buffer, vec, row_offset, inverted_scale); + } + else + { + static_assert(false, "vec_size * sizeof(T) must be a multiple of 16, 8, or 4"); + } +} + +// todo: edit this to use aiterTensor dtype +template +struct t2opus; +template <> +struct t2opus +{ + using type = float; +}; +template <> +struct t2opus +{ + using type = opus::fp16_t; +}; +template <> +struct t2opus +{ + using type = opus::bf16_t; +}; +template <> +struct t2opus +{ + using type = int32_t; +}; +template <> +struct t2opus +{ + using type = opus::i8_t; +}; + +// HIP native type -> opus type mapping +template struct hip2opus; +template <> struct hip2opus { using type = opus::fp32_t; }; +template <> struct hip2opus<__half> { using type = opus::fp16_t; }; +template <> struct hip2opus { using type = opus::bf16_t; }; +template <> struct hip2opus { using type = opus::fp8_t; }; +template <> struct hip2opus { using type = opus::i8_t; }; +template <> struct hip2opus { using type = int32_t; }; + +} // namespace aiter diff --git a/csrc/include/aiter_stream.h b/csrc/include/aiter_stream.h new file mode 100644 index 0000000000000000000000000000000000000000..682c61b1bee484193ab129e6a664abb1e8b5ef6a --- /dev/null +++ b/csrc/include/aiter_stream.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: MIT +// Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +#pragma once + +#include + +/// Lightweight thread-local stream manager (pure HIP, no torch dependency). +/// +/// Usage: +/// hipStream_t s = aiter::getCurrentHIPStream(); +/// aiter::setCurrentHIPStream(stream); + +namespace aiter { + +namespace detail { + +inline hipStream_t& threadLocalStream() +{ + thread_local hipStream_t stream = nullptr; + return stream; +} + +} // namespace detail + +inline hipStream_t getCurrentHIPStream() +{ + return detail::threadLocalStream(); +} + +inline void setCurrentHIPStream(hipStream_t stream) +{ + detail::threadLocalStream() = stream; +} + +} // namespace aiter diff --git a/csrc/include/aiter_tensor.h b/csrc/include/aiter_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..d87a2ac10922b5266ccc7137be93988ba5fec8bb --- /dev/null +++ b/csrc/include/aiter_tensor.h @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: MIT +// Copyright (C) 2024-2026, Advanced Micro Devices, Inc. All rights reserved. +#pragma once +#include "aiter_hip_common.h" +#include +#include + +struct aiter_tensor_t +{ + void* ptr; // data_ptr, pointer to GPU memory + size_t numel_; // total number of elements + int ndim; // number of dimensions + int64_t shape[8]; // size of each dimension, up to 8 dims (PyTorch limit) + int64_t strides[8]; // stride of each dimension + AiterDtype dtype_; // data type + int device_id; // GPU device index: 0, 1, 2, ... + + // torch::Tensor-compatible accessors + int64_t size(int i) const { return (i < 0) ? shape[ndim + i] : shape[i]; } + int64_t stride(int i) const { return (i < 0) ? strides[ndim + i] : strides[i]; } + void* data_ptr() const { return ptr; } + size_t numel() const { return numel_; } + int dim() const { return ndim; } + AiterDtype dtype() const { return dtype_; } + size_t element_size() const { return AiterDtype_element_size(dtype_); } + bool is_gpu() const { return device_id >= 0; } + bool is_cpu() const { return device_id == -1; } + + bool is_contiguous() const + { + int64_t expected = 1; + for(int d = ndim - 1; d >= 0; --d) + { + if(shape[d] != 1 && strides[d] != expected) + return false; + expected *= shape[d]; + } + return true; + } +}; + +/// RAII C++ class for GPU tensor, inherits aiter_tensor_t (POD). +/// - Factory methods: AiterTensor::empty(), AiterTensor::zeros() +/// - Auto hipFree on destruction +/// - Move-only (no copy) +/// - AiterTensor* is implicitly convertible to aiter_tensor_t* +class AiterTensor : public aiter_tensor_t +{ +public: + /// Allocate uninitialized GPU memory. + static AiterTensor empty(std::initializer_list dims, + AiterDtype dtype, + int device_id, + hipStream_t stream = nullptr) + { + AiterTensor t; + t.init_shape(dims, dtype, device_id); + t.stream_ = stream; + + size_t nbytes = t.numel_ * AiterDtype_element_size(dtype); + if(nbytes > 0) + { + HipDeviceGuard guard(device_id); + if(stream) + HIP_CALL(hipMallocAsync(&t.ptr, nbytes, stream)); + else + HIP_CALL(hipMalloc(&t.ptr, nbytes)); + } + t.owns_memory_ = true; + return t; + } + + /// Allocate uninitialized GPU memory with same shape/strides/dtype/device as `other`. + /// Preserves the original strides of `other`. + /// Allocates enough storage span to cover the full positive-stride layout. + static AiterTensor empty_like(const aiter_tensor_t* other, + hipStream_t stream = nullptr) + { + AITER_CHECK(other != nullptr, __func__, ": other must not be null"); + AITER_CHECK(other->ndim <= 8, __func__, ": ndim ", other->ndim, " exceeds max 8"); + AiterTensor t; + t.ndim = other->ndim; + t.numel_ = other->numel_; + t.dtype_ = other->dtype_; + t.device_id = other->device_id; + + size_t storage_nelem = (t.numel_ == 0) ? 0 : 1; + for(int i = 0; i < other->ndim; ++i) + { + t.shape[i] = other->shape[i]; + t.strides[i] = other->strides[i]; + + AITER_CHECK(other->strides[i] >= 0, + __func__, + ": negative strides are not supported"); + if(storage_nelem > 0 && other->shape[i] > 1) + storage_nelem += static_cast(other->shape[i] - 1) * + static_cast(other->strides[i]); + } + + t.stream_ = stream; + + size_t nbytes = storage_nelem * AiterDtype_element_size(t.dtype_); + if(nbytes > 0) + { + HipDeviceGuard guard(t.device_id); + if(stream) + HIP_CALL(hipMallocAsync(&t.ptr, nbytes, stream)); + else + HIP_CALL(hipMalloc(&t.ptr, nbytes)); + } + t.owns_memory_ = true; + return t; + } + + /// Allocate zero-initialized GPU memory. + static AiterTensor zeros(std::initializer_list dims, + AiterDtype dtype, + int device_id, + hipStream_t stream = nullptr) + { + AiterTensor t = empty(dims, dtype, device_id, stream); + size_t nbytes = t.numel_ * AiterDtype_element_size(dtype); + if(nbytes > 0) + { + HipDeviceGuard guard(device_id); + if(stream) + HIP_CALL(hipMemsetAsync(t.ptr, 0, nbytes, stream)); + else + HIP_CALL(hipMemset(t.ptr, 0, nbytes)); + } + return t; + } + + ~AiterTensor() + { + if(owns_memory_ && ptr) + { + HipDeviceGuard guard(device_id); + if(stream_) + hipFreeAsync(ptr, stream_); + else + hipFree(ptr); + ptr = nullptr; + } + } + + // Move constructor + AiterTensor(AiterTensor&& other) noexcept + : aiter_tensor_t(static_cast(other)), + owns_memory_(other.owns_memory_), + stream_(other.stream_) + { + other.owns_memory_ = false; + other.ptr = nullptr; + } + + // Move assignment + AiterTensor& operator=(AiterTensor&& other) noexcept + { + if(this != &other) + { + if(owns_memory_ && ptr) + { + HipDeviceGuard guard(device_id); + if(stream_) + hipFreeAsync(ptr, stream_); + else + hipFree(ptr); + } + static_cast(*this) = static_cast(other); + owns_memory_ = other.owns_memory_; + stream_ = other.stream_; + other.owns_memory_ = false; + other.ptr = nullptr; + } + return *this; + } + + // No copy + AiterTensor(const AiterTensor&) = delete; + AiterTensor& operator=(const AiterTensor&) = delete; + +private: + bool owns_memory_ = false; + hipStream_t stream_ = nullptr; + + AiterTensor() + { + // Zero-init the POD base + std::memset(static_cast(this), 0, sizeof(aiter_tensor_t)); + } + + void init_shape(std::initializer_list dims, AiterDtype dt, int dev) + { + AITER_CHECK(dims.size() <= 8, "AiterTensor supports at most 8 dims, got ", dims.size()); + ndim = static_cast(dims.size()); + int i = 0; + for(auto d : dims) + shape[i++] = d; + + // Row-major contiguous strides + if(ndim > 0) + { + strides[ndim - 1] = 1; + for(int d = ndim - 2; d >= 0; --d) + strides[d] = strides[d + 1] * shape[d + 1]; + } + + numel_ = 1; + for(int d = 0; d < ndim; ++d) + numel_ *= shape[d]; + + dtype_ = dt; + device_id = dev; + } +}; diff --git a/csrc/include/ck_grouped_gemm_abi.h b/csrc/include/ck_grouped_gemm_abi.h new file mode 100644 index 0000000000000000000000000000000000000000..af70e841d182e570ef1d9d67d68b6aded6d1f1b4 --- /dev/null +++ b/csrc/include/ck_grouped_gemm_abi.h @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: MIT +// Pure-C public header for the CK grouped-GEMM C ABI. +// +// This header is intentionally free of C++, CK template, and torch/extension.h +// dependencies so that external projects can include it without pulling in the +// full CK or PyTorch headers. +// +// The same struct and enumerators are also defined inside +// 3rdparty/composable_kernel/example_hcu/ck_tile/19_grouped_gemm/grouped_gemm.hpp +// (inside an extern "C" block). When both headers are visible in a single +// translation unit the guard macro CK_GROUPED_GEMM_ABI_DEFINED prevents +// duplicate definitions. + +#pragma once + +#ifndef CK_GROUPED_GEMM_ABI_DEFINED +#define CK_GROUPED_GEMM_ABI_DEFINED + +#include + +#ifdef __cplusplus +#include +extern "C" { +#else +#include +#endif + +enum ck_tile_dcu_grouped_gemm_dtype +{ + CK_TILE_DCU_GROUPED_GEMM_FP16 = 0, + CK_TILE_DCU_GROUPED_GEMM_FP8 = 1, + CK_TILE_DCU_GROUPED_GEMM_INT8 = 2, + CK_TILE_DCU_GROUPED_GEMM_BF8 = 3, + CK_TILE_DCU_GROUPED_GEMM_BF16 = 4, + CK_TILE_DCU_GROUPED_GEMM_INT4 = 5 +}; + +// Per-group descriptor passed to ck_tile_dcu_grouped_gemm_run. +// +// Memory layout convention: +// A: [M, K] row-major (stride_A = K) +// B: [N, K] row-major stored, but interpreted as column-major by the kernel +// when b_layout='C' (stride_B = K), yielding C = A @ B^T +// C: [M, N] row-major (stride_C = N) +// +// d_ptrs / stride_Ds are optional bias tensors (set num_d_tensors=0 and both +// pointers to NULL for a plain GEMM). +struct ck_tile_dcu_grouped_gemm_desc +{ + const void* a_ptr; + const void* b_ptr; + void* c_ptr; + int k_batch; // SplitK factor; use 1 for standard GEMM + int M; + int N; + int K; + int stride_A; + int stride_B; + int stride_C; + int num_d_tensors; + const void* const* d_ptrs; + const int* stride_Ds; +}; + +// Returns the number of bytes of device workspace required for group_count groups. +size_t ck_tile_dcu_grouped_gemm_workspace_size(int group_count, int num_d_tensors); + +// Launches the grouped GEMM kernel. +// +// descs - array of group_count descriptors (device-visible pointers inside) +// group_count - number of GEMM groups +// dtype - element dtype (ck_tile_dcu_grouped_gemm_dtype) +// a_layout - 'R' (row-major) or 'C' (column-major) for A +// b_layout - 'R' or 'C' for B; use 'C' when B is stored as [N,K] row-major +// workspace - device buffer of at least ck_tile_dcu_grouped_gemm_workspace_size bytes +// stream - HIP stream to submit the kernel on +// +// Returns 0 on success, negative on error. +int ck_tile_dcu_grouped_gemm_run(const struct ck_tile_dcu_grouped_gemm_desc* descs, + int group_count, + int dtype, + char a_layout, + char b_layout, + void* workspace, + hipStream_t stream); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // CK_GROUPED_GEMM_ABI_DEFINED diff --git a/csrc/include/custom_all_reduce.cuh b/csrc/include/custom_all_reduce.cuh index ec4de7b6b964f7beb55435165dea87133e644764..a4bd632e4ddfcc201596e93999d600a087adc34b 100644 --- a/csrc/include/custom_all_reduce.cuh +++ b/csrc/include/custom_all_reduce.cuh @@ -1,6 +1,7 @@ #pragma once /* - * Copyright (C) 2024-2025, The vLLM team. + * Copyright (C) Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2024-2026, The vLLM team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,224 +16,192 @@ * limitations under the License. */ #include "aiter_hip_common.h" -#include "ck_tile/core.hpp" -#include "communication_asm.h" #include "hip_float8.h" +#include "opus/opus.hpp" #include #include #include #include #include #include +#include #include #include +namespace aiter { -namespace aiter +constexpr int kMaxBlocks = 80; +// note: we don't want to use atomics for signals because peer atomics are no +// supported on PCIe links +struct Signal { - - constexpr int kMaxBlocks = 80; - // note: we don't want to use atomics for signals because peer atomics are no - // supported on PCIe links - struct Signal - { alignas(128) uint32_t start[kMaxBlocks][8]; alignas(128) uint32_t end[kMaxBlocks][8]; alignas(128) uint32_t _flag[kMaxBlocks]; // incremental flags for each rank - }; +}; #ifdef USE_ROCM - struct __align__(16) RankData { const void *ptrs[8]; }; +struct __align__(16) RankData +{ + const void* ptrs[8]; +}; #else - struct __align__(16) RankData { const void *__restrict__ ptrs[8]; }; +struct __align__(16) RankData +{ + const void* __restrict__ ptrs[8]; +}; #endif - struct __align__(16) RankSignals - { +struct __align__(16) RankSignals +{ #ifndef USE_ROCM volatile #endif - Signal *signals[8]; - }; - - // like std::array, but aligned - template - struct __align__(alignof(T) * sz) array_t - { - T data[sz]; - using type = T; - static constexpr int size = sz; - }; - - // use packed type to maximize memory efficiency - // goal: generate ld.128 and st.128 instructions - template - struct packed_t - { - // the (P)acked type for load/store - using P = array_t; - // the (A)ccumulator type for reduction - using A = array_t; - }; + Signal* signals[8]; +}; #define DINLINE __device__ __forceinline__ - // scalar cast functions - DINLINE float upcast_s(half val) { return __half2float(val); } - - template - DINLINE T downcast_s(float val); - template <> - DINLINE half downcast_s(float val) - { - return __float2half(val); - } - - // scalar add functions - // for some reason when compiling with Pytorch, the + operator for half and - // bfloat is disabled so we call the intrinsics directly - DINLINE half &assign_add(half &a, half b) - { - a = __hadd(a, b); - return a; - } - DINLINE float &assign_add(float &a, float b) { return a += b; } - -#if (__CUDA_ARCH__ >= 800 || !defined(__CUDA_ARCH__)) - DINLINE float upcast_s(__hip_bfloat16 val) { return __bfloat162float(val); } - template <> - DINLINE __hip_bfloat16 downcast_s(float val) - { - return __float2bfloat16(val); - } - DINLINE __hip_bfloat16 &assign_add(__hip_bfloat16 &a, __hip_bfloat16 b) - { - a = __hadd(a, b); - return a; - } -#endif +// scalar cast functions +template +DINLINE opus::fp32_t upcast_s(inp_dtype val) +{ return opus::cast(val); } - template - DINLINE array_t &packed_assign_add(array_t &a, array_t b) - { -#pragma unroll - for (int i = 0; i < N; i++) +template <> +DINLINE opus::fp32_t upcast_s(opus::fp32_t val) +{ return val; } + +template +DINLINE out_dtype downcast_s(opus::fp32_t val) +{ return opus::cast(val); } + +template <> +DINLINE opus::fp32_t downcast_s(opus::fp32_t val) +{ return val; } + +// scalar add functions +// for some reason when compiling with Pytorch, the + operator for half and +// bfloat is disabled so we call the intrinsics directly +template +DINLINE opus::vector_t& packed_assign_add(opus::vector_t& a, opus::vector_t b) +{ + if constexpr(std::is_same::value) { - assign_add(a.data[i], b.data[i]); + a += b; + } + else + { +#pragma unroll + for(int i = 0; i < N; i++) + { + a[i] = downcast_s(upcast_s(a[i]) + upcast_s(b[i])); + } } return a; - } +} - template - DINLINE array_t upcast(array_t val) - { - if constexpr (std::is_same::value) +// not support fp8 pack convert +template , bool> = true> +DINLINE auto upcast(V val) -> opus::vector_t::size()> +{ + using T = typename opus::vector_traits::dtype; + constexpr int N = opus::vector_traits::size(); + if constexpr(std::is_same::value) { - return val; + return val; } else { - array_t out; -#pragma unroll - for (int i = 0; i < N; i++) - { - out.data[i] = upcast_s(val.data[i]); - } - return out; - } - } - - template - DINLINE O downcast(array_t val) - { - if constexpr (std::is_same::value) - { - return val; - } - // else if constexpr (std::is_same::value) - // { - // O out; - // #pragma unroll - // for (int i = 0; i < O::size; i++) - // { - // union fcvt { - // uint32_t i32; - // float f32; - // } u; - // u.f32 = val.data[i]; - // out.data[i] = __builtin_bit_cast(__hip_bfloat16, uint16_t(u.i32 >> 16)); - // } - // return out; - // } + opus::vector_t out; +#pragma unroll + for(int i = 0; i < N; i++) + { + out[i] = upcast_s(val[i]); + } + return out; + } +} + +template , bool> = true> +DINLINE O downcast(V val) +{ + using T = typename opus::vector_traits::dtype; + constexpr int N = opus::vector_traits::size(); + if constexpr(std::is_same::value) + { + return val; + } else { - O out; + O out; #pragma unroll - for (int i = 0; i < O::size; i++) - { - out.data[i] = downcast_s(val.data[i]); - } - return out; + for(int i = 0; i < N; i++) + { + out[i] = downcast_s(val[i]); + } + return out; } - } +} - // This function is meant to be used as the first synchronization in the all - // reduce kernel. Thus, it doesn't need to make any visibility guarantees for - // prior memory accesses. Note: volatile writes will not be reordered against - // other volatile writes. - template - DINLINE void start_sync(const RankSignals &sg, +// This function is meant to be used as the first synchronization in the all +// reduce kernel. Thus, it doesn't need to make any visibility guarantees for +// prior memory accesses. Note: volatile writes will not be reordered against +// other volatile writes. +template +DINLINE void start_sync(const RankSignals& sg, #ifndef USE_ROCM - volatile + volatile #endif - Signal *self_sg, - int rank) - { + Signal* self_sg, + int rank) +{ #ifdef USE_ROCM uint32_t flag = self_sg->_flag[blockIdx.x] + 1; - if (threadIdx.x < ngpus) - { - // simultaneously write to the corresponding flag of all ranks. - // Latency = 1 p2p write - __scoped_atomic_store_n(&sg.signals[threadIdx.x]->start[blockIdx.x][rank], - flag, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); - // wait until we got true from all ranks - while (__scoped_atomic_load_n(&self_sg->start[blockIdx.x][threadIdx.x], - __ATOMIC_RELAXED, - __MEMORY_SCOPE_DEVICE) < flag) - ; + if(threadIdx.x < ngpus) + { + // simultaneously write to the corresponding flag of all ranks. + // Latency = 1 p2p write + __scoped_atomic_store_n(&sg.signals[threadIdx.x]->start[blockIdx.x][rank], + flag, + __ATOMIC_RELAXED, + __MEMORY_SCOPE_SYSTEM); + // wait until we got true from all ranks + while(__scoped_atomic_load_n(&self_sg->start[blockIdx.x][threadIdx.x], + __ATOMIC_RELAXED, + __MEMORY_SCOPE_DEVICE) < flag) + ; } __syncthreads(); // use one thread to update flag - if (threadIdx.x == 0) - self_sg->_flag[blockIdx.x] = flag; + if(threadIdx.x == 0) + self_sg->_flag[blockIdx.x] = flag; #else - if (threadIdx.x < ngpus) + if(threadIdx.x < ngpus) { - // reset flag for next time - self_sg->end[blockIdx.x][threadIdx.x] = 0; - // simultaneously write to the corresponding flag of all ranks. - // Latency = 1 p2p write - sg.signals[threadIdx.x]->start[blockIdx.x][rank] = 1; - // wait until we got true from all ranks - while (!self_sg->start[blockIdx.x][threadIdx.x]) - ; + // reset flag for next time + self_sg->end[blockIdx.x][threadIdx.x] = 0; + // simultaneously write to the corresponding flag of all ranks. + // Latency = 1 p2p write + sg.signals[threadIdx.x]->start[blockIdx.x][rank] = 1; + // wait until we got true from all ranks + while(!self_sg->start[blockIdx.x][threadIdx.x]) + ; } __syncthreads(); #endif - } +} - // This function is meant to be used as the second or the final synchronization - // barrier in the all reduce kernel. If it's the final synchronization barrier, - // we don't need to make any visibility guarantees for prior memory accesses. - template - DINLINE void end_sync(const RankSignals &sg, +// This function is meant to be used as the second or the final synchronization +// barrier in the all reduce kernel. If it's the final synchronization barrier, +// we don't need to make any visibility guarantees for prior memory accesses. +template +DINLINE void end_sync(const RankSignals& sg, #ifndef USE_ROCM - volatile + volatile #endif - Signal *self_sg, - int rank) - { + Signal* self_sg, + int rank) +{ #ifdef USE_ROCM __syncthreads(); // eliminate the case that prior writes are not visible after signals become @@ -240,131 +209,135 @@ namespace aiter // testing. Might be the case that hardware provides stronger guarantee than // the memory model. uint32_t flag = self_sg->_flag[blockIdx.x] + 1; - if (threadIdx.x < ngpus) - { - // simultaneously write to the corresponding flag of all ranks. - // Latency = 1 p2p write - __scoped_atomic_store_n(&sg.signals[threadIdx.x]->end[blockIdx.x][rank], - flag, - final_sync ? __ATOMIC_RELAXED : __ATOMIC_RELEASE, - __MEMORY_SCOPE_SYSTEM); - // wait until we got true from all ranks - while ( - __scoped_atomic_load_n(&self_sg->end[blockIdx.x][threadIdx.x], - final_sync ? __ATOMIC_RELAXED : __ATOMIC_ACQUIRE, - __MEMORY_SCOPE_DEVICE) < flag) - ; + if(threadIdx.x < ngpus) + { + // simultaneously write to the corresponding flag of all ranks. + // Latency = 1 p2p write + __scoped_atomic_store_n(&sg.signals[threadIdx.x]->end[blockIdx.x][rank], + flag, + final_sync ? __ATOMIC_RELAXED : __ATOMIC_RELEASE, + __MEMORY_SCOPE_SYSTEM); + // wait until we got true from all ranks + while(__scoped_atomic_load_n(&self_sg->end[blockIdx.x][threadIdx.x], + final_sync ? __ATOMIC_RELAXED : __ATOMIC_ACQUIRE, + __MEMORY_SCOPE_DEVICE) < flag) + ; } __syncthreads(); // use one thread to update flag - if (threadIdx.x == 0) - self_sg->_flag[blockIdx.x] = flag; + if(threadIdx.x == 0) + self_sg->_flag[blockIdx.x] = flag; #else __syncthreads(); // eliminate the case that prior writes are not visible after signals become // visible. Note that I did not managed to make this happen through a lot of // testing. Might be the case that hardware provides stronger guarantee than // the memory model. - if constexpr (!final_sync) - __threadfence_system(); - if (threadIdx.x < ngpus) - { - // reset flag for next time - self_sg->start[blockIdx.x][threadIdx.x] = 0; - // simultaneously write to the corresponding flag of all ranks. - // Latency = 1 p2p write - sg.signals[threadIdx.x]->end[blockIdx.x][rank] = 1; - // wait until we got true from all ranks - while (!self_sg->end[blockIdx.x][threadIdx.x]) - ; - } - if constexpr (!final_sync) - __syncthreads(); + if constexpr(!final_sync) + __threadfence_system(); + if(threadIdx.x < ngpus) + { + // reset flag for next time + self_sg->start[blockIdx.x][threadIdx.x] = 0; + // simultaneously write to the corresponding flag of all ranks. + // Latency = 1 p2p write + sg.signals[threadIdx.x]->end[blockIdx.x][rank] = 1; + // wait until we got true from all ranks + while(!self_sg->end[blockIdx.x][threadIdx.x]) + ; + } + if constexpr(!final_sync) + __syncthreads(); #endif - } +} - template - DINLINE P packed_reduce(const P *ptrs[], int idx) - { +template +DINLINE P packed_reduce(const P* ptrs[], int idx) +{ A tmp = upcast(ptrs[0][idx]); #pragma unroll - for (int i = 1; i < ngpus; i++) + for(int i = 1; i < ngpus; i++) { - packed_assign_add(tmp, upcast(ptrs[i][idx])); + packed_assign_add::dtype, opus::vector_traits::size()>( + tmp, upcast(ptrs[i][idx])); } return downcast

(tmp); - } +} - template - __global__ void __launch_bounds__(512, 1) - cross_device_reduce_1stage_naive(RankData *_dp, RankSignals sg, +template +__global__ void __launch_bounds__(512, 1) cross_device_reduce_1stage_naive(RankData* _input_dp, + RankData* _output_dp, + RankSignals sg, #ifndef USE_ROCM - volatile + volatile #endif - Signal *self_sg, - T *__restrict__ result, int rank, int size) - { - using P = typename packed_t::P; - using A = typename packed_t::A; + Signal* self_sg, + T* __restrict__ result, + int rank, + int size) +{ + constexpr int pack_size = 16 / sizeof(T); + using P = typename opus::vector_t; + using A = typename opus::vector_t; // note: we don't reorder the address so the accumulation order is the same // for all ranks, ensuring bitwise identical results - auto dp = *_dp; + auto dp = *_input_dp; start_sync(sg, self_sg, rank); // do the actual reduction - for (int idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size; - idx += gridDim.x * blockDim.x) + for(int idx = blockIdx.x * blockDim.x + threadIdx.x; idx < size; idx += gridDim.x * blockDim.x) { - ((P *)result)[idx] = packed_reduce((const P **)&dp.ptrs[0], idx); + ((P*)result)[idx] = packed_reduce((const P**)&dp.ptrs[0], idx); } end_sync(sg, self_sg, rank); - // // Step-2 consumes data written by peers in step-1, so we need - // // visibility guarantees from this barrier. - // end_sync(sg, self_sg, rank); - } +} - template +template #ifdef USE_ROCM - DINLINE P *get_tmp_buf(Signal *sg) - { +DINLINE P* get_tmp_buf(Signal* sg) +{ #else - DINLINE P *get_tmp_buf(volatile Signal *sg) - { +DINLINE P* get_tmp_buf(volatile Signal* sg) +{ #endif - return (P *)(((Signal *)sg) + 1); - } + return (P*)(((Signal*)sg) + 1); +} - template - __global__ void __launch_bounds__(512, 1) - cross_device_reduce_2stage_naive(RankData *_dp, RankSignals sg, +template +__global__ void __launch_bounds__(512, 1) cross_device_reduce_2stage_naive(RankData* _input_dp, + RankData* _output_dp, + RankSignals sg, #ifndef USE_ROCM - volatile + volatile #endif - Signal *self_sg, - T *__restrict__ result, int rank, int size) - { - int tid = blockIdx.x * blockDim.x + threadIdx.x; - int stride = gridDim.x * blockDim.x; - using P = typename packed_t::P; - using A = typename packed_t::A; - int part = size / ngpus; - int start = rank * part; - int end = rank == ngpus - 1 ? size : start + part; - int largest_part = part + size % ngpus; - const P *ptrs[ngpus]; - P *tmps[ngpus]; -#pragma unroll - for (int i = 0; i < ngpus; i++) - { - int target = (rank + i) % ngpus; - ptrs[i] = (const P *)_dp->ptrs[target]; - tmps[i] = get_tmp_buf

(sg.signals[target]); + Signal* self_sg, + T* __restrict__ result, + int rank, + int size) +{ + constexpr int pack_size = 16 / sizeof(T); + int tid = blockIdx.x * blockDim.x + threadIdx.x; + int stride = gridDim.x * blockDim.x; + using P = typename opus::vector_t; + using A = typename opus::vector_t; + int part = size / ngpus; + int start = rank * part; + int end = rank == ngpus - 1 ? size : start + part; + int largest_part = part + size % ngpus; + const P* ptrs[ngpus]; + P* tmps[ngpus]; +#pragma unroll + for(int i = 0; i < ngpus; i++) + { + int target = (rank + i) % ngpus; + ptrs[i] = (const P*)_input_dp->ptrs[target]; + tmps[i] = get_tmp_buf

(sg.signals[target]); } auto tmp_out = tmps[0]; start_sync(sg, self_sg, rank); // stage 1: reduce scatter - for (int idx = start + tid; idx < end; idx += stride) + for(int idx = start + tid; idx < end; idx += stride) { - tmp_out[idx - start] = packed_reduce(ptrs, idx); + tmp_out[idx - start] = packed_reduce(ptrs, idx); } end_sync(sg, self_sg, rank); @@ -373,155 +346,199 @@ namespace aiter // between threads that have the same tid. If thread i computes the sum of // start + i in the first stage, then thread i also gathers start + i from all // ranks. - for (int idx = tid; idx < largest_part; idx += stride) + for(int idx = tid; idx < largest_part; idx += stride) { #pragma unroll - for (int i = 0; i < ngpus; i++) - { - int gather_from_rank = ((rank + i) % ngpus); - if (gather_from_rank == ngpus - 1 || idx < part) + for(int i = 0; i < ngpus; i++) { - int dst_idx = gather_from_rank * part + idx; - ((P *)result)[dst_idx] = tmps[i][idx]; + int gather_from_rank = ((rank + i) % ngpus); + if(gather_from_rank == ngpus - 1 || idx < part) + { + int dst_idx = gather_from_rank * part + idx; + ((P*)result)[dst_idx] = tmps[i][idx]; + } } - } } - } +} #define THREAD_NUM 512 -// Toggle whether fused allreduce+rmsnorm keeps per-element rms input in float -// before the final cast to output dtype. -#ifndef AITER_FUSED_AR_RMS_KEEP_RMS_INP_F32 -#define AITER_FUSED_AR_RMS_KEEP_RMS_INP_F32 1 -#endif - - template - __global__ void __launch_bounds__(512, 1) - cross_device_reduce_1stage(RankData *_dp, RankSignals sg, +template +__global__ void __launch_bounds__(512, 1) cross_device_reduce_1stage(RankData* _input_dp, + RankData* _output_dp, + RankSignals sg, #ifndef USE_ROCM - volatile + volatile #endif - Signal *self_sg, - T *__restrict__ result, int rank, int size) - { - using P = typename packed_t::P; - using A = typename packed_t::A; - constexpr int pack_size = packed_t::P::size; + Signal* self_sg, + T* __restrict__ result, + int rank, + int size) +{ + constexpr int pack_size = 16 / sizeof(T); + using P = typename opus::vector_t; + using A = typename opus::vector_t; + constexpr int tnum_gpu = THREAD_NUM / ngpus; - __shared__ T tmp_smem[tnum_gpu * ngpus * pack_size]; // note: we don't reorder the address so the accumulation order is the same // for all ranks, ensuring bitwise identical results - auto dp = *_dp; - - // load one gpu data each wave + auto dp = *_input_dp; int warp_id = threadIdx.x / tnum_gpu; int lane_id = threadIdx.x % tnum_gpu; + + // --- double buffer: tmp_smem[0] and tmp_smem[1] --- + __shared__ P tmp_smem[2][tnum_gpu * ngpus]; + + const int step = gridDim.x * tnum_gpu; + const int start = blockIdx.x * tnum_gpu + lane_id; + start_sync(sg, self_sg, rank); - // do the actual reduction - for (int idx = blockIdx.x * tnum_gpu + lane_id; idx < size; - idx += gridDim.x * tnum_gpu) + + // --- compute uniform iteration count (to keep barriers well-formed) --- + const int first = blockIdx.x * tnum_gpu; + int iters = 0; { - *(reinterpret_cast(&tmp_smem[0]) + threadIdx.x) = ((const P**)&dp.ptrs[0])[warp_id][idx]; - __syncthreads(); - if (warp_id == 0) - { - A add_reg; -#pragma unroll - for (int i = 0; i < pack_size; ++i) + int rem = size - first; + iters = rem > 0 ? (rem + step - 1) / step : 0; + } + + // ------------------------------- + // fill buffer 0 + // ------------------------------- + int buf = 0; + int idx0 = start; + + if(idx0 < size) + { + P val = ((const P**)&dp.ptrs[0])[warp_id][idx0]; + tmp_smem[buf][warp_id * tnum_gpu + lane_id] = val; + } + __syncthreads(); + + for(int it = 0; it < iters; ++it) + { + const int cur_idx = idx0 + it * step; + const int next_idx = cur_idx + step; + const int next_buf = buf ^ 1; + + // ======================================================= + // 1. Warp 0 REDUCES current buffer + // ======================================================= + if(warp_id == 0 && cur_idx < size) { - add_reg.data[i] = ck_tile::type_convert(tmp_smem[threadIdx.x * pack_size + i]); - } - constexpr int smem_gpu_loop_stride = tnum_gpu * pack_size; + // GPU 0 contribution + P v0 = tmp_smem[buf][0 * tnum_gpu + lane_id]; + + A acc; #pragma unroll - for (int i = 1; i < ngpus; ++i) - { + for(int j = 0; j < pack_size; ++j) + acc[j] = upcast_s(v0[j]); + + // GPUs 1..(ngpus-1) #pragma unroll - for (int j = 0; j < pack_size; ++j) - { - add_reg.data[j] += ck_tile::type_convert(tmp_smem[smem_gpu_loop_stride * i + threadIdx.x * pack_size + j]); - } - } - P write_reg; + for(int g = 1; g < ngpus; ++g) + { + P vg = tmp_smem[buf][g * tnum_gpu + lane_id]; +#pragma unroll + for(int j = 0; j < pack_size; ++j) + acc[j] += upcast_s(vg[j]); + } + + // store result + P out; #pragma unroll - for (int i = 0; i < pack_size; ++i) + for(int j = 0; j < pack_size; ++j) + out[j] = downcast_s(acc[j]); + + ((P*)result)[cur_idx] = out; + } + + // ======================================================= + // 2. ALL warps prefetch NEXT buffer + // (including warp 0; safe to issue after reduction) + // ======================================================= + if(next_idx < size) { - write_reg.data[i] = ck_tile::type_convert(add_reg.data[i]); + P nxt = ((const P**)&dp.ptrs[0])[warp_id][next_idx]; + tmp_smem[next_buf][warp_id * tnum_gpu + lane_id] = nxt; } - ((P *)result)[idx] = write_reg; - } - __syncthreads(); + + __syncthreads(); + + buf = next_buf; } - // maybe do not need device sync - // end_sync(sg, self_sg, rank); - } +} - template - __global__ void __launch_bounds__(512, 1) - cross_device_reduce_2stage(RankData *_dp, RankSignals sg, +template +__global__ void __launch_bounds__(512, 1) cross_device_reduce_2stage(RankData* _input_dp, + RankData* _output_dp, + RankSignals sg, #ifndef USE_ROCM - volatile + volatile #endif - Signal *self_sg, - T *__restrict__ result, int rank, int size) - { - constexpr int pack_size = packed_t::P::size; - constexpr int tnum_gpu = THREAD_NUM / ngpus; - using P = typename packed_t::P; - using A = typename packed_t::A; + Signal* self_sg, + T* __restrict__ result, + int rank, + int size) +{ + constexpr int pack_size = 16 / sizeof(T); + constexpr int tnum_gpu = THREAD_NUM / ngpus; + using P = typename opus::vector_t; + using A = typename opus::vector_t; + int warp_id = threadIdx.x / tnum_gpu; + int lane_id = threadIdx.x % tnum_gpu; + int tid = blockIdx.x * tnum_gpu + lane_id; + int stride = gridDim.x * tnum_gpu; + int part = size / ngpus; + int start = rank * part; + int end = rank == ngpus - 1 ? size : start + part; + int largest_part = part + size % ngpus; __shared__ T tmp_smem[tnum_gpu * ngpus * pack_size]; - int warp_id = threadIdx.x / tnum_gpu; - int lane_id = threadIdx.x % tnum_gpu; - int tid = blockIdx.x * tnum_gpu + lane_id; - int stride = gridDim.x * tnum_gpu; - int part = size / ngpus; - int start = rank * part; - int end = rank == ngpus - 1 ? size : start + part; - int largest_part = part + size % ngpus; - const P *ptrs[ngpus]; - P *tmps[ngpus]; + const P* ptrs[ngpus]; + P* tmps[ngpus]; #pragma unroll - for (int i = 0; i < ngpus; i++) + for(int i = 0; i < ngpus; i++) { - int target = (rank + i) % ngpus; - ptrs[i] = (const P *)_dp->ptrs[target]; - tmps[i] = get_tmp_buf

(sg.signals[target]); + int target = (rank + i) % ngpus; + ptrs[i] = (const P*)_input_dp->ptrs[target]; + tmps[i] = get_tmp_buf

(sg.signals[target]); } auto tmp_out = tmps[0]; start_sync(sg, self_sg, rank); // stage 1: reduce scatter - for (int idx = start + tid; idx < end; idx += stride) + for(int idx = start + tid; idx < end; idx += stride) { - *(reinterpret_cast(&tmp_smem[0]) + threadIdx.x) = ptrs[warp_id][idx]; - __syncthreads(); - // cal add in first 64 threads - if (warp_id == 0) - { - A add_reg; -#pragma unroll - for (int i = 0; i < pack_size; ++i) + *(reinterpret_cast(&tmp_smem[0]) + threadIdx.x) = ptrs[warp_id][idx]; + __syncthreads(); + // cal add in first 64 threads + if(warp_id == 0) { - add_reg.data[i] = ck_tile::type_convert(tmp_smem[pack_size * threadIdx.x + i]); - } - constexpr int smem_gpu_loop_stride = tnum_gpu * pack_size; + A add_reg; #pragma unroll - for (int i = 1; i < ngpus; ++i) - { + for(int i = 0; i < pack_size; ++i) + { + add_reg[i] = upcast_s(tmp_smem[pack_size * threadIdx.x + i]); + } + constexpr int smem_gpu_loop_stride = tnum_gpu * pack_size; #pragma unroll - for (int j = 0; j < pack_size; ++j) - { - add_reg.data[j] += ck_tile::type_convert(tmp_smem[i * smem_gpu_loop_stride + pack_size * threadIdx.x + j]); - } - } - P write_reg; + for(int i = 1; i < ngpus; ++i) + { #pragma unroll - for (int i = 0; i < pack_size; ++i) - { - write_reg.data[i] = ck_tile::type_convert(add_reg.data[i]); + for(int j = 0; j < pack_size; ++j) + { + add_reg[j] += + upcast_s(tmp_smem[i * smem_gpu_loop_stride + pack_size * threadIdx.x + j]); + } + } + P write_reg; +#pragma unroll + for(int i = 0; i < pack_size; ++i) + { + write_reg[i] = downcast_s(add_reg[i]); + } + tmp_out[idx - start] = write_reg; } - tmp_out[idx - start] = write_reg; - } - __syncthreads(); + __syncthreads(); } end_sync(sg, self_sg, rank); @@ -530,736 +547,1914 @@ namespace aiter // between threads that have the same tid. If thread i computes the sum of // start + i in the first stage, then thread i also gathers start + i from all // ranks. - for (int idx = tid; idx < largest_part; idx += stride) - { - int dst_idx = (warp_id + rank) % ngpus * part + idx; - ((P *)result)[dst_idx] = tmps[warp_id][idx]; - } - } - - /* - * naive allgather - * for case: input(1345,) - * */ - template - __global__ void __launch_bounds__(512, 1) allgather_naive( - RankData* _dp, - RankSignals sg, - Signal* self_sg, - T* __restrict__ result, - int rank, - int size - ) - { - constexpr int tnum_gpu = THREAD_NUM / ngpus; + for(int idx = tid; idx < largest_part; idx += stride) + { + int dst_idx = (warp_id + rank) % ngpus * part + idx; + ((P*)result)[dst_idx] = tmps[warp_id][idx]; + } +} + +template +__global__ void __launch_bounds__(512, 1) + cross_device_reduce_2stage_write_mode(RankData* _input_dp, + RankData* _output_dp, + RankSignals sg, +#ifndef USE_ROCM + volatile +#endif + Signal* self_sg, + T* __restrict__ result, + int rank, + int size) +{ + constexpr int pack_size = 16 / sizeof(T); + constexpr int tnum_gpu = THREAD_NUM / ngpus; + using P = typename opus::vector_t; + using A = typename opus::vector_t; + __shared__ T tmp_smem[tnum_gpu * ngpus * pack_size]; + __shared__ T res_smem[tnum_gpu * pack_size]; int warp_id = threadIdx.x / tnum_gpu; int lane_id = threadIdx.x % tnum_gpu; - int tid = blockIdx.x * tnum_gpu + lane_id; - int stride = gridDim.x * tnum_gpu; + int tid = blockIdx.x * tnum_gpu + lane_id; + int stride = gridDim.x * tnum_gpu; + int part = size / ngpus; + P* output_ptrs[ngpus]; + P* tmps[ngpus]; +#pragma unroll + for(int i = 0; i < ngpus; i++) + { + tmps[i] = get_tmp_buf

(sg.signals[i]); + } + if(is_broadcast_reg_outptr) + { +#pragma unroll + for(int i = 0; i < ngpus; i++) + { + output_ptrs[i] = (P*)_output_dp->ptrs[i]; + } + } + const P* input_ptr = (const P*)_input_dp->ptrs[rank]; + auto tmp_out = tmps[rank]; + int stage3_offset = size; + + // stage1: write local rank data to remote rank + int start = warp_id * part; + int end = warp_id == ngpus - 1 ? size : start + part; + for(int idx = start + tid; idx < end; idx += stride) + { + tmps[warp_id][rank * part + idx - start] = input_ptr[idx]; + } + end_sync(sg, self_sg, rank); + + // stage 2: reduce scatter & write result to remote rank + end = rank != ngpus - 1 ? part : size - part * (ngpus - 1); + for(int idx = tid; idx < end; idx += stride) + { + *(reinterpret_cast(&tmp_smem[0]) + threadIdx.x) = tmp_out[warp_id * part + idx]; + __syncthreads(); + // cal add in first 64 threads + if(warp_id == 0) + { + A add_reg; +#pragma unroll + for(int i = 0; i < pack_size; ++i) + { + add_reg[i] = upcast_s(tmp_smem[pack_size * threadIdx.x + i]); + } + constexpr int smem_gpu_loop_stride = tnum_gpu * pack_size; +#pragma unroll + for(int i = 1; i < ngpus; ++i) + { +#pragma unroll + for(int j = 0; j < pack_size; ++j) + { + add_reg[j] += + upcast_s(tmp_smem[i * smem_gpu_loop_stride + pack_size * threadIdx.x + j]); + } + } + P write_reg; +#pragma unroll + for(int i = 0; i < pack_size; ++i) + { + write_reg[i] = downcast_s(add_reg[i]); + } + *(reinterpret_cast(&res_smem[0]) + lane_id) = write_reg; + } + __syncthreads(); + // send data to remote rank + if(is_broadcast_reg_outptr) + { + P temp_val = *(reinterpret_cast(&res_smem[0]) + lane_id); + auto src_addr = (reinterpret_cast(&temp_val)); + auto dst_addr = (reinterpret_cast(&output_ptrs[warp_id][rank * part + idx])); + __builtin_nontemporal_store(*src_addr, dst_addr); + __builtin_nontemporal_store(*(src_addr + 1), dst_addr + 1); + __builtin_nontemporal_store(*(src_addr + 2), dst_addr + 2); + __builtin_nontemporal_store(*(src_addr + 3), dst_addr + 3); + } + else + { + tmps[warp_id][rank * part + idx + stage3_offset] = + *(reinterpret_cast(&res_smem[0]) + lane_id); + } + } + end_sync(sg, self_sg, rank); + + if(!is_broadcast_reg_outptr) + { + // stage 3: get the output from tmp_buffer + end = warp_id == ngpus - 1 ? size : start + part; + for(int idx = start + tid; idx < end; idx += stride) + { + ((P*)result)[idx] = tmp_out[idx + stage3_offset]; + } + } +} + +/* + * naive allgather + * for case: input(1345,) + * */ +template +__global__ void __launch_bounds__(512, 1) allgather_naive( + RankData* _dp, RankSignals sg, Signal* self_sg, T* __restrict__ result, int rank, int size) +{ + constexpr int tnum_gpu = THREAD_NUM / ngpus; + int warp_id = threadIdx.x / tnum_gpu; + int lane_id = threadIdx.x % tnum_gpu; + int tid = blockIdx.x * tnum_gpu + lane_id; + int stride = gridDim.x * tnum_gpu; const T* ptrs[ngpus]; #pragma unroll - for (int i = 0; i < ngpus; ++i) + for(int i = 0; i < ngpus; ++i) { - ptrs[i] = (const T*)_dp->ptrs[i]; + ptrs[i] = (const T*)_dp->ptrs[i]; } start_sync(sg, self_sg, rank); - for (int idx = tid; idx < size; idx += stride) + for(int idx = tid; idx < size; idx += stride) { - int write_idx = warp_id * size + idx; - result[write_idx] = ptrs[warp_id][idx]; + int write_idx = warp_id * size + idx; + result[write_idx] = ptrs[warp_id][idx]; } - } +} - template - __global__ void __launch_bounds__(512, 1) allgather_vec( - RankData* _dp, - RankSignals sg, - Signal* self_sg, - T* __restrict__ result, - int rank, - int size - ) - { - constexpr int tnum_gpu = THREAD_NUM / ngpus; - using P = typename packed_t::P; - int warp_id = threadIdx.x / tnum_gpu; - int lane_id = threadIdx.x % tnum_gpu; - int tid = blockIdx.x * tnum_gpu + lane_id; - int stride = gridDim.x * tnum_gpu; +template +__global__ void __launch_bounds__(512, 1) allgather_vec( + RankData* _dp, RankSignals sg, Signal* self_sg, T* __restrict__ result, int rank, int size) +{ + constexpr int tnum_gpu = THREAD_NUM / ngpus; + constexpr int pack_size = 16 / sizeof(T); + using P = typename opus::vector_t; + int warp_id = threadIdx.x / tnum_gpu; + int lane_id = threadIdx.x % tnum_gpu; + int tid = blockIdx.x * tnum_gpu + lane_id; + int stride = gridDim.x * tnum_gpu; + const P* ptrs[ngpus]; + +#pragma unroll + for(int i = 0; i < ngpus; ++i) + { + ptrs[i] = (const P*)_dp->ptrs[i]; + } + start_sync(sg, self_sg, rank); + + for(int idx = tid; idx < size; idx += stride) + { + int write_idx = warp_id * size + idx; + *(reinterpret_cast(&result[0]) + write_idx) = ptrs[warp_id][idx]; + } +} + +template +__global__ void __launch_bounds__(512, 1) allgather_lastdim(RankData* _dp, + RankSignals sg, + Signal* self_sg, + T* __restrict__ result, + int rank, + int size, + int last_dim_size) +{ + constexpr int tnum_gpu = THREAD_NUM / ngpus; + constexpr int pack_size = 16 / sizeof(T); + using P = typename opus::vector_t; + int warp_id = threadIdx.x / tnum_gpu; + int lane_id = threadIdx.x % tnum_gpu; + int tid = blockIdx.x * tnum_gpu + lane_id; + int stride = gridDim.x * tnum_gpu; + + last_dim_size /= pack_size; const P* ptrs[ngpus]; #pragma unroll - for (int i = 0; i < ngpus; ++i) + for(int i = 0; i < ngpus; ++i) + { + ptrs[i] = (const P*)_dp->ptrs[i]; + } + start_sync(sg, self_sg, rank); + + for(int idx = tid; idx < size; idx += stride) + { + int y = idx / last_dim_size; + int x = idx % last_dim_size; + int write_idx = (ngpus * y + warp_id) * last_dim_size + x; + *(reinterpret_cast(&result[0]) + write_idx) = ptrs[warp_id][idx]; + } +} + +/* + * reduce_scatter, at first dim + * range = size / (pack_size * ngpu) + * for case: + * input:(ngpus * n) -> output:(n) + * input:(ngpus * m, n, ...) -> output(m, n, ...) + * cond: size % (pack_size * ngpus) == 0 + * */ +template +__global__ void __launch_bounds__(512, 1) reduce_scatter_first_dim( + RankData* _dp, RankSignals sg, Signal* self_sg, T* __restrict__ result, int rank, int range) +{ + int tid = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + constexpr int pack_size = 16 / sizeof(T); + using P = typename opus::vector_t; + using A = typename opus::vector_t; + const P* ptrs[ngpus]; +#pragma unroll + for(int i = 0; i < ngpus; i++) { - ptrs[i] = (const P*)_dp->ptrs[i]; + int target = (rank + i) % ngpus; + ptrs[i] = (const P*)_dp->ptrs[target]; } start_sync(sg, self_sg, rank); - for (int idx = tid; idx < size; idx += stride) + for(int idx = tid; idx < range; idx += stride) { - int write_idx = warp_id * size + idx; - *(reinterpret_cast(&result[0]) + write_idx) = ptrs[warp_id][idx]; + int load_index = rank * range + idx; + int store_index = idx; + *(reinterpret_cast(result) + store_index) = + packed_reduce(ptrs, load_index); } - } +} - // fp8 quant all-reduce code start - template - struct Fp16Filter - { +// fp8 quant all-reduce code start +template +struct Fp16Filter +{ static const bool value = false; - }; +}; - template <> - struct Fp16Filter - { +template <> +struct Fp16Filter +{ static const bool value = true; - }; +}; - template - struct Bf16Filter - { +template +struct Bf16Filter +{ static const bool value = false; - }; +}; - template <> - struct Bf16Filter<__hip_bfloat16> - { +template <> +struct Bf16Filter +{ static const bool value = true; - }; +}; - // dtypes only support half and bf16 now -#define FP16_FILTER \ - typename std::enable_if::value, void>::type* = nullptr +// dtypes only support half and bf16 now +#define FP16_FILTER typename std::enable_if::value, void>::type* = nullptr -#define BF16_FILTER \ - typename std::enable_if::value, void>::type* = nullptr +#define BF16_FILTER typename std::enable_if::value, void>::type* = nullptr - template