Commit 083b80ea authored by zhuwenwen's avatar zhuwenwen
Browse files

增加w8a8相关修改

parent 09428eec
...@@ -661,7 +661,7 @@ if _build_custom_ops(): ...@@ -661,7 +661,7 @@ if _build_custom_ops():
ext_modules.append(CMakeExtension(name="vllm._C")) ext_modules.append(CMakeExtension(name="vllm._C"))
package_data = { package_data = {
"vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json", "benchmarks/*.py","model_executor/layers/quantization/configs/*.json"] "vllm": ["py.typed", "model_executor/layers/fused_moe/configs/*.json", "benchmarks/*.py","model_executor/layers/quantization/configs/w8a8/*.json"]
} }
if _no_device(): if _no_device():
......
...@@ -923,7 +923,7 @@ def triton_int8_gemm_helper(m: int, ...@@ -923,7 +923,7 @@ def triton_int8_gemm_helper(m: int,
out_dtype: Type[torch.dtype] = torch.float16, out_dtype: Type[torch.dtype] = torch.float16,
device: str = "cuda", device: str = "cuda",
best_config:Optional[list] = None): best_config:Optional[list] = None):
return quant_tools.triton_int8_gemm_helper(m,n,k,per_token_act_quant,per_out_channel_weight_quant,use_bias,out_dtype,device,est_config) return quant_tools.triton_int8_gemm_helper(m,n,k,per_token_act_quant,per_out_channel_weight_quant,use_bias,out_dtype,device,best_config)
def cutlass_scaled_mm_azp(a: torch.Tensor, def cutlass_scaled_mm_azp(a: torch.Tensor,
......
...@@ -226,7 +226,7 @@ class AWQLinearMethod(LinearMethodBase): ...@@ -226,7 +226,7 @@ class AWQLinearMethod(LinearMethodBase):
deqweight=ops.dequant_w4_gemm_colmajor( # shape[n, k/8] ---> [n,k] deqweight=ops.dequant_w4_gemm_colmajor( # shape[n, k/8] ---> [n,k]
qweight, qweight,
zeros_and_scales, zeros_and_scales,
k, k+padding_group*self.quant_config.group_size,
n, n,
self.quant_config.group_size) self.quant_config.group_size)
out=F.linear(reshaped_x, deqweight[:,0:k]) out=F.linear(reshaped_x, deqweight[:,0:k])
......
...@@ -24,7 +24,7 @@ class CompressedTensorsW8A8Int8(CompressedTensorsScheme): ...@@ -24,7 +24,7 @@ class CompressedTensorsW8A8Int8(CompressedTensorsScheme):
input_symmetric: bool): input_symmetric: bool):
self.strategy = strategy self.strategy = strategy
self.is_static_input_scheme = is_static_input_scheme self.is_static_input_scheme = is_static_input_scheme
self.w8a8_strategy=int(os.getenv('W8A8_SUPPORT_METHODS', '0')) self.w8a8_strategy=int(os.getenv('W8A8_SUPPORT_METHODS', '1'))
self.input_symmetric = input_symmetric self.input_symmetric = input_symmetric
@classmethod @classmethod
......
{ {
"12288_4096": { "12288_4096": {
"1": { "20": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"2": { "24": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"3": { "28": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"4": { "32": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
...@@ -36,17 +36,17 @@ ...@@ -36,17 +36,17 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"5": { "36": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"6": { "40": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
...@@ -54,17 +54,17 @@ ...@@ -54,17 +54,17 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"7": { "44": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"8": { "48": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
...@@ -72,8 +72,8 @@ ...@@ -72,8 +72,8 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"9": { "52": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
...@@ -81,17 +81,17 @@ ...@@ -81,17 +81,17 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"10": { "56": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"11": { "60": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
...@@ -99,35 +99,53 @@ ...@@ -99,35 +99,53 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"12": { "64": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"13": { "72": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"14": { "80": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"15": { "88": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
...@@ -135,17 +153,17 @@ ...@@ -135,17 +153,17 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"16": { "112": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"17": { "120": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
...@@ -153,61 +171,79 @@ ...@@ -153,61 +171,79 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"18": { "128": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 4
}, },
"19": { "136": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 4
}, },
"20": { "144": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 4
}, },
"21": { "160": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 4
}, },
"22": { "1": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 2,
"num_warps": 8 "num_warps": 8
}, },
"23": { "2": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 2,
"num_warps": 8 "num_warps": 8
}, },
"24": { "3": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"4": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
...@@ -216,16 +252,16 @@ ...@@ -216,16 +252,16 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"25": { "5": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"26": { "6": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
...@@ -234,52 +270,52 @@ ...@@ -234,52 +270,52 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"27": { "7": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 2,
"num_warps": 8 "num_warps": 8
}, },
"28": { "8": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 2,
"num_warps": 8 "num_warps": 8
}, },
"29": { "9": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"30": { "10": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 2,
"num_warps": 8 "num_warps": 8
}, },
"31": { "11": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"32": { "12": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
...@@ -288,23 +324,41 @@ ...@@ -288,23 +324,41 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"64": { "13": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 2,
"num_warps": 2 "num_warps": 8
}, },
"128": { "14": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"15": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 4 "num_warps": 8
}, },
"256": { "256": {
"BLOCK_SIZE_M": 128, "BLOCK_SIZE_M": 128,
...@@ -330,7 +384,7 @@ ...@@ -330,7 +384,7 @@
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 2,
"num_warps": 4 "num_warps": 4
}, },
"2048": { "2048": {
......
{
"1280_8192": {
"1": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"2": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"3": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"4": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"5": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"6": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"7": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"9": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"10": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"11": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"12": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"13": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"14": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"15": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"20": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"24": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"28": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"32": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"36": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"40": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"44": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"48": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"52": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"56": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"60": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"72": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 1,
"num_warps": 4
},
"80": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 1,
"num_warps": 4
},
"88": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 1,
"num_warps": 4
},
"96": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 4
},
"104": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"112": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"120": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 2
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 8
},
"256": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 8
},
"512": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 8
},
"1024": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 1,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 1,
"num_warps": 8
}
}
}
\ No newline at end of file
{
"13824_5120": {
"1": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"2": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"3": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"4": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"5": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"6": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"7": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"9": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"10": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"11": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"12": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"13": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"14": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"15": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"16": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"20": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"24": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"28": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"36": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"40": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"44": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"48": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"52": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"56": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"60": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"72": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"80": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"88": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"256": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"512": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"1024": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
}
}
}
\ No newline at end of file
{
"14336_8192": {
"1": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 2
},
"2": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 2
},
"3": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 2
},
"4": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 2
},
"5": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 2
},
"6": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 2
},
"7": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 2
},
"8": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 2
},
"9": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 2
},
"10": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 2
},
"11": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 2
},
"12": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 2
},
"13": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 2
},
"14": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 2
},
"15": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 2
},
"16": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 2
},
"17": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"20": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"24": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"28": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"36": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"40": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"44": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"48": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"52": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"56": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"60": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"72": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"80": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"88": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"256": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"512": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"1024": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
}
}
}
\ No newline at end of file
{
"15360_5120": {
"1": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"2": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"3": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"5": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"6": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"7": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"9": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"10": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"11": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"12": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"13": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"14": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"15": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"20": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"24": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"28": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"36": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"40": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"44": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"48": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"52": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"56": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"60": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"64": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"72": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"80": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"88": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"256": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"512": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"1024": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
}
}
}
\ No newline at end of file
{ {
"22016_4096": { "22016_4096": {
"1": { "20": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 64, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 2, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 0,
"num_warps": 2 "num_warps": 8
}, },
"2": { "24": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"28": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 4, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 8
}, },
"3": { "32": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 4,
"SPLIT_K": 4, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 8
}, },
"4": { "36": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 4, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"5": { "40": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"44": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"48": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
...@@ -45,34 +72,34 @@ ...@@ -45,34 +72,34 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"6": { "52": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"7": { "56": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"8": { "60": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"9": { "64": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
...@@ -81,245 +108,272 @@ ...@@ -81,245 +108,272 @@
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"10": { "72": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"11": { "80": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"12": { "88": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"13": { "96": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"14": { "104": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"15": { "112": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"16": { "120": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"17": { "128": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"18": { "136": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"19": { "144": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"20": { "152": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"21": { "160": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 1,
"num_warps": 8 "num_warps": 8
}, },
"22": { "1": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"23": { "2": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"24": { "3": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"25": { "4": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"5": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"26": { "6": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"27": { "7": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"28": { "8": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"29": { "9": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"30": { "10": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"31": { "11": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"32": { "12": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 4
}, },
"64": { "13": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 4 "num_warps": 4
}, },
"128": { "14": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 0,
"num_warps": 8 "num_warps": 4
},
"15": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"16": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
}, },
"256": { "256": {
"BLOCK_SIZE_M": 128, "BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 2,
"num_warps": 4 "num_warps": 4
}, },
"512": { "512": {
"BLOCK_SIZE_M": 128, "BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 1, "num_stages": 1,
"num_warps": 4 "num_warps": 4
......
{
"2560_8192": {
"1": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"2": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"3": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"4": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"5": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"6": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"7": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"9": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"10": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"11": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"12": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"13": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"14": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"15": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"17": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"20": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"24": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"28": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"32": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 8
},
"36": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"40": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"44": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"48": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"52": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"56": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"60": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"72": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"80": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"88": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"96": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"136": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"144": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"152": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"160": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"256": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"512": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"1024": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"4096": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"16384": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
}
}
}
\ No newline at end of file
{
"27648_5120": {
"1": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"2": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 4,
"num_stages": 0,
"num_warps": 4
},
"3": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"5": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"6": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"7": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"9": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"10": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"11": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"12": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"13": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"14": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"15": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"20": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"24": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"28": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"36": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"40": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"44": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"48": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"52": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"56": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"60": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"72": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"80": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"88": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"256": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"512": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"1024": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
}
}
}
\ No newline at end of file
{
"28672_4096": {
"1": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"2": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"3": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"5": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"6": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"7": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"9": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"10": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"11": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"12": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"13": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"14": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"15": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"16": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"20": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"24": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"28": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"36": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"40": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"44": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"48": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"52": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"56": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"60": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"72": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"80": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"88": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"256": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"512": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"1024": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
}
}
}
\ No newline at end of file
{
"28672_8192": {
"1": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"2": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"3": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"5": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"6": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"7": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"9": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"10": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"11": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"12": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"13": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"14": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"15": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"16": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"20": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"24": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"28": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"36": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"40": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"44": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"48": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"52": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"56": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"60": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"64": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"72": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"80": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"88": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"256": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"512": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"1024": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
}
}
}
\ No newline at end of file
{
"32000_4096": {
"1": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"2": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"3": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"5": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"6": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"7": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"8": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"9": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"10": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"11": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"12": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"13": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"14": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"15": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"16": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"20": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"24": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"28": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"36": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"40": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"44": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"48": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"52": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"56": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"60": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"72": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"80": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"88": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"256": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"512": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"1024": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
}
}
}
\ No newline at end of file
{
"3584_18944": {
"1": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"2": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"3": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"4": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"5": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 2
},
"6": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"7": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"9": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"10": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"11": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 2
},
"12": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 2
},
"13": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 2
},
"14": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 2
},
"15": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 2
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 2
},
"20": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"24": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"28": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"32": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"36": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"40": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"44": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"48": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"52": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"56": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"60": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"72": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"80": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"88": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"256": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"512": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"1024": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
}
}
}
\ No newline at end of file
{
"3584_3584": {
"1": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 4
},
"2": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"3": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"4": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 4
},
"5": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"6": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"7": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"9": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"10": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"11": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"12": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"13": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"14": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"15": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 4
},
"20": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"24": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"28": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"32": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"36": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"40": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"44": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"48": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"52": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"56": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"60": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"72": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"80": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"88": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"96": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"256": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"512": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"1024": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
}
}
}
\ No newline at end of file
{
"37888_3584": {
"1": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"2": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"3": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"4": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"5": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"6": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"7": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"9": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"10": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"11": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"12": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"13": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"14": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"15": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"20": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"24": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"28": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"32": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"36": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"40": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"44": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"48": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"52": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"56": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"60": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"72": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"80": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"88": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"256": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"512": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"1024": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
}
}
}
\ No newline at end of file
{ {
"4096_11008": { "4096_11008": {
"1": { "20": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"2": { "24": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"3": { "28": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"4": { "32": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"5": { "36": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 8
}, },
"6": { "40": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 2,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 16
}, },
"7": { "44": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 8
}, },
"8": { "48": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 8
}, },
"9": { "52": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 8
}, },
"10": { "56": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"11": { "60": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"12": { "64": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"13": { "72": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"14": { "80": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"15": { "88": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"16": { "96": {
"BLOCK_SIZE_M": 16, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 4
}, },
"17": { "104": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 4 "num_warps": 4
}, },
"18": { "112": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 4 "num_warps": 4
}, },
"19": { "120": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 4 "num_warps": 4
}, },
"20": { "128": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 4 "num_warps": 4
}, },
"21": { "136": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 4 "num_warps": 16
}, },
"22": { "144": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 16
}, },
"23": { "152": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 16
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 16
}, },
"24": { "1": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 2
}, },
"25": { "2": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 2
},
"3": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 2,
"num_warps": 2 "num_warps": 2
}, },
"26": { "4": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 2
},
"5": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 1,
"num_warps": 2 "num_warps": 2
}, },
"27": { "6": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 2,
"num_warps": 2 "num_warps": 2
}, },
"28": { "7": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 8,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 2
}, },
"29": { "8": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 2
}, },
"30": { "9": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"10": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 2
}, },
"31": { "11": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"12": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 2,
"num_warps": 2 "num_warps": 2
}, },
"32": { "13": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 2
}, },
"64": { "14": {
"BLOCK_SIZE_M": 32, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"15": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 4,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 2 "num_warps": 2
}, },
"128": { "16": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64, "BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 1,
"num_warps": 4 "num_warps": 4
}, },
"256": { "256": {
"BLOCK_SIZE_M": 64, "BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256, "BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 8
...@@ -319,7 +373,7 @@ ...@@ -319,7 +373,7 @@
"BLOCK_SIZE_M": 128, "BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128, "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 8, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 8
...@@ -328,7 +382,7 @@ ...@@ -328,7 +382,7 @@
"BLOCK_SIZE_M": 256, "BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 128, "BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128, "BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 8
...@@ -337,7 +391,7 @@ ...@@ -337,7 +391,7 @@
"BLOCK_SIZE_M": 256, "BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64, "BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 8
...@@ -346,7 +400,7 @@ ...@@ -346,7 +400,7 @@
"BLOCK_SIZE_M": 256, "BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64, "BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 8
...@@ -355,7 +409,7 @@ ...@@ -355,7 +409,7 @@
"BLOCK_SIZE_M": 256, "BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256, "BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64, "BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 4, "GROUP_SIZE_M": 2,
"SPLIT_K": 1, "SPLIT_K": 1,
"num_stages": 0, "num_stages": 0,
"num_warps": 8 "num_warps": 8
......
{
"4096_14336": {
"1": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"2": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"3": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"4": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"5": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"6": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"7": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"8": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"9": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"10": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"11": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"12": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"13": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"14": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"15": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 2
},
"16": {
"BLOCK_SIZE_M": 16,
"BLOCK_SIZE_N": 16,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 1,
"num_warps": 4
},
"20": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"24": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 8
},
"28": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 8
},
"32": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"36": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"40": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"44": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 16
},
"48": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 16
},
"52": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 16
},
"56": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"60": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"64": {
"BLOCK_SIZE_M": 32,
"BLOCK_SIZE_N": 32,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 4
},
"72": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"80": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 8
},
"88": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 2,
"num_warps": 16
},
"96": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"104": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"112": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"120": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 2,
"num_stages": 0,
"num_warps": 16
},
"128": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 512,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"136": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 16
},
"144": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 16
},
"152": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 16
},
"160": {
"BLOCK_SIZE_M": 64,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 16
},
"256": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 64,
"BLOCK_SIZE_K": 256,
"GROUP_SIZE_M": 4,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"512": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 128,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"1024": {
"BLOCK_SIZE_M": 128,
"BLOCK_SIZE_N": 128,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 4
},
"2048": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 8,
"SPLIT_K": 1,
"num_stages": 2,
"num_warps": 8
},
"4096": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
},
"8192": {
"BLOCK_SIZE_M": 256,
"BLOCK_SIZE_N": 256,
"BLOCK_SIZE_K": 64,
"GROUP_SIZE_M": 2,
"SPLIT_K": 1,
"num_stages": 0,
"num_warps": 8
}
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment