Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
083b80ea
Commit
083b80ea
authored
Jan 16, 2025
by
zhuwenwen
Browse files
增加w8a8相关修改
parent
09428eec
Changes
42
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
6716 additions
and
1093 deletions
+6716
-1093
setup.py
setup.py
+1
-1
vllm/_custom_ops.py
vllm/_custom_ops.py
+1
-1
vllm/model_executor/layers/quantization/awq.py
vllm/model_executor/layers/quantization/awq.py
+1
-1
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py
...ompressed_tensors/schemes/compressed_tensors_w8a8_int8.py
+1
-1
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_12288_4096_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_12288_4096_K100_AI.json
+417
-363
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_1280_8192_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_1280_8192_K100_AI.json
+418
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_13824_5120_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_13824_5120_K100_AI.json
+418
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_14336_8192_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_14336_8192_K100_AI.json
+427
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_15360_5120_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_15360_5120_K100_AI.json
+418
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_22016_4096_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_22016_4096_K100_AI.json
+417
-363
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_2560_8192_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_2560_8192_K100_AI.json
+436
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_27648_5120_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_27648_5120_K100_AI.json
+418
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_28672_4096_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_28672_4096_K100_AI.json
+418
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_28672_8192_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_28672_8192_K100_AI.json
+418
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_32000_4096_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_32000_4096_K100_AI.json
+418
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_3584_18944_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_3584_18944_K100_AI.json
+418
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_3584_3584_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_3584_3584_K100_AI.json
+418
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_37888_3584_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_37888_3584_K100_AI.json
+418
-0
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_4096_11008_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_4096_11008_K100_AI.json
+417
-363
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_4096_14336_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_4096_14336_K100_AI.json
+418
-0
No files found.
setup.py
View file @
083b80ea
...
...
@@ -661,7 +661,7 @@ if _build_custom_ops():
ext_modules
.
append
(
CMakeExtension
(
name
=
"vllm._C"
))
package_data
=
{
"vllm"
:
[
"py.typed"
,
"model_executor/layers/fused_moe/configs/*.json"
,
"benchmarks/*.py"
,
"model_executor/layers/quantization/configs/*.json"
]
"vllm"
:
[
"py.typed"
,
"model_executor/layers/fused_moe/configs/*.json"
,
"benchmarks/*.py"
,
"model_executor/layers/quantization/configs/
w8a8/
*.json"
]
}
if
_no_device
():
...
...
vllm/_custom_ops.py
View file @
083b80ea
...
...
@@ -923,7 +923,7 @@ def triton_int8_gemm_helper(m: int,
out_dtype
:
Type
[
torch
.
dtype
]
=
torch
.
float16
,
device
:
str
=
"cuda"
,
best_config
:
Optional
[
list
]
=
None
):
return
quant_tools
.
triton_int8_gemm_helper
(
m
,
n
,
k
,
per_token_act_quant
,
per_out_channel_weight_quant
,
use_bias
,
out_dtype
,
device
,
est_config
)
return
quant_tools
.
triton_int8_gemm_helper
(
m
,
n
,
k
,
per_token_act_quant
,
per_out_channel_weight_quant
,
use_bias
,
out_dtype
,
device
,
b
est_config
)
def
cutlass_scaled_mm_azp
(
a
:
torch
.
Tensor
,
...
...
vllm/model_executor/layers/quantization/awq.py
View file @
083b80ea
...
...
@@ -226,7 +226,7 @@ class AWQLinearMethod(LinearMethodBase):
deqweight
=
ops
.
dequant_w4_gemm_colmajor
(
# shape[n, k/8] ---> [n,k]
qweight
,
zeros_and_scales
,
k
,
k
+
padding_group
*
self
.
quant_config
.
group_size
,
n
,
self
.
quant_config
.
group_size
)
out
=
F
.
linear
(
reshaped_x
,
deqweight
[:,
0
:
k
])
...
...
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py
View file @
083b80ea
...
...
@@ -24,7 +24,7 @@ class CompressedTensorsW8A8Int8(CompressedTensorsScheme):
input_symmetric
:
bool
):
self
.
strategy
=
strategy
self
.
is_static_input_scheme
=
is_static_input_scheme
self
.
w8a8_strategy
=
int
(
os
.
getenv
(
'W8A8_SUPPORT_METHODS'
,
'
0
'
))
self
.
w8a8_strategy
=
int
(
os
.
getenv
(
'W8A8_SUPPORT_METHODS'
,
'
1
'
))
self
.
input_symmetric
=
input_symmetric
@
classmethod
...
...
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_12288_4096_K100_AI.json
View file @
083b80ea
{
"12288_4096"
:
{
"
1
"
:
{
"
20
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"2"
:
{
"2
4
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
3
"
:
{
"
28
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
4
"
:
{
"
32
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
...
...
@@ -36,17 +36,17 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"
5
"
:
{
"BLOCK_SIZE_M"
:
16
,
"
36
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
6
"
:
{
"BLOCK_SIZE_M"
:
16
,
"
40
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
...
...
@@ -54,17 +54,17 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"
7
"
:
{
"BLOCK_SIZE_M"
:
16
,
"
44
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"
4
8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
...
...
@@ -72,8 +72,8 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"
9
"
:
{
"BLOCK_SIZE_M"
:
16
,
"
52
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
...
...
@@ -81,17 +81,17 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"
10
"
:
{
"BLOCK_SIZE_M"
:
16
,
"
56
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
11
"
:
{
"BLOCK_SIZE_M"
:
16
,
"
60
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
...
...
@@ -99,35 +99,53 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"
12
"
:
{
"BLOCK_SIZE_M"
:
16
,
"
64
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
13
"
:
{
"BLOCK_SIZE_M"
:
32
,
"
72
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
14
"
:
{
"BLOCK_SIZE_M"
:
16
,
"
80
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
...
...
@@ -135,17 +153,17 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"1
6
"
:
{
"BLOCK_SIZE_M"
:
16
,
"1
12
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"1
7
"
:
{
"BLOCK_SIZE_M"
:
16
,
"1
20
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
...
...
@@ -153,61 +171,79 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"18"
:
{
"BLOCK_SIZE_M"
:
16
,
"1
2
8"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
"num_warps"
:
4
},
"1
9
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"1
36
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
"num_warps"
:
4
},
"
2
1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"1
60
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
"num_warps"
:
4
},
"
22
"
:
{
"
1
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"2
3
"
:
{
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"24"
:
{
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
...
...
@@ -216,16 +252,16 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"
2
5"
:
{
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
2
6"
:
{
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
...
...
@@ -234,52 +270,52 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"
2
7"
:
{
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"
2
8"
:
{
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"
2
9"
:
{
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
3
0"
:
{
"
1
0"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"
3
1"
:
{
"
1
1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
3
2"
:
{
"
1
2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
...
...
@@ -288,23 +324,41 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"
64
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"
13
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
"num_stages"
:
2
,
"num_warps"
:
8
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"14"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
...
...
@@ -330,7 +384,7 @@
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"2048"
:
{
...
...
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_1280_8192_K100_AI.json
0 → 100644
View file @
083b80ea
{
"1280_8192"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"24"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"28"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"32"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"48"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"72"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"80"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"88"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"96"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"112"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"120"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"128"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"512"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"1024"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
1
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_13824_5120_K100_AI.json
0 → 100644
View file @
083b80ea
{
"13824_5120"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"5"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"6"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"7"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"11"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"44"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"48"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_14336_8192_K100_AI.json
0 → 100644
View file @
083b80ea
{
"14336_8192"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"3"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"4"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"5"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"6"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"7"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"9"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"10"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"11"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"12"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"13"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"14"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"17"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_15360_5120_K100_AI.json
0 → 100644
View file @
083b80ea
{
"15360_5120"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"24"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"28"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"60"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"64"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_22016_4096_K100_AI.json
View file @
083b80ea
{
"22016_4096"
:
{
"
1
"
:
{
"
20
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
1
,
"num_warps"
:
2
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"24"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"3"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"3
2
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
4
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"
4
"
:
{
"
36
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"5"
:
{
"40"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
256
,
...
...
@@ -45,34 +72,34 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"
6
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"
52
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
"num_stages"
:
0
,
"num_warps"
:
4
},
"
7
"
:
{
"
56
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
8
"
:
{
"
60
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
9
"
:
{
"
64
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
256
,
...
...
@@ -81,245 +108,272 @@
"num_stages"
:
1
,
"num_warps"
:
8
},
"
10
"
:
{
"
72
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
11
"
:
{
"
80
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
12
"
:
{
"
88
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
13
"
:
{
"
96
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"14"
:
{
"1
0
4"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"1
5
"
:
{
"1
12
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"1
6
"
:
{
"1
20
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"1
7
"
:
{
"BLOCK_SIZE_M"
:
16
,
"1
28
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"1
8
"
:
{
"BLOCK_SIZE_M"
:
16
,
"1
36
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"1
9
"
:
{
"BLOCK_SIZE_M"
:
16
,
"1
44
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"2
0
"
:
{
"BLOCK_SIZE_M"
:
16
,
"
15
2"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
2
1"
:
{
"BLOCK_SIZE_M"
:
16
,
"1
60
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"
22
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"
1
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"2
3
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"
24
"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"
3
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"25"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"4"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"5"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"
2
6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"6"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"
2
7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"7"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"
2
8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"
2
9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"9"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"
3
0"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"
1
0"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"
3
1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"
1
1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"
3
2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"
1
2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
"num_warps"
:
4
},
"
64
"
:
{
"
13
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"1
28
"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
1
28
,
"BLOCK_SIZE_K"
:
256
,
"1
4
"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
1
6
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
"num_stages"
:
0
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
...
...
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_2560_8192_K100_AI.json
0 → 100644
View file @
083b80ea
{
"2560_8192"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"17"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"28"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"32"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"40"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"136"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"144"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"152"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"160"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"256"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"4096"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"16384"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_27648_5120_K100_AI.json
0 → 100644
View file @
083b80ea
{
"27648_5120"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
4
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"24"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_28672_4096_K100_AI.json
0 → 100644
View file @
083b80ea
{
"28672_4096"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"3"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"5"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"6"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"7"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"9"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"10"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"11"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"40"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"44"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"48"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_28672_8192_K100_AI.json
0 → 100644
View file @
083b80ea
{
"28672_8192"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"3"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"5"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"6"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"7"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"9"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"10"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"11"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"12"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"13"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"14"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"24"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"48"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"52"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"56"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"60"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_32000_4096_K100_AI.json
0 → 100644
View file @
083b80ea
{
"32000_4096"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"3"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"5"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"6"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"7"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"9"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"11"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"40"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"44"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"48"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_3584_18944_K100_AI.json
0 → 100644
View file @
083b80ea
This diff is collapsed.
Click to expand it.
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_3584_3584_K100_AI.json
0 → 100644
View file @
083b80ea
This diff is collapsed.
Click to expand it.
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_37888_3584_K100_AI.json
0 → 100644
View file @
083b80ea
This diff is collapsed.
Click to expand it.
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_4096_11008_K100_AI.json
View file @
083b80ea
This diff is collapsed.
Click to expand it.
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_4096_14336_K100_AI.json
0 → 100644
View file @
083b80ea
This diff is collapsed.
Click to expand it.
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment