Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
baf7d06a
Commit
baf7d06a
authored
Feb 28, 2025
by
zhuwenwen
Browse files
1. 修复w8a8找不到对应config的bug
2. 删除vllm的w8a8 config
parent
c5888d31
Changes
37
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
13 additions
and
5053 deletions
+13
-5053
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_5120_5120_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_5120_5120_K100_AI.json
+0
-418
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_5120_6912_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_5120_6912_K100_AI.json
+0
-418
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_5120_8192_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_5120_8192_K100_AI.json
+0
-418
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_6144_4096_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_6144_4096_K100_AI.json
+0
-418
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_7168_8192_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_7168_8192_K100_AI.json
+0
-418
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_7680_5120_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_7680_5120_K100_AI.json
+0
-418
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_1024_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_8192_1024_K100_AI.json
+0
-418
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_14336_K100_AI.json
...rs/quantization/configs/w8a8/W8A8_8192_14336_K100_AI.json
+0
-418
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_2048_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_8192_2048_K100_AI.json
+0
-436
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_3584_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_8192_3584_K100_AI.json
+0
-418
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_4096_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_8192_4096_K100_AI.json
+0
-418
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_7168_K100_AI.json
...ers/quantization/configs/w8a8/W8A8_8192_7168_K100_AI.json
+0
-427
vllm/model_executor/models/gpt_neox.py
vllm/model_executor/models/gpt_neox.py
+4
-5
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama.py
+3
-1
vllm/model_executor/models/qwen.py
vllm/model_executor/models/qwen.py
+3
-1
vllm/model_executor/models/qwen2.py
vllm/model_executor/models/qwen2.py
+3
-1
vllm/utils.py
vllm/utils.py
+0
-2
No files found.
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_5120_5120_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"5120_5120"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"20"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_5120_6912_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"5120_6912"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"20"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"48"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"60"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"80"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
16
},
"88"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"96"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
16
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
16
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
16
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
16
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
16
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"1024"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"4096"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_5120_8192_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"5120_8192"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"10"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"11"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"12"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"13"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"14"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
2
},
"20"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"24"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"28"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"32"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"36"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
1
,
"num_warps"
:
16
},
"48"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"72"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"80"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"88"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"96"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"104"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"112"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"120"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"128"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"136"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"144"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"152"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"160"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"256"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_6144_4096_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"6144_4096"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"5"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"6"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"7"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"11"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"48"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"60"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"136"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"144"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"152"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"160"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_7168_8192_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"7168_8192"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"3"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"4"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"6"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"32"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"48"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"52"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"56"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"60"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
2
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"1024"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_7680_5120_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"7680_5120"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"5"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"6"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"7"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"9"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"10"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"14"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"16"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"20"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"24"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_1024_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"8192_1024"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"256"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"4096"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"8192"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_14336_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"8192_14336"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"512"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_2048_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"8192_2048"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"3"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
16
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"17"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"48"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
16
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"16384"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_3584_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"8192_3584"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"4"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"5"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"6"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"7"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"8"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"9"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"10"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
2
},
"11"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"256"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"1024"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"2048"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"4096"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"8192"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_4096_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"8192_4096"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"3"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"24"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"36"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
2
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
2
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"64"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"88"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"136"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"144"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"152"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"160"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
32
,
"BLOCK_SIZE_K"
:
512
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"512"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/layers/quantization/configs/w8a8/W8A8_8192_7168_K100_AI.json
deleted
100644 → 0
View file @
c5888d31
{
"8192_7168"
:
{
"1"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
2
},
"2"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"3"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
8
},
"4"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
8
},
"5"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"6"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"7"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"8"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"9"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"10"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"11"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"12"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"13"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"14"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"15"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"16"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
2
,
"num_warps"
:
4
},
"17"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"20"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"24"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"28"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"32"
:
{
"BLOCK_SIZE_M"
:
16
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"36"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"40"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"44"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"48"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"52"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"56"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"60"
:
{
"BLOCK_SIZE_M"
:
32
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"64"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
64
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
4
},
"72"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"80"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"88"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"96"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"104"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"112"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"120"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"128"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"136"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"144"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"152"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"160"
:
{
"BLOCK_SIZE_M"
:
64
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
1
,
"num_warps"
:
4
},
"256"
:
{
"BLOCK_SIZE_M"
:
128
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
256
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"512"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
128
,
"BLOCK_SIZE_K"
:
128
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"1024"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
2
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"2048"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
4
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"4096"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
},
"8192"
:
{
"BLOCK_SIZE_M"
:
256
,
"BLOCK_SIZE_N"
:
256
,
"BLOCK_SIZE_K"
:
64
,
"GROUP_SIZE_M"
:
8
,
"SPLIT_K"
:
1
,
"num_stages"
:
0
,
"num_warps"
:
8
}
}
}
\ No newline at end of file
vllm/model_executor/models/gpt_neox.py
View file @
baf7d06a
...
...
@@ -370,6 +370,7 @@ class GPTNeoXForCausalLM(nn.Module, SupportsPP):
combined_words
=
"|"
.
join
(
lay_key_words
)
weight_shapes
=
[]
all_json
=
{}
matched_key_words
=
set
()
for
layername
,
weight
in
params_dict
.
items
():
matches
=
re
.
findall
(
combined_words
,
layername
)
...
...
@@ -384,20 +385,18 @@ class GPTNeoXForCausalLM(nn.Module, SupportsPP):
weight_data
.
data
.
copy_
(
_weight
)
#下面是针对模型记录模型出现k和n值
elif
len
(
weight_shapes
)
<
4
:
#k=weight_data.shape[1]
#print("n:{},k:{}".format(n,k))
elif
len
(
matched_key_words
)
<
4
and
matches
[
0
]
not
in
matched_key_words
:
matched_key_words
.
add
(
matches
[
0
])
weight_shapes
.
append
({
n
,
k
})
json_file
=
self
.
tritonsingleton
.
get_w8a8json_name
(
n
,
k
)
configs_dict
=
self
.
tritonsingleton
.
get_triton_cache
(
json_file
,
n
,
k
)
if
configs_dict
:
all_json
.
update
(
configs_dict
)
#("weight_shapes:",weight_shapes)
if
self
.
w8a8_strategy
==
1
:
self
.
tritonsingleton
.
triton_json_dict
.
append
(
all_json
)
#print("self.tritonsingleton.triton_json_dict:",self.tritonsingleton.triton_json_dict)
#找到的所有config都进行一次warmup
for
key
,
value
in
all_json
.
items
():
m
=
int
(
key
.
split
(
'_'
)[
0
])
...
...
vllm/model_executor/models/llama.py
View file @
baf7d06a
...
...
@@ -565,6 +565,7 @@ class LlamaModel(nn.Module):
combined_words
=
"|"
.
join
(
lay_key_words
)
weight_shapes
=
[]
all_json
=
{}
matched_key_words
=
set
()
for
layername
,
weight
in
params_dict
.
items
():
matches
=
re
.
findall
(
combined_words
,
layername
)
...
...
@@ -582,7 +583,8 @@ class LlamaModel(nn.Module):
weight_data
.
data
.
copy_
(
_weight
)
#下面是针对模型记录模型出现k和n值
elif
len
(
weight_shapes
)
<
4
:
elif
len
(
matched_key_words
)
<
4
and
matches
[
0
]
not
in
matched_key_words
:
matched_key_words
.
add
(
matches
[
0
])
k
=
weight_data
.
shape
[
1
]
weight_shapes
.
append
({
n
,
k
})
...
...
vllm/model_executor/models/qwen.py
View file @
baf7d06a
...
...
@@ -460,6 +460,7 @@ class QWenBaseModel(nn.Module):
combined_words
=
"|"
.
join
(
lay_key_words
)
weight_shapes
=
[]
all_json
=
{}
matched_key_words
=
set
()
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
...
...
@@ -474,7 +475,8 @@ class QWenBaseModel(nn.Module):
weight_data
.
data
.
copy_
(
_weight
)
#下面是针对模型记录模型出现k和n值
elif
len
(
weight_shapes
)
<
4
:
elif
len
(
matched_key_words
)
<
4
and
matches
[
0
]
not
in
matched_key_words
:
matched_key_words
.
add
(
matches
[
0
])
k
=
weight_data
.
shape
[
1
]
weight_shapes
.
append
({
n
,
k
})
...
...
vllm/model_executor/models/qwen2.py
View file @
baf7d06a
...
...
@@ -539,6 +539,7 @@ class Qwen2Model(nn.Module):
combined_words
=
"|"
.
join
(
lay_key_words
)
weight_shapes
=
[]
all_json
=
{}
matched_key_words
=
set
()
for
layername
in
loaded_params
:
weight
=
params_dict
[
layername
]
...
...
@@ -553,7 +554,8 @@ class Qwen2Model(nn.Module):
weight_data
.
data
.
copy_
(
_weight
)
#下面是针对模型记录模型出现k和n值
elif
len
(
weight_shapes
)
<
4
:
elif
len
(
matched_key_words
)
<
4
and
matches
[
0
]
not
in
matched_key_words
:
matched_key_words
.
add
(
matches
[
0
])
k
=
weight_data
.
shape
[
1
]
weight_shapes
.
append
({
n
,
k
})
...
...
vllm/utils.py
View file @
baf7d06a
...
...
@@ -1505,8 +1505,6 @@ class W8a8GetCacheJSON:
def
_initialize
(
self
):
current_folder_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
json_folder_path
=
current_folder_path
+
'/../lmslim/configs/w8a8'
if
not
os
.
path
.
exists
(
json_folder_path
):
json_folder_path
=
current_folder_path
+
'/model_executor/layers/quantization/configs/w8a8'
self
.
triton_json_dir
=
(
os
.
getenv
(
'TRITON_JSON_DIR'
,
json_folder_path
))
self
.
triton_json_dict
=
[]
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment