Merge remote-tracking branch 'origin/v0.7.2-dev' into v0.7.2_zero_overhead

ca4ec0ce · lizhigong · 0be169ad · ae0ed592 · ca4ec0ce · ca4ec0ce
Commit ca4ec0ce authored Mar 25, 2025 by lizhigong
20 changed files
--- a/vllm/model_executor/layers/fused_moe/configs/E=64,N=256,device_name=K100_AI_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=64,N=256,device_name=K100_AI_nn.json
+{
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3,
+        "num_ldmatrixes": 1
+    },
+    "8": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 3,
+        "num_ldmatrixes": 1
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 5,
+        "num_ldmatrixes": 1
+    },
+    "24": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 5,
+        "num_ldmatrixes": 1
+    },
+    "32": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4,
+        "num_ldmatrixes": 1
+    },
+    "48": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 5,
+        "num_ldmatrixes": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 5,
+        "num_ldmatrixes": 1
+    },
+    "96": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3,
+        "num_ldmatrixes": 1
+    },
+    "128": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 5,
+        "num_ldmatrixes": 1
+    },
+    "256": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 4,
+        "num_ldmatrixes": 1
+    },
+    "512": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3,
+        "num_ldmatrixes": 1
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 3,
+        "num_ldmatrixes": 1
+    }
+}
--- a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=DCU_K100_AI_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=DCU_K100_AI_nn.json
+{
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4,
+        "num_ldmatrixes": 1
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 2,
+        "num_stages": 4,
+        "num_ldmatrixes": 1
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 2,
+        "num_stages": 3,
+        "num_ldmatrixes": 1
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4,
+        "num_ldmatrixes": 1
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4,
+        "num_ldmatrixes": 1
+    },
+    "24": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "32": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "48": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "96": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "128": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "256": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "512": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    }
+}
--- a/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=K100_AI_nn.json
+++ b/vllm/model_executor/layers/fused_moe/configs/E=8,N=2048,device_name=K100_AI_nn.json
+{
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4,
+        "num_ldmatrixes": 1
+    },
+    "2": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 2,
+        "num_stages": 4,
+        "num_ldmatrixes": 1
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 2,
+        "num_stages": 3,
+        "num_ldmatrixes": 1
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4,
+        "num_ldmatrixes": 1
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 4,
+        "num_ldmatrixes": 1
+    },
+    "24": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "32": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "48": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "64": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "96": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "128": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "256": {
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "512": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "1536": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "2048": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "3072": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    },
+    "4096": {
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 8,
+        "num_stages": 2,
+        "num_ldmatrixes": 1
+    }
+}
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -14,11 +14,170 @@ from vllm import _custom_ops as ops
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization.utils.fp8_utils import (
    per_token_group_quant_fp8)
+from vllm.model_executor.layers.quantization.utils.int8_utils import (
+    per_token_group_quant_int8)
+
 from vllm.platforms import current_platform
 from vllm.utils import direct_register_custom_op

 logger = init_logger(__name__)

+@triton.jit
+def fused_moe_kernel_awq(
+        # Pointers to matrices
+        a_ptr, # [4, 7168] 
+        b_ptr, # [256, 512, 3584]
+        c_ptr, # (8, 8, 512)
+        b_scale_ptr, # (256, 512, 56)
+        b_zp_ptr, # (256, 256, 56)
+        topk_weights_ptr, 
+        sorted_token_ids_ptr, # [0, 1, 2, 3, 4]
+        expert_ids_ptr,
+        num_tokens_post_padded_ptr,
+        # Matrix dimensions
+        N: tl.constexpr,
+        K: tl.constexpr,
+        EM, # pading后的总索引长度
+        num_valid_tokens, # 有效索引的上限
+        # The stride variables represent how much to increase the ptr by when
+        # moving by 1 element in a particular dimension. E.g. `stride_am` is
+        # how much to increase `a_ptr` by to get the element one row down
+        # (A has M rows).
+        stride_am,
+        stride_ak,
+        stride_be,
+        stride_bk, #1
+        stride_bn,
+        stride_cm,
+        stride_cn,
+        stride_bse,
+        stride_bsk,#1
+        stride_bsn,
+        stride_bze,
+        stride_bzk,
+        stride_bzn,
+        block_k_diviable: tl.constexpr,
+        group_size: tl.constexpr, # 128
+        # Meta-parameters
+        BLOCK_SIZE_M: tl.constexpr,
+        BLOCK_SIZE_N: tl.constexpr,
+        BLOCK_SIZE_K: tl.constexpr,
+        GROUP_SIZE_M: tl.constexpr,
+        MUL_ROUTED_WEIGHT: tl.constexpr,
+        top_k: tl.constexpr,
+        compute_type: tl.constexpr,
+        has_zp: tl.constexpr,
+        use_int4_w4a16: tl.constexpr,
+        use_int8_w8a16: tl.constexpr):
+    pid = tl.program_id(axis=0)
+    num_pid_m = tl.cdiv(EM, BLOCK_SIZE_M)
+    num_pid_n = tl.cdiv(N, BLOCK_SIZE_N)
+    num_pid_in_group = GROUP_SIZE_M * num_pid_n
+    group_id = pid // num_pid_in_group
+    first_pid_m = group_id * GROUP_SIZE_M
+    group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M)
+    pid_m = first_pid_m + ((pid % num_pid_in_group) % group_size_m)
+    pid_n = (pid % num_pid_in_group) // group_size_m
+
+    num_tokens_post_padded = tl.load(num_tokens_post_padded_ptr)
+    if pid_m * BLOCK_SIZE_M >= num_tokens_post_padded:
+        return
+    offs_token_id = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M)
+    offs_token = tl.load(sorted_token_ids_ptr + offs_token_id) # [block_m]
+    token_mask = offs_token < num_valid_tokens
+
+    offs_bn = (pid_n * BLOCK_SIZE_N +
+               tl.arange(0, BLOCK_SIZE_N)) % N # [block_n]
+    offs_k = tl.arange(0, BLOCK_SIZE_K) # 0, 1, 2, ...... , 127 # # [block_k]
+    offs_k2 = tl.arange(0, BLOCK_SIZE_K // 2) # 0, 1, 2, ...... , 127 # # [block_k]
+    a_ptrs = a_ptr + (offs_token[:, None] // top_k * stride_am +
+                      offs_k[None, :] * stride_ak) # [block_m, block_k] 
+
+    off_experts = tl.load(expert_ids_ptr + pid_m)
+
+    if use_int4_w4a16:
+        # [0, 1, 2, ...... , 126, 127] --> [0, 0, 1, 1 ...... , 63, 63] 
+        # [128, 129, 130, ...... , 254, 255] --> [64, 64, 65, 65 ...... , 127, 127] 
+        
+        # b_ptrs = b_ptr + off_experts * stride_be + \
+        #     (offs_k[:, None] // 2) * stride_bk + offs_bn[None, :] * stride_bn
+        b_ptrs = b_ptr + off_experts * stride_be + \
+            offs_bn[:, None] * stride_bn + (offs_k2[None, :]) * stride_bk 
+        # tl.device_print("stride_bn",stride_bsn)>1
+        # tl.device_print("stride_bk",stride_bk)=1
+        b_shifter = (offs_k[:, None] % 2) * 4  # 0, 4
+    elif use_int8_w8a16:
+        b_ptrs = b_ptr + off_experts * stride_be + \
+            offs_k[:, None] * stride_bk + offs_bn[None, :] * stride_bn
+
+    if not has_zp and use_int4_w4a16:
+        b_zp_num = 8
+    if not has_zp and use_int8_w8a16:
+        b_zp_num = 128
+    elif has_zp and use_int4_w4a16:
+        b_zp_shifter = (offs_bn[None, :] % 2) * 4 #  0, 4
+
+    accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32)
+    for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)):
+        if not block_k_diviable:
+            k_mask = offs_k[:, None] < K - k * BLOCK_SIZE_K
+            k_other = 0.0
+        else:
+            k_mask = None
+            k_other = None
+
+        a = tl.load(a_ptrs,
+                    mask=token_mask[:, None] &
+                    (offs_k[None, :] < K - k * BLOCK_SIZE_K),
+                    other=0.0)
+        b = tl.load(b_ptrs)
+        if use_int4_w4a16:
+            b = tl.interleave(b, b)
+            b= b.trans()
+            b = (b >> b_shifter) & 0xF
+
+        b_scale_ptrs = b_scale_ptr + off_experts * stride_bse + \
+            offs_bn[None, :] * stride_bsk + \
+            ((offs_k[:, None] + BLOCK_SIZE_K * k) // group_size) * stride_bsn
+        qzeros_scles = tl.load(b_scale_ptrs, mask=k_mask, other=k_other)
+
+        scales_int16 = tl.cast(qzeros_scles,tl.uint16)
+        b_scale = tl.cast(scales_int16,tl.float16,bitcast=True)
+        # tl.device_print("b_scale dequant",b_scale)
+
+        mid = qzeros_scles >> 16
+        # b_zp = tl.cast(mid,tl.float16,bitcast=False)
+        b_zp = tl.cast(mid,tl.float16)
+        # b_zp = tl.cast(zeros_int16,tl.float16,bitcast=False)
+
+        # tl.device_print("bzp",b_zp)
+
+        # We accumulate along the K dimension.
+        b = ((b - b_zp) * b_scale).to(tl.float16)
+        accumulator = tl.dot(a, b, acc=accumulator)
+
+        # Advance the ptrs to the next K block.
+        a_ptrs += BLOCK_SIZE_K * stride_ak
+        if use_int4_w4a16:
+            b_ptrs += (BLOCK_SIZE_K // 2) * stride_bk
+        else:
+            b_ptrs += BLOCK_SIZE_K * stride_bk
+
+    if MUL_ROUTED_WEIGHT:
+        moe_weight = tl.load(topk_weights_ptr + offs_token,
+                             mask=token_mask,
+                             other=0)
+        accumulator = accumulator * moe_weight[:, None]
+
+    accumulator = accumulator.to(compute_type)
+    # -----------------------------------------------------------
+    # Write back the block of the output
+    offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N)
+    c_ptrs = c_ptr + stride_cm * offs_token[:, None] + stride_cn * offs_cn[
+        None, :]
+    c_mask = token_mask[:, None] & (offs_cn[None, :] < N)
+    tl.store(c_ptrs, accumulator, mask=c_mask)
+

 @triton.jit
 def fused_moe_kernel_gptq_awq(
@@ -265,6 +424,7 @@ def fused_moe_kernel(
        top_k: tl.constexpr,
        compute_type: tl.constexpr,
        use_fp8_w8a8: tl.constexpr,
+        use_int8_w8a8: tl.constexpr,
        use_int8_w8a16: tl.constexpr):
    """
    Implements the fused computation for a Mixture of Experts (MOE) using
@@ -346,7 +506,7 @@ def fused_moe_kernel(
            None, :] * stride_bsn
        b_scale = tl.load(b_scale_ptrs)

-    if use_fp8_w8a8:
+    if use_fp8_w8a8 or use_int8_w8a8:
        if group_k > 0 and group_n > 0:
            a_scale_ptrs = a_scale_ptr + (offs_token // top_k) * stride_asm
            offs_bsn = offs_bn // group_n
@@ -376,7 +536,7 @@ def fused_moe_kernel(
        # We accumulate along the K dimension.
        if use_int8_w8a16:
            accumulator = tl.dot(a, b.to(compute_type), acc=accumulator)
-        elif use_fp8_w8a8:
+        elif use_fp8_w8a8 or use_int8_w8a8:
            if group_k > 0 and group_n > 0:
                k_start = k * BLOCK_SIZE_K
                offs_ks = k_start // group_k
@@ -402,7 +562,7 @@ def fused_moe_kernel(
        accumulator = accumulator * moe_weight[:, None]
    if use_int8_w8a16:
        accumulator = (accumulator * b_scale).to(compute_type)
-    elif use_fp8_w8a8:
+    elif use_fp8_w8a8 or use_int8_w8a8:
        if group_k > 0 and group_n > 0:
            accumulator = accumulator.to(compute_type)
        else:
@@ -558,7 +718,7 @@ def moe_align_block_size_triton(

 def moe_align_block_size(
        topk_ids: torch.Tensor, block_size: int,
-        num_experts: int) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        num_experts: int, num_token: Optional[int] = None) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Aligns the token distribution across experts to be compatible with block
    size for matrix multiplication.
@@ -596,11 +756,18 @@ def moe_align_block_size(
    - The padding ensures that the total number of tokens is now divisible
        by block_size for proper block matrix operations.
    """
-    max_num_tokens_padded = topk_ids.numel() + num_experts * (block_size - 1)
-    sorted_ids = torch.empty((max_num_tokens_padded, ),
-                             dtype=torch.int32,
-                             device=topk_ids.device)
-    sorted_ids.fill_(topk_ids.numel())
+    if num_token:
+        if num_token < block_size:
+            max_num_tokens_padded = min(topk_ids.numel() * block_size, topk_ids.numel() + num_experts * (block_size - 1))
+        else:
+            max_num_tokens_padded = topk_ids.numel() + num_experts * (block_size - 1)
+        sorted_ids = torch.full((max_num_tokens_padded,), fill_value=topk_ids.numel(), dtype=torch.int32, device=topk_ids.device)
+    else:
+        max_num_tokens_padded = topk_ids.numel() + num_experts * (block_size - 1)
+        sorted_ids = torch.empty((max_num_tokens_padded, ),
+                                 dtype=torch.int32,
+                                 device=topk_ids.device)
+        sorted_ids.fill_(topk_ids.numel())
    max_num_m_blocks = triton.cdiv(max_num_tokens_padded, block_size)
    expert_ids = torch.empty((max_num_m_blocks, ),
                             dtype=torch.int32,
@@ -709,6 +876,7 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
                            config: Dict[str, Any],
                            compute_type: tl.dtype,
                            use_fp8_w8a8: bool,
+                            use_int8_w8a8: bool,
                            use_int8_w8a16: bool,
                            use_int4_w4a16: bool,
                            block_shape: Optional[List[int]] = None,
@@ -727,6 +895,19 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
            assert triton.cdiv(A.shape[-1], block_k) == A_scale.shape[-1]
            assert triton.cdiv(B.shape[-2], block_n) == B_scale.shape[-2]
            assert triton.cdiv(B.shape[-1], block_k) == B_scale.shape[-1]
+            
+    elif use_int8_w8a8:
+        assert B_scale is not None
+        if block_shape is None:
+            A, A_scale = ops.scaled_int8_quant(A, A_scale)
+        else:
+            assert len(block_shape) == 2
+            block_n, block_k = block_shape[0], block_shape[1]
+            A, A_scale = per_token_group_quant_int8(A, block_k)
+            assert triton.cdiv(A.shape[-1], block_k) == A_scale.shape[-1]
+            assert triton.cdiv(B.shape[-2], block_n) == B_scale.shape[-2]
+            assert triton.cdiv(B.shape[-1], block_k) == B_scale.shape[-1]
+            
    elif use_int8_w8a16 or use_int4_w4a16:
        assert B_scale is not None
        assert block_shape is None or block_shape[0] == 0
@@ -749,44 +930,82 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
            block_shape is not None and block_shape[1] > 0:
        assert B_scale is not None and B_scale.ndim == 3
        assert B_zp is None or B_zp.ndim == 3
-
-        fused_moe_kernel_gptq_awq[grid](
-            A,
-            B,
-            C,
-            B_scale,
-            B_zp,
-            topk_weights,
-            sorted_token_ids,
-            expert_ids,
-            num_tokens_post_padded,
-            B.shape[1],
-            A.shape[1],
-            EM,
-            topk_ids.numel(),
-            A.stride(0),
-            A.stride(1),
-            B.stride(0),
-            B.stride(2),
-            B.stride(1),
-            C.stride(1),
-            C.stride(2),
-            B_scale.stride(0),
-            B_scale.stride(2),
-            B_scale.stride(1),
-            B_zp.stride(0) if B_zp is not None else 0,
-            B_zp.stride(2) if B_zp is not None else 0,
-            B_zp.stride(1) if B_zp is not None else 0,
-            block_k_diviable=A.shape[1] % config["BLOCK_SIZE_K"] == 0,
-            group_size=block_shape[1],
-            MUL_ROUTED_WEIGHT=mul_routed_weight,
-            top_k=top_k,
-            compute_type=compute_type,
-            has_zp=B_zp is not None,
-            use_int4_w4a16=use_int4_w4a16,
-            use_int8_w8a16=use_int8_w8a16,
-            **config,
-        )
+        if os.environ.get('AWQ_MOE_SZ') == '1':
+            fused_moe_kernel_awq[grid](
+                A,
+                B,
+                C,
+                B_scale,
+                B_zp,
+                topk_weights,
+                sorted_token_ids,
+                expert_ids,
+                num_tokens_post_padded,
+                B.shape[1],
+                A.shape[1],
+                EM,
+                topk_ids.numel(),
+                A.stride(0),
+                A.stride(1),
+                B.stride(0),
+                B.stride(2),
+                B.stride(1),
+                C.stride(1),
+                C.stride(2),
+                B_scale.stride(0),
+                B_scale.stride(2),
+                B_scale.stride(1),
+                B_zp.stride(0) if B_zp is not None else 0,
+                B_zp.stride(2) if B_zp is not None else 0,
+                B_zp.stride(1) if B_zp is not None else 0,
+                block_k_diviable=A.shape[1] % config["BLOCK_SIZE_K"] == 0,
+                group_size=block_shape[1],
+                MUL_ROUTED_WEIGHT=mul_routed_weight,
+                top_k=top_k,
+                compute_type=compute_type,
+                has_zp=B_zp is not None,
+                use_int4_w4a16=use_int4_w4a16,
+                use_int8_w8a16=use_int8_w8a16,
+                **config,
+            )
+        else:
+            fused_moe_kernel_gptq_awq[grid](
+                A,
+                B,
+                C,
+                B_scale,
+                B_zp,
+                topk_weights,
+                sorted_token_ids,
+                expert_ids,
+                num_tokens_post_padded,
+                B.shape[1],
+                A.shape[1],
+                EM,
+                topk_ids.numel(),
+                A.stride(0),
+                A.stride(1),
+                B.stride(0),
+                B.stride(2),
+                B.stride(1),
+                C.stride(1),
+                C.stride(2),
+                B_scale.stride(0),
+                B_scale.stride(2),
+                B_scale.stride(1),
+                B_zp.stride(0) if B_zp is not None else 0,
+                B_zp.stride(2) if B_zp is not None else 0,
+                B_zp.stride(1) if B_zp is not None else 0,
+                block_k_diviable=A.shape[1] % config["BLOCK_SIZE_K"] == 0,
+                group_size=block_shape[1],
+                MUL_ROUTED_WEIGHT=mul_routed_weight,
+                top_k=top_k,
+                compute_type=compute_type,
+                has_zp=B_zp is not None,
+                use_int4_w4a16=use_int4_w4a16,
+                use_int8_w8a16=use_int8_w8a16,
+                **config,
+            )

    else:
        fused_moe_kernel[grid](
@@ -826,6 +1045,7 @@ def invoke_fused_moe_kernel(A: torch.Tensor,
            top_k=top_k,
            compute_type=compute_type,
            use_fp8_w8a8=use_fp8_w8a8,
+            use_int8_w8a8=use_int8_w8a8,
            use_int8_w8a16=use_int8_w8a16,
            **config,
        )
@@ -872,6 +1092,15 @@ def get_moe_configs(

    config_file_path = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), "configs", json_file_name)
+    if torch.cuda.get_device_properties(torch.cuda.current_device()).multi_processor_count == 120:
+        config_file_path_120 = config_file_path.replace(".json","_120.json")
+        if os.path.exists(config_file_path_120):
+            with open(config_file_path_120) as f:
+                logger.info("Using configuration from %s for MoE layer.",
+                            config_file_path_120)
+                # If a configuration has been found, return it
+                return {int(key): val for key, val in json.load(f).items()}
+
    if os.path.exists(config_file_path):
        with open(config_file_path) as f:
            logger.info("Using configuration from %s for MoE layer.",
@@ -1060,9 +1289,12 @@ def grouped_topk(hidden_states: torch.Tensor,
 def get_config_dtype_str(dtype: torch.dtype,
                         use_int4_w4a16: Optional[bool] = False,
                         use_int8_w8a16: Optional[bool] = False,
-                         use_fp8_w8a8: Optional[bool] = False):
+                         use_fp8_w8a8: Optional[bool] = False,
+                         use_int8_w8a8: Optional[bool] = False):
    if use_fp8_w8a8:
        return "fp8_w8a8"
+    elif use_int8_w8a8:
+        return "int8_w8a8"
    elif use_int8_w8a16:
        return "int8_w8a16"
    elif use_int4_w4a16:
@@ -1080,6 +1312,7 @@ def inplace_fused_experts(hidden_states: torch.Tensor,
                          topk_weights: torch.Tensor,
                          topk_ids: torch.Tensor,
                          use_fp8_w8a8: bool = False,
+                          use_int8_w8a8: bool = False,
                          use_int8_w8a16: bool = False,
                          use_int4_w4a16: bool = False,
                          w1_scale: Optional[torch.Tensor] = None,
@@ -1094,7 +1327,7 @@ def inplace_fused_experts(hidden_states: torch.Tensor,
                          start_expert: Optional[int] = -1,
                          end_expert: Optional[int] = -1) -> None:
    fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids, True,
-                       use_fp8_w8a8, use_int8_w8a16, use_int4_w4a16, w1_scale,
+                       use_fp8_w8a8,use_int8_w8a8, use_int8_w8a16, use_int4_w4a16, w1_scale,
                       w2_scale, w1_zp, w2_zp, a1_scale, a2_scale, block_shape,
                       use_nn_moe, moe_ep_size=moe_ep_size,
                       start_expert=start_expert, end_expert=end_expert)
@@ -1107,6 +1340,7 @@ def inplace_fused_experts_fake(
        topk_weights: torch.Tensor,
        topk_ids: torch.Tensor,
        use_fp8_w8a8: bool = False,
+        use_int8_w8a8: bool = False,
        use_int8_w8a16: bool = False,
        use_int4_w4a16: bool = False,
        w1_scale: Optional[torch.Tensor] = None,
@@ -1138,6 +1372,7 @@ def outplace_fused_experts(
        topk_weights: torch.Tensor,
        topk_ids: torch.Tensor,
        use_fp8_w8a8: bool = False,
+        use_int8_w8a8: bool = False,
        use_int8_w8a16: bool = False,
        use_int4_w4a16: bool = False,
        w1_scale: Optional[torch.Tensor] = None,
@@ -1152,7 +1387,7 @@ def outplace_fused_experts(
        start_expert: Optional[int] = -1,
        end_expert: Optional[int] = -1) -> torch.Tensor:
    return fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids,
-                              False, use_fp8_w8a8, use_int8_w8a16,
+                              False, use_fp8_w8a8,use_int8_w8a8,use_int8_w8a16,
                              use_int4_w4a16, w1_scale, w2_scale, w1_zp, w2_zp,
                              a1_scale, a2_scale, block_shape, 
                              use_nn_moe, moe_ep_size=moe_ep_size,
@@ -1166,6 +1401,7 @@ def outplace_fused_experts_fake(
        topk_weights: torch.Tensor,
        topk_ids: torch.Tensor,
        use_fp8_w8a8: bool = False,
+        use_int8_w8a8: bool = False,
        use_int8_w8a16: bool = False,
        use_int4_w4a16: bool = False,
        w1_scale: Optional[torch.Tensor] = None,
@@ -1197,6 +1433,7 @@ def fused_experts(hidden_states: torch.Tensor,
                  topk_ids: torch.Tensor,
                  inplace: bool = False,
                  use_fp8_w8a8: bool = False,
+                  use_int8_w8a8: bool = False,
                  use_int8_w8a16: bool = False,
                  use_int4_w4a16: bool = False,
                  w1_scale: Optional[torch.Tensor] = None,
@@ -1213,7 +1450,7 @@ def fused_experts(hidden_states: torch.Tensor,
    if inplace:
        torch.ops.vllm.inplace_fused_experts(hidden_states, w1, w2,
                                             topk_weights, topk_ids,
-                                             use_fp8_w8a8, use_int8_w8a16,
+                                             use_fp8_w8a8,use_int8_w8a8,use_int8_w8a16,
                                             use_int4_w4a16, w1_scale,
                                             w2_scale, w1_zp, w2_zp, a1_scale,
                                             a2_scale, block_shape, 
@@ -1224,7 +1461,7 @@ def fused_experts(hidden_states: torch.Tensor,
        return hidden_states
    else:
        return torch.ops.vllm.outplace_fused_experts(
-            hidden_states, w1, w2, topk_weights, topk_ids, use_fp8_w8a8,
+            hidden_states, w1, w2, topk_weights, topk_ids, use_fp8_w8a8,use_int8_w8a8,
            use_int8_w8a16, use_int4_w4a16, w1_scale, w2_scale, w1_zp, w2_zp,
            a1_scale, a2_scale, block_shape, 
            use_nn_moe, moe_ep_size=moe_ep_size,
@@ -1239,6 +1476,7 @@ def fused_experts_impl(hidden_states: torch.Tensor,
                       topk_ids: torch.Tensor,
                       inplace: bool = False,
                       use_fp8_w8a8: bool = False,
+                       use_int8_w8a8: bool = False,
                       use_int8_w8a16: bool = False,
                       use_int4_w4a16: bool = False,
                       w1_scale: Optional[torch.Tensor] = None,
@@ -1279,6 +1517,7 @@ def fused_experts_impl(hidden_states: torch.Tensor,
    CHUNK_SIZE = envs.VLLM_FUSED_MOE_CHUNK_SIZE
    M = min(num_tokens, CHUNK_SIZE)
    config_dtype = get_config_dtype_str(use_fp8_w8a8=use_fp8_w8a8,
+                                        use_int8_w8a8=use_int8_w8a8,
                                        use_int8_w8a16=use_int8_w8a16,
                                        use_int4_w4a16=use_int4_w4a16,
                                        dtype=hidden_states.dtype)
@@ -1346,8 +1585,12 @@ def fused_experts_impl(hidden_states: torch.Tensor,
        curr_topk_weights = topk_weights[begin_chunk_idx:end_chunk_idx]
        
        if moe_ep_size == 1:
-            sorted_token_ids, expert_ids, num_tokens_post_padded = (
-                moe_align_block_size(curr_topk_ids, config['BLOCK_SIZE_M'], E))
+            if use_int4_w4a16:
+                sorted_token_ids, expert_ids, num_tokens_post_padded =  (               
+                    moe_align_block_size(curr_topk_ids, config['BLOCK_SIZE_M'], E, curr_hidden_states.shape[0]))
+            else:
+                sorted_token_ids, expert_ids, num_tokens_post_padded = (
+                    moe_align_block_size(curr_topk_ids, config['BLOCK_SIZE_M'], E))
        else:
            sorted_token_ids, expert_ids, num_tokens_post_padded = (
                moe_ep_align_block_size(curr_topk_ids, config['BLOCK_SIZE_M'], E,
@@ -1369,6 +1612,7 @@ def fused_experts_impl(hidden_states: torch.Tensor,
                                config,
                                compute_type=compute_type,
                                use_fp8_w8a8=use_fp8_w8a8,
+                                use_int8_w8a8=use_int8_w8a8,
                                use_int8_w8a16=use_int8_w8a16,
                                use_int4_w4a16=use_int4_w4a16,
                                block_shape=block_shape,
@@ -1393,6 +1637,7 @@ def fused_experts_impl(hidden_states: torch.Tensor,
                                config,
                                compute_type=compute_type,
                                use_fp8_w8a8=use_fp8_w8a8,
+                                use_int8_w8a8=use_int8_w8a8,
                                use_int8_w8a16=use_int8_w8a16,
                                use_int4_w4a16=use_int4_w4a16,
                                block_shape=block_shape,
@@ -1416,6 +1661,7 @@ def fused_moe(
    topk_group: Optional[int] = None,
    custom_routing_function: Optional[Callable] = None,
    use_fp8_w8a8: bool = False,
+    use_int8_w8a8: bool = False,
    use_int8_w8a16: bool = False,
    use_int4_w4a16: bool = False,
    w1_scale: Optional[torch.Tensor] = None,
@@ -1426,7 +1672,7 @@ def fused_moe(
    a2_scale: Optional[torch.Tensor] = None,
    block_shape: Optional[List[int]] = None,
    use_nn_moe: Optional[bool] = False,
-    moe_ep_size: Optional[int] = None,
+    moe_ep_size: Optional[int] = 1,
    start_expert: Optional[int] = None,
    end_expert: Optional[int] = None,
 ) -> torch.Tensor:
@@ -1492,6 +1738,7 @@ def fused_moe(
                         topk_ids,
                         inplace=inplace,
                         use_fp8_w8a8=use_fp8_w8a8,
+                         use_int8_w8a8=use_int8_w8a8,
                         use_int8_w8a16=use_int8_w8a16,
                         use_int4_w4a16=use_int4_w4a16,
                         w1_scale=w1_scale,

--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -363,6 +363,9 @@ class FusedMoE(torch.nn.Module):
        if (self.quant_method.__class__.__name__ ==
                "CompressedTensorsWNA16MoEMethod"):
            moe_quant_params["intermediate_size_full"] = intermediate_size
+        
+        if (self.quant_method.__class__.__name__ in ("BlockInt8MoEMethod")):
+            moe_quant_params["intermediate_size"] = self.intermediate_size_per_partition

        self.quant_method.create_weights(layer=self, **moe_quant_params)


--- a/vllm/model_executor/layers/linear.py
+++ b/vllm/model_executor/layers/linear.py
@@ -37,7 +37,7 @@ WEIGHT_LOADER_V2_SUPPORTED = [
    "MarlinLinearMethod", "QQQLinearMethod", "GPTQMarlin24LinearMethod",
    "TPUInt8LinearMethod", "GPTQLinearMethod", "FBGEMMFp8LinearMethod",
    "ModelOptFp8LinearMethod", "IPEXAWQLinearMethod", "IPEXGPTQLinearMethod",
-    "HQQMarlinMethod", "QuarkLinearMethod"
+    "HQQMarlinMethod", "QuarkLinearMethod", "BlockInt8LinearMethod",
 ]


@@ -664,9 +664,12 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
        if isinstance(param, BlockQuantScaleParameter):
            from vllm.model_executor.layers.quantization.fp8 import (
                Fp8LinearMethod, Fp8MoEMethod)
+            
+            from vllm.model_executor.layers.quantization.blockwise_int8 import (
+                BlockInt8LinearMethod, BlockInt8MoEMethod)
            assert self.quant_method is not None
            assert isinstance(self.quant_method,
-                              (Fp8LinearMethod, Fp8MoEMethod))
+                              (Fp8LinearMethod, Fp8MoEMethod, BlockInt8LinearMethod, BlockInt8MoEMethod))
            weight_block_size = self.quant_method.quant_config.weight_block_size
            assert weight_block_size is not None
            block_n, _ = weight_block_size[0], weight_block_size[1]

--- a/vllm/model_executor/layers/quantization/__init__.py
+++ b/vllm/model_executor/layers/quantization/__init__.py
@@ -29,7 +29,8 @@ QUANTIZATION_METHODS: List[str] = [
    "neuron_quant",
    "ipex",
    "quark",
-    "moe_wna16"
+    "moe_wna16",
+    "blockwise_int8"
 ]

 # The customized quantization methods which will be added to this dict.
@@ -101,6 +102,7 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
    from .neuron_quant import NeuronQuantConfig
    from .qqq import QQQConfig
    from .tpu_int8 import Int8TpuConfig
+    from .blockwise_int8 import BlockInt8Config

    method_to_config: Dict[str, Type[QuantizationConfig]] = {
        "aqlm": AQLMConfig,
@@ -127,6 +129,7 @@ def get_quantization_config(quantization: str) -> Type[QuantizationConfig]:
        "ipex": IPEXConfig,
        "quark": QuarkConfig,
        "moe_wna16": MoeWNA16Config,
+        "blockwise_int8": BlockInt8Config,
    }
    # Update the `method_to_config` with customized quantization methods.
    method_to_config.update(_CUSTOMIZED_METHOD_TO_QUANT_CONFIG)

--- a/vllm/model_executor/layers/quantization/blockwise_int8.py
+++ b/vllm/model_executor/layers/quantization/blockwise_int8.py
+# SPDX-License-Identifier: Apache-2.0
+
+# Adapted from https://github.com/sgl-project/sglang/pull/3730
+
+import logging
+from typing import Any, Callable, Dict, List, Optional
+
+import torch
+from torch.nn import Module
+from vllm.model_executor.layers.quantization.utils.quant_utils import (
+    is_layer_skipped)
+
+from vllm.distributed import get_tensor_model_parallel_world_size
+from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
+                                               UnquantizedLinearMethod)
+from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEMethodBase,
+                                                  FusedMoeWeightScaleSupported)
+from vllm.model_executor.parameter import (BlockQuantScaleParameter,
+                                           ModelWeightParameter,
+                                           PerTensorScaleParameter)
+from vllm.model_executor.layers.quantization.base_config import (
+    QuantizationConfig, QuantizeMethodBase)
+from vllm.model_executor.layers.quantization.utils.int8_utils import (
+    apply_w8a8_block_int8_linear)
+from vllm.model_executor.utils import set_weight_attrs
+
+ACTIVATION_SCHEMES = ["static", "dynamic"]
+
+logger = logging.getLogger(__name__)
+
+
+class BlockInt8Config(QuantizationConfig):
+    """Config class for INT8."""
+
+    def __init__(
+        self,
+        is_checkpoint_int8_serialized: bool = False,
+        activation_scheme: str = "dynamic",
+        ignored_layers: Optional[List[str]] = None,
+        weight_block_size: Optional[List[int]] = None,
+    ) -> None:
+        self.is_checkpoint_int8_serialized = is_checkpoint_int8_serialized
+        if is_checkpoint_int8_serialized:
+            logger.warning(
+                "Detected int8 checkpoint. Please note that the "
+                "format is experimental and subject to change."
+            )
+        if activation_scheme not in ACTIVATION_SCHEMES:
+            raise ValueError("Unsupported activation scheme"
+                             f" {activation_scheme}")
+        self.activation_scheme = activation_scheme
+        self.ignored_layers = ignored_layers or []
+        if weight_block_size is not None:
+            if not is_checkpoint_int8_serialized:
+                raise ValueError(
+                    f"The block-wise quantization only supports "
+                    "int8-serialized checkpoint for now."
+                )
+            if len(weight_block_size) != 2:
+                raise ValueError(
+                    f"The quantization block size of weight must have 2 "
+                    "dimensions, but got {len(weight_block_size)} dimensions."
+                )
+            if activation_scheme != "dynamic":
+                raise ValueError(
+                    f"The block-wise quantization only supports dynamic "
+                    "activation scheme for now, but got "
+                    "{activation_scheme} activation scheme."
+                )
+        self.weight_block_size = weight_block_size
+
+    @classmethod
+    def get_name(cls) -> str:
+        return "blockwise_int8"
+
+    @classmethod
+    def get_supported_act_dtypes(cls) -> List[torch.dtype]:
+        return [torch.bfloat16, torch.half]
+
+    @classmethod
+    def get_min_capability(cls) -> int:
+        return 80
+
+    @classmethod
+    def get_config_filenames(cls) -> List[str]:
+        return []
+
+    @classmethod
+    def from_config(cls, config: Dict[str, Any]) -> "BlockInt8Config":
+        quant_method = cls.get_from_keys(config, ["quant_method"])
+        is_checkpoint_int8_serialized = "int8" in quant_method
+        activation_scheme = cls.get_from_keys(config, ["activation_scheme"])
+        ignored_layers = cls.get_from_keys_or(config, ["ignored_layers"], None)
+        weight_block_size = cls.get_from_keys_or(config,
+                                                 ["weight_block_size"], None)
+        return cls(
+            is_checkpoint_int8_serialized=is_checkpoint_int8_serialized,
+            activation_scheme=activation_scheme,
+            ignored_layers=ignored_layers,
+            weight_block_size=weight_block_size,
+        )
+
+    def get_quant_method(
+        self, layer: torch.nn.Module, prefix: str
+    ) -> Optional["QuantizeMethodBase"]:
+
+        if isinstance(layer, LinearBase):
+            if is_layer_skipped(prefix, self.ignored_layers):
+                return UnquantizedLinearMethod()
+            return BlockInt8LinearMethod(self)
+        elif isinstance(layer, FusedMoE):
+            return BlockInt8MoEMethod(self)
+        return None
+
+    def get_scaled_act_names(self) -> List[str]:
+        return []
+
+
+class BlockInt8LinearMethod(LinearMethodBase):
+    """Linear method for INT8.
+    Supports loading INT8 checkpoints with static weight scale and
+    dynamic activation scale.
+    Limitations:
+    Only support block-wise int8 quantization and int8 checkpoint
+    Args:
+        quant_config: The quantization config.
+    """
+
+    def __init__(self, quant_config: BlockInt8Config):
+        self.quant_config = quant_config
+        assert self.quant_config.weight_block_size is not None
+        assert self.quant_config.is_checkpoint_int8_serialized
+
+    def create_weights(
+        self,
+        layer: torch.nn.Module,
+        input_size_per_partition: int,
+        output_partition_sizes: Optional[List[int]],
+        input_size: int,
+        output_size: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        # assert output_partition_sizes is not None, (
+        #     "output_partition_sizes must be provided for quantization")
+
+        output_size_per_partition = sum(output_partition_sizes)
+        weight_loader = extra_weight_attrs.get("weight_loader")
+
+        tp_size = get_tensor_model_parallel_world_size()
+
+        block_n, block_k = (
+            self.quant_config.weight_block_size[0],
+            self.quant_config.weight_block_size[1],
+        )
+        # Required by row parallel
+        if tp_size > 1 and input_size // input_size_per_partition == tp_size:
+            if input_size_per_partition % block_k != 0:
+                raise ValueError(
+                    f"Weight input_size_per_partition = "
+                    f"{input_size_per_partition} is not divisible by "
+                    f"weight quantization block_k = {block_k}."
+                )
+        # Required by collum parallel or enabling merged weights
+        if (tp_size > 1 and output_size // output_size_per_partition == tp_size) or len(
+            output_partition_sizes
+        ) > 1:
+            for output_partition_size in output_partition_sizes:
+                if output_partition_size % block_n != 0:
+                    raise ValueError(
+                        f"Weight output_partition_size = "
+                        f"{output_partition_size} is not divisible by "
+                        f"weight quantization block_n = {block_n}."
+                    )
+
+        layer.logical_widths = output_partition_sizes
+
+        layer.input_size_per_partition = input_size_per_partition
+        layer.output_size_per_partition = output_size_per_partition
+        layer.orig_dtype = params_dtype
+
+        # WEIGHT
+        weight_dtype = (
+            torch.int8
+            if self.quant_config.is_checkpoint_int8_serialized
+            else params_dtype
+        )
+
+        weight = ModelWeightParameter(
+            data=torch.empty(
+                output_size_per_partition, input_size_per_partition, dtype=weight_dtype
+            ),
+            input_dim=1,
+            output_dim=0,
+            weight_loader=weight_loader,
+        )
+        layer.register_parameter("weight", weight)
+
+        # WEIGHT SCALE
+
+        scale = BlockQuantScaleParameter(
+            data=torch.empty(
+                (output_size_per_partition + block_n - 1) // block_n,
+                (input_size_per_partition + block_k - 1) // block_k,
+                dtype=torch.float32,
+            ),
+            input_dim=1,
+            output_dim=0,
+            weight_loader=weight_loader,
+        )
+        scale[:] = torch.finfo(torch.float32).min
+        layer.register_parameter("weight_scale_inv", scale)
+
+        # INPUT ACTIVATION SCALE
+        assert self.quant_config.activation_scheme == "dynamic"
+        layer.register_parameter("input_scale", None)
+    
+
+    def process_weights_after_loading(self, layer: Module) -> None:
+        # Block quant doesn't need to process weights after loading
+        # Use torch Parameter to avoid cuda graph capturing issue
+        layer.weight = torch.nn.Parameter(layer.weight.data, requires_grad=False)
+        layer.weight_scale_inv = torch.nn.Parameter(
+            layer.weight_scale_inv.data, requires_grad=False
+        )
+
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        bias: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        return apply_w8a8_block_int8_linear(
+            input=x,
+            weight=layer.weight,
+            block_size=self.quant_config.weight_block_size,
+            weight_scale=layer.weight_scale_inv,
+            input_scale=None,
+            bias=bias,
+        )
+
+class BlockInt8MoEMethod:
+    """MoE method for INT8.
+    Supports loading INT8 checkpoints with static weight scale and
+    dynamic activation scale.
+
+    Limitations:
+    Only support block-wise int8 quantization and int8 checkpoint
+
+    Args:
+        quant_config: The quantization config.
+    """
+
+    def __new__(cls, *args, **kwargs):
+        from vllm.model_executor.layers.fused_moe import FusedMoE, FusedMoEMethodBase
+
+        if not hasattr(cls, "_initialized"):
+            original_init = cls.__init__
+            new_cls = type(
+                cls.__name__,
+                (FusedMoEMethodBase,),
+                {
+                    "__init__": original_init,
+                    **{k: v for k, v in cls.__dict__.items() if k != "__dict__"},
+                },
+            )
+            obj = super(new_cls, new_cls).__new__(new_cls)
+            obj.__init__(*args, **kwargs)
+            return obj
+        return super().__new__(cls)
+
+    def __init__(self, quant_config):
+        self.quant_config = quant_config
+        assert self.quant_config.weight_block_size is not None
+        assert self.quant_config.is_checkpoint_int8_serialized
+
+    def create_weights(
+        self,
+        layer: Module,
+        num_experts: int,
+        hidden_size: int,
+        intermediate_size: int,
+        params_dtype: torch.dtype,
+        **extra_weight_attrs,
+    ):
+        from vllm.model_executor.layers.fused_moe import FusedMoeWeightScaleSupported
+
+        if self.quant_config.is_checkpoint_int8_serialized:
+            params_dtype = torch.int8
+        tp_size = get_tensor_model_parallel_world_size()
+
+        block_n, block_k = (
+            self.quant_config.weight_block_size[0],
+            self.quant_config.weight_block_size[1],
+        )
+        # NOTE(HandH1998): To ensure proper alignment of the block-wise quantization scales, the output_size of the weights for both the gate and up layers must be divisible by block_n.
+        # Required by collum parallel or enabling merged weights
+        if intermediate_size % block_n != 0:
+            raise ValueError(
+                f"The output_size of gate's and up's weight = "
+                f"{intermediate_size} is not divisible by "
+                f"weight quantization block_n = {block_n}."
+            )
+        if tp_size > 1:
+            # Required by row parallel
+            if intermediate_size % block_k != 0:
+                raise ValueError(
+                    f"The input_size of down's weight = "
+                    f"{intermediate_size} is not divisible by "
+                    f"weight quantization block_k = {block_k}."
+                )
+
+        # WEIGHTS
+        w13_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts, 2 * intermediate_size, hidden_size, dtype=params_dtype
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight", w13_weight)
+        set_weight_attrs(w13_weight, extra_weight_attrs)
+
+        w2_weight = torch.nn.Parameter(
+            torch.empty(
+                num_experts, hidden_size, intermediate_size, dtype=params_dtype
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w2_weight", w2_weight)
+        set_weight_attrs(w2_weight, extra_weight_attrs)
+
+        # WEIGHT_SCALES
+        w13_weight_scale = torch.nn.Parameter(
+            torch.ones(
+                num_experts,
+                2 * ((intermediate_size + block_n - 1) // block_n),
+                (hidden_size + block_k - 1) // block_k,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        w2_weight_scale = torch.nn.Parameter(
+            torch.ones(
+                num_experts,
+                (hidden_size + block_n - 1) // block_n,
+                (intermediate_size + block_k - 1) // block_k,
+                dtype=torch.float32,
+            ),
+            requires_grad=False,
+        )
+        layer.register_parameter("w13_weight_scale_inv", w13_weight_scale)
+        layer.register_parameter("w2_weight_scale_inv", w2_weight_scale)
+
+        extra_weight_attrs.update(
+            {"quant_method": FusedMoeWeightScaleSupported.BLOCK.value}
+        )
+        set_weight_attrs(w13_weight_scale, extra_weight_attrs)
+        set_weight_attrs(w2_weight_scale, extra_weight_attrs)
+
+        # INPUT_SCALES
+        assert self.quant_config.activation_scheme == "dynamic"
+        layer.w13_input_scale = None
+        layer.w2_input_scale = None
+
+    def process_weights_after_loading(self, layer: Module) -> None:
+        # Block quant doesn't need to process weights after loading
+        return
+
+    def apply(
+        self,
+        layer: torch.nn.Module,
+        x: torch.Tensor,
+        router_logits: torch.Tensor,
+        top_k: int,
+        renormalize: bool,
+        use_grouped_topk: bool,
+        topk_group: Optional[int] = None,
+        use_nn_moe: Optional[bool] = False,
+        num_expert_group: Optional[int] = None,
+        custom_routing_function: Optional[Callable] = None,
+        scoring_func: str = "softmax",
+        e_score_correction_bias: Optional[torch.Tensor] = None,
+        
+        moe_ep_size: Optional[int] = 1,
+        start_expert: Optional[int] = -1,
+        end_expert: Optional[int] = -1        
+    ) -> torch.Tensor:
+        from vllm.model_executor.layers.fused_moe import fused_experts
+        
+        #print("===========fused_experts========================")
+        # Expert selection
+        topk_weights, topk_ids = FusedMoE.select_experts(
+            hidden_states=x,
+            router_logits=router_logits,
+            use_grouped_topk=use_grouped_topk,
+            top_k=top_k,
+            renormalize=renormalize,
+            topk_group=topk_group,
+            num_expert_group=num_expert_group,
+            custom_routing_function=custom_routing_function,
+            scoring_func=scoring_func,
+            e_score_correction_bias=e_score_correction_bias
+        )
+
+        # Expert fusion with INT8 quantization
+
+        return fused_experts(
+            x,
+            layer.w13_weight,
+            layer.w2_weight,
+            topk_weights=topk_weights,
+            topk_ids=topk_ids,
+            inplace=True,
+            use_int8_w8a8=True,
+            w1_scale=(layer.w13_weight_scale_inv),
+            w2_scale=(layer.w2_weight_scale_inv),
+            a1_scale=layer.w13_input_scale,
+            a2_scale=layer.w2_input_scale,
+            block_shape=self.quant_config.weight_block_size,
+            use_nn_moe=use_nn_moe,
+            moe_ep_size=moe_ep_size,
+            start_expert=start_expert,
+            end_expert=end_expert
+        )
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_12288_4096_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_12288_4096_K100_AI.json
-{
-    "12288_4096": {
-        "20": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "24": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "28": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "32": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "36": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "40": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "44": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "48": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "52": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "56": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "60": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "64": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "72": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "80": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "88": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "96": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "1": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "2": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "3": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "4": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "5": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "6": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "7": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "8": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "9": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "10": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "11": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "12": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "13": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "14": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "15": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "16": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "256": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "512": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_1280_8192_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_1280_8192_K100_AI.json
-{
-    "1280_8192": {
-        "1": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "2": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "3": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "4": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "5": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "6": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "7": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "8": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "9": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "10": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "11": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "12": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "13": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "14": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "15": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "16": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "20": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "24": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "28": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "32": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "36": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "40": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "44": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "48": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "52": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "56": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "60": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "64": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "72": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "80": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "88": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "96": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "104": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "112": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "120": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "128": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 2
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "256": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "512": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 1,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_13824_5120_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_13824_5120_K100_AI.json
-{
-    "13824_5120": {
-        "1": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "2": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "3": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "4": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "5": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "6": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "7": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "8": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "9": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "10": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "11": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "12": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "13": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "14": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "15": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "16": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "20": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "24": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "28": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "32": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "36": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "40": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "44": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "48": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "52": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "56": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "60": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "64": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "72": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "80": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "88": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "96": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "256": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "512": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_14336_8192_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_14336_8192_K100_AI.json
-{
-    "14336_8192": {
-        "1": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 2
-        },
-        "2": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 2
-        },
-        "3": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 2
-        },
-        "4": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 2
-        },
-        "5": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 2
-        },
-        "6": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 2
-        },
-        "7": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 2
-        },
-        "8": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 2
-        },
-        "9": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 2
-        },
-        "10": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 2
-        },
-        "11": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 2
-        },
-        "12": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 2
-        },
-        "13": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 2
-        },
-        "14": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 2
-        },
-        "15": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 2
-        },
-        "16": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 2
-        },
-        "17": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "20": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "24": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "28": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "32": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "36": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "40": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "44": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "48": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "52": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "56": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "60": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "64": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "72": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "80": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "88": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "96": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "256": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "512": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_15360_5120_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_15360_5120_K100_AI.json
-{
-    "15360_5120": {
-        "1": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "2": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "3": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "5": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "6": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "7": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "9": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "10": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "11": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "12": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "13": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "14": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "15": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "16": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "20": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "24": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "28": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "32": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "36": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "40": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "44": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "48": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "52": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "56": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "60": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "64": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "72": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "80": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "88": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "96": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "256": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "512": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_22016_4096_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_22016_4096_K100_AI.json
-{
-    "22016_4096": {
-        "20": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "24": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "28": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "32": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "36": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "40": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "44": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "48": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "52": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "56": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "60": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "64": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "72": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "80": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "88": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "96": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "1": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "2": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "3": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "4": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "5": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "6": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "7": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "8": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "9": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "10": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "11": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "12": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "13": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "14": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "15": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "16": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "256": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "512": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_2560_8192_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_2560_8192_K100_AI.json
-{
-    "2560_8192": {
-        "1": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "2": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "3": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "4": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "5": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "6": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "7": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "8": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "9": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "10": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "11": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "12": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "13": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "14": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "15": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "16": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "17": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "20": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "24": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "28": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "32": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "36": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "40": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "44": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "48": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "52": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "56": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "60": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "64": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "72": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "80": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "88": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "96": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "136": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "144": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "152": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "160": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "256": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "512": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "16384": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_27648_5120_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_27648_5120_K100_AI.json
-{
-    "27648_5120": {
-        "1": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "2": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 4,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "3": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "5": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "6": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "7": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "9": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "10": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "11": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "12": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "13": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "14": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "15": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "16": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "20": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "24": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "28": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "32": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "36": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "40": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "44": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "48": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "52": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "56": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "60": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "64": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "72": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "80": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "88": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "96": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "256": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "512": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_28672_4096_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_28672_4096_K100_AI.json
-{
-    "28672_4096": {
-        "1": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "2": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "3": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "5": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "6": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "7": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "9": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "10": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "11": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "12": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "13": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "14": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "15": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "16": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "20": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "24": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "28": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "32": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "36": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "40": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "44": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "48": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "52": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "56": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "60": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "64": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "72": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "80": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "88": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "96": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "256": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "512": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_28672_8192_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_28672_8192_K100_AI.json
-{
-    "28672_8192": {
-        "1": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "2": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "3": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "5": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "6": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "7": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "9": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "10": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "11": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "12": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "13": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "14": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "15": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "16": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "20": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "24": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "28": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "32": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "36": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "40": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "44": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "48": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "52": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "56": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "60": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "64": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "72": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "80": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "88": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "96": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "256": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "512": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_32000_4096_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_32000_4096_K100_AI.json
-{
-    "32000_4096": {
-        "1": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "2": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "3": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "5": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "6": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "7": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "8": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "9": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "10": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "11": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "12": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "13": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "14": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        },
-        "15": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 16,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 4
-        },
-        "16": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "20": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "24": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "28": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "32": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "36": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "40": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "44": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "48": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "52": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "56": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "60": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "64": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "72": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "80": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "88": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "96": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "256": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "512": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 64,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 2,
-            "num_warps": 8
-        }
-    }
-}
\ No newline at end of file
--- a/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_3584_18944_K100_AI.json
+++ b/vllm/model_executor/layers/quantization/configs/w8a8/W8A8_3584_18944_K100_AI.json
-{
-    "3584_18944": {
-        "1": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "2": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "3": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "4": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "5": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 2,
-            "num_warps": 2
-        },
-        "6": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "7": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "8": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "9": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "10": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "11": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 2
-        },
-        "12": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 2
-        },
-        "13": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 2
-        },
-        "14": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 2
-        },
-        "15": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 2
-        },
-        "16": {
-            "BLOCK_SIZE_M": 16,
-            "BLOCK_SIZE_N": 32,
-            "BLOCK_SIZE_K": 512,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 2
-        },
-        "20": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "24": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "28": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "32": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "36": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "40": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "44": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "48": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "52": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "56": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "60": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "64": {
-            "BLOCK_SIZE_M": 32,
-            "BLOCK_SIZE_N": 64,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 4
-        },
-        "72": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "80": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "88": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "96": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "104": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "112": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "120": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "128": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "136": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "144": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "152": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "160": {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 8,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 16
-        },
-        "256": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 256,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 2,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "512": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "1024": {
-            "BLOCK_SIZE_M": 128,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 4,
-            "SPLIT_K": 1,
-            "num_stages": 0,
-            "num_warps": 8
-        },
-        "2048": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "4096": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 256,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 8
-        },
-        "8192": {
-            "BLOCK_SIZE_M": 256,
-            "BLOCK_SIZE_N": 128,
-            "BLOCK_SIZE_K": 128,
-            "GROUP_SIZE_M": 2,
-            "SPLIT_K": 1,
-            "num_stages": 1,
-            "num_warps": 4
-        }
-    }
-}
\ No newline at end of file