feat: rename Dequantize to DequantizeAWQ in nvidia gpu

4217976d · zhushuang · d3d982df · 4217976d · d3d982df · 4217976d
Commit 4217976d authored Sep 23, 2025 by zhushuang
12 changed files
--- a/include/infiniop.h
+++ b/include/infiniop.h
@@ -7,7 +7,7 @@
 #include "infiniop/ops/causal_softmax.h"
 #include "infiniop/ops/clip.h"
 #include "infiniop/ops/conv.h"
-#include "infiniop/ops/dequantize.h"
+#include "infiniop/ops/dequantize_awq.h"
 #include "infiniop/ops/gemm.h"
 #include "infiniop/ops/mul.h"
 #include "infiniop/ops/random_sample.h"

--- a/include/infiniop/ops/dequantize.h
+++ b/include/infiniop/ops/dequantize.h
-#ifndef __INFINIOP_DEQUANTIZE_API_H__
-#define __INFINIOP_DEQUANTIZE_API_H__
-
-#include "../operator_descriptor.h"
-
-typedef struct InfiniopDescriptor *infiniopDequantizeDescriptor_t;
-
-__C __export infiniStatus_t infiniopCreateDequantizeDescriptor(infiniopHandle_t handle,
-                                                               infiniopDequantizeDescriptor_t *desc_ptr,
-                                                               infiniopTensorDescriptor_t out_desc,
-                                                               infiniopTensorDescriptor_t qweight_desc,
-                                                               infiniopTensorDescriptor_t scales_desc,
-                                                               infiniopTensorDescriptor_t zeros_desc);
-
-__C __export infiniStatus_t infiniopGetDequantizeWorkspaceSize(infiniopDequantizeDescriptor_t desc, size_t *size);
-
-__C __export infiniStatus_t infiniopDequantize(infiniopDequantizeDescriptor_t desc,
-                                               void *workspace,
-                                               size_t workspace_size,
-                                               void *out,
-                                               const void *qweight,
-                                               const void *scales,
-                                               const void *zeros,
-                                               void *stream);
-
-__C __export infiniStatus_t infiniopDestroyDequantizeDescriptor(infiniopDequantizeDescriptor_t desc);
-
-#endif
--- a/include/infiniop/ops/dequantize_awq.h
+++ b/include/infiniop/ops/dequantize_awq.h
+#ifndef __INFINIOP_DEQUANTIZE_AWQ_API_H__
+#define __INFINIOP_DEQUANTIZE_AWQ_API_H__
+
+#include "../operator_descriptor.h"
+
+typedef struct InfiniopDescriptor *infiniopDequantizeAWQDescriptor_t;
+
+__C __export infiniStatus_t infiniopCreateDequantizeAWQDescriptor(infiniopHandle_t handle,
+                                                                  infiniopDequantizeAWQDescriptor_t *desc_ptr,
+                                                                  infiniopTensorDescriptor_t out_desc,
+                                                                  infiniopTensorDescriptor_t qweight_desc,
+                                                                  infiniopTensorDescriptor_t scales_desc,
+                                                                  infiniopTensorDescriptor_t zeros_desc);
+
+__C __export infiniStatus_t infiniopGetDequantizeAWQWorkspaceSize(infiniopDequantizeAWQDescriptor_t desc, size_t *size);
+
+__C __export infiniStatus_t infiniopDequantizeAWQ(infiniopDequantizeAWQDescriptor_t desc,
+                                                  void *workspace,
+                                                  size_t workspace_size,
+                                                  void *out,
+                                                  const void *qweight,
+                                                  const void *scales,
+                                                  const void *zeros,
+                                                  void *stream);
+
+__C __export infiniStatus_t infiniopDestroyDequantizeAWQDescriptor(infiniopDequantizeAWQDescriptor_t desc);
+
+#endif
--- a/src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_nvidia.cuh
+++ b/src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_nvidia.cuh
-#ifndef __DEQUANTIZE_CUDA_CUH__
-#define __DEQUANTIZE_CUDA_CUH__
-
-#include "../dequantize.h"
-
-DESCRIPTOR(nvidia)
-
-#endif // __GEMM_CUDA_CUH__
--- a/src/infiniop/ops/dequantize/dequantize.h
+++ b/src/infiniop/ops/dequantize/dequantize.h
-#ifndef __DEQUANTIZE_H__
-#define __DEQUANTIZE_H__
+#ifndef __DEQUANTIZE_AWQ_H__
+#define __DEQUANTIZE_AWQ_H__

 #include "../../../utils.h"
 #include "../../operator.h"
@@ -8,17 +8,17 @@

 #define DESCRIPTOR(NAMESPACE)                                    \
                                                                 \
-    namespace op::dequantize::NAMESPACE {                        \
+    namespace op::dequantize_awq::NAMESPACE {                    \
    class Descriptor final : public InfiniopDescriptor {         \
        struct Opaque;                                           \
        Opaque *_opaque;                                         \
-        DequantizeInfo _info;                                    \
+        DequantizeAWQInfo _info;                                 \
        size_t _workspace_size;                                  \
                                                                 \
        Descriptor(                                              \
            size_t workspace_size_,                              \
            Opaque *opaque,                                      \
-            DequantizeInfo info,                                 \
+            DequantizeAWQInfo info,                              \
            infiniDevice_t device_type,                          \
            int device_id)                                       \
            : InfiniopDescriptor{device_type, device_id},        \
@@ -49,4 +49,5 @@
            void *stream) const;                                 \
    };                                                           \
    }
-#endif
+
+#endif //__DEQUANTIZE_AWQ_H__
--- a/src/infiniop/ops/dequantize/info.h
+++ b/src/infiniop/ops/dequantize/info.h
-#ifndef __DEQUANTIZE_INFO_H__
-#define __DEQUANTIZE_INFO_H__
+#ifndef __DEQUANTIZE_AWQ_INFO_H__
+#define __DEQUANTIZE_AWQ_INFO_H__

 #include "../../../utils.h"
 #include "../../tensor.h"
 #include <vector>

-namespace op::dequantize {
+namespace op::dequantize_awq {

-class DequantizeInfo {
-    DequantizeInfo() = default;
+class DequantizeAWQInfo {
+    DequantizeAWQInfo() = default;

 public:
    int _in_features, _out_features, _num_groups;
@@ -17,7 +17,7 @@ public:
    int out_features() const { return _out_features; }
    int num_groups() const { return _num_groups; }

-    static utils::Result<DequantizeInfo> create(
+    static utils::Result<DequantizeAWQInfo> create(
        infiniopTensorDescriptor_t out_desc,
        infiniopTensorDescriptor_t qweight_desc,
        infiniopTensorDescriptor_t scales_desc,
@@ -27,13 +27,13 @@ public:
        int _out_features = qweight_desc->dim(1);
        int _num_groups = scales_desc->dim(0);

-        return utils::Result<DequantizeInfo>(DequantizeInfo{
+        return utils::Result<DequantizeAWQInfo>(DequantizeAWQInfo{
            _in_features,
            _out_features,
            _num_groups});
    }
 };

-} // namespace op::dequantize
+} // namespace op::dequantize_awq

-#endif // __DEQUANTIZE_INFO_H__
+#endif // __DEQUANTIZE_AWQ_INFO_H__
--- a/src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_kernel.cuh
+++ b/src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_kernel.cuh
--- a/src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_nvidia.cu
+++ b/src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_nvidia.cu
@@ -5,7 +5,7 @@
 #include "dequantize_w42f16_kernel.cuh"
 #include "dequantize_w42f16_nvidia.cuh"

-#include "../dequantize.h"
+#include "../dequantize_awq.h"
 #include <cuda_fp16.h>

 __global__ void __launch_bounds__(64)
@@ -68,7 +68,7 @@ __global__ void __launch_bounds__(64)
    }
 }

-namespace op::dequantize::nvidia {
+namespace op::dequantize_awq::nvidia {

 struct Descriptor::Opaque {
    std::shared_ptr<device::nvidia::Handle::Internal> internal;
@@ -87,7 +87,7 @@ infiniStatus_t Descriptor::create(
    infiniopTensorDescriptor_t zeros_desc) {

    auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
-    auto result = DequantizeInfo::create(out_desc, qweight_desc, scales_desc, zeros_desc);
+    auto result = DequantizeAWQInfo::create(out_desc, qweight_desc, scales_desc, zeros_desc);

    *desc_ptr = new Descriptor(
        0,
@@ -133,6 +133,6 @@ Descriptor::calculate(
    return INFINI_STATUS_SUCCESS;
 }

-} // namespace op::dequantize::nvidia
+} // namespace op::dequantize_awq::nvidia

 #endif
--- a/src/infiniop/ops/dequantize_awq/nvidia/dequantize_w42f16_nvidia.cuh
+++ b/src/infiniop/ops/dequantize_awq/nvidia/dequantize_w42f16_nvidia.cuh
+#ifndef __DEQUANTIZE_AWQ_CUDA_CUH__
+#define __DEQUANTIZE_AWQ_CUDA_CUH__
+
+#include "../dequantize_awq.h"
+
+DESCRIPTOR(nvidia)
+
+#endif // __DEQUANTIZE_AWQ_CUDA_CUH__
--- a/src/infiniop/ops/dequantize/operator.cc
+++ b/src/infiniop/ops/dequantize/operator.cc
 #include "../../operator.h"
 #include "../../handle.h"
-#include "infiniop/ops/dequantize.h"
+#include "infiniop/ops/dequantize_awq.h"

 #ifdef ENABLE_NVIDIA_API
 #include "nvidia/dequantize_w42f16_nvidia.cuh"
 #endif

-__C infiniStatus_t infiniopCreateDequantizeDescriptor(
+__C infiniStatus_t infiniopCreateDequantizeAWQDescriptor(
    infiniopHandle_t handle,
-    infiniopDequantizeDescriptor_t *desc_ptr,
+    infiniopDequantizeAWQDescriptor_t *desc_ptr,
    infiniopTensorDescriptor_t out_desc,
    infiniopTensorDescriptor_t qweight_desc,
    infiniopTensorDescriptor_t scales_desc,
    infiniopTensorDescriptor_t zeros_desc) {

-#define CREATE(CASE, NAMESPACE)                                                   \
-    case CASE:                                                                    \
-        return op::dequantize::NAMESPACE::Descriptor::create(                     \
-            handle,                                                               \
-            reinterpret_cast<op::dequantize::NAMESPACE::Descriptor **>(desc_ptr), \
-            out_desc,                                                             \
-            qweight_desc,                                                         \
-            scales_desc,                                                          \
+#define CREATE(CASE, NAMESPACE)                                                       \
+    case CASE:                                                                        \
+        return op::dequantize_awq::NAMESPACE::Descriptor::create(                     \
+            handle,                                                                   \
+            reinterpret_cast<op::dequantize_awq::NAMESPACE::Descriptor **>(desc_ptr), \
+            out_desc,                                                                 \
+            qweight_desc,                                                             \
+            scales_desc,                                                              \
            zeros_desc)

    switch (handle->device) {
@@ -35,11 +35,11 @@ __C infiniStatus_t infiniopCreateDequantizeDescriptor(
 #undef CREATE
 }

-__C infiniStatus_t infiniopGetDequantizeWorkspaceSize(infiniopDequantizeDescriptor_t desc,
-                                                      size_t *size) {
-#define GET(CASE, NAMESPACE)                                                                            \
-    case CASE:                                                                                          \
-        *size = reinterpret_cast<const op::dequantize::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
+__C infiniStatus_t infiniopGetDequantizeAWQWorkspaceSize(infiniopDequantizeAWQDescriptor_t desc,
+                                                         size_t *size) {
+#define GET(CASE, NAMESPACE)                                                                                \
+    case CASE:                                                                                              \
+        *size = reinterpret_cast<const op::dequantize_awq::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
        return INFINI_STATUS_SUCCESS

    switch (desc->device_type) {
@@ -52,8 +52,8 @@ __C infiniStatus_t infiniopGetDequantizeWorkspaceSize(infiniopDequantizeDescript
 #undef GET
 }

-__C infiniStatus_t infiniopDequantize(
-    infiniopDequantizeDescriptor_t desc,
+__C infiniStatus_t infiniopDequantizeAWQ(
+    infiniopDequantizeAWQDescriptor_t desc,
    void *workspace,
    size_t workspace_size,
    void *out,
@@ -62,9 +62,9 @@ __C infiniStatus_t infiniopDequantize(
    const void *zeros,
    void *stream) {

-#define CALCULATE(CASE, NAMESPACE)                                                   \
-    case CASE:                                                                       \
-        return reinterpret_cast<const op::dequantize::NAMESPACE::Descriptor *>(desc) \
+#define CALCULATE(CASE, NAMESPACE)                                                       \
+    case CASE:                                                                           \
+        return reinterpret_cast<const op::dequantize_awq::NAMESPACE::Descriptor *>(desc) \
            ->calculate(workspace, workspace_size, out, qweight, scales, zeros, stream)

    switch (desc->device_type) {
@@ -79,11 +79,11 @@ __C infiniStatus_t infiniopDequantize(
 }

 __C infiniStatus_t
-infiniopDestroyDequantizeDescriptor(infiniopDequantizeDescriptor_t desc) {
+infiniopDestroyDequantizeAWQDescriptor(infiniopDequantizeAWQDescriptor_t desc) {

-#define DELETE(CASE, NAMESPACE)                                                       \
-    case CASE:                                                                        \
-        delete reinterpret_cast<const op::dequantize::NAMESPACE::Descriptor *>(desc); \
+#define DELETE(CASE, NAMESPACE)                                                           \
+    case CASE:                                                                            \
+        delete reinterpret_cast<const op::dequantize_awq::NAMESPACE::Descriptor *>(desc); \
        return INFINI_STATUS_SUCCESS;

    switch (desc->device_type) {

--- a/test/infiniop/dequantize.py
+++ b/test/infiniop/dequantize.py
@@ -140,7 +140,7 @@ AWQ_ORDER = [0, 2, 4, 6, 1, 3, 5, 7]
 AWQ_REVERSE_ORDER = [0, 4, 1, 5, 2, 6, 3, 7]


-def dequantize(
+def dequantize_awq(
    qweight: torch.Tensor,
    qzeros: torch.Tensor,
    qscales: torch.Tensor,
@@ -216,7 +216,7 @@ def test(
    sync=None,
 ):
    print(
-        f"Testing Dequantize on {InfiniDeviceNames[device]} with bits:{bits}, group_size:{group_size},"
+        f"Testing Dequantize AWQ on {InfiniDeviceNames[device]} with bits:{bits}, group_size:{group_size},"
        f" qweights_shape:{qweights_shape}, qzeros_shape:{qzeros_shape}, qscales_shape:{qscales_shape},"
        f" qweights_stride:{qweights_stride}, qzeros_stride:{qzeros_stride}, qscales_stride:{qscales_stride},"
        f" qweights_dtype:{InfiniDtypeNames[qweights_dtype]}, qzeros_dtype:{InfiniDtypeNames[qzeros_dtype]}, qscales_dtype:{InfiniDtypeNames[qscales_dtype]}"
@@ -225,14 +225,16 @@ def test(
    qweights = TestTensor(
        qweights_shape, qweights_stride, qweights_dtype, device, mode="randint"
    )
-    qzeros = TestTensor(qzeros_shape, qzeros_stride, qzeros_dtype, device, mode="randint")
+    qzeros = TestTensor(
+        qzeros_shape, qzeros_stride, qzeros_dtype, device, mode="randint"
+    )
    qscales = TestTensor(qscales_shape, qscales_stride, qscales_dtype, device)
    out = TestTensor(out_shape, out_stride, out_dtype, device, mode="zeros")
    ans = TestTensor(out_shape, out_stride, out_dtype, device, mode="ones")

    # Compute the PyTorch reference result
-    def torch_dequantize():
-        return dequantize(
+    def torch_dequantize_awq():
+        return dequantize_awq(
            qweights.torch_tensor(),
            qzeros.torch_tensor(),
            qscales.torch_tensor(),
@@ -240,14 +242,14 @@ def test(
            group_size,
        )

-    ans = torch_dequantize()
+    ans = torch_dequantize_awq()

    if sync is not None:
        sync()

    descriptor = infiniopOperatorDescriptor_t()
    check_error(
-        LIBINFINIOP.infiniopCreateDequantizeDescriptor(
+        LIBINFINIOP.infiniopCreateDequantizeAWQDescriptor(
            handle,
            ctypes.byref(descriptor),
            out.descriptor,
@@ -264,16 +266,16 @@ def test(
    # Get workspace size and create workspace
    workspace_size = c_uint64(0)
    check_error(
-        LIBINFINIOP.infiniopGetDequantizeWorkspaceSize(
+        LIBINFINIOP.infiniopGetDequantizeAWQWorkspaceSize(
            descriptor, ctypes.byref(workspace_size)
        )
    )
    workspace = TestWorkspace(workspace_size.value, device)

    # Execute infiniop gemm operator
-    def lib_dequantize():
+    def lib_dequantize_awq():
        check_error(
-            LIBINFINIOP.infiniopDequantize(
+            LIBINFINIOP.infiniopDequantizeAWQ(
                descriptor,
                workspace.data(),
                workspace_size.value,
@@ -285,7 +287,7 @@ def test(
            )
        )

-    lib_dequantize()
+    lib_dequantize_awq()

    # Validate results
    atol, rtol = get_tolerance(_TOLERANCE_MAP, dtype)
@@ -298,10 +300,10 @@ def test(
    # Profiling workflow
    if PROFILE:
        # fmt: off
-        profile_operation("PyTorch", lambda: torch_dequantize(), device, NUM_PRERUN, NUM_ITERATIONS)
-        profile_operation("    lib", lambda: lib_dequantize(), device, NUM_PRERUN, NUM_ITERATIONS)
+        profile_operation("PyTorch", lambda: torch_dequantize_awq(), device, NUM_PRERUN, NUM_ITERATIONS)
+        profile_operation("    lib", lambda: lib_dequantize_awq(), device, NUM_PRERUN, NUM_ITERATIONS)
        # fmt: on
-    check_error(LIBINFINIOP.infiniopDestroyDequantizeDescriptor(descriptor))
+    check_error(LIBINFINIOP.infiniopDestroyDequantizeAWQDescriptor(descriptor))


 # ==============================================================================

--- a/test/infiniop/libinfiniop/op_register.py
+++ b/test/infiniop/libinfiniop/op_register.py
@@ -533,8 +533,8 @@ def topkrouter_(lib):

 @OpRegister.operator
 def dequantize_(lib):
-    lib.infiniopCreateDequantizeDescriptor.restype = c_int32
-    lib.infiniopCreateDequantizeDescriptor.argtypes = [
+    lib.infiniopCreateDequantizeAWQDescriptor.restype = c_int32
+    lib.infiniopCreateDequantizeAWQDescriptor.argtypes = [
        infiniopHandle_t,
        POINTER(infiniopOperatorDescriptor_t),
        infiniopTensorDescriptor_t,
@@ -542,13 +542,13 @@ def dequantize_(lib):
        infiniopTensorDescriptor_t,
        infiniopTensorDescriptor_t,
    ]
-    lib.infiniopGetDequantizeWorkspaceSize.restype = c_int32
-    lib.infiniopGetDequantizeWorkspaceSize.argtypes = [
+    lib.infiniopGetDequantizeAWQWorkspaceSize.restype = c_int32
+    lib.infiniopGetDequantizeAWQWorkspaceSize.argtypes = [
        infiniopOperatorDescriptor_t,
        POINTER(c_size_t),
    ]
-    lib.infiniopDequantize.restype = c_int32
-    lib.infiniopDequantize.argtypes = [
+    lib.infiniopDequantizeAWQ.restype = c_int32
+    lib.infiniopDequantizeAWQ.argtypes = [
        infiniopOperatorDescriptor_t,
        c_void_p,
        c_size_t,
@@ -557,8 +557,8 @@ def dequantize_(lib):
        c_void_p,
        c_void_p,
    ]
-    lib.infiniopDestroyDequantizeDescriptor.restype = c_int32
-    lib.infiniopDestroyDequantizeDescriptor.argtypes = [
+    lib.infiniopDestroyDequantizeAWQDescriptor.restype = c_int32
+    lib.infiniopDestroyDequantizeAWQDescriptor.argtypes = [
        infiniopOperatorDescriptor_t,
    ]