Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
4217976d
Commit
4217976d
authored
Sep 23, 2025
by
zhushuang
Browse files
feat: rename Dequantize to DequantizeAWQ in nvidia gpu
parent
d3d982df
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
106 additions
and
103 deletions
+106
-103
include/infiniop.h
include/infiniop.h
+1
-1
include/infiniop/ops/dequantize.h
include/infiniop/ops/dequantize.h
+0
-28
include/infiniop/ops/dequantize_awq.h
include/infiniop/ops/dequantize_awq.h
+28
-0
src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_nvidia.cuh
...finiop/ops/dequantize/nvidia/dequantize_w42f16_nvidia.cuh
+0
-8
src/infiniop/ops/dequantize_awq/dequantize_awq.h
src/infiniop/ops/dequantize_awq/dequantize_awq.h
+7
-6
src/infiniop/ops/dequantize_awq/info.h
src/infiniop/ops/dequantize_awq/info.h
+9
-9
src/infiniop/ops/dequantize_awq/nvidia/dequantize_w42f16_kernel.cuh
...op/ops/dequantize_awq/nvidia/dequantize_w42f16_kernel.cuh
+0
-0
src/infiniop/ops/dequantize_awq/nvidia/dequantize_w42f16_nvidia.cu
...iop/ops/dequantize_awq/nvidia/dequantize_w42f16_nvidia.cu
+4
-4
src/infiniop/ops/dequantize_awq/nvidia/dequantize_w42f16_nvidia.cuh
...op/ops/dequantize_awq/nvidia/dequantize_w42f16_nvidia.cuh
+8
-0
src/infiniop/ops/dequantize_awq/operator.cc
src/infiniop/ops/dequantize_awq/operator.cc
+25
-25
test/infiniop/dequantize_awq.py
test/infiniop/dequantize_awq.py
+16
-14
test/infiniop/libinfiniop/op_register.py
test/infiniop/libinfiniop/op_register.py
+8
-8
No files found.
include/infiniop.h
View file @
4217976d
...
...
@@ -7,7 +7,7 @@
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h"
#include "infiniop/ops/conv.h"
#include "infiniop/ops/dequantize.h"
#include "infiniop/ops/dequantize
_awq
.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/mul.h"
#include "infiniop/ops/random_sample.h"
...
...
include/infiniop/ops/dequantize.h
deleted
100644 → 0
View file @
d3d982df
#ifndef __INFINIOP_DEQUANTIZE_API_H__
#define __INFINIOP_DEQUANTIZE_API_H__
#include "../operator_descriptor.h"
typedef
struct
InfiniopDescriptor
*
infiniopDequantizeDescriptor_t
;
__C
__export
infiniStatus_t
infiniopCreateDequantizeDescriptor
(
infiniopHandle_t
handle
,
infiniopDequantizeDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
out_desc
,
infiniopTensorDescriptor_t
qweight_desc
,
infiniopTensorDescriptor_t
scales_desc
,
infiniopTensorDescriptor_t
zeros_desc
);
__C
__export
infiniStatus_t
infiniopGetDequantizeWorkspaceSize
(
infiniopDequantizeDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniStatus_t
infiniopDequantize
(
infiniopDequantizeDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
out
,
const
void
*
qweight
,
const
void
*
scales
,
const
void
*
zeros
,
void
*
stream
);
__C
__export
infiniStatus_t
infiniopDestroyDequantizeDescriptor
(
infiniopDequantizeDescriptor_t
desc
);
#endif
include/infiniop/ops/dequantize_awq.h
0 → 100644
View file @
4217976d
#ifndef __INFINIOP_DEQUANTIZE_AWQ_API_H__
#define __INFINIOP_DEQUANTIZE_AWQ_API_H__
#include "../operator_descriptor.h"
typedef
struct
InfiniopDescriptor
*
infiniopDequantizeAWQDescriptor_t
;
__C
__export
infiniStatus_t
infiniopCreateDequantizeAWQDescriptor
(
infiniopHandle_t
handle
,
infiniopDequantizeAWQDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
out_desc
,
infiniopTensorDescriptor_t
qweight_desc
,
infiniopTensorDescriptor_t
scales_desc
,
infiniopTensorDescriptor_t
zeros_desc
);
__C
__export
infiniStatus_t
infiniopGetDequantizeAWQWorkspaceSize
(
infiniopDequantizeAWQDescriptor_t
desc
,
size_t
*
size
);
__C
__export
infiniStatus_t
infiniopDequantizeAWQ
(
infiniopDequantizeAWQDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
out
,
const
void
*
qweight
,
const
void
*
scales
,
const
void
*
zeros
,
void
*
stream
);
__C
__export
infiniStatus_t
infiniopDestroyDequantizeAWQDescriptor
(
infiniopDequantizeAWQDescriptor_t
desc
);
#endif
src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_nvidia.cuh
deleted
100644 → 0
View file @
d3d982df
#ifndef __DEQUANTIZE_CUDA_CUH__
#define __DEQUANTIZE_CUDA_CUH__
#include "../dequantize.h"
DESCRIPTOR
(
nvidia
)
#endif // __GEMM_CUDA_CUH__
src/infiniop/ops/dequantize/dequantize.h
→
src/infiniop/ops/dequantize
_awq
/dequantize
_awq
.h
View file @
4217976d
#ifndef __DEQUANTIZE_H__
#define __DEQUANTIZE_H__
#ifndef __DEQUANTIZE_
AWQ_
H__
#define __DEQUANTIZE_
AWQ_
H__
#include "../../../utils.h"
#include "../../operator.h"
...
...
@@ -8,17 +8,17 @@
#define DESCRIPTOR(NAMESPACE) \
\
namespace op::dequantize::NAMESPACE {
\
namespace op::dequantize
_awq
::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
struct Opaque; \
Opaque *_opaque; \
DequantizeInfo _info;
\
Dequantize
AWQ
Info _info; \
size_t _workspace_size; \
\
Descriptor( \
size_t workspace_size_, \
Opaque *opaque, \
DequantizeInfo info,
\
Dequantize
AWQ
Info info, \
infiniDevice_t device_type, \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
...
...
@@ -49,4 +49,5 @@
void *stream) const; \
}; \
}
#endif
#endif //__DEQUANTIZE_AWQ_H__
src/infiniop/ops/dequantize/info.h
→
src/infiniop/ops/dequantize
_awq
/info.h
View file @
4217976d
#ifndef __DEQUANTIZE_INFO_H__
#define __DEQUANTIZE_INFO_H__
#ifndef __DEQUANTIZE_
AWQ_
INFO_H__
#define __DEQUANTIZE_
AWQ_
INFO_H__
#include "../../../utils.h"
#include "../../tensor.h"
#include <vector>
namespace
op
::
dequantize
{
namespace
op
::
dequantize
_awq
{
class
DequantizeInfo
{
DequantizeInfo
()
=
default
;
class
Dequantize
AWQ
Info
{
Dequantize
AWQ
Info
()
=
default
;
public:
int
_in_features
,
_out_features
,
_num_groups
;
...
...
@@ -17,7 +17,7 @@ public:
int
out_features
()
const
{
return
_out_features
;
}
int
num_groups
()
const
{
return
_num_groups
;
}
static
utils
::
Result
<
DequantizeInfo
>
create
(
static
utils
::
Result
<
Dequantize
AWQ
Info
>
create
(
infiniopTensorDescriptor_t
out_desc
,
infiniopTensorDescriptor_t
qweight_desc
,
infiniopTensorDescriptor_t
scales_desc
,
...
...
@@ -27,13 +27,13 @@ public:
int
_out_features
=
qweight_desc
->
dim
(
1
);
int
_num_groups
=
scales_desc
->
dim
(
0
);
return
utils
::
Result
<
DequantizeInfo
>
(
DequantizeInfo
{
return
utils
::
Result
<
Dequantize
AWQ
Info
>
(
Dequantize
AWQ
Info
{
_in_features
,
_out_features
,
_num_groups
});
}
};
}
// namespace op::dequantize
}
// namespace op::dequantize
_awq
#endif // __DEQUANTIZE_INFO_H__
#endif // __DEQUANTIZE_
AWQ_
INFO_H__
src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_kernel.cuh
→
src/infiniop/ops/dequantize
_awq
/nvidia/dequantize_w42f16_kernel.cuh
View file @
4217976d
File moved
src/infiniop/ops/dequantize/nvidia/dequantize_w42f16_nvidia.cu
→
src/infiniop/ops/dequantize
_awq
/nvidia/dequantize_w42f16_nvidia.cu
View file @
4217976d
...
...
@@ -5,7 +5,7 @@
#include "dequantize_w42f16_kernel.cuh"
#include "dequantize_w42f16_nvidia.cuh"
#include "../dequantize.h"
#include "../dequantize
_awq
.h"
#include <cuda_fp16.h>
__global__
void
__launch_bounds__
(
64
)
...
...
@@ -68,7 +68,7 @@ __global__ void __launch_bounds__(64)
}
}
namespace
op
::
dequantize
::
nvidia
{
namespace
op
::
dequantize
_awq
::
nvidia
{
struct
Descriptor
::
Opaque
{
std
::
shared_ptr
<
device
::
nvidia
::
Handle
::
Internal
>
internal
;
...
...
@@ -87,7 +87,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t
zeros_desc
)
{
auto
handle
=
reinterpret_cast
<
device
::
nvidia
::
Handle
*>
(
handle_
);
auto
result
=
DequantizeInfo
::
create
(
out_desc
,
qweight_desc
,
scales_desc
,
zeros_desc
);
auto
result
=
Dequantize
AWQ
Info
::
create
(
out_desc
,
qweight_desc
,
scales_desc
,
zeros_desc
);
*
desc_ptr
=
new
Descriptor
(
0
,
...
...
@@ -133,6 +133,6 @@ Descriptor::calculate(
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::dequantize::nvidia
}
// namespace op::dequantize
_awq
::nvidia
#endif
src/infiniop/ops/dequantize_awq/nvidia/dequantize_w42f16_nvidia.cuh
0 → 100644
View file @
4217976d
#ifndef __DEQUANTIZE_AWQ_CUDA_CUH__
#define __DEQUANTIZE_AWQ_CUDA_CUH__
#include "../dequantize_awq.h"
DESCRIPTOR
(
nvidia
)
#endif // __DEQUANTIZE_AWQ_CUDA_CUH__
src/infiniop/ops/dequantize/operator.cc
→
src/infiniop/ops/dequantize
_awq
/operator.cc
View file @
4217976d
#include "../../operator.h"
#include "../../handle.h"
#include "infiniop/ops/dequantize.h"
#include "infiniop/ops/dequantize
_awq
.h"
#ifdef ENABLE_NVIDIA_API
#include "nvidia/dequantize_w42f16_nvidia.cuh"
#endif
__C
infiniStatus_t
infiniopCreateDequantizeDescriptor
(
__C
infiniStatus_t
infiniopCreateDequantize
AWQ
Descriptor
(
infiniopHandle_t
handle
,
infiniopDequantizeDescriptor_t
*
desc_ptr
,
infiniopDequantize
AWQ
Descriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
out_desc
,
infiniopTensorDescriptor_t
qweight_desc
,
infiniopTensorDescriptor_t
scales_desc
,
infiniopTensorDescriptor_t
zeros_desc
)
{
#define CREATE(CASE, NAMESPACE) \
case CASE: \
return op::dequantize::NAMESPACE::Descriptor::create( \
handle, \
reinterpret_cast<op::dequantize::NAMESPACE::Descriptor **>(desc_ptr), \
out_desc, \
qweight_desc, \
scales_desc, \
#define CREATE(CASE, NAMESPACE)
\
case CASE:
\
return op::dequantize
_awq
::NAMESPACE::Descriptor::create( \
handle,
\
reinterpret_cast<op::dequantize
_awq
::NAMESPACE::Descriptor **>(desc_ptr), \
out_desc,
\
qweight_desc,
\
scales_desc,
\
zeros_desc)
switch
(
handle
->
device
)
{
...
...
@@ -35,11 +35,11 @@ __C infiniStatus_t infiniopCreateDequantizeDescriptor(
#undef CREATE
}
__C
infiniStatus_t
infiniopGetDequantizeWorkspaceSize
(
infiniopDequantizeDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<const op::dequantize::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
__C
infiniStatus_t
infiniopGetDequantize
AWQ
WorkspaceSize
(
infiniopDequantize
AWQ
Descriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE)
\
case CASE:
\
*size = reinterpret_cast<const op::dequantize
_awq
::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
return INFINI_STATUS_SUCCESS
switch
(
desc
->
device_type
)
{
...
...
@@ -52,8 +52,8 @@ __C infiniStatus_t infiniopGetDequantizeWorkspaceSize(infiniopDequantizeDescript
#undef GET
}
__C
infiniStatus_t
infiniopDequantize
(
infiniopDequantizeDescriptor_t
desc
,
__C
infiniStatus_t
infiniopDequantize
AWQ
(
infiniopDequantize
AWQ
Descriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
out
,
...
...
@@ -62,9 +62,9 @@ __C infiniStatus_t infiniopDequantize(
const
void
*
zeros
,
void
*
stream
)
{
#define CALCULATE(CASE, NAMESPACE) \
case CASE: \
return reinterpret_cast<const op::dequantize::NAMESPACE::Descriptor *>(desc) \
#define CALCULATE(CASE, NAMESPACE)
\
case CASE:
\
return reinterpret_cast<const op::dequantize
_awq
::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size, out, qweight, scales, zeros, stream)
switch
(
desc
->
device_type
)
{
...
...
@@ -79,11 +79,11 @@ __C infiniStatus_t infiniopDequantize(
}
__C
infiniStatus_t
infiniopDestroyDequantizeDescriptor
(
infiniopDequantizeDescriptor_t
desc
)
{
infiniopDestroyDequantize
AWQ
Descriptor
(
infiniopDequantize
AWQ
Descriptor_t
desc
)
{
#define DELETE(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<const op::dequantize::NAMESPACE::Descriptor *>(desc); \
#define DELETE(CASE, NAMESPACE)
\
case CASE:
\
delete reinterpret_cast<const op::dequantize
_awq
::NAMESPACE::Descriptor *>(desc); \
return INFINI_STATUS_SUCCESS;
switch
(
desc
->
device_type
)
{
...
...
test/infiniop/dequantize.py
→
test/infiniop/dequantize
_awq
.py
View file @
4217976d
...
...
@@ -140,7 +140,7 @@ AWQ_ORDER = [0, 2, 4, 6, 1, 3, 5, 7]
AWQ_REVERSE_ORDER
=
[
0
,
4
,
1
,
5
,
2
,
6
,
3
,
7
]
def
dequantize
(
def
dequantize
_awq
(
qweight
:
torch
.
Tensor
,
qzeros
:
torch
.
Tensor
,
qscales
:
torch
.
Tensor
,
...
...
@@ -216,7 +216,7 @@ def test(
sync
=
None
,
):
print
(
f
"Testing Dequantize on
{
InfiniDeviceNames
[
device
]
}
with bits:
{
bits
}
, group_size:
{
group_size
}
,"
f
"Testing Dequantize
AWQ
on
{
InfiniDeviceNames
[
device
]
}
with bits:
{
bits
}
, group_size:
{
group_size
}
,"
f
" qweights_shape:
{
qweights_shape
}
, qzeros_shape:
{
qzeros_shape
}
, qscales_shape:
{
qscales_shape
}
,"
f
" qweights_stride:
{
qweights_stride
}
, qzeros_stride:
{
qzeros_stride
}
, qscales_stride:
{
qscales_stride
}
,"
f
" qweights_dtype:
{
InfiniDtypeNames
[
qweights_dtype
]
}
, qzeros_dtype:
{
InfiniDtypeNames
[
qzeros_dtype
]
}
, qscales_dtype:
{
InfiniDtypeNames
[
qscales_dtype
]
}
"
...
...
@@ -225,14 +225,16 @@ def test(
qweights
=
TestTensor
(
qweights_shape
,
qweights_stride
,
qweights_dtype
,
device
,
mode
=
"randint"
)
qzeros
=
TestTensor
(
qzeros_shape
,
qzeros_stride
,
qzeros_dtype
,
device
,
mode
=
"randint"
)
qzeros
=
TestTensor
(
qzeros_shape
,
qzeros_stride
,
qzeros_dtype
,
device
,
mode
=
"randint"
)
qscales
=
TestTensor
(
qscales_shape
,
qscales_stride
,
qscales_dtype
,
device
)
out
=
TestTensor
(
out_shape
,
out_stride
,
out_dtype
,
device
,
mode
=
"zeros"
)
ans
=
TestTensor
(
out_shape
,
out_stride
,
out_dtype
,
device
,
mode
=
"ones"
)
# Compute the PyTorch reference result
def
torch_dequantize
():
return
dequantize
(
def
torch_dequantize
_awq
():
return
dequantize
_awq
(
qweights
.
torch_tensor
(),
qzeros
.
torch_tensor
(),
qscales
.
torch_tensor
(),
...
...
@@ -240,14 +242,14 @@ def test(
group_size
,
)
ans
=
torch_dequantize
()
ans
=
torch_dequantize
_awq
()
if
sync
is
not
None
:
sync
()
descriptor
=
infiniopOperatorDescriptor_t
()
check_error
(
LIBINFINIOP
.
infiniopCreateDequantizeDescriptor
(
LIBINFINIOP
.
infiniopCreateDequantize
AWQ
Descriptor
(
handle
,
ctypes
.
byref
(
descriptor
),
out
.
descriptor
,
...
...
@@ -264,16 +266,16 @@ def test(
# Get workspace size and create workspace
workspace_size
=
c_uint64
(
0
)
check_error
(
LIBINFINIOP
.
infiniopGetDequantizeWorkspaceSize
(
LIBINFINIOP
.
infiniopGetDequantize
AWQ
WorkspaceSize
(
descriptor
,
ctypes
.
byref
(
workspace_size
)
)
)
workspace
=
TestWorkspace
(
workspace_size
.
value
,
device
)
# Execute infiniop gemm operator
def
lib_dequantize
():
def
lib_dequantize
_awq
():
check_error
(
LIBINFINIOP
.
infiniopDequantize
(
LIBINFINIOP
.
infiniopDequantize
AWQ
(
descriptor
,
workspace
.
data
(),
workspace_size
.
value
,
...
...
@@ -285,7 +287,7 @@ def test(
)
)
lib_dequantize
()
lib_dequantize
_awq
()
# Validate results
atol
,
rtol
=
get_tolerance
(
_TOLERANCE_MAP
,
dtype
)
...
...
@@ -298,10 +300,10 @@ def test(
# Profiling workflow
if
PROFILE
:
# fmt: off
profile_operation
(
"PyTorch"
,
lambda
:
torch_dequantize
(),
device
,
NUM_PRERUN
,
NUM_ITERATIONS
)
profile_operation
(
" lib"
,
lambda
:
lib_dequantize
(),
device
,
NUM_PRERUN
,
NUM_ITERATIONS
)
profile_operation
(
"PyTorch"
,
lambda
:
torch_dequantize
_awq
(),
device
,
NUM_PRERUN
,
NUM_ITERATIONS
)
profile_operation
(
" lib"
,
lambda
:
lib_dequantize
_awq
(),
device
,
NUM_PRERUN
,
NUM_ITERATIONS
)
# fmt: on
check_error
(
LIBINFINIOP
.
infiniopDestroyDequantizeDescriptor
(
descriptor
))
check_error
(
LIBINFINIOP
.
infiniopDestroyDequantize
AWQ
Descriptor
(
descriptor
))
# ==============================================================================
...
...
test/infiniop/libinfiniop/op_register.py
View file @
4217976d
...
...
@@ -533,8 +533,8 @@ def topkrouter_(lib):
@
OpRegister
.
operator
def
dequantize_
(
lib
):
lib
.
infiniopCreateDequantizeDescriptor
.
restype
=
c_int32
lib
.
infiniopCreateDequantizeDescriptor
.
argtypes
=
[
lib
.
infiniopCreateDequantize
AWQ
Descriptor
.
restype
=
c_int32
lib
.
infiniopCreateDequantize
AWQ
Descriptor
.
argtypes
=
[
infiniopHandle_t
,
POINTER
(
infiniopOperatorDescriptor_t
),
infiniopTensorDescriptor_t
,
...
...
@@ -542,13 +542,13 @@ def dequantize_(lib):
infiniopTensorDescriptor_t
,
infiniopTensorDescriptor_t
,
]
lib
.
infiniopGetDequantizeWorkspaceSize
.
restype
=
c_int32
lib
.
infiniopGetDequantizeWorkspaceSize
.
argtypes
=
[
lib
.
infiniopGetDequantize
AWQ
WorkspaceSize
.
restype
=
c_int32
lib
.
infiniopGetDequantize
AWQ
WorkspaceSize
.
argtypes
=
[
infiniopOperatorDescriptor_t
,
POINTER
(
c_size_t
),
]
lib
.
infiniopDequantize
.
restype
=
c_int32
lib
.
infiniopDequantize
.
argtypes
=
[
lib
.
infiniopDequantize
AWQ
.
restype
=
c_int32
lib
.
infiniopDequantize
AWQ
.
argtypes
=
[
infiniopOperatorDescriptor_t
,
c_void_p
,
c_size_t
,
...
...
@@ -557,8 +557,8 @@ def dequantize_(lib):
c_void_p
,
c_void_p
,
]
lib
.
infiniopDestroyDequantizeDescriptor
.
restype
=
c_int32
lib
.
infiniopDestroyDequantizeDescriptor
.
argtypes
=
[
lib
.
infiniopDestroyDequantize
AWQ
Descriptor
.
restype
=
c_int32
lib
.
infiniopDestroyDequantize
AWQ
Descriptor
.
argtypes
=
[
infiniopOperatorDescriptor_t
,
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment