Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
33f5256a
Commit
33f5256a
authored
Sep 23, 2025
by
zhushuang
Browse files
fix: update AWQ-dequantize op name to match infinicore op dequantizeAWQ
parent
664589d0
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
8 deletions
+8
-8
src/cache_manager/opcache_manager.hpp
src/cache_manager/opcache_manager.hpp
+2
-2
src/models/inference_context.cpp
src/models/inference_context.cpp
+6
-6
No files found.
src/cache_manager/opcache_manager.hpp
View file @
33f5256a
...
...
@@ -161,7 +161,7 @@ public:
DECLARE_OP_CACHE
(
Topkrouter
)
DECLARE_OP_CACHE
(
SwiGLU
)
DECLARE_OP_CACHE
(
RandomSample
)
DECLARE_OP_CACHE
(
Dequantize
)
DECLARE_OP_CACHE
(
Dequantize
AWQ
)
CacheManager
(
size_t
capacity
=
100
)
:
Add_cache
(
capacity
,
DESTROY_FUNC
(
Add
)),
...
...
@@ -173,7 +173,7 @@ public:
Topkrouter_cache
(
capacity
,
DESTROY_FUNC
(
Topkrouter
)),
SwiGLU_cache
(
capacity
,
DESTROY_FUNC
(
SwiGLU
)),
RandomSample_cache
(
capacity
,
DESTROY_FUNC
(
RandomSample
)),
Dequantize_cache
(
capacity
,
DESTROY_FUNC
(
Dequantize
))
{}
Dequantize
AWQ
_cache
(
capacity
,
DESTROY_FUNC
(
Dequantize
AWQ
))
{}
template
<
typename
...
Tensors
>
static
size_t
createDescriptorKey
(
Tensors
...
tensors
)
{
...
...
src/models/inference_context.cpp
View file @
33f5256a
...
...
@@ -266,18 +266,18 @@ void InferenceContext::dequant(std::shared_ptr<Tensor> weight,
size_t
key
=
CacheManager
::
createDescriptorKey
(
weight
,
in_w
,
in_s
,
in_z
);
infiniopDequantizeDescriptor_t
desc
;
if
(
!
cache_manager
->
getDequantizeDescriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateDequantizeDescriptor
(
op_handle
,
&
desc
,
weight
->
desc
(),
in_w
->
desc
(),
in_s
->
desc
(),
in_z
->
desc
()));
cache_manager
->
putDequantizeDescriptor
(
key
,
desc
);
infiniopDequantize
AWQ
Descriptor_t
desc
;
if
(
!
cache_manager
->
getDequantize
AWQ
Descriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateDequantize
AWQ
Descriptor
(
op_handle
,
&
desc
,
weight
->
desc
(),
in_w
->
desc
(),
in_s
->
desc
(),
in_z
->
desc
()));
cache_manager
->
putDequantize
AWQ
Descriptor
(
key
,
desc
);
}
size_t
workspace_size
=
0
;
RUN_INFINI
(
infiniopGetDequantizeWorkspaceSize
(
desc
,
&
workspace_size
));
RUN_INFINI
(
infiniopGetDequantize
AWQ
WorkspaceSize
(
desc
,
&
workspace_size
));
ensure_workspace
(
workspace_size
);
void
*
workspace
=
workspace_storage
->
memory
();
RUN_INFINI
(
infiniopDequantize
(
RUN_INFINI
(
infiniopDequantize
AWQ
(
desc
,
workspace
,
workspace_size
,
weight
->
data
(),
in_w
->
data
(),
in_s
->
data
(),
in_z
->
data
(),
stream
));
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment