"googlemock/git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "a5136dbdd21eab0a3376c7dcd334607d2f3d6bd5"
Commit 33f5256a authored by zhushuang's avatar zhushuang
Browse files

fix: update AWQ-dequantize op name to match infinicore op dequantizeAWQ

parent 664589d0
...@@ -161,7 +161,7 @@ public: ...@@ -161,7 +161,7 @@ public:
DECLARE_OP_CACHE(Topkrouter) DECLARE_OP_CACHE(Topkrouter)
DECLARE_OP_CACHE(SwiGLU) DECLARE_OP_CACHE(SwiGLU)
DECLARE_OP_CACHE(RandomSample) DECLARE_OP_CACHE(RandomSample)
DECLARE_OP_CACHE(Dequantize) DECLARE_OP_CACHE(DequantizeAWQ)
CacheManager(size_t capacity = 100) CacheManager(size_t capacity = 100)
: Add_cache(capacity, DESTROY_FUNC(Add)), : Add_cache(capacity, DESTROY_FUNC(Add)),
...@@ -173,7 +173,7 @@ public: ...@@ -173,7 +173,7 @@ public:
Topkrouter_cache(capacity, DESTROY_FUNC(Topkrouter)), Topkrouter_cache(capacity, DESTROY_FUNC(Topkrouter)),
SwiGLU_cache(capacity, DESTROY_FUNC(SwiGLU)), SwiGLU_cache(capacity, DESTROY_FUNC(SwiGLU)),
RandomSample_cache(capacity, DESTROY_FUNC(RandomSample)), RandomSample_cache(capacity, DESTROY_FUNC(RandomSample)),
Dequantize_cache(capacity, DESTROY_FUNC(Dequantize)) {} DequantizeAWQ_cache(capacity, DESTROY_FUNC(DequantizeAWQ)) {}
template <typename... Tensors> template <typename... Tensors>
static size_t createDescriptorKey(Tensors... tensors) { static size_t createDescriptorKey(Tensors... tensors) {
......
...@@ -266,18 +266,18 @@ void InferenceContext::dequant(std::shared_ptr<Tensor> weight, ...@@ -266,18 +266,18 @@ void InferenceContext::dequant(std::shared_ptr<Tensor> weight,
size_t key = CacheManager::createDescriptorKey(weight, in_w, in_s, in_z); size_t key = CacheManager::createDescriptorKey(weight, in_w, in_s, in_z);
infiniopDequantizeDescriptor_t desc; infiniopDequantizeAWQDescriptor_t desc;
if (!cache_manager->getDequantizeDescriptor(key, desc)) { if (!cache_manager->getDequantizeAWQDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateDequantizeDescriptor(op_handle, &desc, weight->desc(), in_w->desc(), in_s->desc(), in_z->desc())); RUN_INFINI(infiniopCreateDequantizeAWQDescriptor(op_handle, &desc, weight->desc(), in_w->desc(), in_s->desc(), in_z->desc()));
cache_manager->putDequantizeDescriptor(key, desc); cache_manager->putDequantizeAWQDescriptor(key, desc);
} }
size_t workspace_size = 0; size_t workspace_size = 0;
RUN_INFINI(infiniopGetDequantizeWorkspaceSize(desc, &workspace_size)); RUN_INFINI(infiniopGetDequantizeAWQWorkspaceSize(desc, &workspace_size));
ensure_workspace(workspace_size); ensure_workspace(workspace_size);
void *workspace = workspace_storage->memory(); void *workspace = workspace_storage->memory();
RUN_INFINI(infiniopDequantize( RUN_INFINI(infiniopDequantizeAWQ(
desc, workspace, workspace_size, desc, workspace, workspace_size,
weight->data(), in_w->data(), in_s->data(), in_z->data(), stream)); weight->data(), in_w->data(), in_s->data(), in_z->data(), stream));
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment