Commit 33f5256a authored by zhushuang's avatar zhushuang
Browse files

fix: update AWQ-dequantize op name to match infinicore op dequantizeAWQ

parent 664589d0
......@@ -161,7 +161,7 @@ public:
DECLARE_OP_CACHE(Topkrouter)
DECLARE_OP_CACHE(SwiGLU)
DECLARE_OP_CACHE(RandomSample)
DECLARE_OP_CACHE(Dequantize)
DECLARE_OP_CACHE(DequantizeAWQ)
CacheManager(size_t capacity = 100)
: Add_cache(capacity, DESTROY_FUNC(Add)),
......@@ -173,7 +173,7 @@ public:
Topkrouter_cache(capacity, DESTROY_FUNC(Topkrouter)),
SwiGLU_cache(capacity, DESTROY_FUNC(SwiGLU)),
RandomSample_cache(capacity, DESTROY_FUNC(RandomSample)),
Dequantize_cache(capacity, DESTROY_FUNC(Dequantize)) {}
DequantizeAWQ_cache(capacity, DESTROY_FUNC(DequantizeAWQ)) {}
template <typename... Tensors>
static size_t createDescriptorKey(Tensors... tensors) {
......
......@@ -266,18 +266,18 @@ void InferenceContext::dequant(std::shared_ptr<Tensor> weight,
size_t key = CacheManager::createDescriptorKey(weight, in_w, in_s, in_z);
infiniopDequantizeDescriptor_t desc;
if (!cache_manager->getDequantizeDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateDequantizeDescriptor(op_handle, &desc, weight->desc(), in_w->desc(), in_s->desc(), in_z->desc()));
cache_manager->putDequantizeDescriptor(key, desc);
infiniopDequantizeAWQDescriptor_t desc;
if (!cache_manager->getDequantizeAWQDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateDequantizeAWQDescriptor(op_handle, &desc, weight->desc(), in_w->desc(), in_s->desc(), in_z->desc()));
cache_manager->putDequantizeAWQDescriptor(key, desc);
}
size_t workspace_size = 0;
RUN_INFINI(infiniopGetDequantizeWorkspaceSize(desc, &workspace_size));
RUN_INFINI(infiniopGetDequantizeAWQWorkspaceSize(desc, &workspace_size));
ensure_workspace(workspace_size);
void *workspace = workspace_storage->memory();
RUN_INFINI(infiniopDequantize(
RUN_INFINI(infiniopDequantizeAWQ(
desc, workspace, workspace_size,
weight->data(), in_w->data(), in_s->data(), in_z->data(), stream));
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment