Commit 02922ce9 authored by zhangyue's avatar zhangyue
Browse files

issue/174: Separate getworkspace and execution

parent 98270602
......@@ -12,6 +12,8 @@ struct Descriptor::Opaque {
aclnnTensorDescriptor_t value;
void *mask_addr;
void *value_addr;
uint64_t workspacesize;
aclOpExecutor *executor;
~Opaque() {
delete x;
......@@ -21,6 +23,9 @@ struct Descriptor::Opaque {
aclrtFree(mask_addr);
aclrtFree(value_addr);
// Delete useless executor
aclDestroyAclOpExecutor(executor);
}
};
......@@ -92,20 +97,19 @@ infiniStatus_t Descriptor::create(
aclTensor *tvalue = value->tensor;
CHECK_ACL(aclnnInplaceMaskedFillTensorGetWorkspaceSize(tx, tmask, tvalue, &workspacesize_mask, &mask_executor));
int64_t dim = 2;
int64_t dim = 2;
CHECK_ACL(aclnnSoftmaxGetWorkspaceSize(tx, dim, ty, &workspacesize_softmax, &executor));
// set executor reusable
aclSetAclOpExecutorRepeatable(executor);
// Create the descriptor
// Create the descripto
size_t all_workspacesize = std::max(workspacesize_softmax, workspacesize_mask);
*desc_ptr = new Descriptor(new Opaque{x, mask, y, value, mask_addr, value_addr},
*desc_ptr = new Descriptor(new Opaque{x, mask, y, value, mask_addr, value_addr,
workspacesize_softmax, executor},
std::move(info), all_workspacesize, handle_ascend->device, handle_ascend->device_id);
// Delete useless executor
aclDestroyAclOpExecutor(executor);
aclDestroyAclOpExecutor(mask_executor);
return INFINI_STATUS_SUCCESS;
}
......@@ -117,11 +121,8 @@ infiniStatus_t Descriptor::calculate(void *workspace, size_t workspace_size, voi
auto ty = _opaque->y->tensor;
auto tmask = _opaque->mask->tensor;
auto tvalue = _opaque->value->tensor;
aclOpExecutor *executor = nullptr;
aclOpExecutor *mask_executor = nullptr;
size_t workspacesize_softmax = 0;
size_t workspacesize_mask = 0;
int64_t dim = 2;
AclSetTensorAddr(mask_executor, 0, tx, (void *)x);
AclSetTensorAddr(mask_executor, 1, tmask, _opaque->mask_addr);
......@@ -129,10 +130,9 @@ infiniStatus_t Descriptor::calculate(void *workspace, size_t workspace_size, voi
CHECK_ACL(aclnnInplaceMaskedFillTensorGetWorkspaceSize(tx, tmask, tvalue, &workspacesize_mask, &mask_executor));
CHECK_ACL(aclnnInplaceMaskedFillTensor(workspace, workspacesize_mask, mask_executor, stream));
AclSetTensorAddr(executor, 0, tx, (void *)x);
AclSetTensorAddr(executor, 1, ty, y);
CHECK_ACL(aclnnSoftmaxGetWorkspaceSize(tx, dim, ty, &workspacesize_softmax, &executor));
CHECK_ACL(aclnnSoftmax(workspace, workspacesize_softmax, executor, stream));
AclSetTensorAddr(_opaque->executor, 0, tx, (void *)x);
AclSetTensorAddr(_opaque->executor, 1, ty, y);
CHECK_ACL(aclnnSoftmax(workspace, _opaque->workspacesize, _opaque->executor, stream));
return INFINI_STATUS_SUCCESS;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment