Commit 35ad7d1e authored by PanZezhong's avatar PanZezhong
Browse files

issue/89/ascend 添加昇腾运行时

parent 309878f0
#include "tensor_aclnn.h" #include "tensor_aclnn.h"
#include "../../../utils.h" #include "../../../utils.h"
#include "../../tensor.h"
#include <algorithm> #include <algorithm>
infiniStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const std::vector<int64_t> &shape, const std::vector<int64_t> &strides) { infiniStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const std::vector<int64_t> &shape, const std::vector<int64_t> &strides) {
...@@ -36,15 +37,15 @@ infiniStatus_t aclnnTensorDescriptor::inferStorageShape() { ...@@ -36,15 +37,15 @@ infiniStatus_t aclnnTensorDescriptor::inferStorageShape() {
/// @param y infiniopTensorDescriptor /// @param y infiniopTensorDescriptor
/// @return infiniopStatus_t /// @return infiniopStatus_t
infiniStatus_t aclnnTensorDescriptor::fromInfiniOpTensorDescriptor(infiniopTensorDescriptor_t y) { infiniStatus_t aclnnTensorDescriptor::fromInfiniOpTensorDescriptor(infiniopTensorDescriptor_t y) {
uint64_t ndim = y->ndim; uint64_t ndim = y->ndim();
// Cast shape type // Cast shape type
auto shape = std::vector<int64_t>(ndim); auto shape = std::vector<int64_t>(ndim);
auto strides = std::vector<int64_t>(ndim); auto strides = std::vector<int64_t>(ndim);
for (uint64_t i = 0; i < ndim; ++i) { for (uint64_t i = 0; i < ndim; ++i) {
shape[i] = static_cast<int64_t>(y->shape[i]); shape[i] = static_cast<int64_t>(y->dim(i));
strides[i] = y->strides[i]; strides[i] = y->stride(i);
} }
return setDescriptor(toAclDataType(y->dtype), shape, strides); return setDescriptor(toAclDataType(y->dtype()), shape, strides);
} }
/// @brief Wrapper of aclCreateTensor. Create aclTensor. /// @brief Wrapper of aclCreateTensor. Create aclTensor.
......
#ifndef __ACLNN_TENSOR__ #ifndef __ACLNN_TENSOR__
#define __ACLNN_TENSOR__ #define __ACLNN_TENSOR__
#include "../../operator.h"
#include "common_ascend.h" #include "common_ascend.h"
#include "infiniop/operator.h"
#include <acl/acl.h> #include <acl/acl.h>
#include <acl/acl_base.h> #include <acl/acl_base.h>
#include <aclnn/acl_meta.h> #include <aclnn/acl_meta.h>
......
...@@ -35,7 +35,7 @@ infiniStatus_t Descriptor::create( ...@@ -35,7 +35,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t a_desc, infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) { infiniopTensorDescriptor_t b_desc) {
auto handle = reinterpret_cast<infiniopAscendHandle_t>(handle_); auto handle = reinterpret_cast<infiniopAscendHandle_t>(handle_);
auto dtype = c_desc->dtype; auto dtype = c_desc->dtype();
if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) { if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
return INFINI_STATUS_BAD_TENSOR_DTYPE; return INFINI_STATUS_BAD_TENSOR_DTYPE;
...@@ -54,17 +54,17 @@ infiniStatus_t Descriptor::create( ...@@ -54,17 +54,17 @@ infiniStatus_t Descriptor::create(
// Treat A, B, C as 2D matrix, reuse aclnnTensorDescriptor for batched // Treat A, B, C as 2D matrix, reuse aclnnTensorDescriptor for batched
// operation // operation
CHECK_STATUS(c->setDescriptor( CHECK_STATUS(c->setDescriptor(
toAclDataType(c_desc->dtype), toAclDataType(c_desc->dtype()),
{static_cast<int64_t>(info.c_matrix.rows), {static_cast<int64_t>(info.c_matrix.rows),
static_cast<int64_t>(info.c_matrix.cols)}, static_cast<int64_t>(info.c_matrix.cols)},
{info.c_matrix.row_stride, info.c_matrix.col_stride})); {info.c_matrix.row_stride, info.c_matrix.col_stride}));
CHECK_STATUS(a->setDescriptor( CHECK_STATUS(a->setDescriptor(
toAclDataType(a_desc->dtype), toAclDataType(a_desc->dtype()),
{static_cast<int64_t>(info.a_matrix.rows), {static_cast<int64_t>(info.a_matrix.rows),
static_cast<int64_t>(info.a_matrix.cols)}, static_cast<int64_t>(info.a_matrix.cols)},
{info.a_matrix.row_stride, info.a_matrix.col_stride})); {info.a_matrix.row_stride, info.a_matrix.col_stride}));
CHECK_STATUS(b->setDescriptor( CHECK_STATUS(b->setDescriptor(
toAclDataType(b_desc->dtype), toAclDataType(b_desc->dtype()),
{static_cast<int64_t>(info.b_matrix.rows), {static_cast<int64_t>(info.b_matrix.rows),
static_cast<int64_t>(info.b_matrix.cols)}, static_cast<int64_t>(info.b_matrix.cols)},
{info.b_matrix.row_stride, info.b_matrix.col_stride})); {info.b_matrix.row_stride, info.b_matrix.col_stride}));
......
#include "infinirt_ascend.h"
#include "../../utils.h"
#include <acl/acl.h>
#include <mutex>
std::once_flag acl_init_flag;
#define CHECK_ACLRT(API) CHECK_INTERNAL(API, ACL_SUCCESS)
namespace infinirt::ascend {
infiniStatus_t init() {
aclError _err = ACL_SUCCESS;
std::call_once(acl_init_flag, [&_err]() {
_err = aclInit(NULL);
});
CHECK_ACLRT(_err);
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t getDeviceCount(int *count) {
uint32_t count_ = 0;
CHECK_ACLRT(aclrtGetDeviceCount(&count_));
*count = (int)count_;
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t setDevice(int device_id) {
CHECK_ACLRT(aclrtSetDevice(device_id));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t deviceSynchronize() {
CHECK_ACLRT(aclrtSynchronizeDevice());
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) {
aclrtStream acl_stream;
CHECK_ACLRT(aclrtCreateStreamWithConfig(&acl_stream, 0, ACL_STREAM_FAST_LAUNCH));
*stream_ptr = (infinirtStream_t)acl_stream;
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamDestroy(infinirtStream_t stream) {
CHECK_ACLRT(aclrtDestroyStream((aclrtStream)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamSynchronize(infinirtStream_t stream) {
CHECK_ACLRT(aclrtSynchronizeStream((aclrtStream)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) {
CHECK_ACLRT(aclrtStreamWaitEvent((aclrtStream)stream, (aclrtEvent)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) {
aclrtEvent acl_event;
CHECK_ACLRT(aclrtCreateEvent(&acl_event));
*event_ptr = (infinirtEvent_t)acl_event;
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) {
CHECK_ACLRT(aclrtRecordEvent((aclrtEvent)event, (aclrtStream)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) {
aclrtEventRecordedStatus status;
CHECK_ACLRT(aclrtQueryEventStatus((aclrtEvent)event, &status));
if (ACL_EVENT_RECORDED_STATUS_COMPLETE == status) {
*status_ptr = INFINIRT_EVENT_COMPLETE;
} else {
*status_ptr = INFINIRT_EVENT_NOT_READY;
}
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventSynchronize(infinirtEvent_t event) {
CHECK_ACLRT(aclrtSynchronizeEvent((aclrtEvent)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventDestroy(infinirtEvent_t event) {
CHECK_ACLRT(aclrtDestroyEvent((aclrtEvent)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t mallocDevice(void **p_ptr, size_t size) {
CHECK_ACLRT(aclrtMallocAlign32(p_ptr, size, ACL_MEM_MALLOC_HUGE_FIRST));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t mallocHost(void **p_ptr, size_t size) {
CHECK_ACLRT(aclrtMallocHost(p_ptr, size));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeDevice(void *ptr) {
CHECK_ACLRT(aclrtFree(ptr));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeHost(void *ptr) {
CHECK_ACLRT(aclrtFreeHost(ptr));
return INFINI_STATUS_SUCCESS;
}
aclrtMemcpyKind toAclrtMemcpyKind(infinirtMemcpyKind_t kind) {
switch (kind) {
case INFINIRT_MEMCPY_H2D:
return ACL_MEMCPY_HOST_TO_DEVICE;
case INFINIRT_MEMCPY_D2H:
return ACL_MEMCPY_DEVICE_TO_HOST;
case INFINIRT_MEMCPY_D2D:
return ACL_MEMCPY_DEVICE_TO_DEVICE;
case INFINIRT_MEMCPY_H2H:
return ACL_MEMCPY_HOST_TO_HOST;
default:
return ACL_MEMCPY_DEFAULT;
}
}
infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) {
CHECK_ACLRT(aclrtMemcpy(dst, size, src, size, toAclrtMemcpyKind(kind)));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) {
CHECK_ACLRT(aclrtMemcpyAsync(dst, size, src, size, toAclrtMemcpyKind(kind), (aclrtStream)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) {
return mallocDevice(p_ptr, size);
}
infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) {
return freeDevice(ptr);
}
} // namespace infinirt::ascend
#undef CHECK_ACLRT
#ifndef __INFINIRT_ASCEND_H__
#define __INFINIRT_ASCEND_H__
#include "../infinirt_impl.h"
namespace infinirt::ascend {
#ifdef ENABLE_ASCEND_API
infiniStatus_t init();
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::ascend
#endif // __INFINIRT_ASCEND_H__
#include "infinirt.h" #include "infinirt.h"
#include "../utils.h"
#include "ascend/infinirt_ascend.h"
#include "cpu/infinirt_cpu.h" #include "cpu/infinirt_cpu.h"
#include "cuda/infinirt_cuda.cuh" #include "cuda/infinirt_cuda.cuh"
...@@ -6,6 +8,9 @@ thread_local infiniDevice_t CURRENT_DEVICE_TYPE = INFINI_DEVICE_CPU; ...@@ -6,6 +8,9 @@ thread_local infiniDevice_t CURRENT_DEVICE_TYPE = INFINI_DEVICE_CPU;
thread_local int CURRENT_DEVICE_ID = 0; thread_local int CURRENT_DEVICE_ID = 0;
__C infiniStatus_t infinirtInit() { __C infiniStatus_t infinirtInit() {
#ifdef ENABLE_ASCEND_API
CHECK_STATUS(infinirt::ascend::init());
#endif
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
...@@ -46,6 +51,9 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_ ...@@ -46,6 +51,9 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
case INFINI_DEVICE_NVIDIA: \ case INFINI_DEVICE_NVIDIA: \
_status = infinirt::cuda::API PARAMS; \ _status = infinirt::cuda::API PARAMS; \
break; \ break; \
case INFINI_DEVICE_ASCEND: \
_status = infinirt::ascend::API PARAMS; \
break; \
default: \ default: \
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; \ return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; \
} \ } \
......
...@@ -128,6 +128,9 @@ target("infinirt") ...@@ -128,6 +128,9 @@ target("infinirt")
if has_config("nv-gpu") then if has_config("nv-gpu") then
add_deps("infinirt-cuda") add_deps("infinirt-cuda")
end end
if has_config("ascend-npu") then
add_deps("infinirt-ascend")
end
set_languages("cxx17") set_languages("cxx17")
set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")) set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
add_files("src/infinirt/*.cc") add_files("src/infinirt/*.cc")
......
...@@ -54,3 +54,13 @@ target("infiniop-ascend") ...@@ -54,3 +54,13 @@ target("infiniop-ascend")
-- add_rules("ascend-kernels") -- add_rules("ascend-kernels")
-- add_links(builddir.."/libascend_kernels.a") -- add_links(builddir.."/libascend_kernels.a")
target_end() target_end()
target("infinirt-ascend")
set_kind("static")
set_languages("cxx17")
on_install(function (target) end)
add_deps("infini-utils")
-- Add files
add_files("$(projectdir)/src/infinirt/ascend/*.cc")
add_cxflags("-lstdc++ -Wall -Werror -fPIC")
target_end()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment