Unverified Commit e2b50fd0 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #80 from PanZezhong1725/issue/78

Issue/78 重构utils
parents be731358 05188f30
...@@ -2,9 +2,7 @@ ...@@ -2,9 +2,7 @@
infiniStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr) { infiniStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr) {
int device_id = 0; int device_id = 0;
auto ret = aclrtGetDevice(&device_id); CHECK_ACL(aclrtGetDevice(&device_id));
CHECK_RET(ret == ACL_SUCCESS,
return INFINI_STATUS_DEVICE_NOT_INITIALIZED);
*handle_ptr = new InfiniopAscendHandle{INFINI_DEVICE_ASCEND, device_id}; *handle_ptr = new InfiniopAscendHandle{INFINI_DEVICE_ASCEND, device_id};
......
...@@ -11,21 +11,15 @@ int64_t numElements(const int64_t *shape, int64_t num) { ...@@ -11,21 +11,15 @@ int64_t numElements(const int64_t *shape, int64_t num) {
infiniStatus_t mallocWorkspace(void **workspaceAddr, size_t workspaceSize) { infiniStatus_t mallocWorkspace(void **workspaceAddr, size_t workspaceSize) {
*workspaceAddr = nullptr; *workspaceAddr = nullptr;
if (workspaceSize > 0) { if (workspaceSize > 0) {
auto ret = aclrtMalloc(workspaceAddr, workspaceSize, CHECK_ACL(aclrtMalloc(workspaceAddr, workspaceSize,
ACL_MEM_MALLOC_HUGE_FIRST); ACL_MEM_MALLOC_HUGE_FIRST));
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
} }
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
infiniStatus_t freeWorkspace(void *workspaceAddr) { infiniStatus_t freeWorkspace(void *workspaceAddr) {
if (workspaceAddr != nullptr) { if (workspaceAddr != nullptr) {
auto ret = aclrtFree(workspaceAddr); CHECK_ACL(aclrtFree(workspaceAddr));
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclrtFree failed, ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
} }
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
......
#ifndef __INFINIOP_COMMON_ASCEND_H__ #ifndef __INFINIOP_COMMON_ASCEND_H__
#define __INFINIOP_COMMON_ASCEND_H__ #define __INFINIOP_COMMON_ASCEND_H__
#include "../../utils.h"
#include "ascend_handle.h" #include "ascend_handle.h"
#include <acl/acl.h> #include <acl/acl.h>
#include <acl/acl_base.h> #include <acl/acl_base.h>
...@@ -15,25 +16,7 @@ ...@@ -15,25 +16,7 @@
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif #endif
#define CHECK_ACL(API) CHECK_INTERNAL(API, ACL_SUCCESS)
#define CHECK_RET(cond, return_expr) \
do { \
if (!(cond)) { \
return_expr; \
} \
} while (0)
#define LOG_PRINT(message, ...) \
do { \
printf(message, ##__VA_ARGS__); \
} while (0)
#define LOG_ERROR(message, ...) \
do { \
printf(message, ##__VA_ARGS__); \
return INFINI_STATUS_INTERNAL_ERROR; \
} while (0)
#ifdef __cplusplus #ifdef __cplusplus
}; };
#endif #endif
......
...@@ -70,10 +70,7 @@ infiniStatus_t aclnnTensorDescriptor::createTensor(void *data) { ...@@ -70,10 +70,7 @@ infiniStatus_t aclnnTensorDescriptor::createTensor(void *data) {
} }
infiniStatus_t aclnnTensorDescriptor::destroyTensor() { infiniStatus_t aclnnTensorDescriptor::destroyTensor() {
auto ret = aclDestroyTensor(this->t); CHECK_ACL(aclDestroyTensor(this->t));
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclDesctroyTensor failed, ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
t = nullptr; t = nullptr;
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
......
...@@ -5,29 +5,14 @@ ...@@ -5,29 +5,14 @@
#define MAX_WARP_PER_BLOCK 32 #define MAX_WARP_PER_BLOCK 32
#define WARP_SIZE 32 #define WARP_SIZE 32
#include "../../utils.h"
#include <iostream> #include <iostream>
#define CHECK_CUDA_OR_RETURN(call, errorCode) \ #define CHECK_CUDA_OR_RETURN(API, ERROR) CHECK_API_OR(API, cudaSuccess, return ERROR)
do { \
if (auto status = call; status != cudaSuccess) { \
std::cerr << "CUDA error: " << cudaGetErrorString(status) \
<< " in file " << __FILE__ << ", function " << __func__ \
<< ", line " << __LINE__ << std::endl; \
return errorCode; \
} \
} while (0)
#define CHECK_CUDA(call) CHECK_CUDA_OR_RETURN(call, INFINI_STATUS_INTERNAL_ERROR) #define CHECK_CUDA(API) CHECK_INTERNAL(API, cudaSuccess)
#define CHECK_CUDNN(call) \ #define CHECK_CUDNN(API) CHECK_INTERNAL(API, CUDNN_STATUS_SUCCESS)
do { \
if (auto status = call; status != CUDNN_STATUS_SUCCESS) { \
std::cerr << "CUDNN error: " << cudnnGetErrorString(status) \
<< " in file " << __FILE__ << ", function " << __func__ \
<< ", line " << __LINE__ << std::endl; \
return INFINI_STATUS_INTERNAL_ERROR; \
} \
} while (0)
#include "../pool.h" #include "../pool.h"
#include "cuda_handle.h" #include "cuda_handle.h"
......
...@@ -88,10 +88,7 @@ infiniStatus_t Descriptor::create( ...@@ -88,10 +88,7 @@ infiniStatus_t Descriptor::create(
// use alpha = 0.5, beta = 0.5 temporarily // use alpha = 0.5, beta = 0.5 temporarily
int8_t mt = 1; int8_t mt = 1;
auto ret = aclnnGemmGetWorkspaceSize(ta, tb, tc, .5, .5, 0, 0, tc, mt, &workspace_size, &executor); CHECK_ACL(aclnnGemmGetWorkspaceSize(ta, tb, tc, .5, .5, 0, 0, tc, mt, &workspace_size, &executor));
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemmGetWorkspaceSize failed. ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
aclSetAclOpExecutorRepeatable(executor); aclSetAclOpExecutorRepeatable(executor);
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
...@@ -122,12 +119,9 @@ infiniStatus_t Descriptor::calculate( ...@@ -122,12 +119,9 @@ infiniStatus_t Descriptor::calculate(
tb = _opaque->b->t; tb = _opaque->b->t;
size_t workspace_size; size_t workspace_size;
auto ret = aclnnGemmGetWorkspaceSize( CHECK_ACL(aclnnGemmGetWorkspaceSize(
ta, tb, tc, alpha, beta, 0, 0, tc, _opaque->mt, ta, tb, tc, alpha, beta, 0, 0, tc, _opaque->mt,
&workspace_size, &(_opaque->executor)); &workspace_size, &(_opaque->executor)));
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemmGetWorkspaceSize failed. ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
if (workspaceSize_ < workspace_size) { if (workspaceSize_ < workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE; return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
} }
...@@ -139,10 +133,7 @@ infiniStatus_t Descriptor::calculate( ...@@ -139,10 +133,7 @@ infiniStatus_t Descriptor::calculate(
AclSetTensorAddr(_opaque->executor, 1, tb, ((char *)b) + i * _info.b_matrix.stride * unit); AclSetTensorAddr(_opaque->executor, 1, tb, ((char *)b) + i * _info.b_matrix.stride * unit);
AclSetTensorAddr(_opaque->executor, 2, tc, ((char *)c) + i * _info.c_matrix.stride * unit); AclSetTensorAddr(_opaque->executor, 2, tc, ((char *)c) + i * _info.c_matrix.stride * unit);
AclSetTensorAddr(_opaque->executor, 3, tc, ((char *)c) + i * _info.c_matrix.stride * unit); AclSetTensorAddr(_opaque->executor, 3, tc, ((char *)c) + i * _info.c_matrix.stride * unit);
ret = aclnnGemm(workspace, workspace_size, _opaque->executor, stream); CHECK_ACL(aclnnGemm(workspace, workspace_size, _opaque->executor, stream));
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemm failed. ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
} }
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
......
#include "../../check.h" #include "../../utils.h"
#include "infinirt_cuda.cuh" #include "infinirt_cuda.cuh"
#include <cuda_runtime.h> #include <cuda_runtime.h>
......
#ifndef INFINIUTILS_H
#define INFINIUTILS_H
#include "utils/check.h"
#include "utils/rearrange.h"
#endif
#ifndef INFINI_CHECK_H #ifndef INFINIUTILS_CHECK_H
#define INFINI_CHECK_H #define INFINIUTILS_CHECK_H
#include <iostream> #include <iostream>
#define CHECK_API_OR(API, EXPECT, ACTION) \ #define CHECK_API_OR(API, EXPECT, ACTION) \
...@@ -15,4 +15,4 @@ ...@@ -15,4 +15,4 @@
#define CHECK_INTERNAL(API, EXPECT) CHECK_API_OR(API, EXPECT, return INFINI_STATUS_INTERNAL_ERROR) #define CHECK_INTERNAL(API, EXPECT) CHECK_API_OR(API, EXPECT, return INFINI_STATUS_INTERNAL_ERROR)
#endif // INFINI_CHECK_H #endif // INFINIUTILS_CHECK_H
#include "rearrange.h"
#include "check.h"
#include <algorithm>
#include <cstring>
#include <vector>
namespace utils {
RearrangeMeta::RearrangeMeta(std::vector<ptrdiff_t> meta)
: _meta(std::move(meta)) {}
std::optional<RearrangeMeta> RearrangeMeta::create(
const size_t *shape,
const ptrdiff_t *dst_strides_,
const ptrdiff_t *src_strides_,
size_t ndim,
size_t element_size) {
ptrdiff_t unit = element_size;
struct Dim {
size_t len;
ptrdiff_t dst, src;
};
std::vector<Dim> dims;
for (size_t i = 0; i < ndim; ++i) {
// 剔除初始的 1 长维度
if (shape[i] != 1) {
auto sd = dst_strides_[i] * unit, ss = src_strides_[i] * unit;
// assert (sd != 0)
dims.push_back(Dim{shape[i], sd, ss});
}
}
// 排序
std::sort(dims.begin(), dims.end(), [](const Dim &a, const Dim &b) {
if (std::abs(a.dst) == std::abs(b.dst)) {
if (std::abs(a.src) == std::abs(b.src)) {
return a.len < b.len;
}
return std::abs(a.src) > std::abs(b.src);
}
return std::abs(a.dst) > std::abs(b.dst);
});
// # 合并连续维度
// ## 合并末尾连续维度到 unit
for (auto it = dims.rbegin(); it != dims.rend(); ++it) {
if (it->dst == unit && it->src == unit) {
unit *= it->len;
ndim -= 1;
} else {
break;
}
}
// ## 合并任意连续维度
for (ptrdiff_t i = ndim - 1; i > 0; --i) {
auto &f = dims[i - 1];
auto &b = dims[i];
ptrdiff_t len = b.len;
if (b.dst * len == f.dst && b.src * len == f.src) {
f = Dim{b.len * f.len, b.dst, b.src};
b = Dim{1, 0, 0};
ndim -= 1;
}
}
dims.resize(ndim);
// 填写序号步长、输入步长和输出步长
std::vector<ptrdiff_t> meta(2 + ndim * 3);
meta[0] = unit;
meta[1 + ndim] = 1;
for (size_t i = 0; i < ndim; ++i) {
meta[1 + i] = dims[i].len;
meta[1 + 1 + ndim + i] = dims[i].dst;
meta[1 + 1 + ndim * 2 + i] = dims[i].src;
}
for (ptrdiff_t i = ndim; i > 0; --i) {
meta[1 + i - 1] *= meta[1 + i];
}
return {RearrangeMeta(std::move(meta))};
}
size_t RearrangeMeta::ndim() const { return (_meta.size() - 2) / 3; }
size_t RearrangeMeta::unit() const { return _meta[0]; }
size_t RearrangeMeta::count() const { return _meta[1]; }
const ptrdiff_t *RearrangeMeta::idx_strides() const { return _meta.data() + 2; }
const ptrdiff_t *RearrangeMeta::dst_strides() const { return idx_strides() + ndim(); }
const ptrdiff_t *RearrangeMeta::src_strides() const { return dst_strides() + ndim(); }
void RearrangeMeta::launch(void *dst_, const void *src_) const {
auto const ndim_ = ndim();
auto const count_ = count();
auto const unit_ = unit();
auto const idx_strides_ = idx_strides();
auto const dst_strides_ = dst_strides();
auto const src_strides_ = src_strides();
// 执行 rearrange
if (count_ == 1) {
std::memcpy(dst_, src_, unit_);
} else {
for (size_t i = 0; i < idx_strides_[0]; ++i) {
auto dst = reinterpret_cast<char *>(dst_);
auto src = reinterpret_cast<const char *>(src_);
auto rem = i;
for (size_t j = 0; j < ndim_; ++j) {
auto k = rem / idx_strides_[j + 1];
dst += k * dst_strides_[j];
src += k * src_strides_[j];
rem %= idx_strides_[j + 1];
}
std::memcpy(dst, src, unit_);
}
}
}
void rearrange(
void *dst,
const void *src,
const size_t *shape,
const ptrdiff_t *dst_strides,
const ptrdiff_t *src_strides,
size_t ndim,
size_t element_size) {
auto scheme = RearrangeMeta::create(shape, dst_strides, src_strides, ndim, element_size);
if (scheme) {
scheme->launch(dst, src);
} else {
std::abort();
}
}
} // namespace utils
#ifndef __INFINIUTILS_REARRANGE_H__
#define __INFINIUTILS_REARRANGE_H__
#include <optional>
#include <stddef.h>
#include <vector>
namespace utils {
class RearrangeMeta {
std::vector<ptrdiff_t> _meta;
RearrangeMeta(std::vector<ptrdiff_t>);
public:
static std::optional<RearrangeMeta> create(
const size_t *shape,
const ptrdiff_t *dst_strides,
const ptrdiff_t *src_strides,
size_t ndim,
size_t element_size);
size_t ndim() const;
size_t unit() const;
size_t count() const;
const ptrdiff_t *idx_strides() const;
const ptrdiff_t *dst_strides() const;
const ptrdiff_t *src_strides() const;
void launch(void *dst, const void *src) const;
};
void rearrange(
void *dst,
const void *src,
const size_t *shape,
const ptrdiff_t *dst_strides,
const ptrdiff_t *src_strides,
size_t ndim,
size_t element_size);
} // namespace utils
#endif // __INFINIUTILS_REARRANGE_H__
...@@ -112,9 +112,15 @@ if has_config("kunlun-xpu") then ...@@ -112,9 +112,15 @@ if has_config("kunlun-xpu") then
includes("xmake/kunlun.lua") includes("xmake/kunlun.lua")
end end
target("infini-utils")
set_kind("static")
on_install(function (target) end)
set_languages("cxx17")
add_files("src/utils/*.cc")
target_end()
target("infiniop") target("infiniop")
set_kind("shared") set_kind("shared")
if has_config("cpu") then if has_config("cpu") then
add_deps("infiniop-cpu") add_deps("infiniop-cpu")
end end
......
...@@ -39,8 +39,8 @@ rule("ascend-kernels") ...@@ -39,8 +39,8 @@ rule("ascend-kernels")
rule_end() rule_end()
target("infiniop-ascend") target("infiniop-ascend")
-- Other configs
set_kind("static") set_kind("static")
add_deps("infini-utils")
set_languages("cxx17") set_languages("cxx17")
on_install(function (target) end) on_install(function (target) end)
-- Add files -- Add files
......
...@@ -38,6 +38,7 @@ local src_dir = path.join(os.projectdir(), "src", "infiniop") ...@@ -38,6 +38,7 @@ local src_dir = path.join(os.projectdir(), "src", "infiniop")
target("infiniop-cambricon") target("infiniop-cambricon")
set_kind("static") set_kind("static")
add_deps("infini-utils")
on_install(function (target) end) on_install(function (target) end)
set_languages("cxx17") set_languages("cxx17")
add_files(src_dir.."/devices/bang/*.cc", src_dir.."/ops/*/bang/*.cc") add_files(src_dir.."/devices/bang/*.cc", src_dir.."/ops/*/bang/*.cc")
......
target("infiniop-cpu") target("infiniop-cpu")
on_install(function (target) end)
set_kind("static") set_kind("static")
add_deps("infini-utils")
on_install(function (target) end)
set_warnings("all", "error") set_warnings("all", "error")
...@@ -17,8 +18,9 @@ target("infiniop-cpu") ...@@ -17,8 +18,9 @@ target("infiniop-cpu")
target_end() target_end()
target("infinirt-cpu") target("infinirt-cpu")
on_install(function (target) end)
set_kind("static") set_kind("static")
add_deps("infini-utils")
on_install(function (target) end)
set_warnings("all", "error") set_warnings("all", "error")
......
...@@ -10,6 +10,7 @@ end ...@@ -10,6 +10,7 @@ end
target("infiniop-cuda") target("infiniop-cuda")
set_kind("static") set_kind("static")
add_deps("infini-utils")
on_install(function (target) end) on_install(function (target) end)
set_policy("build.cuda.devlink", true) set_policy("build.cuda.devlink", true)
...@@ -37,6 +38,7 @@ target_end() ...@@ -37,6 +38,7 @@ target_end()
target("infinirt-cuda") target("infinirt-cuda")
set_kind("static") set_kind("static")
add_deps("infini-utils")
on_install(function (target) end) on_install(function (target) end)
set_policy("build.cuda.devlink", true) set_policy("build.cuda.devlink", true)
......
...@@ -8,8 +8,8 @@ add_links("xpurt") ...@@ -8,8 +8,8 @@ add_links("xpurt")
add_links("xpuapi") add_links("xpuapi")
target("infiniop-kunlun") target("infiniop-kunlun")
-- Other configs
set_kind("static") set_kind("static")
add_deps("infini-utils")
set_languages("cxx17") set_languages("cxx17")
on_install(function (target) end) on_install(function (target) end)
-- Add files -- Add files
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment