Unverified Commit e2b50fd0 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #80 from PanZezhong1725/issue/78

Issue/78 重构utils
parents be731358 05188f30
......@@ -2,9 +2,7 @@
infiniStatus_t createAscendHandle(infiniopAscendHandle_t *handle_ptr) {
int device_id = 0;
auto ret = aclrtGetDevice(&device_id);
CHECK_RET(ret == ACL_SUCCESS,
return INFINI_STATUS_DEVICE_NOT_INITIALIZED);
CHECK_ACL(aclrtGetDevice(&device_id));
*handle_ptr = new InfiniopAscendHandle{INFINI_DEVICE_ASCEND, device_id};
......
......@@ -11,21 +11,15 @@ int64_t numElements(const int64_t *shape, int64_t num) {
infiniStatus_t mallocWorkspace(void **workspaceAddr, size_t workspaceSize) {
*workspaceAddr = nullptr;
if (workspaceSize > 0) {
auto ret = aclrtMalloc(workspaceAddr, workspaceSize,
ACL_MEM_MALLOC_HUGE_FIRST);
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclrtMalloc failed. ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
CHECK_ACL(aclrtMalloc(workspaceAddr, workspaceSize,
ACL_MEM_MALLOC_HUGE_FIRST));
}
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeWorkspace(void *workspaceAddr) {
if (workspaceAddr != nullptr) {
auto ret = aclrtFree(workspaceAddr);
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclrtFree failed, ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
CHECK_ACL(aclrtFree(workspaceAddr));
}
return INFINI_STATUS_SUCCESS;
}
......
#ifndef __INFINIOP_COMMON_ASCEND_H__
#define __INFINIOP_COMMON_ASCEND_H__
#include "../../utils.h"
#include "ascend_handle.h"
#include <acl/acl.h>
#include <acl/acl_base.h>
......@@ -15,25 +16,7 @@
#ifdef __cplusplus
extern "C" {
#endif
#define CHECK_RET(cond, return_expr) \
do { \
if (!(cond)) { \
return_expr; \
} \
} while (0)
#define LOG_PRINT(message, ...) \
do { \
printf(message, ##__VA_ARGS__); \
} while (0)
#define LOG_ERROR(message, ...) \
do { \
printf(message, ##__VA_ARGS__); \
return INFINI_STATUS_INTERNAL_ERROR; \
} while (0)
#define CHECK_ACL(API) CHECK_INTERNAL(API, ACL_SUCCESS)
#ifdef __cplusplus
};
#endif
......
......@@ -70,10 +70,7 @@ infiniStatus_t aclnnTensorDescriptor::createTensor(void *data) {
}
infiniStatus_t aclnnTensorDescriptor::destroyTensor() {
auto ret = aclDestroyTensor(this->t);
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclDesctroyTensor failed, ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
CHECK_ACL(aclDestroyTensor(this->t));
t = nullptr;
return INFINI_STATUS_SUCCESS;
......
......@@ -5,29 +5,14 @@
#define MAX_WARP_PER_BLOCK 32
#define WARP_SIZE 32
#include "../../utils.h"
#include <iostream>
#define CHECK_CUDA_OR_RETURN(call, errorCode) \
do { \
if (auto status = call; status != cudaSuccess) { \
std::cerr << "CUDA error: " << cudaGetErrorString(status) \
<< " in file " << __FILE__ << ", function " << __func__ \
<< ", line " << __LINE__ << std::endl; \
return errorCode; \
} \
} while (0)
#define CHECK_CUDA_OR_RETURN(API, ERROR) CHECK_API_OR(API, cudaSuccess, return ERROR)
#define CHECK_CUDA(call) CHECK_CUDA_OR_RETURN(call, INFINI_STATUS_INTERNAL_ERROR)
#define CHECK_CUDA(API) CHECK_INTERNAL(API, cudaSuccess)
#define CHECK_CUDNN(call) \
do { \
if (auto status = call; status != CUDNN_STATUS_SUCCESS) { \
std::cerr << "CUDNN error: " << cudnnGetErrorString(status) \
<< " in file " << __FILE__ << ", function " << __func__ \
<< ", line " << __LINE__ << std::endl; \
return INFINI_STATUS_INTERNAL_ERROR; \
} \
} while (0)
#define CHECK_CUDNN(API) CHECK_INTERNAL(API, CUDNN_STATUS_SUCCESS)
#include "../pool.h"
#include "cuda_handle.h"
......
......@@ -88,10 +88,7 @@ infiniStatus_t Descriptor::create(
// use alpha = 0.5, beta = 0.5 temporarily
int8_t mt = 1;
auto ret = aclnnGemmGetWorkspaceSize(ta, tb, tc, .5, .5, 0, 0, tc, mt, &workspace_size, &executor);
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemmGetWorkspaceSize failed. ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
CHECK_ACL(aclnnGemmGetWorkspaceSize(ta, tb, tc, .5, .5, 0, 0, tc, mt, &workspace_size, &executor));
aclSetAclOpExecutorRepeatable(executor);
*desc_ptr = new Descriptor(
......@@ -122,12 +119,9 @@ infiniStatus_t Descriptor::calculate(
tb = _opaque->b->t;
size_t workspace_size;
auto ret = aclnnGemmGetWorkspaceSize(
CHECK_ACL(aclnnGemmGetWorkspaceSize(
ta, tb, tc, alpha, beta, 0, 0, tc, _opaque->mt,
&workspace_size, &(_opaque->executor));
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemmGetWorkspaceSize failed. ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
&workspace_size, &(_opaque->executor)));
if (workspaceSize_ < workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
......@@ -139,10 +133,7 @@ infiniStatus_t Descriptor::calculate(
AclSetTensorAddr(_opaque->executor, 1, tb, ((char *)b) + i * _info.b_matrix.stride * unit);
AclSetTensorAddr(_opaque->executor, 2, tc, ((char *)c) + i * _info.c_matrix.stride * unit);
AclSetTensorAddr(_opaque->executor, 3, tc, ((char *)c) + i * _info.c_matrix.stride * unit);
ret = aclnnGemm(workspace, workspace_size, _opaque->executor, stream);
CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemm failed. ERROR: %d\n", ret);
return INFINI_STATUS_INTERNAL_ERROR);
CHECK_ACL(aclnnGemm(workspace, workspace_size, _opaque->executor, stream));
}
return INFINI_STATUS_SUCCESS;
......
#include "../../check.h"
#include "../../utils.h"
#include "infinirt_cuda.cuh"
#include <cuda_runtime.h>
......
#ifndef INFINIUTILS_H
#define INFINIUTILS_H
#include "utils/check.h"
#include "utils/rearrange.h"
#endif
#ifndef INFINI_CHECK_H
#define INFINI_CHECK_H
#ifndef INFINIUTILS_CHECK_H
#define INFINIUTILS_CHECK_H
#include <iostream>
#define CHECK_API_OR(API, EXPECT, ACTION) \
......@@ -15,4 +15,4 @@
#define CHECK_INTERNAL(API, EXPECT) CHECK_API_OR(API, EXPECT, return INFINI_STATUS_INTERNAL_ERROR)
#endif // INFINI_CHECK_H
#endif // INFINIUTILS_CHECK_H
#include "rearrange.h"
#include "check.h"
#include <algorithm>
#include <cstring>
#include <vector>
namespace utils {
RearrangeMeta::RearrangeMeta(std::vector<ptrdiff_t> meta)
: _meta(std::move(meta)) {}
std::optional<RearrangeMeta> RearrangeMeta::create(
const size_t *shape,
const ptrdiff_t *dst_strides_,
const ptrdiff_t *src_strides_,
size_t ndim,
size_t element_size) {
ptrdiff_t unit = element_size;
struct Dim {
size_t len;
ptrdiff_t dst, src;
};
std::vector<Dim> dims;
for (size_t i = 0; i < ndim; ++i) {
// 剔除初始的 1 长维度
if (shape[i] != 1) {
auto sd = dst_strides_[i] * unit, ss = src_strides_[i] * unit;
// assert (sd != 0)
dims.push_back(Dim{shape[i], sd, ss});
}
}
// 排序
std::sort(dims.begin(), dims.end(), [](const Dim &a, const Dim &b) {
if (std::abs(a.dst) == std::abs(b.dst)) {
if (std::abs(a.src) == std::abs(b.src)) {
return a.len < b.len;
}
return std::abs(a.src) > std::abs(b.src);
}
return std::abs(a.dst) > std::abs(b.dst);
});
// # 合并连续维度
// ## 合并末尾连续维度到 unit
for (auto it = dims.rbegin(); it != dims.rend(); ++it) {
if (it->dst == unit && it->src == unit) {
unit *= it->len;
ndim -= 1;
} else {
break;
}
}
// ## 合并任意连续维度
for (ptrdiff_t i = ndim - 1; i > 0; --i) {
auto &f = dims[i - 1];
auto &b = dims[i];
ptrdiff_t len = b.len;
if (b.dst * len == f.dst && b.src * len == f.src) {
f = Dim{b.len * f.len, b.dst, b.src};
b = Dim{1, 0, 0};
ndim -= 1;
}
}
dims.resize(ndim);
// 填写序号步长、输入步长和输出步长
std::vector<ptrdiff_t> meta(2 + ndim * 3);
meta[0] = unit;
meta[1 + ndim] = 1;
for (size_t i = 0; i < ndim; ++i) {
meta[1 + i] = dims[i].len;
meta[1 + 1 + ndim + i] = dims[i].dst;
meta[1 + 1 + ndim * 2 + i] = dims[i].src;
}
for (ptrdiff_t i = ndim; i > 0; --i) {
meta[1 + i - 1] *= meta[1 + i];
}
return {RearrangeMeta(std::move(meta))};
}
size_t RearrangeMeta::ndim() const { return (_meta.size() - 2) / 3; }
size_t RearrangeMeta::unit() const { return _meta[0]; }
size_t RearrangeMeta::count() const { return _meta[1]; }
const ptrdiff_t *RearrangeMeta::idx_strides() const { return _meta.data() + 2; }
const ptrdiff_t *RearrangeMeta::dst_strides() const { return idx_strides() + ndim(); }
const ptrdiff_t *RearrangeMeta::src_strides() const { return dst_strides() + ndim(); }
void RearrangeMeta::launch(void *dst_, const void *src_) const {
auto const ndim_ = ndim();
auto const count_ = count();
auto const unit_ = unit();
auto const idx_strides_ = idx_strides();
auto const dst_strides_ = dst_strides();
auto const src_strides_ = src_strides();
// 执行 rearrange
if (count_ == 1) {
std::memcpy(dst_, src_, unit_);
} else {
for (size_t i = 0; i < idx_strides_[0]; ++i) {
auto dst = reinterpret_cast<char *>(dst_);
auto src = reinterpret_cast<const char *>(src_);
auto rem = i;
for (size_t j = 0; j < ndim_; ++j) {
auto k = rem / idx_strides_[j + 1];
dst += k * dst_strides_[j];
src += k * src_strides_[j];
rem %= idx_strides_[j + 1];
}
std::memcpy(dst, src, unit_);
}
}
}
void rearrange(
void *dst,
const void *src,
const size_t *shape,
const ptrdiff_t *dst_strides,
const ptrdiff_t *src_strides,
size_t ndim,
size_t element_size) {
auto scheme = RearrangeMeta::create(shape, dst_strides, src_strides, ndim, element_size);
if (scheme) {
scheme->launch(dst, src);
} else {
std::abort();
}
}
} // namespace utils
#ifndef __INFINIUTILS_REARRANGE_H__
#define __INFINIUTILS_REARRANGE_H__
#include <optional>
#include <stddef.h>
#include <vector>
namespace utils {
class RearrangeMeta {
std::vector<ptrdiff_t> _meta;
RearrangeMeta(std::vector<ptrdiff_t>);
public:
static std::optional<RearrangeMeta> create(
const size_t *shape,
const ptrdiff_t *dst_strides,
const ptrdiff_t *src_strides,
size_t ndim,
size_t element_size);
size_t ndim() const;
size_t unit() const;
size_t count() const;
const ptrdiff_t *idx_strides() const;
const ptrdiff_t *dst_strides() const;
const ptrdiff_t *src_strides() const;
void launch(void *dst, const void *src) const;
};
void rearrange(
void *dst,
const void *src,
const size_t *shape,
const ptrdiff_t *dst_strides,
const ptrdiff_t *src_strides,
size_t ndim,
size_t element_size);
} // namespace utils
#endif // __INFINIUTILS_REARRANGE_H__
......@@ -112,9 +112,15 @@ if has_config("kunlun-xpu") then
includes("xmake/kunlun.lua")
end
target("infini-utils")
set_kind("static")
on_install(function (target) end)
set_languages("cxx17")
add_files("src/utils/*.cc")
target_end()
target("infiniop")
set_kind("shared")
if has_config("cpu") then
add_deps("infiniop-cpu")
end
......
......@@ -39,8 +39,8 @@ rule("ascend-kernels")
rule_end()
target("infiniop-ascend")
-- Other configs
set_kind("static")
add_deps("infini-utils")
set_languages("cxx17")
on_install(function (target) end)
-- Add files
......
......@@ -38,6 +38,7 @@ local src_dir = path.join(os.projectdir(), "src", "infiniop")
target("infiniop-cambricon")
set_kind("static")
add_deps("infini-utils")
on_install(function (target) end)
set_languages("cxx17")
add_files(src_dir.."/devices/bang/*.cc", src_dir.."/ops/*/bang/*.cc")
......
target("infiniop-cpu")
on_install(function (target) end)
set_kind("static")
add_deps("infini-utils")
on_install(function (target) end)
set_warnings("all", "error")
......@@ -17,8 +18,9 @@ target("infiniop-cpu")
target_end()
target("infinirt-cpu")
on_install(function (target) end)
set_kind("static")
add_deps("infini-utils")
on_install(function (target) end)
set_warnings("all", "error")
......
......@@ -10,6 +10,7 @@ end
target("infiniop-cuda")
set_kind("static")
add_deps("infini-utils")
on_install(function (target) end)
set_policy("build.cuda.devlink", true)
......@@ -37,6 +38,7 @@ target_end()
target("infinirt-cuda")
set_kind("static")
add_deps("infini-utils")
on_install(function (target) end)
set_policy("build.cuda.devlink", true)
......
......@@ -8,8 +8,8 @@ add_links("xpurt")
add_links("xpuapi")
target("infiniop-kunlun")
-- Other configs
set_kind("static")
add_deps("infini-utils")
set_languages("cxx17")
on_install(function (target) end)
-- Add files
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment