Unverified Commit bd8ae651 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #99 from PanZezhong1725/kunlun-handle

issue/87: restruct kunlun handle
parents a51e1d56 cc8162b3
...@@ -48,9 +48,7 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) { ...@@ -48,9 +48,7 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) {
CREATE(INFINI_DEVICE_ASCEND, ascend); CREATE(INFINI_DEVICE_ASCEND, ascend);
#endif #endif
#ifdef ENABLE_KUNLUN_API #ifdef ENABLE_KUNLUN_API
case INFINI_DEVICE_KUNLUN: { CREATE(INFINI_DEVICE_KUNLUN, kunlun);
return createKunlunHandle((infiniopKunlunHandle_t *)handle_ptr);
}
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, maca); CREATE(INFINI_DEVICE_METAX, maca);
...@@ -84,9 +82,7 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) { ...@@ -84,9 +82,7 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) {
DELETE(INFINI_DEVICE_ASCEND, ascend); DELETE(INFINI_DEVICE_ASCEND, ascend);
#endif #endif
#ifdef ENABLE_KUNLUN_API #ifdef ENABLE_KUNLUN_API
case INFINI_DEVICE_KUNLUN: { DELETE(INFINI_DEVICE_KUNLUN, kunlun);
return destroyKunlunHandle((infiniopKunlunHandle_t)handle);
}
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
DELETE(INFINI_DEVICE_METAX, maca); DELETE(INFINI_DEVICE_METAX, maca);
......
#ifndef __INFINIOP_COMMON_KUNLUN_H__
#define __INFINIOP_COMMON_KUNLUN_H__
#include "../../../utils.h"
#include "../pool.h"
#include "infinicore.h"
#include "kunlun_handle.h"
#include "xpu/runtime.h"
#include "xpu/runtime_ex.h"
#include "xpu/xdnn.h"
#include <memory>
namespace xdnn = baidu::xpu::api;
typedef xdnn::Context *xdnnHandle_t;
typedef XPUStream KunlunStream_t;
#define CHECK_KUNLUN(call) CHECK_INTERNAL(call, XPU_SUCCESS)
struct InfiniopKunlunHandle {
infiniDevice_t device;
int device_id;
std::shared_ptr<Pool<xdnnHandle_t>> xdnn_handle_pool;
};
template <typename T>
void use_xdnn(std::shared_ptr<Pool<xdnnHandle_t>> &pool, KunlunStream_t stream, const T &f) {
auto handle = pool->pop();
if (!handle) {
*handle = xdnn::create_context();
}
(*handle)->set_stream(stream);
f(*handle);
pool->push(std::move(*handle));
}
#endif //__INFINIOP_COMMON_KUNLUN_H__
#include "common_kunlun.h" #include "kunlun_handle.h"
infiniStatus_t createKunlunHandle(infiniopKunlunHandle_t *handle_ptr) { namespace device::kunlun {
int device_id;
CHECK_KUNLUN(xpu_current_device(&device_id));
auto pool = std::make_shared<Pool<xdnnHandle_t>>();
xdnnHandle_t handle = xdnn::create_context();
pool->push(std::move(handle));
*handle_ptr = new InfiniopKunlunHandle{ Handle::Handle(int device_id)
INFINI_DEVICE_KUNLUN, : InfiniopHandle{INFINI_DEVICE_KUNLUN, device_id},
device_id, _internal(std::make_shared<Handle::Internal>()) {}
std::move(pool),
};
return INFINI_STATUS_SUCCESS; auto Handle::internal() const -> const std::shared_ptr<Internal> & {
return _internal;
} }
infiniStatus_t destroyKunlunHandle(infiniopKunlunHandle_t handle_ptr) { infiniStatus_t Handle::Internal::useXdnn(kunlunStream_t stream, const Fn<xdnnHandle_t> &f) const {
handle_ptr->xdnn_handle_pool = nullptr; auto handle = dnn_handles.pop();
delete handle_ptr; if (!handle) {
*handle = xdnn::create_context();
}
(*handle)->set_stream(stream);
CHECK_STATUS(f(*handle));
dnn_handles.push(std::move(*handle));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Handle::create(InfiniopHandle **handle_ptr, int device_id) {
*handle_ptr = new Handle(device_id);
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
} // namespace device::kunlun
#ifndef __INFINIOP_KUNLUN_HANDLE_H__ #ifndef __INFINIOP_KUNLUN_HANDLE_H__
#define __INFINIOP_KUNLUN_HANDLE_H__ #define __INFINIOP_KUNLUN_HANDLE_H__
#include "../../../utils.h"
#include "../../handle.h" #include "../../handle.h"
#include "../pool.h"
#include <functional>
#include <memory>
#include <xpu/runtime.h>
#include <xpu/runtime_ex.h>
#include <xpu/xdnn.h>
struct InfiniopKunlunHandle; namespace xdnn = baidu::xpu::api;
typedef struct InfiniopKunlunHandle *infiniopKunlunHandle_t; typedef XPUStream kunlunStream_t;
typedef XPUEvent kunlunEvent_t;
typedef xdnn::Context *xdnnHandle_t;
infiniStatus_t createKunlunHandle(infiniopKunlunHandle_t *handle_ptr); #define CHECK_XDNN(API) CHECK_INTERNAL(API, XPU_SUCCESS)
infiniStatus_t destroyKunlunHandle(infiniopKunlunHandle_t handle);
namespace device::kunlun {
struct Handle : public InfiniopHandle {
class Internal;
auto internal() const -> const std::shared_ptr<Internal> &;
Handle(int device_id);
private:
std::shared_ptr<Internal> _internal;
public:
static infiniStatus_t create(InfiniopHandle **handle_ptr, int device_id);
};
class Handle::Internal {
Pool<xdnnHandle_t> dnn_handles;
template <typename T>
using Fn = std::function<infiniStatus_t(T)>;
public:
infiniStatus_t useXdnn(kunlunStream_t stream, const Fn<xdnnHandle_t> &f) const;
};
} // namespace device::kunlun
#endif // __INFINIOP_KUNLUN_HANDLE_H__ #endif // __INFINIOP_KUNLUN_HANDLE_H__
#include "matmul_kunlun.h" #include "matmul_kunlun.h"
#include "../../../devices/kunlun/common_kunlun.h" #include "../../../../utils.h"
#include "../../utils.h" #include "../../../devices/kunlun/kunlun_handle.h"
namespace op::matmul::kunlun { namespace op::matmul::kunlun {
typedef device::kunlun::Handle::Internal HandleInternal;
struct Descriptor::Opaque { struct Descriptor::Opaque {
std::shared_ptr<Pool<xdnnHandle_t>> xdnn_handle_pool; std::shared_ptr<HandleInternal> internal;
}; };
Descriptor::~Descriptor() { Descriptor::~Descriptor() {
delete _opaque; delete _opaque;
} }
infiniStatus_t Descriptor::create(infiniopHandle_t handle_, infiniStatus_t Descriptor::create(
Descriptor **desc_ptr, infiniopHandle_t handle_,
infiniopTensorDescriptor_t c_desc, Descriptor **desc_ptr,
infiniopTensorDescriptor_t a_desc, infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t b_desc) { infiniopTensorDescriptor_t a_desc,
auto handle = reinterpret_cast<infiniopKunlunHandle_t>(handle_); infiniopTensorDescriptor_t b_desc) {
auto dtype = c_desc->dtype; auto handle = reinterpret_cast<device::kunlun::Handle *>(handle_);
auto dtype = c_desc->dtype();
if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) { if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
return INFINI_STATUS_BAD_TENSOR_DTYPE; return INFINI_STATUS_BAD_TENSOR_DTYPE;
...@@ -32,22 +35,22 @@ infiniStatus_t Descriptor::create(infiniopHandle_t handle_, ...@@ -32,22 +35,22 @@ infiniStatus_t Descriptor::create(infiniopHandle_t handle_,
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
dtype, info, 0, dtype, info, 0,
new Opaque{handle->xdnn_handle_pool}, new Opaque{handle->internal()},
handle->device, handle->device_id); handle->device, handle->device_id);
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
template <class Tdata> template <class Tdata>
void calculate( infiniStatus_t calculate(
const MatmulInfo &info, MatmulInfo info,
std::shared_ptr<Pool<xdnnHandle_t>> &xdnn_handle_pool, std::shared_ptr<HandleInternal> internal,
infiniDtype_t dtype, infiniDtype_t dtype,
void *c, void *c,
float beta, float beta,
const void *a, const void *a,
const void *b, const void *b,
float alpha, float alpha,
KunlunStream_t stream) { kunlunStream_t stream) {
if (info.is_transed) { if (info.is_transed) {
std::swap(a, b); std::swap(a, b);
...@@ -58,33 +61,35 @@ void calculate( ...@@ -58,33 +61,35 @@ void calculate(
auto unit = infiniSizeOf(dtype); auto unit = infiniSizeOf(dtype);
use_xdnn(xdnn_handle_pool, CHECK_STATUS(internal->useXdnn(
(KunlunStream_t)stream, (kunlunStream_t)stream,
[&](xdnnHandle_t handle) { [&](xdnnHandle_t handle) {
for (size_t i = 0; i < info.batch; i++) { for (size_t i = 0; i < info.batch; i++) {
xdnn::fc_fusion<Tdata, Tdata, Tdata, int16_t>( CHECK_XDNN((xdnn::fc_fusion<Tdata, Tdata, Tdata, int16_t>(
handle, handle,
(Tdata *)((char *)a + i * info.a_matrix.stride * unit), (Tdata *)((char *)a + i * info.a_matrix.stride * unit),
(Tdata *)((char *)b + i * info.b_matrix.stride * unit), (Tdata *)((char *)b + i * info.b_matrix.stride * unit),
(Tdata *)((char *)c + i * info.c_matrix.stride * unit), (Tdata *)((char *)c + i * info.c_matrix.stride * unit),
info.m, info.m,
info.n, info.n,
info.k, info.k,
transA, transA,
transB, transB,
nullptr, nullptr,
nullptr, nullptr,
nullptr, nullptr,
info.a_matrix.ld(), info.a_matrix.ld(),
info.b_matrix.ld(), info.b_matrix.ld(),
info.c_matrix.ld(), info.c_matrix.ld(),
alpha, alpha,
beta, beta,
nullptr, nullptr,
xdnn::Activation_t::LINEAR, xdnn::Activation_t::LINEAR,
nullptr); nullptr)));
} }
}); return INFINI_STATUS_SUCCESS;
}));
return INFINI_STATUS_SUCCESS;
} }
infiniStatus_t Descriptor::calculate( infiniStatus_t Descriptor::calculate(
...@@ -98,13 +103,9 @@ infiniStatus_t Descriptor::calculate( ...@@ -98,13 +103,9 @@ infiniStatus_t Descriptor::calculate(
void *stream) const { void *stream) const {
switch (_dtype) { switch (_dtype) {
case INFINI_DTYPE_F16: case INFINI_DTYPE_F16:
kunlun::calculate<float16>(_info, _opaque->xdnn_handle_pool, _dtype, c, beta, a, b, alpha, (KunlunStream_t)stream); return op::matmul::kunlun::calculate<float16>(_info, _opaque->internal, _dtype, c, beta, a, b, alpha, (kunlunStream_t)stream);
return INFINI_STATUS_SUCCESS;
case INFINI_DTYPE_F32: case INFINI_DTYPE_F32:
kunlun::calculate<float>(_info, _opaque->xdnn_handle_pool, _dtype, c, beta, a, b, alpha, (KunlunStream_t)stream); return op::matmul::kunlun::calculate<float>(_info, _opaque->internal, _dtype, c, beta, a, b, alpha, (kunlunStream_t)stream);
return INFINI_STATUS_SUCCESS;
default: default:
return INFINI_STATUS_BAD_TENSOR_DTYPE; return INFINI_STATUS_BAD_TENSOR_DTYPE;
} }
......
#include "infinirt_kunlun.h"
#include "../../utils.h"
#include <xpu/runtime.h>
#include <xpu/runtime_ex.h>
typedef XPUStream kunlunStream_t;
typedef XPUEvent kunlunEvent_t;
#define CHECK_KUNLUNRT(RT_API) CHECK_INTERNAL(RT_API, XPU_SUCCESS)
namespace infinirt::kunlun {
infiniStatus_t getDeviceCount(int *count) {
CHECK_KUNLUNRT(xpu_device_count(count));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t setDevice(int device_id) {
CHECK_KUNLUNRT(xpu_set_device(device_id));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t deviceSynchronize() {
CHECK_KUNLUNRT(xpu_wait());
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) {
kunlunStream_t stream;
CHECK_KUNLUNRT(xpu_stream_create(&stream));
*stream_ptr = stream;
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamDestroy(infinirtStream_t stream) {
CHECK_KUNLUNRT(xpu_stream_destroy((kunlunStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamSynchronize(infinirtStream_t stream) {
CHECK_KUNLUNRT(xpu_wait((kunlunStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) {
CHECK_KUNLUNRT(xpu_stream_wait_event((kunlunStream_t)stream, (kunlunEvent_t)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) {
kunlunEvent_t event;
CHECK_KUNLUNRT(xpu_event_create(&event));
*event_ptr = event;
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) {
CHECK_KUNLUNRT(xpu_event_record((kunlunEvent_t)event, (kunlunStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) {
// no event query in kunlun2
return INFINI_STATUS_NOT_IMPLEMENTED;
}
infiniStatus_t eventSynchronize(infinirtEvent_t event) {
CHECK_KUNLUNRT(xpu_event_wait((kunlunEvent_t)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventDestroy(infinirtEvent_t event) {
CHECK_KUNLUNRT(xpu_event_destroy((kunlunEvent_t)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t mallocDevice(void **p_ptr, size_t size) {
CHECK_KUNLUNRT(xpu_malloc(p_ptr, static_cast<uint64_t>(size)));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t mallocHost(void **p_ptr, size_t size) {
CHECK_KUNLUNRT(xpu_host_alloc(p_ptr, static_cast<uint64_t>(size), 0));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeDevice(void *ptr) {
CHECK_KUNLUNRT(xpu_free(ptr));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeHost(void *ptr) {
CHECK_KUNLUNRT(xpu_host_free(ptr));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) {
switch (kind) {
case INFINIRT_MEMCPY_H2D:
CHECK_KUNLUNRT(xpu_memcpy(dst, src, static_cast<uint64_t>(size), XPUMemcpyKind::XPU_HOST_TO_DEVICE));
return INFINI_STATUS_SUCCESS;
case INFINIRT_MEMCPY_D2H:
CHECK_KUNLUNRT(xpu_memcpy(dst, src, static_cast<uint64_t>(size), XPUMemcpyKind::XPU_DEVICE_TO_HOST));
return INFINI_STATUS_SUCCESS;
case INFINIRT_MEMCPY_D2D:
CHECK_KUNLUNRT(xpu_memcpy(dst, src, static_cast<uint64_t>(size), XPUMemcpyKind::XPU_DEVICE_TO_DEVICE));
return INFINI_STATUS_SUCCESS;
default:
return INFINI_STATUS_INTERNAL_ERROR;
}
}
infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) {
// no async memcpy func in kunlun2
return memcpy(dst, src, size, kind);
}
infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) {
CHECK_KUNLUNRT(xpu_malloc(p_ptr, static_cast<uint64_t>(size)));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) {
CHECK_KUNLUNRT(xpu_free(ptr));
return INFINI_STATUS_SUCCESS;
}
} // namespace infinirt::kunlun
#ifndef __INFINIRT_KUNLUN_H__
#define __INFINIRT_KUNLUN_H__
#include "../infinirt_impl.h"
namespace infinirt::kunlun {
#ifdef ENABLE_KUNLUN_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::kunlun
#endif // __INFINIRT_KUNLUN_H__
...@@ -121,6 +121,7 @@ target("infini-utils") ...@@ -121,6 +121,7 @@ target("infini-utils")
on_install(function (target) end) on_install(function (target) end)
set_languages("cxx17") set_languages("cxx17")
add_files("src/utils/*.cc") add_files("src/utils/*.cc")
add_cxflags("-Wno-unknown-pragmas")
target_end() target_end()
target("infinirt") target("infinirt")
...@@ -138,6 +139,9 @@ target("infinirt") ...@@ -138,6 +139,9 @@ target("infinirt")
if has_config("metax-gpu") then if has_config("metax-gpu") then
add_deps("infinirt-metax") add_deps("infinirt-metax")
end end
if has_config("kunlun-xpu") then
add_deps("infinirt-kunlun")
end
set_languages("cxx17") set_languages("cxx17")
set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")) set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
add_files("src/infinirt/*.cc") add_files("src/infinirt/*.cc")
......
...@@ -4,6 +4,7 @@ target("infiniop-cpu") ...@@ -4,6 +4,7 @@ target("infiniop-cpu")
on_install(function (target) end) on_install(function (target) end)
set_warnings("all", "error") set_warnings("all", "error")
add_cxflags("-Wno-unknown-pragmas")
if is_plat("windows") then if is_plat("windows") then
if has_config("omp") then if has_config("omp") then
......
...@@ -18,3 +18,14 @@ target("infiniop-kunlun") ...@@ -18,3 +18,14 @@ target("infiniop-kunlun")
set_languages("cxx17") set_languages("cxx17")
add_files("$(projectdir)/src/infiniop/devices/kunlun/*.cc", "$(projectdir)/src/infiniop/ops/*/kunlun/*.cc") add_files("$(projectdir)/src/infiniop/devices/kunlun/*.cc", "$(projectdir)/src/infiniop/ops/*/kunlun/*.cc")
target_end() target_end()
target("infinirt-kunlun")
set_kind("static")
add_deps("infini-utils")
set_languages("cxx17")
on_install(function (target) end)
-- Add include dirs
add_files("../src/infinirt/kunlun/*.cc")
add_cxflags("-lstdc++ -Wall -Werror -fPIC")
target_end()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment