Unverified Commit 89ebdac8 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #100 from PanZezhong1725/issue/87/maca

issue/87/feat:增加沐曦平台的handle
parents 240b1236 f8173f07
...@@ -17,6 +17,9 @@ ...@@ -17,6 +17,9 @@
#ifdef ENABLE_KUNLUN_API #ifdef ENABLE_KUNLUN_API
#include "kunlun/kunlun_handle.h" #include "kunlun/kunlun_handle.h"
#endif #endif
#ifdef ENABLE_METAX_API
#include "maca/maca_handle.h"
#endif
__C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) { __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) {
if (handle_ptr == nullptr) { if (handle_ptr == nullptr) {
...@@ -51,6 +54,9 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) { ...@@ -51,6 +54,9 @@ __C infiniStatus_t infiniopCreateHandle(infiniopHandle_t *handle_ptr) {
return createKunlunHandle((infiniopKunlunHandle_t *)handle_ptr); return createKunlunHandle((infiniopKunlunHandle_t *)handle_ptr);
} }
#endif #endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, maca);
#endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -85,6 +91,9 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) { ...@@ -85,6 +91,9 @@ __C infiniStatus_t infiniopDestroyHandle(infiniopHandle_t handle) {
case INFINI_DEVICE_KUNLUN: { case INFINI_DEVICE_KUNLUN: {
return destroyKunlunHandle((infiniopKunlunHandle_t)handle); return destroyKunlunHandle((infiniopKunlunHandle_t)handle);
} }
#endif
#ifdef ENABLE_METAX_API
DELETE(INFINI_DEVICE_METAX, maca);
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
#include "../../../utils.h"
#include "../pool.h"
#include "maca_handle.h"
#include <hcblas/hcblas.h>
#include <hcdnn/hcdnn.h>
#include <memory>
#define CHECK_MCBLAS(API) CHECK_INTERNAL(API, HCBLAS_STATUS_SUCCESS)
#define CHECK_MCDNN(API) CHECK_INTERNAL(API, HCDNN_STATUS_SUCCESS)
namespace device::maca {
class Handle::Internal {
Pool<hcblasHandle_t> mcblas_handles;
Pool<hcdnnHandle_t> mcdnn_handles;
template <typename T>
using Fn = std::function<infiniStatus_t(T)>;
public:
infiniStatus_t useMcblas(hcStream_t stream, const Fn<hcblasHandle_t> &f) const;
infiniStatus_t useMcdnn(hcStream_t stream, const Fn<hcdnnHandle_t> &f) const;
};
hcdnnDataType_t getHcdnnDtype(infiniDtype_t dt);
} // namespace device::maca
#include "common_maca.h"
namespace device::maca {
Handle::Handle(infiniDevice_t device, int device_id)
: InfiniopHandle{device, device_id},
_internal(std::make_shared<Handle::Internal>()) {}
Handle::Handle(int device_id) : Handle(INFINI_DEVICE_METAX, device_id) {}
auto Handle::internal() const -> const std::shared_ptr<Internal> & {
return _internal;
}
infiniStatus_t Handle::Internal::useMcblas(hcStream_t stream, const Fn<hcblasHandle_t> &f) const {
auto handle = mcblas_handles.pop();
if (!handle) {
CHECK_MCBLAS(hcblasCreate(&(*handle)));
}
CHECK_MCBLAS(hcblasSetStream(*handle, stream));
CHECK_STATUS(f(*handle));
mcblas_handles.push(std::move(*handle));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Handle::Internal::useMcdnn(hcStream_t stream, const Fn<hcdnnHandle_t> &f) const {
auto handle = mcdnn_handles.pop();
if (!handle) {
CHECK_MCDNN(hcdnnCreate(&(*handle)));
}
CHECK_MCDNN(hcdnnSetStream(*handle, stream));
CHECK_STATUS(f(*handle));
mcdnn_handles.push(std::move(*handle));
return INFINI_STATUS_SUCCESS;
}
hcdnnDataType_t getHcdnnDtype(infiniDtype_t dt) {
switch (dt) {
case INFINI_DTYPE_F16:
return HCDNN_DATA_HALF;
case INFINI_DTYPE_F32:
return HCDNN_DATA_FLOAT;
case INFINI_DTYPE_F64:
return HCDNN_DATA_DOUBLE;
case INFINI_DTYPE_BF16:
return HCDNN_DATA_BFLOAT16;
case INFINI_DTYPE_I8:
return HCDNN_DATA_INT8;
case INFINI_DTYPE_I32:
return HCDNN_DATA_INT32;
case INFINI_DTYPE_I64:
return HCDNN_DATA_INT64;
case INFINI_DTYPE_U8:
return HCDNN_DATA_UINT8;
default:
return HCDNN_DATA_FLOAT;
}
}
infiniStatus_t Handle::create(InfiniopHandle **handle_ptr, int device_id) {
*handle_ptr = new Handle(INFINI_DEVICE_METAX, device_id);
return INFINI_STATUS_SUCCESS;
}
} // namespace device::maca
#ifndef __INFINIOP_MACA_HANDLE_H__
#define __INFINIOP_MACA_HANDLE_H__
#include "../../handle.h"
#include <memory>
namespace device::maca {
struct Handle : public InfiniopHandle {
Handle(int device_id);
class Internal;
auto internal() const -> const std::shared_ptr<Internal> &;
public:
static infiniStatus_t create(InfiniopHandle **handle_ptr, int device_id);
protected:
Handle(infiniDevice_t device, int device_id);
private:
std::shared_ptr<Internal> _internal;
};
} // namespace device::maca
#endif // __INFINIOP_MACA_HANDLE_H__
#include "matmul_maca.h"
#include "../../../devices/maca/common_maca.h"
#include "../../../devices/maca/maca_handle.h"
namespace op::matmul::maca {
struct Descriptor::Opaque {
std::shared_ptr<device::maca::Handle::Internal> internal;
};
Descriptor::~Descriptor() {
delete _opaque;
}
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
auto handle = reinterpret_cast<device::maca::Handle *>(handle_);
auto dtype = c_desc->dtype();
if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
infiniStatus_t status;
auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
if (status != INFINI_STATUS_SUCCESS) {
return status;
}
*desc_ptr = new Descriptor(
dtype, info, 0,
new Opaque{handle->internal()},
handle->device, handle->device_id);
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *c,
float beta,
const void *a,
const void *b,
float alpha,
void *stream) const {
hpccDataType a_type, b_type, c_type;
hcblasComputeType_t compute_type;
switch (_dtype) {
case INFINI_DTYPE_F16:
a_type = b_type = c_type = HPCC_R_16F;
compute_type = HCBLAS_COMPUTE_32F;
break;
case INFINI_DTYPE_F32:
a_type = b_type = c_type = HPCC_R_32F;
compute_type = HCBLAS_COMPUTE_32F_FAST_TF32;
break;
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
if (_info.is_transed) {
std::swap(a, b);
}
auto op_a = _info.a_matrix.row_stride == 1 ? HCBLAS_OP_N : HCBLAS_OP_T;
auto op_b = _info.b_matrix.row_stride == 1 ? HCBLAS_OP_N : HCBLAS_OP_T;
CHECK_STATUS(_opaque->internal->useMcblas(
(hcStream_t)stream,
[&](hcblasHandle_t handle) {
CHECK_MCBLAS(
hcblasGemmStridedBatchedEx(
handle,
op_a,
op_b,
static_cast<int>(_info.m),
static_cast<int>(_info.n),
static_cast<int>(_info.k),
&alpha,
a,
a_type,
static_cast<int>(_info.a_matrix.ld()),
_info.a_matrix.stride,
b,
b_type,
static_cast<int>(_info.b_matrix.ld()),
_info.b_matrix.stride,
&beta,
c,
c_type,
static_cast<int>(_info.c_matrix.ld()),
_info.c_matrix.stride,
static_cast<int>(_info.batch),
compute_type,
HCBLAS_GEMM_DEFAULT_TENSOR_OP));
return INFINI_STATUS_SUCCESS;
}));
return INFINI_STATUS_SUCCESS;
}
} // namespace op::matmul::maca
#ifndef __MATMUL_MACA_H__
#define __MATMUL_MACA_H__
#include "../matmul.h"
DESCRIPTOR(maca)
#endif // __MATMUL_MACA_H__
...@@ -14,6 +14,9 @@ ...@@ -14,6 +14,9 @@
#ifdef ENABLE_ASCEND_API #ifdef ENABLE_ASCEND_API
#include "ascend/matmul_ascend.h" #include "ascend/matmul_ascend.h"
#endif #endif
#ifdef ENABLE_METAX_API
#include "maca/matmul_maca.h"
#endif
#ifdef ENABLE_KUNLUN_API #ifdef ENABLE_KUNLUN_API
#include "kunlun/matmul_kunlun.h" #include "kunlun/matmul_kunlun.h"
#endif #endif
...@@ -48,6 +51,9 @@ __C infiniStatus_t infiniopCreateMatmulDescriptor( ...@@ -48,6 +51,9 @@ __C infiniStatus_t infiniopCreateMatmulDescriptor(
#ifdef ENABLE_ASCEND_API #ifdef ENABLE_ASCEND_API
CREATE(INFINI_DEVICE_ASCEND, ascend); CREATE(INFINI_DEVICE_ASCEND, ascend);
#endif #endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, maca);
#endif
#ifdef ENABLE_KUNLUN_API #ifdef ENABLE_KUNLUN_API
CREATE(INFINI_DEVICE_KUNLUN, kunlun); CREATE(INFINI_DEVICE_KUNLUN, kunlun);
#endif #endif
...@@ -83,6 +89,9 @@ infiniopGetMatmulWorkspaceSize( ...@@ -83,6 +89,9 @@ infiniopGetMatmulWorkspaceSize(
#ifdef ENABLE_ASCEND_API #ifdef ENABLE_ASCEND_API
GET(INFINI_DEVICE_ASCEND, ascend); GET(INFINI_DEVICE_ASCEND, ascend);
#endif #endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, maca);
#endif
#ifdef ENABLE_KUNLUN_API #ifdef ENABLE_KUNLUN_API
GET(INFINI_DEVICE_KUNLUN, kunlun); GET(INFINI_DEVICE_KUNLUN, kunlun);
#endif #endif
...@@ -126,6 +135,9 @@ __C infiniStatus_t infiniopMatmul( ...@@ -126,6 +135,9 @@ __C infiniStatus_t infiniopMatmul(
#ifdef ENABLE_ASCEND_API #ifdef ENABLE_ASCEND_API
CALCULATE(INFINI_DEVICE_ASCEND, ascend); CALCULATE(INFINI_DEVICE_ASCEND, ascend);
#endif #endif
#ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, maca);
#endif
#ifdef ENABLE_KUNLUN_API #ifdef ENABLE_KUNLUN_API
CALCULATE(INFINI_DEVICE_KUNLUN, kunlun); CALCULATE(INFINI_DEVICE_KUNLUN, kunlun);
#endif #endif
...@@ -159,6 +171,9 @@ infiniopDestroyMatmulDescriptor(infiniopMatmulDescriptor_t desc) { ...@@ -159,6 +171,9 @@ infiniopDestroyMatmulDescriptor(infiniopMatmulDescriptor_t desc) {
#ifdef ENABLE_ASCEND_API #ifdef ENABLE_ASCEND_API
DELETE(INFINI_DEVICE_ASCEND, ascend); DELETE(INFINI_DEVICE_ASCEND, ascend);
#endif #endif
#ifdef ENABLE_METAX_API
DELETE(INFINI_DEVICE_METAX, maca);
#endif
#ifdef ENABLE_KUNLUN_API #ifdef ENABLE_KUNLUN_API
DELETE(INFINI_DEVICE_KUNLUN, kunlun); DELETE(INFINI_DEVICE_KUNLUN, kunlun);
#endif #endif
......
#include "infinirt_maca.h"
#include "../../utils.h"
#include <hcr/hc_runtime.h>
#include <hcr/hc_runtime_api.h>
#define CHECK_MACART(RT_API) CHECK_INTERNAL(RT_API, hcSuccess)
namespace infinirt::maca {
infiniStatus_t getDeviceCount(int *count) {
CHECK_MACART(hcGetDeviceCount(count));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t setDevice(int device_id) {
CHECK_MACART(hcSetDevice(device_id));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t deviceSynchronize() {
CHECK_MACART(hcDeviceSynchronize());
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamCreate(infinirtStream_t *stream_ptr) {
hcStream_t stream;
CHECK_MACART(hcStreamCreate(&stream));
*stream_ptr = stream;
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamDestroy(infinirtStream_t stream) {
CHECK_MACART(hcStreamDestroy((hcStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamSynchronize(infinirtStream_t stream) {
CHECK_MACART(hcStreamSynchronize((hcStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) {
CHECK_MACART(hcStreamWaitEvent((hcStream_t)stream, (hcEvent_t)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventCreate(infinirtEvent_t *event_ptr) {
hcEvent_t event;
CHECK_MACART(hcEventCreate(&event));
*event_ptr = event;
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventRecord(infinirtEvent_t event, infinirtStream_t stream) {
CHECK_MACART(hcEventRecord((hcEvent_t)event, (hcStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventQuery(infinirtEvent_t event, infinirtEventStatus_t *status_ptr) {
CHECK_MACART(hcEventQuery((hcEvent_t)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventSynchronize(infinirtEvent_t event) {
CHECK_MACART(hcEventSynchronize((hcEvent_t)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t eventDestroy(infinirtEvent_t event) {
CHECK_MACART(hcEventDestroy((hcEvent_t)event));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t mallocDevice(void **p_ptr, size_t size) {
CHECK_MACART(hcMalloc(p_ptr, size));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t mallocHost(void **p_ptr, size_t size) {
CHECK_MACART(hcMallocHost(p_ptr, size));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeDevice(void *ptr) {
CHECK_MACART(hcFree(ptr));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeHost(void *ptr) {
CHECK_MACART(hcFreeHost(ptr));
return INFINI_STATUS_SUCCESS;
}
hcMemcpyKind toMacaMemcpyKind(infinirtMemcpyKind_t kind) {
switch (kind) {
case INFINIRT_MEMCPY_H2D:
return hcMemcpyHostToDevice;
case INFINIRT_MEMCPY_D2H:
return hcMemcpyDeviceToHost;
case INFINIRT_MEMCPY_D2D:
return hcMemcpyDeviceToDevice;
case INFINIRT_MEMCPY_H2H:
return hcMemcpyHostToHost;
default:
return hcMemcpyDefault;
}
}
infiniStatus_t memcpy(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind) {
CHECK_MACART(hcMemcpy(dst, src, size, toMacaMemcpyKind(kind)));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t memcpyAsync(void *dst, const void *src, size_t size, infinirtMemcpyKind_t kind, infinirtStream_t stream) {
CHECK_MACART(hcMemcpyAsync(dst, src, size, toMacaMemcpyKind(kind), (hcStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t mallocAsync(void **p_ptr, size_t size, infinirtStream_t stream) {
CHECK_MACART(hcMallocAsync(p_ptr, size, (hcStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) {
CHECK_MACART(hcFreeAsync(ptr, (hcStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
} // namespace infinirt::maca
#ifndef __INFINIRT_MACA_H__
#define __INFINIRT_MACA_H__
#include "../infinirt_impl.h"
namespace infinirt::maca {
#ifdef ENABLE_METAX_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::maca
#endif // __INFINIRT_MACA_H__
...@@ -166,6 +166,11 @@ def get_args(): ...@@ -166,6 +166,11 @@ def get_args():
action="store_true", action="store_true",
help="Run ASCEND NPU test", help="Run ASCEND NPU test",
) )
parser.add_argument(
"--metax",
action="store_true",
help="Run METAX GPU test",
)
parser.add_argument( parser.add_argument(
"--kunlun", "--kunlun",
action="store_true", action="store_true",
...@@ -434,6 +439,10 @@ def get_test_devices(args): ...@@ -434,6 +439,10 @@ def get_test_devices(args):
torch.npu.set_device(0) # Ascend NPU needs explicit device initialization torch.npu.set_device(0) # Ascend NPU needs explicit device initialization
devices_to_test.append(InfiniDeviceEnum.ASCEND) devices_to_test.append(InfiniDeviceEnum.ASCEND)
if args.metax:
import torch
devices_to_test.append(InfiniDeviceEnum.METAX)
if args.kunlun: if args.kunlun:
import torch_xmlir import torch_xmlir
......
...@@ -77,7 +77,8 @@ option("metax-gpu") ...@@ -77,7 +77,8 @@ option("metax-gpu")
option_end() option_end()
if has_config("metax-gpu") then if has_config("metax-gpu") then
add_defines("ENABLE_MACA_API") add_defines("ENABLE_METAX_API")
includes("xmake/maca.lua")
end end
-- 摩尔线程 -- 摩尔线程
...@@ -134,6 +135,9 @@ target("infinirt") ...@@ -134,6 +135,9 @@ target("infinirt")
if has_config("ascend-npu") then if has_config("ascend-npu") then
add_deps("infinirt-ascend") add_deps("infinirt-ascend")
end end
if has_config("metax-gpu") then
add_deps("infinirt-metax")
end
set_languages("cxx17") set_languages("cxx17")
set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")) set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
add_files("src/infinirt/*.cc") add_files("src/infinirt/*.cc")
...@@ -172,7 +176,7 @@ target("infiniop") ...@@ -172,7 +176,7 @@ target("infiniop")
add_deps("infiniop-ascend") add_deps("infiniop-ascend")
end end
if has_config("metax-gpu") then if has_config("metax-gpu") then
add_deps("metax-gpu") add_deps("infiniop-metax")
end end
if has_config("kunlun-xpu") then if has_config("kunlun-xpu") then
add_deps("infiniop-kunlun") add_deps("infiniop-kunlun")
......
local MACA_ROOT = os.getenv("MACA_PATH") or os.getenv("MACA_HOME") or os.getenv("MACA_ROOT")
add_includedirs(MACA_ROOT .. "/include")
add_linkdirs(MACA_ROOT .. "/lib")
add_links("libhcdnn.so")
add_links("libhcblas.so")
add_links("libhcruntime.so")
rule("maca")
set_extensions(".maca")
on_load(function (target)
target:add("includedirs", "include")
end)
on_build_file(function (target, sourcefile)
local objectfile = target:objectfile(sourcefile)
os.mkdir(path.directory(objectfile))
local htcc = path.join(MACA_ROOT, "htgpu_llvm/bin/htcc")
local includedirs = table.concat(target:get("includedirs"), " ")
local args = { "-x", "hpcc", "-c", sourcefile, "-o", objectfile, "-I" .. MACA_ROOT .. "/include", "-O3", "-fPIC", "-Werror", "-std=c++17"}
for _, includedir in ipairs(target:get("includedirs")) do
table.insert(args, "-I" .. includedir)
end
os.execv(htcc, args)
table.insert(target:objectfiles(), objectfile)
end)
rule_end()
target("infiniop-metax")
set_kind("static")
on_install(function (target) end)
add_cxflags("-lstdc++ -Wall -fPIC")
set_languages("cxx17")
set_warnings("all")
add_files("../src/infiniop/devices/maca/*.cc", "../src/infiniop/ops/*/maca/*.cc")
add_files("../src/infiniop/ops/*/maca/*.maca", {rule = "maca"})
target_end()
target("infinirt-metax")
set_kind("static")
set_languages("cxx17")
on_install(function (target) end)
add_deps("infini-utils")
-- Add files
add_files("$(projectdir)/src/infinirt/maca/*.cc")
add_cxflags("-lstdc++ -Wall -Werror -fPIC")
target_end()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment