Unverified Commit 65bed07e authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #322 from YdrMaster/main

issue/291/style: 所有 maca 改为 metax
parents e4605f7c 507be07e
#include "../../../devices/maca/common_maca.h"
#include "../../../devices/metax/metax_common.h"
#include "rms_norm_metax.cuh"
#include "../../../devices/maca/maca_kernel_common.h"
#include "../../../devices/metax/metax_kernel_common.h"
#include <cub/block/block_reduce.cuh>
#include "../../../reduce/cuda/reduce.cuh"
......@@ -9,7 +9,7 @@
#include "../cuda/kernel.cuh"
template <unsigned int BLOCK_SIZE, typename Tcompute, typename Tdata, typename Tweight>
INFINIOP_MACA_KERNEL rmsnormKernel(
INFINIOP_METAX_KERNEL rmsnormKernel(
Tdata *__restrict__ y,
ptrdiff_t stride_y,
const Tdata *__restrict__ x,
......@@ -20,10 +20,10 @@ INFINIOP_MACA_KERNEL rmsnormKernel(
rmsnormBlock<BLOCK_SIZE, Tcompute>(y, stride_y, x, stride_x, w, dim, epsilon);
}
namespace op::rms_norm::maca {
namespace op::rms_norm::metax {
struct Descriptor::Opaque {
std::shared_ptr<device::maca::Handle::Internal> internal;
std::shared_ptr<device::metax::Handle::Internal> internal;
};
Descriptor::~Descriptor() {
......@@ -47,7 +47,7 @@ infiniStatus_t Descriptor::create(
}
*desc_ptr = new Descriptor(
new Opaque{reinterpret_cast<device::maca::Handle *>(handle)->internal()},
new Opaque{reinterpret_cast<device::metax::Handle *>(handle)->internal()},
std::move(info),
0,
handle->device, handle->device_id);
......@@ -62,10 +62,10 @@ infiniStatus_t launchKernel(
const void *x, ptrdiff_t stride_x,
const void *w, infiniDtype_t wtype,
float epsilon,
hcStream_t maca_stream) {
hcStream_t stream) {
#define LAUNCH_KERNEL(Tdata, Tweight, Tcompute) \
rmsnormKernel<BLOCK_SIZE, Tcompute, Tdata, Tweight><<<batch_size, BLOCK_SIZE, 0, maca_stream>>>( \
rmsnormKernel<BLOCK_SIZE, Tcompute, Tdata, Tweight><<<batch_size, BLOCK_SIZE, 0, stream>>>( \
reinterpret_cast<Tdata *>(y), \
stride_y, \
reinterpret_cast<const Tdata *>(x), \
......@@ -96,7 +96,7 @@ infiniStatus_t launchKernel(
infiniStatus_t Descriptor::calculate(
void *workspace, size_t workspace_size,
void *y, const void *x, const void *w,
void *stream) const {
void *stream_) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
......@@ -106,14 +106,14 @@ infiniStatus_t Descriptor::calculate(
auto stride_y = _info.y_strides[0];
auto dim = _info.dim();
uint32_t batch_size = static_cast<uint32_t>(_info.shape[0]);
auto maca_stream = reinterpret_cast<hcStream_t>(stream);
auto stream = reinterpret_cast<hcStream_t>(stream_);
// launch kernel with different block sizes
if (_opaque->internal->maxThreadsPerBlock() == MACA_BLOCK_SIZE_1024) {
CHECK_STATUS(launchKernel<MACA_BLOCK_SIZE_1024>(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, maca_stream));
if (_opaque->internal->maxThreadsPerBlock() == METAX_BLOCK_SIZE_1024) {
CHECK_STATUS(launchKernel<METAX_BLOCK_SIZE_1024>(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, stream));
} else {
return INFINI_STATUS_DEVICE_ARCHITECTURE_NOT_SUPPORTED;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::rms_norm::maca
} // namespace op::rms_norm::metax
......@@ -58,7 +58,7 @@ __C infiniStatus_t infiniopCreateRMSNormDescriptor(
CREATE(INFINI_DEVICE_ASCEND, ascend);
#endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, maca);
CREATE(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
CREATE(INFINI_DEVICE_MOORE, musa);
......@@ -96,7 +96,7 @@ __C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t d
GET(INFINI_DEVICE_ASCEND, ascend);
#endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, maca);
GET(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
GET(INFINI_DEVICE_MOORE, musa);
......@@ -135,7 +135,7 @@ __C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *works
CALCULATE(INFINI_DEVICE_ASCEND, ascend);
#endif
#ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, maca);
CALCULATE(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
CALCULATE(INFINI_DEVICE_MOORE, musa);
......@@ -173,7 +173,7 @@ __C infiniStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_t
DESTROY(INFINI_DEVICE_ASCEND, ascend);
#endif
#ifdef ENABLE_METAX_API
DESTROY(INFINI_DEVICE_METAX, maca);
DESTROY(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
DESTROY(INFINI_DEVICE_MOORE, musa);
......
#ifndef __INFINIOP_ROPE_MACA_H__
#define __INFINIOP_ROPE_MACA_H__
#ifndef __INFINIOP_ROPE_METAX_H__
#define __INFINIOP_ROPE_METAX_H__
#include "../rope.h"
DESCRIPTOR(metax)
#endif // __INFINIOP_ROPE_MACA_H__
#endif // __INFINIOP_ROPE_METAX_H__
#include "../../../devices/maca/common_maca.h"
#include "../../../devices/metax/metax_common.h"
#include "rope_metax.h"
#include "../../../devices/maca/maca_kernel_common.h"
#include "../../../devices/metax/metax_kernel_common.h"
#include "../cuda/kernel.cuh"
template <typename Tdata, typename Tindex, typename Tangle>
INFINIOP_MACA_KERNEL ropeThreadPerItemKernel(
INFINIOP_METAX_KERNEL ropeThreadPerItemKernel(
Tdata *y_,
const Tdata *x_,
const Tindex *__restrict__ pos_ids,
......@@ -28,7 +28,7 @@ INFINIOP_MACA_KERNEL ropeThreadPerItemKernel(
namespace op::rope::metax {
struct Descriptor::Opaque {
std::shared_ptr<device::maca::Handle::Internal> internal;
std::shared_ptr<device::metax::Handle::Internal> internal;
};
Descriptor::~Descriptor() {
......@@ -44,7 +44,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t sin_desc,
infiniopTensorDescriptor_t cos_desc) {
auto handle = reinterpret_cast<device::maca::Handle *>(handle_);
auto handle = reinterpret_cast<device::metax::Handle *>(handle_);
auto info = RoPEInfo::createRoPEInfo(y_desc, x_desc, pos_desc, sin_desc, cos_desc);
CHECK_RESULT(info);
......@@ -53,7 +53,7 @@ infiniStatus_t Descriptor::create(
*desc_ptr = new Descriptor(
info.take(),
0,
new Opaque{reinterpret_cast<device::maca::Handle *>(handle)->internal()},
new Opaque{reinterpret_cast<device::metax::Handle *>(handle)->internal()},
handle->device,
handle->device_id);
......@@ -141,4 +141,4 @@ infiniStatus_t Descriptor::calculate(
#undef ROPE_TYPE
#undef CALCULATE_ROPE
} // namespace op::rope::maca
} // namespace op::rope::metax
#ifndef __SWIGLU_MACA_API_H__
#define __SWIGLU_MACA_API_H__
#ifndef __SWIGLU_METAX_API_H__
#define __SWIGLU_METAX_API_H__
#include "../../../elementwise/maca/elementwise_maca_api.h"
#include "../../../elementwise/metax/elementwise_metax_api.h"
ELEMENTWISE_DESCRIPTOR(swiglu, metax, maca)
ELEMENTWISE_DESCRIPTOR(swiglu, metax, metax)
#endif // __SWIGLU_MACA_API_H__
#endif // __SWIGLU_METAX_API_H__
#include "swiglu_metax.h"
#include "../../../elementwise/maca/elementwise_maca.h"
#include "../../../elementwise/metax/elementwise_metax.h"
#include "../cuda/kernel.cuh"
......@@ -14,7 +14,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::maca::Handle *>(handle_);
auto handle = reinterpret_cast<device::metax::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &up_desc = input_desc_vec.at(0);
......@@ -26,8 +26,8 @@ infiniStatus_t Descriptor::create(
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_BF16, INFINI_DTYPE_F32, INFINI_DTYPE_F64);
CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape);
// create MACA elementwise descriptor
CREATE_ELEMENTWISE_MACA_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
// create METAX elementwise descriptor
CREATE_ELEMENTWISE_METAX_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
......
......@@ -5,7 +5,7 @@
#include "cpu/infinirt_cpu.h"
#include "cuda/infinirt_cuda.cuh"
#include "kunlun/infinirt_kunlun.h"
#include "maca/infinirt_maca.h"
#include "metax/infinirt_metax.h"
#include "musa/infinirt_musa.h"
thread_local infiniDevice_t CURRENT_DEVICE_TYPE = INFINI_DEVICE_CPU;
......@@ -62,7 +62,7 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
_status = infinirt::ascend::API PARAMS; \
break; \
case INFINI_DEVICE_METAX: \
_status = infinirt::maca::API PARAMS; \
_status = infinirt::metax::API PARAMS; \
break; \
case INFINI_DEVICE_MOORE: \
_status = infinirt::musa::API PARAMS; \
......
#include "infinirt_maca.h"
#include "infinirt_metax.h"
#include "../../utils.h"
#include <hcr/hc_runtime.h>
#include <hcr/hc_runtime_api.h>
#define CHECK_MACART(RT_API) CHECK_INTERNAL(RT_API, hcSuccess)
namespace infinirt::maca {
namespace infinirt::metax {
infiniStatus_t getDeviceCount(int *count) {
CHECK_MACART(hcGetDeviceCount(count));
return INFINI_STATUS_SUCCESS;
......@@ -124,4 +124,4 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) {
CHECK_MACART(hcFreeAsync(ptr, (hcStream_t)stream));
return INFINI_STATUS_SUCCESS;
}
} // namespace infinirt::maca
} // namespace infinirt::metax
......@@ -2,12 +2,12 @@
#define __INFINIRT_MACA_H__
#include "../infinirt_impl.h"
namespace infinirt::maca {
namespace infinirt::metax {
#ifdef ENABLE_METAX_API
INFINIRT_DEVICE_API_IMPL
#else
INFINIRT_DEVICE_API_NOOP
#endif
} // namespace infinirt::maca
} // namespace infinirt::metax
#endif // __INFINIRT_MACA_H__
......@@ -34,7 +34,7 @@ target("infiniop-metax")
set_languages("cxx17")
set_warnings("all", "error")
add_cxflags("-lstdc++", "-fPIC", "-Wno-defaulted-function-deleted", "-Wno-strict-aliasing")
add_files("../src/infiniop/devices/maca/*.cc", "../src/infiniop/ops/*/metax/*.cc")
add_files("../src/infiniop/devices/metax/*.cc", "../src/infiniop/ops/*/metax/*.cc")
add_files("../src/infiniop/ops/*/metax/*.maca", {rule = "maca"})
target_end()
......@@ -45,7 +45,7 @@ target("infinirt-metax")
add_deps("infini-utils")
set_warnings("all", "error")
add_cxflags("-lstdc++ -fPIC")
add_files("../src/infinirt/maca/*.cc")
add_files("../src/infinirt/metax/*.cc")
target_end()
target("infiniccl-metax")
......@@ -58,7 +58,7 @@ target("infiniccl-metax")
end
if has_config("ccl") then
add_links("libhccl.so")
add_files("../src/infiniccl/maca/*.cc")
add_files("../src/infiniccl/metax/*.cc")
end
set_languages("cxx17")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment