Unverified Commit 65bed07e authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #322 from YdrMaster/main

issue/291/style: 所有 maca 改为 metax
parents e4605f7c 507be07e
#include "../../../devices/maca/common_maca.h" #include "../../../devices/metax/metax_common.h"
#include "rms_norm_metax.cuh" #include "rms_norm_metax.cuh"
#include "../../../devices/maca/maca_kernel_common.h" #include "../../../devices/metax/metax_kernel_common.h"
#include <cub/block/block_reduce.cuh> #include <cub/block/block_reduce.cuh>
#include "../../../reduce/cuda/reduce.cuh" #include "../../../reduce/cuda/reduce.cuh"
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
#include "../cuda/kernel.cuh" #include "../cuda/kernel.cuh"
template <unsigned int BLOCK_SIZE, typename Tcompute, typename Tdata, typename Tweight> template <unsigned int BLOCK_SIZE, typename Tcompute, typename Tdata, typename Tweight>
INFINIOP_MACA_KERNEL rmsnormKernel( INFINIOP_METAX_KERNEL rmsnormKernel(
Tdata *__restrict__ y, Tdata *__restrict__ y,
ptrdiff_t stride_y, ptrdiff_t stride_y,
const Tdata *__restrict__ x, const Tdata *__restrict__ x,
...@@ -20,10 +20,10 @@ INFINIOP_MACA_KERNEL rmsnormKernel( ...@@ -20,10 +20,10 @@ INFINIOP_MACA_KERNEL rmsnormKernel(
rmsnormBlock<BLOCK_SIZE, Tcompute>(y, stride_y, x, stride_x, w, dim, epsilon); rmsnormBlock<BLOCK_SIZE, Tcompute>(y, stride_y, x, stride_x, w, dim, epsilon);
} }
namespace op::rms_norm::maca { namespace op::rms_norm::metax {
struct Descriptor::Opaque { struct Descriptor::Opaque {
std::shared_ptr<device::maca::Handle::Internal> internal; std::shared_ptr<device::metax::Handle::Internal> internal;
}; };
Descriptor::~Descriptor() { Descriptor::~Descriptor() {
...@@ -47,7 +47,7 @@ infiniStatus_t Descriptor::create( ...@@ -47,7 +47,7 @@ infiniStatus_t Descriptor::create(
} }
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
new Opaque{reinterpret_cast<device::maca::Handle *>(handle)->internal()}, new Opaque{reinterpret_cast<device::metax::Handle *>(handle)->internal()},
std::move(info), std::move(info),
0, 0,
handle->device, handle->device_id); handle->device, handle->device_id);
...@@ -62,10 +62,10 @@ infiniStatus_t launchKernel( ...@@ -62,10 +62,10 @@ infiniStatus_t launchKernel(
const void *x, ptrdiff_t stride_x, const void *x, ptrdiff_t stride_x,
const void *w, infiniDtype_t wtype, const void *w, infiniDtype_t wtype,
float epsilon, float epsilon,
hcStream_t maca_stream) { hcStream_t stream) {
#define LAUNCH_KERNEL(Tdata, Tweight, Tcompute) \ #define LAUNCH_KERNEL(Tdata, Tweight, Tcompute) \
rmsnormKernel<BLOCK_SIZE, Tcompute, Tdata, Tweight><<<batch_size, BLOCK_SIZE, 0, maca_stream>>>( \ rmsnormKernel<BLOCK_SIZE, Tcompute, Tdata, Tweight><<<batch_size, BLOCK_SIZE, 0, stream>>>( \
reinterpret_cast<Tdata *>(y), \ reinterpret_cast<Tdata *>(y), \
stride_y, \ stride_y, \
reinterpret_cast<const Tdata *>(x), \ reinterpret_cast<const Tdata *>(x), \
...@@ -96,7 +96,7 @@ infiniStatus_t launchKernel( ...@@ -96,7 +96,7 @@ infiniStatus_t launchKernel(
infiniStatus_t Descriptor::calculate( infiniStatus_t Descriptor::calculate(
void *workspace, size_t workspace_size, void *workspace, size_t workspace_size,
void *y, const void *x, const void *w, void *y, const void *x, const void *w,
void *stream) const { void *stream_) const {
if (workspace_size < _workspace_size) { if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE; return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
...@@ -106,14 +106,14 @@ infiniStatus_t Descriptor::calculate( ...@@ -106,14 +106,14 @@ infiniStatus_t Descriptor::calculate(
auto stride_y = _info.y_strides[0]; auto stride_y = _info.y_strides[0];
auto dim = _info.dim(); auto dim = _info.dim();
uint32_t batch_size = static_cast<uint32_t>(_info.shape[0]); uint32_t batch_size = static_cast<uint32_t>(_info.shape[0]);
auto maca_stream = reinterpret_cast<hcStream_t>(stream); auto stream = reinterpret_cast<hcStream_t>(stream_);
// launch kernel with different block sizes // launch kernel with different block sizes
if (_opaque->internal->maxThreadsPerBlock() == MACA_BLOCK_SIZE_1024) { if (_opaque->internal->maxThreadsPerBlock() == METAX_BLOCK_SIZE_1024) {
CHECK_STATUS(launchKernel<MACA_BLOCK_SIZE_1024>(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, maca_stream)); CHECK_STATUS(launchKernel<METAX_BLOCK_SIZE_1024>(batch_size, dim, y, _info.atype, stride_y, x, stride_x, w, _info.wtype, _info.epsilon, stream));
} else { } else {
return INFINI_STATUS_DEVICE_ARCHITECTURE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_ARCHITECTURE_NOT_SUPPORTED;
} }
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
} // namespace op::rms_norm::maca } // namespace op::rms_norm::metax
...@@ -58,7 +58,7 @@ __C infiniStatus_t infiniopCreateRMSNormDescriptor( ...@@ -58,7 +58,7 @@ __C infiniStatus_t infiniopCreateRMSNormDescriptor(
CREATE(INFINI_DEVICE_ASCEND, ascend); CREATE(INFINI_DEVICE_ASCEND, ascend);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, maca); CREATE(INFINI_DEVICE_METAX, metax);
#endif #endif
#ifdef ENABLE_MOORE_API #ifdef ENABLE_MOORE_API
CREATE(INFINI_DEVICE_MOORE, musa); CREATE(INFINI_DEVICE_MOORE, musa);
...@@ -96,7 +96,7 @@ __C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t d ...@@ -96,7 +96,7 @@ __C infiniStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t d
GET(INFINI_DEVICE_ASCEND, ascend); GET(INFINI_DEVICE_ASCEND, ascend);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, maca); GET(INFINI_DEVICE_METAX, metax);
#endif #endif
#ifdef ENABLE_MOORE_API #ifdef ENABLE_MOORE_API
GET(INFINI_DEVICE_MOORE, musa); GET(INFINI_DEVICE_MOORE, musa);
...@@ -135,7 +135,7 @@ __C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *works ...@@ -135,7 +135,7 @@ __C infiniStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *works
CALCULATE(INFINI_DEVICE_ASCEND, ascend); CALCULATE(INFINI_DEVICE_ASCEND, ascend);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, maca); CALCULATE(INFINI_DEVICE_METAX, metax);
#endif #endif
#ifdef ENABLE_MOORE_API #ifdef ENABLE_MOORE_API
CALCULATE(INFINI_DEVICE_MOORE, musa); CALCULATE(INFINI_DEVICE_MOORE, musa);
...@@ -173,7 +173,7 @@ __C infiniStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_t ...@@ -173,7 +173,7 @@ __C infiniStatus_t infiniopDestroyRMSNormDescriptor(infiniopRMSNormDescriptor_t
DESTROY(INFINI_DEVICE_ASCEND, ascend); DESTROY(INFINI_DEVICE_ASCEND, ascend);
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
DESTROY(INFINI_DEVICE_METAX, maca); DESTROY(INFINI_DEVICE_METAX, metax);
#endif #endif
#ifdef ENABLE_MOORE_API #ifdef ENABLE_MOORE_API
DESTROY(INFINI_DEVICE_MOORE, musa); DESTROY(INFINI_DEVICE_MOORE, musa);
......
#ifndef __INFINIOP_ROPE_MACA_H__ #ifndef __INFINIOP_ROPE_METAX_H__
#define __INFINIOP_ROPE_MACA_H__ #define __INFINIOP_ROPE_METAX_H__
#include "../rope.h" #include "../rope.h"
DESCRIPTOR(metax) DESCRIPTOR(metax)
#endif // __INFINIOP_ROPE_MACA_H__ #endif // __INFINIOP_ROPE_METAX_H__
#include "../../../devices/maca/common_maca.h" #include "../../../devices/metax/metax_common.h"
#include "rope_metax.h" #include "rope_metax.h"
#include "../../../devices/maca/maca_kernel_common.h" #include "../../../devices/metax/metax_kernel_common.h"
#include "../cuda/kernel.cuh" #include "../cuda/kernel.cuh"
template <typename Tdata, typename Tindex, typename Tangle> template <typename Tdata, typename Tindex, typename Tangle>
INFINIOP_MACA_KERNEL ropeThreadPerItemKernel( INFINIOP_METAX_KERNEL ropeThreadPerItemKernel(
Tdata *y_, Tdata *y_,
const Tdata *x_, const Tdata *x_,
const Tindex *__restrict__ pos_ids, const Tindex *__restrict__ pos_ids,
...@@ -28,7 +28,7 @@ INFINIOP_MACA_KERNEL ropeThreadPerItemKernel( ...@@ -28,7 +28,7 @@ INFINIOP_MACA_KERNEL ropeThreadPerItemKernel(
namespace op::rope::metax { namespace op::rope::metax {
struct Descriptor::Opaque { struct Descriptor::Opaque {
std::shared_ptr<device::maca::Handle::Internal> internal; std::shared_ptr<device::metax::Handle::Internal> internal;
}; };
Descriptor::~Descriptor() { Descriptor::~Descriptor() {
...@@ -44,7 +44,7 @@ infiniStatus_t Descriptor::create( ...@@ -44,7 +44,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t sin_desc, infiniopTensorDescriptor_t sin_desc,
infiniopTensorDescriptor_t cos_desc) { infiniopTensorDescriptor_t cos_desc) {
auto handle = reinterpret_cast<device::maca::Handle *>(handle_); auto handle = reinterpret_cast<device::metax::Handle *>(handle_);
auto info = RoPEInfo::createRoPEInfo(y_desc, x_desc, pos_desc, sin_desc, cos_desc); auto info = RoPEInfo::createRoPEInfo(y_desc, x_desc, pos_desc, sin_desc, cos_desc);
CHECK_RESULT(info); CHECK_RESULT(info);
...@@ -53,7 +53,7 @@ infiniStatus_t Descriptor::create( ...@@ -53,7 +53,7 @@ infiniStatus_t Descriptor::create(
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
info.take(), info.take(),
0, 0,
new Opaque{reinterpret_cast<device::maca::Handle *>(handle)->internal()}, new Opaque{reinterpret_cast<device::metax::Handle *>(handle)->internal()},
handle->device, handle->device,
handle->device_id); handle->device_id);
...@@ -141,4 +141,4 @@ infiniStatus_t Descriptor::calculate( ...@@ -141,4 +141,4 @@ infiniStatus_t Descriptor::calculate(
#undef ROPE_TYPE #undef ROPE_TYPE
#undef CALCULATE_ROPE #undef CALCULATE_ROPE
} // namespace op::rope::maca } // namespace op::rope::metax
#ifndef __SWIGLU_MACA_API_H__ #ifndef __SWIGLU_METAX_API_H__
#define __SWIGLU_MACA_API_H__ #define __SWIGLU_METAX_API_H__
#include "../../../elementwise/maca/elementwise_maca_api.h" #include "../../../elementwise/metax/elementwise_metax_api.h"
ELEMENTWISE_DESCRIPTOR(swiglu, metax, maca) ELEMENTWISE_DESCRIPTOR(swiglu, metax, metax)
#endif // __SWIGLU_MACA_API_H__ #endif // __SWIGLU_METAX_API_H__
#include "swiglu_metax.h" #include "swiglu_metax.h"
#include "../../../elementwise/maca/elementwise_maca.h" #include "../../../elementwise/metax/elementwise_metax.h"
#include "../cuda/kernel.cuh" #include "../cuda/kernel.cuh"
...@@ -14,7 +14,7 @@ infiniStatus_t Descriptor::create( ...@@ -14,7 +14,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t out_desc, infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) { std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::maca::Handle *>(handle_); auto handle = reinterpret_cast<device::metax::Handle *>(handle_);
auto dtype = out_desc->dtype(); auto dtype = out_desc->dtype();
const auto &up_desc = input_desc_vec.at(0); const auto &up_desc = input_desc_vec.at(0);
...@@ -26,8 +26,8 @@ infiniStatus_t Descriptor::create( ...@@ -26,8 +26,8 @@ infiniStatus_t Descriptor::create(
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_BF16, INFINI_DTYPE_F32, INFINI_DTYPE_F64); CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_BF16, INFINI_DTYPE_F32, INFINI_DTYPE_F64);
CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape); CHECK_SAME_SHAPE(out_shape, up_shape, gate_shape);
// create MACA elementwise descriptor // create METAX elementwise descriptor
CREATE_ELEMENTWISE_MACA_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec) CREATE_ELEMENTWISE_METAX_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "cpu/infinirt_cpu.h" #include "cpu/infinirt_cpu.h"
#include "cuda/infinirt_cuda.cuh" #include "cuda/infinirt_cuda.cuh"
#include "kunlun/infinirt_kunlun.h" #include "kunlun/infinirt_kunlun.h"
#include "maca/infinirt_maca.h" #include "metax/infinirt_metax.h"
#include "musa/infinirt_musa.h" #include "musa/infinirt_musa.h"
thread_local infiniDevice_t CURRENT_DEVICE_TYPE = INFINI_DEVICE_CPU; thread_local infiniDevice_t CURRENT_DEVICE_TYPE = INFINI_DEVICE_CPU;
...@@ -62,7 +62,7 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_ ...@@ -62,7 +62,7 @@ __C infiniStatus_t infinirtGetDevice(infiniDevice_t *device_ptr, int *device_id_
_status = infinirt::ascend::API PARAMS; \ _status = infinirt::ascend::API PARAMS; \
break; \ break; \
case INFINI_DEVICE_METAX: \ case INFINI_DEVICE_METAX: \
_status = infinirt::maca::API PARAMS; \ _status = infinirt::metax::API PARAMS; \
break; \ break; \
case INFINI_DEVICE_MOORE: \ case INFINI_DEVICE_MOORE: \
_status = infinirt::musa::API PARAMS; \ _status = infinirt::musa::API PARAMS; \
......
#include "infinirt_maca.h" #include "infinirt_metax.h"
#include "../../utils.h" #include "../../utils.h"
#include <hcr/hc_runtime.h> #include <hcr/hc_runtime.h>
#include <hcr/hc_runtime_api.h> #include <hcr/hc_runtime_api.h>
#define CHECK_MACART(RT_API) CHECK_INTERNAL(RT_API, hcSuccess) #define CHECK_MACART(RT_API) CHECK_INTERNAL(RT_API, hcSuccess)
namespace infinirt::maca { namespace infinirt::metax {
infiniStatus_t getDeviceCount(int *count) { infiniStatus_t getDeviceCount(int *count) {
CHECK_MACART(hcGetDeviceCount(count)); CHECK_MACART(hcGetDeviceCount(count));
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
...@@ -124,4 +124,4 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) { ...@@ -124,4 +124,4 @@ infiniStatus_t freeAsync(void *ptr, infinirtStream_t stream) {
CHECK_MACART(hcFreeAsync(ptr, (hcStream_t)stream)); CHECK_MACART(hcFreeAsync(ptr, (hcStream_t)stream));
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
} // namespace infinirt::maca } // namespace infinirt::metax
...@@ -2,12 +2,12 @@ ...@@ -2,12 +2,12 @@
#define __INFINIRT_MACA_H__ #define __INFINIRT_MACA_H__
#include "../infinirt_impl.h" #include "../infinirt_impl.h"
namespace infinirt::maca { namespace infinirt::metax {
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
INFINIRT_DEVICE_API_IMPL INFINIRT_DEVICE_API_IMPL
#else #else
INFINIRT_DEVICE_API_NOOP INFINIRT_DEVICE_API_NOOP
#endif #endif
} // namespace infinirt::maca } // namespace infinirt::metax
#endif // __INFINIRT_MACA_H__ #endif // __INFINIRT_MACA_H__
...@@ -34,7 +34,7 @@ target("infiniop-metax") ...@@ -34,7 +34,7 @@ target("infiniop-metax")
set_languages("cxx17") set_languages("cxx17")
set_warnings("all", "error") set_warnings("all", "error")
add_cxflags("-lstdc++", "-fPIC", "-Wno-defaulted-function-deleted", "-Wno-strict-aliasing") add_cxflags("-lstdc++", "-fPIC", "-Wno-defaulted-function-deleted", "-Wno-strict-aliasing")
add_files("../src/infiniop/devices/maca/*.cc", "../src/infiniop/ops/*/metax/*.cc") add_files("../src/infiniop/devices/metax/*.cc", "../src/infiniop/ops/*/metax/*.cc")
add_files("../src/infiniop/ops/*/metax/*.maca", {rule = "maca"}) add_files("../src/infiniop/ops/*/metax/*.maca", {rule = "maca"})
target_end() target_end()
...@@ -45,7 +45,7 @@ target("infinirt-metax") ...@@ -45,7 +45,7 @@ target("infinirt-metax")
add_deps("infini-utils") add_deps("infini-utils")
set_warnings("all", "error") set_warnings("all", "error")
add_cxflags("-lstdc++ -fPIC") add_cxflags("-lstdc++ -fPIC")
add_files("../src/infinirt/maca/*.cc") add_files("../src/infinirt/metax/*.cc")
target_end() target_end()
target("infiniccl-metax") target("infiniccl-metax")
...@@ -58,7 +58,7 @@ target("infiniccl-metax") ...@@ -58,7 +58,7 @@ target("infiniccl-metax")
end end
if has_config("ccl") then if has_config("ccl") then
add_links("libhccl.so") add_links("libhccl.so")
add_files("../src/infiniccl/maca/*.cc") add_files("../src/infiniccl/metax/*.cc")
end end
set_languages("cxx17") set_languages("cxx17")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment