Commit c235afb1 authored by YdrMaster's avatar YdrMaster
Browse files

issue/291/refactor: 改造 clip 和 element-wise


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent 27a13460
......@@ -12,45 +12,45 @@
#include <numeric>
#include <vector>
#define ELEMENTWISE_DESCRIPTOR(OP, NAMESPACE) \
\
namespace op::OP::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
infiniDtype_t _dtype; \
op::elementwise::ElementwiseInfo _info; \
std::unique_ptr<op::elementwise::NAMESPACE::DeviceImpl> _device_info; \
size_t _workspace_size; \
\
Descriptor( \
infiniDtype_t dtype, \
op::elementwise::ElementwiseInfo info, \
op::elementwise::NAMESPACE::DeviceImpl *device_info, \
size_t workspace_size, \
infiniDevice_t device_type, \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
_dtype(dtype), \
_info(std::move(info)), \
_device_info(std::move(device_info)), \
_workspace_size(workspace_size) {} \
\
public: \
~Descriptor(); \
\
size_t workspaceSize() const { return _workspace_size; } \
\
static infiniStatus_t create( \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t output_desc, \
std::vector<infiniopTensorDescriptor_t> input_descs); \
\
infiniStatus_t calculate( \
void *workspace, size_t workspace_size, \
void *output, \
std::vector<const void *> inputs, \
void *stream) const; \
}; \
#define ELEMENTWISE_DESCRIPTOR(OP, NAMESPACE, KERNEL_COMMON) \
\
namespace op::OP::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \
infiniDtype_t _dtype; \
op::elementwise::ElementwiseInfo _info; \
std::unique_ptr<op::elementwise::KERNEL_COMMON::DeviceImpl> _device_info; \
size_t _workspace_size; \
\
Descriptor( \
infiniDtype_t dtype, \
op::elementwise::ElementwiseInfo info, \
op::elementwise::KERNEL_COMMON::DeviceImpl *device_info, \
size_t workspace_size, \
infiniDevice_t device_type, \
int device_id) \
: InfiniopDescriptor{device_type, device_id}, \
_dtype(dtype), \
_info(std::move(info)), \
_device_info(std::move(device_info)), \
_workspace_size(workspace_size) {} \
\
public: \
~Descriptor(); \
\
size_t workspaceSize() const { return _workspace_size; } \
\
static infiniStatus_t create( \
infiniopHandle_t handle, \
Descriptor **desc_ptr, \
infiniopTensorDescriptor_t output_desc, \
std::vector<infiniopTensorDescriptor_t> input_descs); \
\
infiniStatus_t calculate( \
void *workspace, size_t workspace_size, \
void *output, \
std::vector<const void *> inputs, \
void *stream) const; \
}; \
}
namespace op::elementwise {
......
......@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(add, cpu)
ELEMENTWISE_DESCRIPTOR(add, cpu, cpu)
namespace op::add::cpu {
typedef struct AddOp {
......
......@@ -3,6 +3,6 @@
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
ELEMENTWISE_DESCRIPTOR(add, cuda)
ELEMENTWISE_DESCRIPTOR(add, cuda, cuda)
#endif // __ADD_CUDA_API_H__
......@@ -4,7 +4,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h"
#include "infiniop/ops/clip.h"
ELEMENTWISE_DESCRIPTOR(clip, cpu)
ELEMENTWISE_DESCRIPTOR(clip, cpu, cpu)
namespace op::clip::cpu {
......
#include "clip_cuda.cuh"
#include "clip_cuda_internal.cuh"
#include "../cuda/kernel.cuh"
#include "clip_nvidia.cuh"
namespace op::clip::cuda {
namespace op::clip::nvidia {
Descriptor::~Descriptor() = default;
......@@ -45,17 +45,17 @@ infiniStatus_t Descriptor::calculate(
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, ClipOp, half>(_info, workspace, output, inputs, stream);
return _device_info->calculate<256, cuda::ClipOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, ClipOp, float>(_info, workspace, output, inputs, stream);
return _device_info->calculate<256, cuda::ClipOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, ClipOp, double>(_info, workspace, output, inputs, stream);
return _device_info->calculate<256, cuda::ClipOp, double>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, ClipOp, __nv_bfloat16>(_info, workspace, output, inputs, stream);
return _device_info->calculate<256, cuda::ClipOp, __nv_bfloat16>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::clip::cuda
} // namespace op::clip::nvidia
......@@ -4,6 +4,6 @@
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
#include "infiniop/ops/clip.h"
ELEMENTWISE_DESCRIPTOR(clip, cuda)
ELEMENTWISE_DESCRIPTOR(clip, nvidia, cuda)
#endif // __CLIP_CUDA_API_H__
......@@ -6,7 +6,7 @@
#include "cpu/clip_cpu.h"
#endif
#ifdef ENABLE_NVIDIA_API
#include "cuda/clip_cuda.cuh"
#include "nvidia/clip_nvidia.cuh"
#endif
__C infiniStatus_t infiniopCreateClipDescriptor(
......@@ -31,7 +31,7 @@ __C infiniStatus_t infiniopCreateClipDescriptor(
CREATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda);
CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
default:
......@@ -53,7 +53,7 @@ __C infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, s
GET(INFINI_DEVICE_CPU, cpu)
#endif
#ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, cuda)
GET(INFINI_DEVICE_NVIDIA, nvidia)
#endif
}
......@@ -83,7 +83,7 @@ __C infiniStatus_t infiniopClip(
CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda);
CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
default:
......@@ -107,7 +107,7 @@ infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc) {
DELETE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda);
DELETE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
default:
......
......@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(mul, cpu)
ELEMENTWISE_DESCRIPTOR(mul, cpu, cpu)
namespace op::mul::cpu {
typedef struct MulOp {
......
......@@ -3,6 +3,6 @@
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
ELEMENTWISE_DESCRIPTOR(mul, cuda)
ELEMENTWISE_DESCRIPTOR(mul, cuda, cuda)
#endif // __MUL_CUDA_API_H__
......@@ -5,7 +5,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(relu, cpu)
ELEMENTWISE_DESCRIPTOR(relu, cpu, cpu)
namespace op::relu::cpu {
typedef struct ReluOp {
......
......@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(sub, cpu)
ELEMENTWISE_DESCRIPTOR(sub, cpu, cpu)
namespace op::sub::cpu {
typedef struct SubOp {
......
......@@ -3,6 +3,6 @@
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
ELEMENTWISE_DESCRIPTOR(sub, cuda)
ELEMENTWISE_DESCRIPTOR(sub, cuda, cuda)
#endif // __SUB_CUDA_API_H__
......@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(swiglu, cpu)
ELEMENTWISE_DESCRIPTOR(swiglu, cpu, cpu)
namespace op::swiglu::cpu {
typedef struct SwiGLUOp {
......
......@@ -3,6 +3,6 @@
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
ELEMENTWISE_DESCRIPTOR(swiglu, cuda)
ELEMENTWISE_DESCRIPTOR(swiglu, cuda, cuda)
#endif // __SWIGLU_CUDA_API_H__
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment