"vscode:/vscode.git/clone" did not exist on "71c1617b2ece44e7db0b0bc0a6b8e00cae31859c"
Commit c235afb1 authored by YdrMaster's avatar YdrMaster
Browse files

issue/291/refactor: 改造 clip 和 element-wise


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent 27a13460
...@@ -12,45 +12,45 @@ ...@@ -12,45 +12,45 @@
#include <numeric> #include <numeric>
#include <vector> #include <vector>
#define ELEMENTWISE_DESCRIPTOR(OP, NAMESPACE) \ #define ELEMENTWISE_DESCRIPTOR(OP, NAMESPACE, KERNEL_COMMON) \
\ \
namespace op::OP::NAMESPACE { \ namespace op::OP::NAMESPACE { \
class Descriptor final : public InfiniopDescriptor { \ class Descriptor final : public InfiniopDescriptor { \
infiniDtype_t _dtype; \ infiniDtype_t _dtype; \
op::elementwise::ElementwiseInfo _info; \ op::elementwise::ElementwiseInfo _info; \
std::unique_ptr<op::elementwise::NAMESPACE::DeviceImpl> _device_info; \ std::unique_ptr<op::elementwise::KERNEL_COMMON::DeviceImpl> _device_info; \
size_t _workspace_size; \ size_t _workspace_size; \
\ \
Descriptor( \ Descriptor( \
infiniDtype_t dtype, \ infiniDtype_t dtype, \
op::elementwise::ElementwiseInfo info, \ op::elementwise::ElementwiseInfo info, \
op::elementwise::NAMESPACE::DeviceImpl *device_info, \ op::elementwise::KERNEL_COMMON::DeviceImpl *device_info, \
size_t workspace_size, \ size_t workspace_size, \
infiniDevice_t device_type, \ infiniDevice_t device_type, \
int device_id) \ int device_id) \
: InfiniopDescriptor{device_type, device_id}, \ : InfiniopDescriptor{device_type, device_id}, \
_dtype(dtype), \ _dtype(dtype), \
_info(std::move(info)), \ _info(std::move(info)), \
_device_info(std::move(device_info)), \ _device_info(std::move(device_info)), \
_workspace_size(workspace_size) {} \ _workspace_size(workspace_size) {} \
\ \
public: \ public: \
~Descriptor(); \ ~Descriptor(); \
\ \
size_t workspaceSize() const { return _workspace_size; } \ size_t workspaceSize() const { return _workspace_size; } \
\ \
static infiniStatus_t create( \ static infiniStatus_t create( \
infiniopHandle_t handle, \ infiniopHandle_t handle, \
Descriptor **desc_ptr, \ Descriptor **desc_ptr, \
infiniopTensorDescriptor_t output_desc, \ infiniopTensorDescriptor_t output_desc, \
std::vector<infiniopTensorDescriptor_t> input_descs); \ std::vector<infiniopTensorDescriptor_t> input_descs); \
\ \
infiniStatus_t calculate( \ infiniStatus_t calculate( \
void *workspace, size_t workspace_size, \ void *workspace, size_t workspace_size, \
void *output, \ void *output, \
std::vector<const void *> inputs, \ std::vector<const void *> inputs, \
void *stream) const; \ void *stream) const; \
}; \ }; \
} }
namespace op::elementwise { namespace op::elementwise {
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h" #include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(add, cpu) ELEMENTWISE_DESCRIPTOR(add, cpu, cpu)
namespace op::add::cpu { namespace op::add::cpu {
typedef struct AddOp { typedef struct AddOp {
......
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh" #include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
ELEMENTWISE_DESCRIPTOR(add, cuda) ELEMENTWISE_DESCRIPTOR(add, cuda, cuda)
#endif // __ADD_CUDA_API_H__ #endif // __ADD_CUDA_API_H__
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h" #include "../../../elementwise/cpu/elementwise_cpu.h"
#include "infiniop/ops/clip.h" #include "infiniop/ops/clip.h"
ELEMENTWISE_DESCRIPTOR(clip, cpu) ELEMENTWISE_DESCRIPTOR(clip, cpu, cpu)
namespace op::clip::cpu { namespace op::clip::cpu {
......
#include "clip_cuda.cuh" #include "../cuda/kernel.cuh"
#include "clip_cuda_internal.cuh" #include "clip_nvidia.cuh"
namespace op::clip::cuda { namespace op::clip::nvidia {
Descriptor::~Descriptor() = default; Descriptor::~Descriptor() = default;
...@@ -45,17 +45,17 @@ infiniStatus_t Descriptor::calculate( ...@@ -45,17 +45,17 @@ infiniStatus_t Descriptor::calculate(
switch (_dtype) { switch (_dtype) {
case INFINI_DTYPE_F16: case INFINI_DTYPE_F16:
return _device_info->calculate<256, ClipOp, half>(_info, workspace, output, inputs, stream); return _device_info->calculate<256, cuda::ClipOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32: case INFINI_DTYPE_F32:
return _device_info->calculate<256, ClipOp, float>(_info, workspace, output, inputs, stream); return _device_info->calculate<256, cuda::ClipOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64: case INFINI_DTYPE_F64:
return _device_info->calculate<256, ClipOp, double>(_info, workspace, output, inputs, stream); return _device_info->calculate<256, cuda::ClipOp, double>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16: case INFINI_DTYPE_BF16:
return _device_info->calculate<256, ClipOp, __nv_bfloat16>(_info, workspace, output, inputs, stream); return _device_info->calculate<256, cuda::ClipOp, __nv_bfloat16>(_info, workspace, output, inputs, stream);
default: default:
return INFINI_STATUS_BAD_TENSOR_DTYPE; return INFINI_STATUS_BAD_TENSOR_DTYPE;
} }
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
} }
} // namespace op::clip::cuda } // namespace op::clip::nvidia
...@@ -4,6 +4,6 @@ ...@@ -4,6 +4,6 @@
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh" #include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
#include "infiniop/ops/clip.h" #include "infiniop/ops/clip.h"
ELEMENTWISE_DESCRIPTOR(clip, cuda) ELEMENTWISE_DESCRIPTOR(clip, nvidia, cuda)
#endif // __CLIP_CUDA_API_H__ #endif // __CLIP_CUDA_API_H__
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
#include "cpu/clip_cpu.h" #include "cpu/clip_cpu.h"
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
#include "cuda/clip_cuda.cuh" #include "nvidia/clip_nvidia.cuh"
#endif #endif
__C infiniStatus_t infiniopCreateClipDescriptor( __C infiniStatus_t infiniopCreateClipDescriptor(
...@@ -31,7 +31,7 @@ __C infiniStatus_t infiniopCreateClipDescriptor( ...@@ -31,7 +31,7 @@ __C infiniStatus_t infiniopCreateClipDescriptor(
CREATE(INFINI_DEVICE_CPU, cpu); CREATE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, cuda); CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
default: default:
...@@ -53,7 +53,7 @@ __C infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, s ...@@ -53,7 +53,7 @@ __C infiniStatus_t infiniopGetClipWorkspaceSize(infiniopClipDescriptor_t desc, s
GET(INFINI_DEVICE_CPU, cpu) GET(INFINI_DEVICE_CPU, cpu)
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, cuda) GET(INFINI_DEVICE_NVIDIA, nvidia)
#endif #endif
} }
...@@ -83,7 +83,7 @@ __C infiniStatus_t infiniopClip( ...@@ -83,7 +83,7 @@ __C infiniStatus_t infiniopClip(
CALCULATE(INFINI_DEVICE_CPU, cpu); CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, cuda); CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
default: default:
...@@ -107,7 +107,7 @@ infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc) { ...@@ -107,7 +107,7 @@ infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc) {
DELETE(INFINI_DEVICE_CPU, cpu); DELETE(INFINI_DEVICE_CPU, cpu);
#endif #endif
#ifdef ENABLE_NVIDIA_API #ifdef ENABLE_NVIDIA_API
DELETE(INFINI_DEVICE_NVIDIA, cuda); DELETE(INFINI_DEVICE_NVIDIA, nvidia);
#endif #endif
default: default:
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h" #include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(mul, cpu) ELEMENTWISE_DESCRIPTOR(mul, cpu, cpu)
namespace op::mul::cpu { namespace op::mul::cpu {
typedef struct MulOp { typedef struct MulOp {
......
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh" #include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
ELEMENTWISE_DESCRIPTOR(mul, cuda) ELEMENTWISE_DESCRIPTOR(mul, cuda, cuda)
#endif // __MUL_CUDA_API_H__ #endif // __MUL_CUDA_API_H__
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h" #include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(relu, cpu) ELEMENTWISE_DESCRIPTOR(relu, cpu, cpu)
namespace op::relu::cpu { namespace op::relu::cpu {
typedef struct ReluOp { typedef struct ReluOp {
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h" #include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(sub, cpu) ELEMENTWISE_DESCRIPTOR(sub, cpu, cpu)
namespace op::sub::cpu { namespace op::sub::cpu {
typedef struct SubOp { typedef struct SubOp {
......
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh" #include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
ELEMENTWISE_DESCRIPTOR(sub, cuda) ELEMENTWISE_DESCRIPTOR(sub, cuda, cuda)
#endif // __SUB_CUDA_API_H__ #endif // __SUB_CUDA_API_H__
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "../../../elementwise/cpu/elementwise_cpu.h" #include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR(swiglu, cpu) ELEMENTWISE_DESCRIPTOR(swiglu, cpu, cpu)
namespace op::swiglu::cpu { namespace op::swiglu::cpu {
typedef struct SwiGLUOp { typedef struct SwiGLUOp {
......
...@@ -3,6 +3,6 @@ ...@@ -3,6 +3,6 @@
#include "../../../elementwise/cuda/elementwise_cuda_api.cuh" #include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
ELEMENTWISE_DESCRIPTOR(swiglu, cuda) ELEMENTWISE_DESCRIPTOR(swiglu, cuda, cuda)
#endif // __SWIGLU_CUDA_API_H__ #endif // __SWIGLU_CUDA_API_H__
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment