Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
bfead271
Unverified
Commit
bfead271
authored
Mar 13, 2026
by
thatPepe
Committed by
GitHub
Mar 13, 2026
Browse files
Merge pull request #1073 from InfiniTensor/revert-1071-issue/1031_T1-1-9
Revert "【算子比赛2025秋】T1-1-9"
parents
09d4b2ae
908c3cc5
Changes
69
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
998 deletions
+0
-998
src/infiniop/ops/adaptive_max_pool1d/nvidia/adaptive_max_pool1d_nvidia.cu
.../adaptive_max_pool1d/nvidia/adaptive_max_pool1d_nvidia.cu
+0
-144
src/infiniop/ops/adaptive_max_pool1d/nvidia/adaptive_max_pool1d_nvidia.cuh
...adaptive_max_pool1d/nvidia/adaptive_max_pool1d_nvidia.cuh
+0
-8
src/infiniop/ops/adaptive_max_pool1d/operator.cc
src/infiniop/ops/adaptive_max_pool1d/operator.cc
+0
-147
src/infiniop/ops/asinh/cpu/asinh_cpu.cc
src/infiniop/ops/asinh/cpu/asinh_cpu.cc
+0
-50
src/infiniop/ops/asinh/cpu/asinh_cpu.h
src/infiniop/ops/asinh/cpu/asinh_cpu.h
+0
-22
src/infiniop/ops/asinh/cuda/kernel.cuh
src/infiniop/ops/asinh/cuda/kernel.cuh
+0
-29
src/infiniop/ops/asinh/metax/asinh.maca
src/infiniop/ops/asinh/metax/asinh.maca
+0
-58
src/infiniop/ops/asinh/metax/asinh_metax.h
src/infiniop/ops/asinh/metax/asinh_metax.h
+0
-8
src/infiniop/ops/asinh/moore/asinh_moore.h
src/infiniop/ops/asinh/moore/asinh_moore.h
+0
-8
src/infiniop/ops/asinh/moore/asinh_moore.mu
src/infiniop/ops/asinh/moore/asinh_moore.mu
+0
-59
src/infiniop/ops/asinh/nvidia/asinh_nvidia.cu
src/infiniop/ops/asinh/nvidia/asinh_nvidia.cu
+0
-56
src/infiniop/ops/asinh/nvidia/asinh_nvidia.cuh
src/infiniop/ops/asinh/nvidia/asinh_nvidia.cuh
+0
-8
src/infiniop/ops/asinh/operator.cc
src/infiniop/ops/asinh/operator.cc
+0
-141
src/infiniop/ops/fmod/cpu/fmod_cpu.cc
src/infiniop/ops/fmod/cpu/fmod_cpu.cc
+0
-53
src/infiniop/ops/fmod/cpu/fmod_cpu.h
src/infiniop/ops/fmod/cpu/fmod_cpu.h
+0
-19
src/infiniop/ops/fmod/cuda/kernel.cuh
src/infiniop/ops/fmod/cuda/kernel.cuh
+0
-48
src/infiniop/ops/fmod/metax/fmod_metax.h
src/infiniop/ops/fmod/metax/fmod_metax.h
+0
-8
src/infiniop/ops/fmod/metax/mul_metax.maca
src/infiniop/ops/fmod/metax/mul_metax.maca
+0
-61
src/infiniop/ops/fmod/moore/fmod_moore.h
src/infiniop/ops/fmod/moore/fmod_moore.h
+0
-8
src/infiniop/ops/fmod/moore/fmod_moore.mu
src/infiniop/ops/fmod/moore/fmod_moore.mu
+0
-63
No files found.
src/infiniop/ops/adaptive_max_pool1d/nvidia/adaptive_max_pool1d_nvidia.cu
deleted
100644 → 0
View file @
09d4b2ae
#include "../../../devices/nvidia/nvidia_common.cuh"
#include "adaptive_max_pool1d_nvidia.cuh"
#include "../../../devices/nvidia/nvidia_kernel_common.cuh"
#include "../cuda/kernel.cuh"
template
<
unsigned
int
BLOCK_SIZE
,
typename
Tdata
,
typename
Tcompute
>
INFINIOP_CUDA_KERNEL
adaptiveMaxPool1dKernel
(
Tdata
*
__restrict__
y
,
ptrdiff_t
stride_y_batch
,
ptrdiff_t
stride_y_channel
,
const
Tdata
*
__restrict__
x
,
ptrdiff_t
stride_x_batch
,
ptrdiff_t
stride_x_channel
,
ptrdiff_t
stride_x_length
,
size_t
channels
,
size_t
input_length
,
size_t
output_length
,
size_t
ndim
)
{
adaptiveMaxPool1dBlock
<
BLOCK_SIZE
,
Tdata
,
Tcompute
>
(
y
,
stride_y_batch
,
stride_y_channel
,
x
,
stride_x_batch
,
stride_x_channel
,
stride_x_length
,
channels
,
input_length
,
output_length
,
ndim
);
}
namespace
op
::
adaptive_max_pool1d
::
nvidia
{
struct
Descriptor
::
Opaque
{
std
::
shared_ptr
<
device
::
nvidia
::
Handle
::
Internal
>
internal
;
};
Descriptor
::~
Descriptor
()
{
delete
_opaque
;
}
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
infiniopTensorDescriptor_t
x_desc
,
size_t
output_size
)
{
auto
result
=
AdaptiveMaxPool1dInfo
::
create
(
y_desc
,
x_desc
,
output_size
);
CHECK_RESULT
(
result
);
auto
info
=
result
.
take
();
*
desc_ptr
=
new
Descriptor
(
new
Opaque
{
reinterpret_cast
<
device
::
nvidia
::
Handle
*>
(
handle
)
->
internal
()},
std
::
move
(
info
),
0
,
handle
->
device
,
handle
->
device_id
);
return
INFINI_STATUS_SUCCESS
;
}
template
<
unsigned
int
BLOCK_SIZE
>
infiniStatus_t
launchKernel
(
uint32_t
num_blocks
,
void
*
y
,
infiniDtype_t
dtype
,
ptrdiff_t
stride_y_batch
,
ptrdiff_t
stride_y_channel
,
const
void
*
x
,
ptrdiff_t
stride_x_batch
,
ptrdiff_t
stride_x_channel
,
ptrdiff_t
stride_x_length
,
size_t
channels
,
size_t
input_length
,
size_t
output_length
,
size_t
ndim
,
cudaStream_t
cuda_stream
)
{
#define LAUNCH_KERNEL(Tdata, Tcompute) \
adaptiveMaxPool1dKernel<BLOCK_SIZE, Tdata, Tcompute><<<num_blocks, BLOCK_SIZE, 0, cuda_stream>>>( \
reinterpret_cast<Tdata *>(y), \
stride_y_batch, stride_y_channel, \
reinterpret_cast<const Tdata *>(x), \
stride_x_batch, stride_x_channel, stride_x_length, \
channels, input_length, output_length, ndim)
if
(
dtype
==
INFINI_DTYPE_F16
)
{
LAUNCH_KERNEL
(
half
,
float
);
}
else
if
(
dtype
==
INFINI_DTYPE_BF16
)
{
LAUNCH_KERNEL
(
__nv_bfloat16
,
float
);
}
else
if
(
dtype
==
INFINI_DTYPE_F32
)
{
LAUNCH_KERNEL
(
float
,
float
);
}
else
if
(
dtype
==
INFINI_DTYPE_F64
)
{
LAUNCH_KERNEL
(
double
,
double
);
}
else
{
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
#undef LAUNCH_KERNEL
return
INFINI_STATUS_SUCCESS
;
}
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
const
void
*
x
,
void
*
stream
)
const
{
if
(
workspace_size
<
_workspace_size
)
{
return
INFINI_STATUS_INSUFFICIENT_WORKSPACE
;
}
const
size_t
ndim
=
_info
.
ndim
();
const
size_t
batch_size
=
_info
.
shape
[
0
];
const
size_t
channels
=
ndim
>
2
?
_info
.
shape
[
1
]
:
1
;
const
size_t
input_length
=
_info
.
input_length
();
const
size_t
output_length
=
_info
.
output_length
();
ptrdiff_t
stride_x_batch
=
_info
.
x_strides
[
0
];
ptrdiff_t
stride_x_channel
=
ndim
>
2
?
_info
.
x_strides
[
1
]
:
0
;
ptrdiff_t
stride_x_length
=
_info
.
x_strides
.
back
();
ptrdiff_t
stride_y_batch
=
_info
.
y_strides
[
0
];
ptrdiff_t
stride_y_channel
=
ndim
>
2
?
_info
.
y_strides
[
1
]
:
0
;
uint32_t
num_blocks
=
static_cast
<
uint32_t
>
(
batch_size
*
channels
);
auto
cuda_stream
=
reinterpret_cast
<
cudaStream_t
>
(
stream
);
if
(
_opaque
->
internal
->
maxThreadsPerBlock
()
>=
CUDA_BLOCK_SIZE_1024
)
{
CHECK_STATUS
(
launchKernel
<
CUDA_BLOCK_SIZE_1024
>
(
num_blocks
,
y
,
_info
.
atype
,
stride_y_batch
,
stride_y_channel
,
x
,
stride_x_batch
,
stride_x_channel
,
stride_x_length
,
channels
,
input_length
,
output_length
,
ndim
,
cuda_stream
));
}
else
if
(
_opaque
->
internal
->
maxThreadsPerBlock
()
>=
CUDA_BLOCK_SIZE_512
)
{
CHECK_STATUS
(
launchKernel
<
CUDA_BLOCK_SIZE_512
>
(
num_blocks
,
y
,
_info
.
atype
,
stride_y_batch
,
stride_y_channel
,
x
,
stride_x_batch
,
stride_x_channel
,
stride_x_length
,
channels
,
input_length
,
output_length
,
ndim
,
cuda_stream
));
}
else
if
(
_opaque
->
internal
->
maxThreadsPerBlock
()
==
CUDA_BLOCK_SIZE_4096
)
{
CHECK_STATUS
(
launchKernel
<
CUDA_BLOCK_SIZE_4096
>
(
num_blocks
,
y
,
_info
.
atype
,
stride_y_batch
,
stride_y_channel
,
x
,
stride_x_batch
,
stride_x_channel
,
stride_x_length
,
channels
,
input_length
,
output_length
,
ndim
,
cuda_stream
));
}
else
{
return
INFINI_STATUS_DEVICE_ARCHITECTURE_NOT_SUPPORTED
;
}
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::adaptive_max_pool1d::nvidia
src/infiniop/ops/adaptive_max_pool1d/nvidia/adaptive_max_pool1d_nvidia.cuh
deleted
100644 → 0
View file @
09d4b2ae
#ifndef __ADAPTIVE_MAX_POOL1D_CUDA_H__
#define __ADAPTIVE_MAX_POOL1D_CUDA_H__
#include "../adaptive_max_pool1d.h"
DESCRIPTOR
(
nvidia
)
#endif
src/infiniop/ops/adaptive_max_pool1d/operator.cc
deleted
100644 → 0
View file @
09d4b2ae
#include "../../operator.h"
#include "../../handle.h"
#include "infiniop/ops/adaptive_max_pool1d.h"
#ifdef ENABLE_CPU_API
#include "cpu/adaptive_max_pool1d_cpu.h"
#endif
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/adaptive_max_pool1d_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/adaptive_max_pool1d_metax.cuh"
#endif
#ifdef ENABLE_MOORE_API
#include "moore/adaptive_max_pool1d_moore.h"
#endif
__INFINI_C
infiniStatus_t
infiniopCreateAdaptiveMaxPool1dDescriptor
(
infiniopHandle_t
handle
,
infiniopAdaptiveMaxPool1dDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
infiniopTensorDescriptor_t
x_desc
,
size_t
output_size
)
{
#define CREATE(CASE, NAMESPACE) \
case CASE: \
return op::adaptive_max_pool1d::NAMESPACE::Descriptor::create( \
handle, \
reinterpret_cast<op::adaptive_max_pool1d::NAMESPACE::Descriptor **>(desc_ptr), \
y_desc, \
x_desc, \
output_size)
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#ifdef ENABLE_MOORE_API
CREATE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
}
#undef CREATE
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__INFINI_C
infiniStatus_t
infiniopGetAdaptiveMaxPool1dWorkspaceSize
(
infiniopAdaptiveMaxPool1dDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<op::adaptive_max_pool1d::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
return INFINI_STATUS_SUCCESS;
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
GET
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
GET
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#ifdef ENABLE_MOORE_API
GET
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
}
#undef GET
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__INFINI_C
infiniStatus_t
infiniopAdaptiveMaxPool1d
(
infiniopAdaptiveMaxPool1dDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
const
void
*
x
,
void
*
stream
)
{
#define CALCULATE(CASE, NAMESPACE) \
case CASE: \
return reinterpret_cast<op::adaptive_max_pool1d::NAMESPACE::Descriptor *>(desc)->calculate( \
workspace, workspace_size, y, x, stream);
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#ifdef ENABLE_MOORE_API
CALCULATE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
}
#undef CALCULATE
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__INFINI_C
infiniStatus_t
infiniopDestroyAdaptiveMaxPool1dDescriptor
(
infiniopAdaptiveMaxPool1dDescriptor_t
desc
)
{
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<op::adaptive_max_pool1d::NAMESPACE::Descriptor *>(desc); \
return INFINI_STATUS_SUCCESS;
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
DESTROY
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
DESTROY
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
DESTROY
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#ifdef ENABLE_MOORE_API
DESTROY
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
}
#undef DESTROY
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
src/infiniop/ops/asinh/cpu/asinh_cpu.cc
deleted
100644 → 0
View file @
09d4b2ae
#include "asinh_cpu.h"
namespace
op
::
asinh
::
cpu
{
Descriptor
::~
Descriptor
()
=
default
;
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle_
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
out_desc
,
std
::
vector
<
infiniopTensorDescriptor_t
>
input_desc_vec
)
{
auto
handle
=
reinterpret_cast
<
device
::
cpu
::
Handle
*>
(
handle_
);
auto
dtype
=
out_desc
->
dtype
();
const
auto
&
x_desc
=
input_desc_vec
.
at
(
0
);
const
auto
&
y_shape
=
out_desc
->
shape
();
const
auto
&
x_shape
=
x_desc
->
shape
();
CHECK_DTYPE
(
dtype
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_F64
,
INFINI_DTYPE_BF16
);
CHECK_SAME_SHAPE
(
y_shape
,
x_shape
);
CREATE_ELEMENTWISE_CPU_DESCRIPTOR
(
handle
,
dtype
,
out_desc
,
input_desc_vec
);
return
INFINI_STATUS_SUCCESS
;
}
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
output
,
std
::
vector
<
const
void
*>
inputs
,
void
*
stream
)
const
{
switch
(
_dtype
)
{
case
INFINI_DTYPE_F16
:
return
_device_info
->
calculate
<
AsinhOp
,
fp16_t
>
(
_info
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F32
:
return
_device_info
->
calculate
<
AsinhOp
,
float
>
(
_info
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F64
:
return
_device_info
->
calculate
<
AsinhOp
,
double
>
(
_info
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_BF16
:
return
_device_info
->
calculate
<
AsinhOp
,
bf16_t
>
(
_info
,
output
,
inputs
,
stream
);
default:
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
}
}
// namespace op::asinh::cpu
src/infiniop/ops/asinh/cpu/asinh_cpu.h
deleted
100644 → 0
View file @
09d4b2ae
#ifndef __ASINH_CPU_H__
#define __ASINH_CPU_H__
#include <cmath>
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR
(
asinh
,
cpu
)
namespace
op
::
asinh
::
cpu
{
typedef
struct
AsinhOp
{
public:
static
constexpr
size_t
num_inputs
=
1
;
template
<
typename
T
>
T
operator
()(
const
T
&
x
)
const
{
return
std
::
asinh
(
x
);
}
}
AsinhOp
;
}
// namespace op::asinh::cpu
#endif // __ASINH_CPU_H__
src/infiniop/ops/asinh/cuda/kernel.cuh
deleted
100644 → 0
View file @
09d4b2ae
#ifndef __ASINH_CUDA_KERNEL_H__
#define __ASINH_CUDA_KERNEL_H__
namespace
op
::
asinh
::
cuda
{
typedef
struct
AsinhOp
{
public:
static
constexpr
size_t
num_inputs
=
1
;
template
<
typename
T
>
__device__
__forceinline__
T
operator
()(
const
T
&
x
)
const
{
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
float
x_f
=
__half2float
(
x
);
return
__float2half
(
asinhf
(
x_f
));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda_bfloat16
>
)
{
float
x_f
=
__bfloat162float
(
x
);
return
__float2bfloat16
(
asinhf
(
x_f
));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
float
>
)
{
return
asinhf
(
x
);
}
else
{
return
::
asinh
(
x
);
}
}
}
AsinhOp
;
}
// namespace op::asinh::cuda
#endif // __ASINH_CUDA_KERNEL_H__
src/infiniop/ops/asinh/metax/asinh.maca
deleted
100644 → 0
View file @
09d4b2ae
#include "../../../elementwise/metax/elementwise_metax.h"
#include "asinh_metax.h"
#include "../cuda/kernel.cuh"
namespace op::asinh::metax {
Descriptor::~Descriptor() = default;
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::metax::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &x_desc = input_desc_vec.at(0);
const auto &y_shape = out_desc->shape();
const auto &x_shape = x_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64, INFINI_DTYPE_BF16);
CHECK_SAME_SHAPE(y_shape, x_shape);
// create CUDA elementwise descriptor
CREATE_ELEMENTWISE_METAX_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *output,
std::vector<const void *> inputs,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, cuda::AsinhOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, cuda::AsinhOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, cuda::AsinhOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, cuda::AsinhOp, double>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
}
} // namespace op::asinh::metax
src/infiniop/ops/asinh/metax/asinh_metax.h
deleted
100644 → 0
View file @
09d4b2ae
#ifndef __ASINH_METAX_API_H__
#define __ASINH_METAX_API_H__
#include "../../../elementwise/metax/elementwise_metax_api.h"
ELEMENTWISE_DESCRIPTOR
(
asinh
,
metax
)
#endif // __ASINH_METAX_API_H__
src/infiniop/ops/asinh/moore/asinh_moore.h
deleted
100644 → 0
View file @
09d4b2ae
#ifndef __ASINH_MOORE_API_H__
#define __ASINH_MOORE_API_H__
#include "../../../elementwise/moore/elementwise_moore_api.h"
ELEMENTWISE_DESCRIPTOR
(
asinh
,
moore
)
#endif // __ASINH_MOORE_API_H__
src/infiniop/ops/asinh/moore/asinh_moore.mu
deleted
100644 → 0
View file @
09d4b2ae
#include "asinh_moore.h"
#include "../../../elementwise/moore/elementwise_moore.h"
#include "../cuda/kernel.cuh"
namespace op::asinh::moore {
Descriptor::~Descriptor() = default;
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::moore::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &x_desc = input_desc_vec.at(0);
const auto &y_shape = out_desc->shape();
const auto &x_shape = x_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64, INFINI_DTYPE_BF16);
CHECK_SAME_SHAPE(y_shape, x_shape);
// create MOORE elementwise descriptor
CREATE_ELEMENTWISE_MOORE_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *output,
std::vector<const void *> inputs,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, cuda::AsinhOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, cuda::AsinhOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, cuda::AsinhOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, cuda::AsinhOp, double>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::asinh::moore
src/infiniop/ops/asinh/nvidia/asinh_nvidia.cu
deleted
100644 → 0
View file @
09d4b2ae
#include "../../../devices/nvidia/nvidia_common.cuh"
#include "../../../elementwise/nvidia/elementwise_nvidia.cuh"
#include "../cuda/kernel.cuh"
#include "asinh_nvidia.cuh"
namespace
op
::
asinh
::
nvidia
{
Descriptor
::~
Descriptor
()
=
default
;
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle_
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
out_desc
,
std
::
vector
<
infiniopTensorDescriptor_t
>
input_desc_vec
)
{
auto
handle
=
reinterpret_cast
<
device
::
nvidia
::
Handle
*>
(
handle_
);
auto
dtype
=
out_desc
->
dtype
();
const
auto
&
x_desc
=
input_desc_vec
.
at
(
0
);
const
auto
&
y_shape
=
out_desc
->
shape
();
const
auto
&
x_shape
=
x_desc
->
shape
();
CHECK_DTYPE
(
dtype
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_F64
,
INFINI_DTYPE_BF16
);
CHECK_SAME_SHAPE
(
y_shape
,
x_shape
);
CREATE_ELEMENTWISE_CUDA_DESCRIPTOR
(
handle
,
dtype
,
out_desc
,
input_desc_vec
)
return
INFINI_STATUS_SUCCESS
;
}
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
output
,
std
::
vector
<
const
void
*>
inputs
,
void
*
stream
)
const
{
if
(
workspace_size
<
_workspace_size
)
{
return
INFINI_STATUS_INSUFFICIENT_WORKSPACE
;
}
switch
(
_dtype
)
{
case
INFINI_DTYPE_F16
:
return
_device_info
->
calculate
<
256
,
cuda
::
AsinhOp
,
half
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_BF16
:
return
_device_info
->
calculate
<
256
,
cuda
::
AsinhOp
,
cuda_bfloat16
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F32
:
return
_device_info
->
calculate
<
256
,
cuda
::
AsinhOp
,
float
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F64
:
return
_device_info
->
calculate
<
256
,
cuda
::
AsinhOp
,
double
>
(
_info
,
workspace
,
output
,
inputs
,
stream
);
default:
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
}
}
// namespace op::asinh::nvidia
src/infiniop/ops/asinh/nvidia/asinh_nvidia.cuh
deleted
100644 → 0
View file @
09d4b2ae
#ifndef __ASINH_NVIDIA_API_H__
#define __ASINH_NVIDIA_API_H__
#include "../../../elementwise/nvidia/elementwise_nvidia_api.cuh"
ELEMENTWISE_DESCRIPTOR
(
asinh
,
nvidia
)
#endif // __ASINH_NVIDIA_API_H
src/infiniop/ops/asinh/operator.cc
deleted
100644 → 0
View file @
09d4b2ae
#include "../../operator.h"
#include "../../handle.h"
#include "infiniop/ops/asinh.h"
#ifdef ENABLE_CPU_API
#include "cpu/asinh_cpu.h"
#endif
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/asinh_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/asinh_metax.h"
#endif
#ifdef ENABLE_MOORE_API
#include "moore/asinh_moore.h"
#endif
__INFINI_C
infiniStatus_t
infiniopCreateAsinhDescriptor
(
infiniopHandle_t
handle
,
infiniopAsinhDescriptor_t
*
desc_ptr
,
infiniopTensorDescriptor_t
y_desc
,
infiniopTensorDescriptor_t
x_desc
)
{
#define CREATE(CASE, NAMESPACE) \
case CASE: \
return op::asinh::NAMESPACE::Descriptor::create( \
handle, \
reinterpret_cast<op::asinh::NAMESPACE::Descriptor **>(desc_ptr), \
y_desc, \
{x_desc})
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU_API
CREATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#ifdef ENABLE_MOORE_API
CREATE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
#undef CREATE
}
__INFINI_C
infiniStatus_t
infiniopGetAsinhWorkspaceSize
(
infiniopAsinhDescriptor_t
desc
,
size_t
*
size
)
{
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<const op::asinh::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
return INFINI_STATUS_SUCCESS
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
GET
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
GET
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
GET
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#ifdef ENABLE_MOORE_API
GET
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
#undef GET
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
__INFINI_C
infiniStatus_t
infiniopAsinh
(
infiniopAsinhDescriptor_t
desc
,
void
*
workspace
,
size_t
workspace_size
,
void
*
y
,
const
void
*
x
,
void
*
stream
)
{
#define CALCULATE(CASE, NAMESPACE) \
case CASE: \
return reinterpret_cast<const op::asinh::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size, y, {x}, stream);
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
CALCULATE
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#ifdef ENABLE_MOORE_API
CALCULATE
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
#undef CALCULATE
}
__INFINI_C
infiniStatus_t
infiniopDestroyAsinhDescriptor
(
infiniopAsinhDescriptor_t
desc
)
{
#define DESTROY(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<op::asinh::NAMESPACE::Descriptor *>(desc); \
return INFINI_STATUS_SUCCESS;
switch
(
desc
->
device_type
)
{
#ifdef ENABLE_CPU_API
DESTROY
(
INFINI_DEVICE_CPU
,
cpu
);
#endif
#ifdef ENABLE_NVIDIA_API
DESTROY
(
INFINI_DEVICE_NVIDIA
,
nvidia
);
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY
(
INFINI_DEVICE_ILUVATAR
,
nvidia
);
#endif
#ifdef ENABLE_METAX_API
DESTROY
(
INFINI_DEVICE_METAX
,
metax
);
#endif
#ifdef ENABLE_MOORE_API
DESTROY
(
INFINI_DEVICE_MOORE
,
moore
);
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
#undef DESTROY
}
src/infiniop/ops/fmod/cpu/fmod_cpu.cc
deleted
100644 → 0
View file @
09d4b2ae
#include "fmod_cpu.h"
namespace
op
::
fmod__
::
cpu
{
Descriptor
::~
Descriptor
()
=
default
;
infiniStatus_t
Descriptor
::
create
(
infiniopHandle_t
handle_
,
Descriptor
**
desc_ptr
,
infiniopTensorDescriptor_t
out_desc
,
std
::
vector
<
infiniopTensorDescriptor_t
>
input_desc_vec
)
{
auto
handle
=
reinterpret_cast
<
device
::
cpu
::
Handle
*>
(
handle_
);
auto
dtype
=
out_desc
->
dtype
();
const
auto
&
a_desc
=
input_desc_vec
.
at
(
0
);
const
auto
&
b_desc
=
input_desc_vec
.
at
(
1
);
const
auto
&
out_shape
=
out_desc
->
shape
();
const
auto
&
a_shape
=
a_desc
->
shape
();
const
auto
&
b_shape
=
b_desc
->
shape
();
CHECK_DTYPE
(
dtype
,
INFINI_DTYPE_F16
,
INFINI_DTYPE_F32
,
INFINI_DTYPE_F64
,
INFINI_DTYPE_BF16
);
CHECK_SAME_SHAPE
(
out_shape
,
a_shape
,
b_shape
);
// create CPU elementwise descriptor
CREATE_ELEMENTWISE_CPU_DESCRIPTOR
(
handle
,
dtype
,
out_desc
,
input_desc_vec
);
return
INFINI_STATUS_SUCCESS
;
}
infiniStatus_t
Descriptor
::
calculate
(
void
*
workspace
,
size_t
workspace_size
,
void
*
output
,
std
::
vector
<
const
void
*>
inputs
,
void
*
stream
)
const
{
switch
(
_dtype
)
{
case
INFINI_DTYPE_F16
:
return
_device_info
->
calculate
<
FmodOp
,
fp16_t
>
(
_info
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F32
:
return
_device_info
->
calculate
<
FmodOp
,
float
>
(
_info
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_F64
:
return
_device_info
->
calculate
<
FmodOp
,
double
>
(
_info
,
output
,
inputs
,
stream
);
case
INFINI_DTYPE_BF16
:
return
_device_info
->
calculate
<
FmodOp
,
bf16_t
>
(
_info
,
output
,
inputs
,
stream
);
default:
return
INFINI_STATUS_BAD_TENSOR_DTYPE
;
}
return
INFINI_STATUS_SUCCESS
;
}
}
// namespace op::fmod__::cpu
src/infiniop/ops/fmod/cpu/fmod_cpu.h
deleted
100644 → 0
View file @
09d4b2ae
#ifndef _FMOD_CPU_H__
#define _FMOD_CPU_H__
#include "../../../elementwise/cpu/elementwise_cpu.h"
ELEMENTWISE_DESCRIPTOR
(
fmod__
,
cpu
)
namespace
op
::
fmod__
::
cpu
{
typedef
struct
FmodOp
{
public:
static
constexpr
size_t
num_inputs
=
2
;
template
<
typename
T
>
T
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
return
std
::
fmod
(
a
,
b
);
}
}
FmodOp
;
}
// namespace op::fmod__::cpu
#endif // _FMOD_CPU_H__
src/infiniop/ops/fmod/cuda/kernel.cuh
deleted
100644 → 0
View file @
09d4b2ae
#ifndef __FMOD_CUDA_H__
#define __FMOD_CUDA_H__
namespace
op
::
fmod__
::
cuda
{
typedef
struct
FmodOp
{
static
constexpr
size_t
num_inputs
=
2
;
template
<
typename
T
>
__device__
__forceinline__
T
operator
()(
const
T
&
a
,
const
T
&
b
)
const
{
// fmod(a, b) = a - b * trunc(a / b)
if
constexpr
(
std
::
is_same_v
<
T
,
half2
>
)
{
// 对于 half2,转换为 float 计算后再转回
float2
af
=
__half22float2
(
a
);
float2
bf
=
__half22float2
(
b
);
float2
result
;
result
.
x
=
fmodf
(
af
.
x
,
bf
.
x
);
result
.
y
=
fmodf
(
af
.
y
,
bf
.
y
);
return
__float22half2_rn
(
result
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda_bfloat162
>
)
{
// 对于 bfloat162,转换为 float 计算后再转回
float
af_low
=
__bfloat162float
(
__low2bfloat16
(
a
));
float
af_high
=
__bfloat162float
(
__high2bfloat16
(
a
));
float
bf_low
=
__bfloat162float
(
__low2bfloat16
(
b
));
float
bf_high
=
__bfloat162float
(
__high2bfloat16
(
b
));
return
__floats2bfloat162_rn
(
fmodf
(
af_low
,
bf_low
),
fmodf
(
af_high
,
bf_high
));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
// 对于 half,转换为 float 计算后再转回
float
af
=
__half2float
(
a
);
float
bf
=
__half2float
(
b
);
return
__float2half
(
fmodf
(
af
,
bf
));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
cuda_bfloat16
>
)
{
// 对于 bfloat16,转换为 float 计算后再转回
float
af
=
__bfloat162float
(
a
);
float
bf
=
__bfloat162float
(
b
);
return
__float2bfloat16
(
fmodf
(
af
,
bf
));
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
float
>
)
{
return
fmodf
(
a
,
b
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
double
>
)
{
return
fmod
(
a
,
b
);
}
else
{
// 整数类型使用 % 运算符
return
a
%
b
;
}
}
}
FmodOp
;
}
// namespace op::fmod__::cuda
#endif // __FMOD_CUDA_H__
src/infiniop/ops/fmod/metax/fmod_metax.h
deleted
100644 → 0
View file @
09d4b2ae
#ifndef __FMOD_METAX_API_H__
#define __FMOD_METAX_API_H__
#include "../../../elementwise/metax/elementwise_metax_api.h"
ELEMENTWISE_DESCRIPTOR
(
fmod__
,
metax
)
#endif // __FMOD_METAX_API_H__
src/infiniop/ops/fmod/metax/mul_metax.maca
deleted
100644 → 0
View file @
09d4b2ae
#include "../../../elementwise/metax/elementwise_metax.h"
#include "../cuda/kernel.cuh"
#include "fmod_metax.h"
namespace op::fmod__::metax {
Descriptor::~Descriptor() = default;
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::metax::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &a_desc = input_desc_vec.at(0);
const auto &b_desc = input_desc_vec.at(1);
const auto &c_shape = out_desc->shape();
const auto &a_shape = a_desc->shape();
const auto &b_shape = b_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64, INFINI_DTYPE_BF16);
CHECK_SAME_SHAPE(c_shape, a_shape, b_shape);
CREATE_ELEMENTWISE_METAX_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *output,
std::vector<const void *> inputs,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, cuda::FmodOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, cuda::FmodOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, cuda::FmodOp, double>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, cuda::FmodOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::fmod__::metax
src/infiniop/ops/fmod/moore/fmod_moore.h
deleted
100644 → 0
View file @
09d4b2ae
#ifndef __FMOD_MOORE_API_H__
#define __FMOD_MOORE_API_H__
#include "../../../elementwise/moore/elementwise_moore_api.h"
ELEMENTWISE_DESCRIPTOR
(
fmod__
,
moore
)
#endif // __FMOD_MOORE_API_H__
src/infiniop/ops/fmod/moore/fmod_moore.mu
deleted
100644 → 0
View file @
09d4b2ae
#include "fmod_moore.h"
#include "../../../elementwise/moore/elementwise_moore.h"
#include "../cuda/kernel.cuh"
namespace op::fmod__::moore {
Descriptor::~Descriptor() = default;
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::moore::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &a_desc = input_desc_vec.at(0);
const auto &b_desc = input_desc_vec.at(1);
const auto &c_shape = out_desc->shape();
const auto &a_shape = a_desc->shape();
const auto &b_shape = b_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64, INFINI_DTYPE_BF16);
CHECK_SAME_SHAPE(c_shape, a_shape, b_shape);
// create MOORE elementwise descriptor
CREATE_ELEMENTWISE_MOORE_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *output,
std::vector<const void *> inputs,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, cuda::FmodOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, cuda::FmodOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, cuda::FmodOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, cuda::FmodOp, double>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::fmod__::moore
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment