Unverified Commit 09d4b2ae authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Merge pull request #1071 from InfiniTensor/issue/1031_T1-1-9

【算子比赛2025秋】T1-1-9
parents 5fc85c8b 85f8987c
#include "../../../elementwise/nvidia/elementwise_nvidia.cuh"
#include "../cuda/kernel.cuh"
#include "fmod_nvidia.cuh"
namespace op::fmod__::nvidia {
Descriptor::~Descriptor() = default;
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &a_desc = input_desc_vec.at(0);
const auto &b_desc = input_desc_vec.at(1);
const auto &c_shape = out_desc->shape();
const auto &a_shape = a_desc->shape();
const auto &b_shape = b_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64, INFINI_DTYPE_BF16);
CHECK_SAME_SHAPE(c_shape, a_shape, b_shape);
CREATE_ELEMENTWISE_CUDA_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *output,
std::vector<const void *> inputs,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, cuda::FmodOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, cuda::FmodOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, cuda::FmodOp, double>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, cuda::FmodOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::fmod__::nvidia
#ifndef __FMOD_CUDA_API_H__
#define __FMOD_CUDA_API_H__
#include "../../../elementwise/nvidia/elementwise_nvidia_api.cuh"
ELEMENTWISE_DESCRIPTOR(fmod__, nvidia)
#endif // __FMOD_CUDA_API_H__
#include "../../operator.h"
#include "../../handle.h"
#include "infiniop/ops/fmod.h"
#ifdef ENABLE_CPU_API
#include "cpu/fmod_cpu.h"
#endif
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/fmod_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/fmod_metax.h"
#endif
#ifdef ENABLE_MOORE_API
#include "moore/fmod_moore.h"
#endif
__INFINI_C infiniStatus_t infiniopCreateFmodDescriptor(
infiniopHandle_t handle,
infiniopFmodDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
#define CREATE(CASE, NAMESPACE) \
case CASE: \
return op::fmod__::NAMESPACE::Descriptor::create( \
handle, \
reinterpret_cast<op::fmod__::NAMESPACE::Descriptor **>(desc_ptr), \
c_desc, \
{a_desc, \
b_desc})
switch (handle->device) {
#ifdef ENABLE_CPU_API
CREATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
CREATE(INFINI_DEVICE_MOORE, moore);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
#undef CREATE
}
__INFINI_C infiniStatus_t infiniopGetFmodWorkspaceSize(infiniopFmodDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<const op::fmod__::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
return INFINI_STATUS_SUCCESS;
switch (desc->device_type) {
#ifdef ENABLE_CPU_API
GET(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
GET(INFINI_DEVICE_MOORE, moore);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
#undef GET
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
__INFINI_C infiniStatus_t infiniopFmod(
infiniopFmodDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream) {
#define CALCULATE(CASE, NAMESPACE) \
case CASE: \
return reinterpret_cast<const op::fmod__::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size, c, {a, b}, stream)
switch (desc->device_type) {
#ifdef ENABLE_CPU_API
CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
CALCULATE(INFINI_DEVICE_MOORE, moore);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
#undef CALCULATE
}
__INFINI_C infiniStatus_t infiniopDestroyFmodDescriptor(infiniopFmodDescriptor_t desc) {
#define GET(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<op::fmod__::NAMESPACE::Descriptor *>(desc); \
return INFINI_STATUS_SUCCESS;
switch (desc->device_type) {
#ifdef ENABLE_CPU_API
GET(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_QY_API
GET(INFINI_DEVICE_QY, nvidia);
#endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
GET(INFINI_DEVICE_MOORE, moore);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
#undef DELETE
}
......@@ -64,7 +64,11 @@ void calculate(
*c_ = utils::cast<Tdata>(beta * utils::cast<float>(*c_) + alpha * sum);
}
} else {
*c_ = beta * (*c_) + alpha * sum;
if (beta == 0) {
*c_ = alpha * sum;
} else {
*c_ = beta * (*c_) + alpha * sum;
}
}
}
}
......
......@@ -67,9 +67,9 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs):
return torch.nn.functional.adaptive_max_pool1d(*args, **kwargs)
# def infinicore_operator(self, *args, **kwargs):
# """InfiniCore implementation (operator not yet available)."""
# return infinicore.nn.functional.adaptive_max_pool1d(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs):
"""InfiniCore implementation (operator not yet available)."""
return infinicore.nn.functional.adaptive_max_pool1d(*args, **kwargs)
def main():
......
......@@ -97,9 +97,9 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs):
return torch.asinh(*args, **kwargs)
# def infinicore_operator(self, *args, **kwargs):
# """InfiniCore implementation (operator not yet available)."""
# return infinicore.asinh(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs):
"""InfiniCore implementation (operator not yet available)."""
return infinicore.asinh(*args, **kwargs)
def main():
......
......@@ -18,8 +18,8 @@ from framework import (
_TEST_CASES_DATA = [
((3, 5), (2, 3, 4), (2, 4, 5), None, None, None, None, None),
((8, 8), (4, 8, 8), (4, 8, 8), None, None, None, 0.5, 2.0),
((5, 7), (2, 5, 6), (2, 6, 7), (30, 1), (0, 5, 1), None, None, None),
((16, 16), (2, 16, 16), (2, 16, 16), None, None, (512, 1, 1), 1.0, None),
((5, 7), (2, 5, 6), (2, 6, 7), (30, 1), None, None, None, None),
((16, 16), (2, 16, 16), (2, 16, 16), None, None, None, 1.0, None),
((1, 1), (1, 1, 1), (1, 1, 1), None, None, None, None, None),
((6, 8), (3, 6, 7), (3, 7, 8), None, None, None, None, 0.2),
]
......@@ -99,9 +99,9 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs):
return torch.baddbmm(*args, **kwargs)
# def infinicore_operator(self, *args, **kwargs):
# """InfiniCore implementation (operator not yet available)."""
# return infinicore.baddbmm(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs):
"""InfiniCore implementation (operator not yet available)."""
return infinicore.baddbmm(*args, **kwargs)
def main():
......
......@@ -14,7 +14,7 @@ _TEST_CASES_DATA = [
((1, 6), (1, 7), (3, 6, 7), None, None, None, True),
((8, 2), (8, 4), (5, 2, 4), (16, 2), None, None, False),
((2, 3), (2, 3), (4, 3, 3), None, (0, 3), None, True),
((6, 10), (6, 12), (7, 10, 12), None, None, (840, 70, 1), False),
((6, 10), (6, 12), (7, 10, 12), None, None, None, False),
((3, 1), (3, 1), (2, 1, 1), None, None, None, True),
]
......@@ -44,11 +44,17 @@ def parse_test_cases():
in2 = TensorSpec.from_tensor(in2_shape, in2_strides, dtype)
weight = TensorSpec.from_tensor(weight_shape, weight_strides, dtype)
inputs = [in1, in2, weight]
if bias_present:
bias_shape = (weight_shape[0],)
bias = TensorSpec.from_tensor(bias_shape, None, dtype)
inputs.append(bias)
kwargs = {}
test_cases.append(
TestCase(
inputs=[in1, in2, weight],
inputs=inputs,
kwargs=kwargs,
output_spec=None,
comparison_target=None,
......@@ -72,9 +78,10 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs):
return torch.nn.functional.bilinear(*args, **kwargs)
# def infinicore_operator(self, *args, **kwargs):
# """InfiniCore implementation (operator not yet available)."""
# return infinicore.nn.functional.bilinear(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs):
from infinicore.ops.bilinear import bilinear
return bilinear(*args, **kwargs)
def main():
......
......@@ -103,9 +103,9 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs):
return torch.fmod(*args, **kwargs)
# def infinicore_operator(self, *args, **kwargs):
# """InfiniCore implementation (operator not yet available)."""
# return infinicore.fmod(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs):
"""InfiniCore implementation (operator not yet available)."""
return infinicore.fmod(*args, **kwargs)
def main():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment