Unverified Commit b2660e66 authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Merge pull request #1070 from InfiniTensor/issue/1031_revert

Issue/1031 revert T1-1-9
parents 037140c0 45a3794b
#include "../../../elementwise/nvidia/elementwise_nvidia.cuh"
#include "../cuda/kernel.cuh"
#include "fmod_nvidia.cuh"
namespace op::fmod::nvidia {
Descriptor::~Descriptor() = default;
infiniStatus_t Descriptor::create(
infiniopHandle_t handle_,
Descriptor **desc_ptr,
infiniopTensorDescriptor_t out_desc,
std::vector<infiniopTensorDescriptor_t> input_desc_vec) {
auto handle = reinterpret_cast<device::nvidia::Handle *>(handle_);
auto dtype = out_desc->dtype();
const auto &a_desc = input_desc_vec.at(0);
const auto &b_desc = input_desc_vec.at(1);
const auto &c_shape = out_desc->shape();
const auto &a_shape = a_desc->shape();
const auto &b_shape = b_desc->shape();
CHECK_DTYPE(dtype, INFINI_DTYPE_F16, INFINI_DTYPE_F32, INFINI_DTYPE_F64, INFINI_DTYPE_BF16);
CHECK_SAME_SHAPE(c_shape, a_shape, b_shape);
CREATE_ELEMENTWISE_CUDA_DESCRIPTOR(handle, dtype, out_desc, input_desc_vec)
return INFINI_STATUS_SUCCESS;
}
infiniStatus_t Descriptor::calculate(
void *workspace,
size_t workspace_size,
void *output,
std::vector<const void *> inputs,
void *stream) const {
if (workspace_size < _workspace_size) {
return INFINI_STATUS_INSUFFICIENT_WORKSPACE;
}
switch (_dtype) {
case INFINI_DTYPE_F16:
return _device_info->calculate<256, cuda::FmodOp, half>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F32:
return _device_info->calculate<256, cuda::FmodOp, float>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_F64:
return _device_info->calculate<256, cuda::FmodOp, double>(_info, workspace, output, inputs, stream);
case INFINI_DTYPE_BF16:
return _device_info->calculate<256, cuda::FmodOp, cuda_bfloat16>(_info, workspace, output, inputs, stream);
default:
return INFINI_STATUS_BAD_TENSOR_DTYPE;
}
return INFINI_STATUS_SUCCESS;
}
} // namespace op::fmod::nvidia
#ifndef __MUL_CUDA_API_H__
#define __MUL_CUDA_API_H__
#include "../../../elementwise/nvidia/elementwise_nvidia_api.cuh"
ELEMENTWISE_DESCRIPTOR(fmod, nvidia)
#endif // __MUL_CUDA_API_H__
#include "../../operator.h"
#include "../../handle.h"
#include "infiniop/ops/fmod.h"
#ifdef ENABLE_CPU_API
#include "cpu/fmod_cpu.h"
#endif
#if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/fmod_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
#include "metax/fmod_metax.h"
#endif
#ifdef ENABLE_MOORE_API
#include "moore/fmod_moore.h"
#endif
__INFINI_C infiniStatus_t infiniopCreateFmodDescriptor(
infiniopHandle_t handle,
infiniopFmodDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c_desc,
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
#define CREATE(CASE, NAMESPACE) \
case CASE: \
return op::fmod::NAMESPACE::Descriptor::create( \
handle, \
reinterpret_cast<op::fmod::NAMESPACE::Descriptor **>(desc_ptr), \
c_desc, \
{a_desc, \
b_desc})
switch (handle->device) {
#ifdef ENABLE_CPU_API
CREATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CREATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
CREATE(INFINI_DEVICE_MOORE, moore);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
#undef CREATE
}
__INFINI_C infiniStatus_t infiniopGetFmodWorkspaceSize(infiniopFmodDescriptor_t desc, size_t *size) {
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<const op::fmod::NAMESPACE::Descriptor *>(desc)->workspaceSize(); \
return INFINI_STATUS_SUCCESS;
switch (desc->device_type) {
#ifdef ENABLE_CPU_API
GET(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
GET(INFINI_DEVICE_MOORE, moore);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
#undef GET
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
__INFINI_C infiniStatus_t infiniopFmod(
infiniopFmodDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream) {
#define CALCULATE(CASE, NAMESPACE) \
case CASE: \
return reinterpret_cast<const op::fmod::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size, c, {a, b}, stream)
switch (desc->device_type) {
#ifdef ENABLE_CPU_API
CALCULATE(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
CALCULATE(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
CALCULATE(INFINI_DEVICE_MOORE, moore);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
#undef CALCULATE
}
__INFINI_C infiniStatus_t infiniopDestroyFmodDescriptor(infiniopFmodDescriptor_t desc) {
#define GET(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<op::fmod::NAMESPACE::Descriptor *>(desc); \
return INFINI_STATUS_SUCCESS;
switch (desc->device_type) {
#ifdef ENABLE_CPU_API
GET(INFINI_DEVICE_CPU, cpu);
#endif
#ifdef ENABLE_NVIDIA_API
GET(INFINI_DEVICE_NVIDIA, nvidia);
#endif
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia);
#endif
#ifdef ENABLE_QY_API
GET(INFINI_DEVICE_QY, nvidia);
#endif
#ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax);
#endif
#ifdef ENABLE_MOORE_API
GET(INFINI_DEVICE_MOORE, moore);
#endif
default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
#undef DELETE
}
...@@ -64,11 +64,7 @@ void calculate( ...@@ -64,11 +64,7 @@ void calculate(
*c_ = utils::cast<Tdata>(beta * utils::cast<float>(*c_) + alpha * sum); *c_ = utils::cast<Tdata>(beta * utils::cast<float>(*c_) + alpha * sum);
} }
} else { } else {
if (beta == 0) { *c_ = beta * (*c_) + alpha * sum;
*c_ = alpha * sum;
} else {
*c_ = beta * (*c_) + alpha * sum;
}
} }
} }
} }
......
...@@ -3,14 +3,6 @@ ...@@ -3,14 +3,6 @@
namespace op::gemm::nvidia { namespace op::gemm::nvidia {
// 添加线程局部控制开关
thread_local bool g_tf32_enabled = true;
// 暴露设置函数(非静态,以便外部链接)
void set_tf32_enabled(bool enabled) {
g_tf32_enabled = enabled;
}
struct Descriptor::Opaque { struct Descriptor::Opaque {
std::shared_ptr<device::nvidia::Handle::Internal> internal; std::shared_ptr<device::nvidia::Handle::Internal> internal;
}; };
...@@ -79,8 +71,7 @@ infiniStatus_t Descriptor::calculate( ...@@ -79,8 +71,7 @@ infiniStatus_t Descriptor::calculate(
#if defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API) #if defined(ENABLE_ILUVATAR_API) || defined(ENABLE_HYGON_API)
compute_type = CUDA_R_32F; compute_type = CUDA_R_32F;
#else #else
// compute_type = CUBLAS_COMPUTE_32F_FAST_TF32; compute_type = CUBLAS_COMPUTE_32F_FAST_TF32;
compute_type = g_tf32_enabled ? CUBLAS_COMPUTE_32F_FAST_TF32 : CUBLAS_COMPUTE_32F;
#endif #endif
break; break;
......
...@@ -67,9 +67,9 @@ class OpTest(BaseOperatorTest): ...@@ -67,9 +67,9 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs): def torch_operator(self, *args, **kwargs):
return torch.nn.functional.adaptive_max_pool1d(*args, **kwargs) return torch.nn.functional.adaptive_max_pool1d(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs): # def infinicore_operator(self, *args, **kwargs):
"""InfiniCore implementation (operator not yet available).""" # """InfiniCore implementation (operator not yet available)."""
return infinicore.nn.functional.adaptive_max_pool1d(*args, **kwargs) # return infinicore.nn.functional.adaptive_max_pool1d(*args, **kwargs)
def main(): def main():
......
...@@ -97,9 +97,9 @@ class OpTest(BaseOperatorTest): ...@@ -97,9 +97,9 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs): def torch_operator(self, *args, **kwargs):
return torch.asinh(*args, **kwargs) return torch.asinh(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs): # def infinicore_operator(self, *args, **kwargs):
"""InfiniCore implementation (operator not yet available).""" # """InfiniCore implementation (operator not yet available)."""
return infinicore.asinh(*args, **kwargs) # return infinicore.asinh(*args, **kwargs)
def main(): def main():
......
...@@ -99,9 +99,9 @@ class OpTest(BaseOperatorTest): ...@@ -99,9 +99,9 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs): def torch_operator(self, *args, **kwargs):
return torch.baddbmm(*args, **kwargs) return torch.baddbmm(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs): # def infinicore_operator(self, *args, **kwargs):
"""InfiniCore implementation (operator not yet available).""" # """InfiniCore implementation (operator not yet available)."""
return infinicore.baddbmm(*args, **kwargs) # return infinicore.baddbmm(*args, **kwargs)
def main(): def main():
......
...@@ -44,17 +44,11 @@ def parse_test_cases(): ...@@ -44,17 +44,11 @@ def parse_test_cases():
in2 = TensorSpec.from_tensor(in2_shape, in2_strides, dtype) in2 = TensorSpec.from_tensor(in2_shape, in2_strides, dtype)
weight = TensorSpec.from_tensor(weight_shape, weight_strides, dtype) weight = TensorSpec.from_tensor(weight_shape, weight_strides, dtype)
inputs = [in1, in2, weight]
if bias_present:
bias_shape = (weight_shape[0],)
bias = TensorSpec.from_tensor(bias_shape, None, dtype)
inputs.append(bias)
kwargs = {} kwargs = {}
test_cases.append( test_cases.append(
TestCase( TestCase(
inputs=inputs, inputs=[in1, in2, weight],
kwargs=kwargs, kwargs=kwargs,
output_spec=None, output_spec=None,
comparison_target=None, comparison_target=None,
...@@ -78,10 +72,9 @@ class OpTest(BaseOperatorTest): ...@@ -78,10 +72,9 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs): def torch_operator(self, *args, **kwargs):
return torch.nn.functional.bilinear(*args, **kwargs) return torch.nn.functional.bilinear(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs): # def infinicore_operator(self, *args, **kwargs):
from infinicore.ops.bilinear import bilinear # """InfiniCore implementation (operator not yet available)."""
# return infinicore.nn.functional.bilinear(*args, **kwargs)
return bilinear(*args, **kwargs)
def main(): def main():
......
...@@ -103,9 +103,9 @@ class OpTest(BaseOperatorTest): ...@@ -103,9 +103,9 @@ class OpTest(BaseOperatorTest):
def torch_operator(self, *args, **kwargs): def torch_operator(self, *args, **kwargs):
return torch.fmod(*args, **kwargs) return torch.fmod(*args, **kwargs)
def infinicore_operator(self, *args, **kwargs): # def infinicore_operator(self, *args, **kwargs):
"""InfiniCore implementation (operator not yet available).""" # """InfiniCore implementation (operator not yet available)."""
return infinicore.fmod(*args, **kwargs) # return infinicore.fmod(*args, **kwargs)
def main(): def main():
......
...@@ -452,7 +452,8 @@ target("infinicore_cpp_api") ...@@ -452,7 +452,8 @@ target("infinicore_cpp_api")
set_kind("shared") set_kind("shared")
add_deps("infiniop", "infinirt", "infiniccl") add_deps("infiniop", "infinirt", "infiniccl")
set_languages("cxx17") set_languages("cxx17")
set_policy("build.optimization.lto", false) set_symbols("visibility")
local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini") local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")
add_includedirs("include") add_includedirs("include")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment