Unverified Commit b2660e66 authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Merge pull request #1070 from InfiniTensor/issue/1031_revert

Issue/1031 revert T1-1-9
parents 037140c0 45a3794b
#pragma once #pragma once
#include "ops/adaptive_max_pool1d.hpp"
#include "ops/add.hpp" #include "ops/add.hpp"
#include "ops/add_rms_norm.hpp" #include "ops/add_rms_norm.hpp"
#include "ops/asinh.hpp"
#include "ops/attention.hpp" #include "ops/attention.hpp"
#include "ops/avg_pool1d.hpp" #include "ops/avg_pool1d.hpp"
#include "ops/baddbmm.hpp"
#include "ops/bilinear.hpp"
#include "ops/causal_softmax.hpp" #include "ops/causal_softmax.hpp"
#include "ops/cross_entropy.hpp" #include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp" #include "ops/embedding.hpp"
#include "ops/flash_attention.hpp" #include "ops/flash_attention.hpp"
#include "ops/fmod.hpp"
#include "ops/hardswish.hpp" #include "ops/hardswish.hpp"
#include "ops/hardtanh.hpp" #include "ops/hardtanh.hpp"
#include "ops/kv_caching.hpp" #include "ops/kv_caching.hpp"
......
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class AdaptiveMaxPool1d {
public:
using schema = void (*)(Tensor, Tensor, size_t);
static void execute(Tensor y, Tensor x, size_t output_size);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor adaptive_max_pool1d(Tensor x, size_t output_size);
void adaptive_max_pool1d_(Tensor y, Tensor x, size_t output_size);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Asinh {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor y, Tensor x);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor asinh(Tensor x);
void asinh_(Tensor y, Tensor x);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
namespace infinicore::op {
Tensor baddbmm(Tensor input, Tensor batch1, Tensor batch2,
float beta = 1.0f,
float alpha = 1.0f);
void baddbmm_(Tensor out, Tensor input, Tensor batch1, Tensor batch2,
float beta = 1.0f,
float alpha = 1.0f);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
namespace infinicore::op {
Tensor bilinear(Tensor x1, Tensor x2, Tensor weight, std::optional<Tensor> bias);
void bilinear_(Tensor out, Tensor x1, Tensor x2, Tensor weight, std::optional<Tensor> bias);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Fmod {
public:
using schema = void (*)(Tensor, Tensor, Tensor);
static void execute(Tensor c, Tensor a, Tensor b);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor fmod(Tensor a, Tensor b);
void fmod_(Tensor c, Tensor a, Tensor b);
} // namespace infinicore::op
...@@ -2,11 +2,9 @@ ...@@ -2,11 +2,9 @@
#define __INFINIOP_API_H__ #define __INFINIOP_API_H__
#include "infiniop/handle.h" #include "infiniop/handle.h"
#include "infiniop/ops/adaptive_max_pool1d.h"
#include "infiniop/ops/add.h" #include "infiniop/ops/add.h"
#include "infiniop/ops/add_rms_norm.h" #include "infiniop/ops/add_rms_norm.h"
#include "infiniop/ops/all.h" #include "infiniop/ops/all.h"
#include "infiniop/ops/asinh.h"
#include "infiniop/ops/attention.h" #include "infiniop/ops/attention.h"
#include "infiniop/ops/causal_softmax.h" #include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h" #include "infiniop/ops/clip.h"
...@@ -14,7 +12,6 @@ ...@@ -14,7 +12,6 @@
#include "infiniop/ops/dequantize_awq.h" #include "infiniop/ops/dequantize_awq.h"
#include "infiniop/ops/embedding.h" #include "infiniop/ops/embedding.h"
#include "infiniop/ops/flash_attention.h" #include "infiniop/ops/flash_attention.h"
#include "infiniop/ops/fmod.h"
#include "infiniop/ops/gelu.h" #include "infiniop/ops/gelu.h"
#include "infiniop/ops/gemm.h" #include "infiniop/ops/gemm.h"
#include "infiniop/ops/int8_gemm.h" #include "infiniop/ops/int8_gemm.h"
...@@ -50,10 +47,10 @@ ...@@ -50,10 +47,10 @@
#include "infiniop/ops/zeros.h" #include "infiniop/ops/zeros.h"
#include "infiniop/tensor_descriptor.h" #include "infiniop/tensor_descriptor.h"
#include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/cross_entropy.h" #include "infiniop/ops/cross_entropy.h"
#include "infiniop/ops/equal.h"
#include "infiniop/ops/hardswish.h" #include "infiniop/ops/hardswish.h"
#include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/equal.h"
#include "infiniop/ops/hardtanh.h" #include "infiniop/ops/hardtanh.h"
#endif // __INFINIOP_API_H__ #endif // __INFINIOP_API_H__
#ifndef __INFINIOP_ADAPTIVE_MAX_POOL1D_H__
#define __INFINIOP_ADAPTIVE_MAX_POOL1D_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopAdaptiveMaxPool1dDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateAdaptiveMaxPool1dDescriptor(
infiniopHandle_t handle,
infiniopAdaptiveMaxPool1dDescriptor_t *desc,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
size_t output_size);
__INFINI_C __export infiniStatus_t infiniopGetAdaptiveMaxPool1dWorkspaceSize(infiniopAdaptiveMaxPool1dDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopAdaptiveMaxPool1d(infiniopAdaptiveMaxPool1dDescriptor_t desc, void *workspace, size_t workspace_size,
void *y, const void *x, void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyAdaptiveMaxPool1dDescriptor(infiniopAdaptiveMaxPool1dDescriptor_t desc);
#endif
#ifndef __INFINIOP_ASINH_API_H__
#define __INFINIOP_ASINH_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopAsinhDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateAsinhDescriptor(infiniopHandle_t handle,
infiniopAsinhDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);
__INFINI_C __export infiniStatus_t infiniopGetAsinhWorkspaceSize(infiniopAsinhDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopAsinh(infiniopAsinhDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyAsinhDescriptor(infiniopAsinhDescriptor_t desc);
#endif
#ifndef __INFINIOP_FMOD_API_H__
#define __INFINIOP_FMOD_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopFmodDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateFmodDescriptor(infiniopHandle_t handle,
infiniopFmodDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);
__INFINI_C __export infiniStatus_t infiniopGetFmodWorkspaceSize(infiniopFmodDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopFmod(infiniopFmodDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyFmodDescriptor(infiniopFmodDescriptor_t desc);
#endif
...@@ -50,13 +50,9 @@ from infinicore.dtype import ( ...@@ -50,13 +50,9 @@ from infinicore.dtype import (
from infinicore.ops.add import add from infinicore.ops.add import add
from infinicore.ops.add_rms_norm import add_rms_norm from infinicore.ops.add_rms_norm import add_rms_norm
from infinicore.ops.all import all from infinicore.ops.all import all
from infinicore.ops.asinh import asinh
from infinicore.ops.attention import attention from infinicore.ops.attention import attention
from infinicore.ops.baddbmm import baddbmm
from infinicore.ops.bilinear import bilinear
from infinicore.ops.cross_entropy import cross_entropy from infinicore.ops.cross_entropy import cross_entropy
from infinicore.ops.equal import equal from infinicore.ops.equal import equal
from infinicore.ops.fmod import fmod
from infinicore.ops.kv_caching import kv_caching from infinicore.ops.kv_caching import kv_caching
from infinicore.ops.matmul import matmul from infinicore.ops.matmul import matmul
from infinicore.ops.mha_varlen import mha_varlen from infinicore.ops.mha_varlen import mha_varlen
...@@ -134,10 +130,6 @@ __all__ = [ ...@@ -134,10 +130,6 @@ __all__ = [
"add_rms_norm_", "add_rms_norm_",
"attention", "attention",
"kv_caching", "kv_caching",
"asinh",
"baddbmm",
"bilinear",
"fmod",
"matmul", "matmul",
"equal", "equal",
"mul", "mul",
......
from .adaptive_max_pool1d import adaptive_max_pool1d
from .avg_pool1d import avg_pool1d from .avg_pool1d import avg_pool1d
from .causal_softmax import causal_softmax from .causal_softmax import causal_softmax
from .embedding import embedding from .embedding import embedding
...@@ -15,7 +14,6 @@ from .silu_and_mul import silu_and_mul ...@@ -15,7 +14,6 @@ from .silu_and_mul import silu_and_mul
from .swiglu import swiglu from .swiglu import swiglu
__all__ = [ __all__ = [
"adaptive_max_pool1d",
"causal_softmax", "causal_softmax",
"embedding", "embedding",
"flash_attention", "flash_attention",
......
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def adaptive_max_pool1d(
input: Tensor,
output_size: int,
*,
out=None,
) -> Tensor:
r"""Applies a 1D adaptive max pooling over an input signal composed of
several input planes.
The output size is H_out. The algorithm used is fairly simple:
.. math::
\text{start} = \left\lfloor \frac{i \cdot L_{in}}{L_{out}} \right\rfloor
\text{end} = \left\lceil \frac{(i + 1) \cdot L_{in}}{L_{out}} \right\rceil
where :math:`L_{in}` is the size of the input dimension, and :math:`L_{out}` is the size of the output dimension.
Args:
input (Tensor): Input tensor of shape (N, C, L_in)
output_size (int): The target output size (L_out)
out (Tensor, optional): Output tensor.
Returns:
Tensor: The result of the adaptive max pooling operation.
"""
if out is None:
return Tensor(_infinicore.adaptive_max_pool1d(input._underlying, output_size))
_infinicore.adaptive_max_pool1d_(out._underlying, input._underlying, output_size)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def asinh(input, *, out=None):
if out is None:
return Tensor(_infinicore.asinh(input._underlying))
_infinicore.asinh_(out._underlying, input._underlying)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def baddbmm(input, batch1, batch2, *, beta=1.0, alpha=1.0, out=None):
if out is None:
return Tensor(
_infinicore.baddbmm(
input._underlying,
batch1._underlying,
batch2._underlying,
float(beta),
float(alpha),
)
)
_infinicore.baddbmm_(
out._underlying,
input._underlying,
batch1._underlying,
batch2._underlying,
float(beta),
float(alpha),
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def bilinear(input1, input2, weight, bias=None, *, out=None):
if out is None:
return Tensor(
_infinicore.bilinear(
input1._underlying,
input2._underlying,
weight._underlying,
bias._underlying if bias is not None else None,
)
)
_infinicore.bilinear_(
out._underlying,
input1._underlying,
input2._underlying,
weight._underlying,
bias._underlying if bias is not None else None,
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def fmod(input, other, *, out=None):
if out is None:
return Tensor(_infinicore.fmod(input._underlying, other._underlying))
_infinicore.fmod_(out._underlying, input._underlying, other._underlying)
return out
#include "infinicore/ops/adaptive_max_pool1d.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<AdaptiveMaxPool1d::schema> &AdaptiveMaxPool1d::dispatcher() {
static common::OpDispatcher<AdaptiveMaxPool1d::schema> dispatcher_;
return dispatcher_;
}
void AdaptiveMaxPool1d::execute(Tensor y, Tensor x, size_t output_size) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, x);
infinicore::context::setDevice(y->device());
dispatcher().lookup(y->device().getType())(y, x, output_size);
}
Tensor adaptive_max_pool1d(Tensor x, size_t output_size) {
infinicore::Shape y_shape = x->shape();
y_shape.back() = output_size;
auto y = Tensor::empty(y_shape, x->dtype(), x->device());
adaptive_max_pool1d_(y, x, output_size);
return y;
}
void adaptive_max_pool1d_(Tensor y, Tensor x, size_t output_size) {
AdaptiveMaxPool1d::execute(y, x, output_size);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/adaptive_max_pool1d.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::adaptive_max_pool1d_impl::infiniop {
thread_local common::OpCache<size_t, infiniopAdaptiveMaxPool1dDescriptor_t> caches(
100, // capacity
[](infiniopAdaptiveMaxPool1dDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyAdaptiveMaxPool1dDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor y, Tensor x, size_t out) {
size_t seed = hash_combine(y, x, out);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopAdaptiveMaxPool1dDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateAdaptiveMaxPool1dDescriptor(
context::getInfiniopHandle(y->device()), &desc,
y->desc(), x->desc(), out));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAdaptiveMaxPool1dWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopAdaptiveMaxPool1d(
desc, workspace->data(), workspace_size,
y->data(), x->data(), context::getStream()));
}
static bool registered = []() {
AdaptiveMaxPool1d::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::adaptive_max_pool1d_impl::infiniop
#include "infinicore/ops/asinh.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<Asinh::schema> &Asinh::dispatcher() {
static common::OpDispatcher<Asinh::schema> dispatcher_;
return dispatcher_;
};
void Asinh::execute(Tensor y, Tensor x) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, x);
infinicore::context::setDevice(y->device());
dispatcher().lookup(y->device().getType())(y, x);
}
Tensor asinh(Tensor x) {
auto y = Tensor::empty(x->shape(), x->dtype(), x->device());
asinh_(y, x);
return y;
}
void asinh_(Tensor y, Tensor x) {
Asinh::execute(y, x);
}
} // namespace infinicore::op
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment