Unverified Commit b2660e66 authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Merge pull request #1070 from InfiniTensor/issue/1031_revert

Issue/1031 revert T1-1-9
parents 037140c0 45a3794b
#pragma once
#include "ops/adaptive_max_pool1d.hpp"
#include "ops/add.hpp"
#include "ops/add_rms_norm.hpp"
#include "ops/asinh.hpp"
#include "ops/attention.hpp"
#include "ops/avg_pool1d.hpp"
#include "ops/baddbmm.hpp"
#include "ops/bilinear.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
#include "ops/flash_attention.hpp"
#include "ops/fmod.hpp"
#include "ops/hardswish.hpp"
#include "ops/hardtanh.hpp"
#include "ops/kv_caching.hpp"
......
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class AdaptiveMaxPool1d {
public:
using schema = void (*)(Tensor, Tensor, size_t);
static void execute(Tensor y, Tensor x, size_t output_size);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor adaptive_max_pool1d(Tensor x, size_t output_size);
void adaptive_max_pool1d_(Tensor y, Tensor x, size_t output_size);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Asinh {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor y, Tensor x);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor asinh(Tensor x);
void asinh_(Tensor y, Tensor x);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
namespace infinicore::op {
Tensor baddbmm(Tensor input, Tensor batch1, Tensor batch2,
float beta = 1.0f,
float alpha = 1.0f);
void baddbmm_(Tensor out, Tensor input, Tensor batch1, Tensor batch2,
float beta = 1.0f,
float alpha = 1.0f);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
namespace infinicore::op {
Tensor bilinear(Tensor x1, Tensor x2, Tensor weight, std::optional<Tensor> bias);
void bilinear_(Tensor out, Tensor x1, Tensor x2, Tensor weight, std::optional<Tensor> bias);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Fmod {
public:
using schema = void (*)(Tensor, Tensor, Tensor);
static void execute(Tensor c, Tensor a, Tensor b);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor fmod(Tensor a, Tensor b);
void fmod_(Tensor c, Tensor a, Tensor b);
} // namespace infinicore::op
......@@ -2,11 +2,9 @@
#define __INFINIOP_API_H__
#include "infiniop/handle.h"
#include "infiniop/ops/adaptive_max_pool1d.h"
#include "infiniop/ops/add.h"
#include "infiniop/ops/add_rms_norm.h"
#include "infiniop/ops/all.h"
#include "infiniop/ops/asinh.h"
#include "infiniop/ops/attention.h"
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/clip.h"
......@@ -14,7 +12,6 @@
#include "infiniop/ops/dequantize_awq.h"
#include "infiniop/ops/embedding.h"
#include "infiniop/ops/flash_attention.h"
#include "infiniop/ops/fmod.h"
#include "infiniop/ops/gelu.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/int8_gemm.h"
......@@ -50,10 +47,10 @@
#include "infiniop/ops/zeros.h"
#include "infiniop/tensor_descriptor.h"
#include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/cross_entropy.h"
#include "infiniop/ops/equal.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/equal.h"
#include "infiniop/ops/hardtanh.h"
#endif // __INFINIOP_API_H__
#ifndef __INFINIOP_ADAPTIVE_MAX_POOL1D_H__
#define __INFINIOP_ADAPTIVE_MAX_POOL1D_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopAdaptiveMaxPool1dDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateAdaptiveMaxPool1dDescriptor(
infiniopHandle_t handle,
infiniopAdaptiveMaxPool1dDescriptor_t *desc,
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x_desc,
size_t output_size);
__INFINI_C __export infiniStatus_t infiniopGetAdaptiveMaxPool1dWorkspaceSize(infiniopAdaptiveMaxPool1dDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopAdaptiveMaxPool1d(infiniopAdaptiveMaxPool1dDescriptor_t desc, void *workspace, size_t workspace_size,
void *y, const void *x, void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyAdaptiveMaxPool1dDescriptor(infiniopAdaptiveMaxPool1dDescriptor_t desc);
#endif
#ifndef __INFINIOP_ASINH_API_H__
#define __INFINIOP_ASINH_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopAsinhDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateAsinhDescriptor(infiniopHandle_t handle,
infiniopAsinhDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);
__INFINI_C __export infiniStatus_t infiniopGetAsinhWorkspaceSize(infiniopAsinhDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopAsinh(infiniopAsinhDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyAsinhDescriptor(infiniopAsinhDescriptor_t desc);
#endif
#ifndef __INFINIOP_FMOD_API_H__
#define __INFINIOP_FMOD_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopFmodDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateFmodDescriptor(infiniopHandle_t handle,
infiniopFmodDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t c,
infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b);
__INFINI_C __export infiniStatus_t infiniopGetFmodWorkspaceSize(infiniopFmodDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopFmod(infiniopFmodDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *c,
const void *a,
const void *b,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyFmodDescriptor(infiniopFmodDescriptor_t desc);
#endif
......@@ -50,13 +50,9 @@ from infinicore.dtype import (
from infinicore.ops.add import add
from infinicore.ops.add_rms_norm import add_rms_norm
from infinicore.ops.all import all
from infinicore.ops.asinh import asinh
from infinicore.ops.attention import attention
from infinicore.ops.baddbmm import baddbmm
from infinicore.ops.bilinear import bilinear
from infinicore.ops.cross_entropy import cross_entropy
from infinicore.ops.equal import equal
from infinicore.ops.fmod import fmod
from infinicore.ops.kv_caching import kv_caching
from infinicore.ops.matmul import matmul
from infinicore.ops.mha_varlen import mha_varlen
......@@ -134,10 +130,6 @@ __all__ = [
"add_rms_norm_",
"attention",
"kv_caching",
"asinh",
"baddbmm",
"bilinear",
"fmod",
"matmul",
"equal",
"mul",
......
from .adaptive_max_pool1d import adaptive_max_pool1d
from .avg_pool1d import avg_pool1d
from .causal_softmax import causal_softmax
from .embedding import embedding
......@@ -15,7 +14,6 @@ from .silu_and_mul import silu_and_mul
from .swiglu import swiglu
__all__ = [
"adaptive_max_pool1d",
"causal_softmax",
"embedding",
"flash_attention",
......
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def adaptive_max_pool1d(
input: Tensor,
output_size: int,
*,
out=None,
) -> Tensor:
r"""Applies a 1D adaptive max pooling over an input signal composed of
several input planes.
The output size is H_out. The algorithm used is fairly simple:
.. math::
\text{start} = \left\lfloor \frac{i \cdot L_{in}}{L_{out}} \right\rfloor
\text{end} = \left\lceil \frac{(i + 1) \cdot L_{in}}{L_{out}} \right\rceil
where :math:`L_{in}` is the size of the input dimension, and :math:`L_{out}` is the size of the output dimension.
Args:
input (Tensor): Input tensor of shape (N, C, L_in)
output_size (int): The target output size (L_out)
out (Tensor, optional): Output tensor.
Returns:
Tensor: The result of the adaptive max pooling operation.
"""
if out is None:
return Tensor(_infinicore.adaptive_max_pool1d(input._underlying, output_size))
_infinicore.adaptive_max_pool1d_(out._underlying, input._underlying, output_size)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def asinh(input, *, out=None):
if out is None:
return Tensor(_infinicore.asinh(input._underlying))
_infinicore.asinh_(out._underlying, input._underlying)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def baddbmm(input, batch1, batch2, *, beta=1.0, alpha=1.0, out=None):
if out is None:
return Tensor(
_infinicore.baddbmm(
input._underlying,
batch1._underlying,
batch2._underlying,
float(beta),
float(alpha),
)
)
_infinicore.baddbmm_(
out._underlying,
input._underlying,
batch1._underlying,
batch2._underlying,
float(beta),
float(alpha),
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def bilinear(input1, input2, weight, bias=None, *, out=None):
if out is None:
return Tensor(
_infinicore.bilinear(
input1._underlying,
input2._underlying,
weight._underlying,
bias._underlying if bias is not None else None,
)
)
_infinicore.bilinear_(
out._underlying,
input1._underlying,
input2._underlying,
weight._underlying,
bias._underlying if bias is not None else None,
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def fmod(input, other, *, out=None):
if out is None:
return Tensor(_infinicore.fmod(input._underlying, other._underlying))
_infinicore.fmod_(out._underlying, input._underlying, other._underlying)
return out
#include "infinicore/ops/adaptive_max_pool1d.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<AdaptiveMaxPool1d::schema> &AdaptiveMaxPool1d::dispatcher() {
static common::OpDispatcher<AdaptiveMaxPool1d::schema> dispatcher_;
return dispatcher_;
}
void AdaptiveMaxPool1d::execute(Tensor y, Tensor x, size_t output_size) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, x);
infinicore::context::setDevice(y->device());
dispatcher().lookup(y->device().getType())(y, x, output_size);
}
Tensor adaptive_max_pool1d(Tensor x, size_t output_size) {
infinicore::Shape y_shape = x->shape();
y_shape.back() = output_size;
auto y = Tensor::empty(y_shape, x->dtype(), x->device());
adaptive_max_pool1d_(y, x, output_size);
return y;
}
void adaptive_max_pool1d_(Tensor y, Tensor x, size_t output_size) {
AdaptiveMaxPool1d::execute(y, x, output_size);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/adaptive_max_pool1d.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::adaptive_max_pool1d_impl::infiniop {
thread_local common::OpCache<size_t, infiniopAdaptiveMaxPool1dDescriptor_t> caches(
100, // capacity
[](infiniopAdaptiveMaxPool1dDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyAdaptiveMaxPool1dDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor y, Tensor x, size_t out) {
size_t seed = hash_combine(y, x, out);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopAdaptiveMaxPool1dDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateAdaptiveMaxPool1dDescriptor(
context::getInfiniopHandle(y->device()), &desc,
y->desc(), x->desc(), out));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAdaptiveMaxPool1dWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopAdaptiveMaxPool1d(
desc, workspace->data(), workspace_size,
y->data(), x->data(), context::getStream()));
}
static bool registered = []() {
AdaptiveMaxPool1d::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::adaptive_max_pool1d_impl::infiniop
#include "infinicore/ops/asinh.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<Asinh::schema> &Asinh::dispatcher() {
static common::OpDispatcher<Asinh::schema> dispatcher_;
return dispatcher_;
};
void Asinh::execute(Tensor y, Tensor x) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, x);
infinicore::context::setDevice(y->device());
dispatcher().lookup(y->device().getType())(y, x);
}
Tensor asinh(Tensor x) {
auto y = Tensor::empty(x->shape(), x->dtype(), x->device());
asinh_(y, x);
return y;
}
void asinh_(Tensor y, Tensor x) {
Asinh::execute(y, x);
}
} // namespace infinicore::op
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment