Commit cb7f0b7d authored by wooway777's avatar wooway777
Browse files

Revert "Merge pull request #1056 from InfiniTensor/issue/1031"

This reverts commit 7f295448, reversing
changes made to e60985dc.
parent 037140c0
#include "infinicore/ops/baddbmm.hpp"
#include "infinicore/ops/gemm.hpp"
#include "infinicore/ops/rearrange.hpp"
namespace infinicore::op {
// 内联的 BLAS 兼容性检查,减少函数调用开销
inline bool is_blas_compatible(const Tensor &t) {
const auto ndim = t->ndim();
if (ndim == 2) {
const auto rs = t->stride(0);
const auto cs = t->stride(1);
if (rs != 1 && cs != 1) {
return false;
}
if (rs == 1 && cs == 1) {
return t->shape()[0] == 1 || t->shape()[1] == 1;
}
return true;
} else if (ndim == 3) {
const auto rs = t->stride(1);
const auto cs = t->stride(2);
if (t->shape()[0] > 1 && t->stride(0) == 0) {
return false;
}
if (rs != 1 && cs != 1) {
return false;
}
if (rs == 1 && cs == 1) {
return t->shape()[1] == 1 || t->shape()[2] == 1;
}
return true;
}
return false;
}
inline void prepare_gemm_input(Tensor &output, Tensor &input, const size_t batch_size, const size_t m, const size_t n) {
const auto input_ndim = input->ndim();
if (input_ndim == 2) {
rearrange_(output, input->as_strided(
{batch_size, m, n},
{0, input->stride(0), input->stride(1)}));
} else if (input_ndim == 3 && input->shape()[0] == 1 && batch_size > 1) {
rearrange_(output, input->as_strided(
{batch_size, m, n},
{0, input->stride(1), input->stride(2)}));
} else {
rearrange_(output, input);
}
}
Tensor baddbmm(Tensor input, Tensor batch1, Tensor batch2,
float beta,
float alpha) {
const size_t batch_size = batch1->shape()[0];
const size_t m = batch1->shape()[1];
const size_t n = batch2->shape()[2];
const Tensor &a = is_blas_compatible(batch1) ? batch1 : rearrange(batch1);
const Tensor &b = is_blas_compatible(batch2) ? batch2 : rearrange(batch2);
if (beta == 0.0f) {
return gemm(a, b, alpha, 0.0f);
}
Tensor result = Tensor::empty({batch_size, m, n}, a->dtype(), a->device());
prepare_gemm_input(result, input, batch_size, m, n);
gemm_(result, a, b, alpha, beta);
return result;
}
void baddbmm_(Tensor out, Tensor input, Tensor batch1, Tensor batch2,
float beta,
float alpha) {
const size_t batch_size = batch1->shape()[0];
const size_t m = batch1->shape()[1];
const size_t n = batch2->shape()[2];
const Tensor &a = is_blas_compatible(batch1) ? batch1 : rearrange(batch1);
const Tensor &b = is_blas_compatible(batch2) ? batch2 : rearrange(batch2);
const bool out_is_usable = out->is_contiguous() && out->ndim() == 3 && out->shape()[0] == batch_size && out->shape()[1] == m && out->shape()[2] == n;
if (out_is_usable) {
if (beta != 0.0f && input->data() != out->data()) {
prepare_gemm_input(out, input, batch_size, m, n);
}
gemm_(out, a, b, alpha, beta);
} else {
Tensor result = Tensor::empty({batch_size, m, n}, a->dtype(), a->device());
if (beta != 0.0f) {
prepare_gemm_input(result, input, batch_size, m, n);
}
gemm_(result, a, b, alpha, beta);
rearrange_(out, result);
}
}
} // namespace infinicore::op
#include "infinicore/ops/bilinear.hpp"
#include "infinicore/ops/add.hpp"
#include "infinicore/ops/matmul.hpp"
#include "infinicore/ops/rearrange.hpp"
#ifdef ENABLE_NVIDIA_API
namespace op::gemm::nvidia {
void set_tf32_enabled(bool);
}
#endif
namespace infinicore::op {
namespace {
// RAII 守卫:作用域内禁用 TF32
struct ScopedTF32Disable {
ScopedTF32Disable() {
#ifdef ENABLE_NVIDIA_API
// 实际项目中建议添加检查,仅在 NVIDIA 设备上调用
// 使用 ::op 强制从全局命名空间查找,避免被当前的 infinicore::op 遮蔽
::op::gemm::nvidia::set_tf32_enabled(false);
#endif
}
~ScopedTF32Disable() {
#ifdef ENABLE_NVIDIA_API
::op::gemm::nvidia::set_tf32_enabled(true);
#endif
}
};
inline bool is_gemm_compatible_3d(const Tensor &t) {
if (t->ndim() != 3) {
return false;
}
const auto batch = t->shape()[0];
const auto rows = t->shape()[1];
const auto cols = t->shape()[2];
const auto bs = t->stride(0);
const auto rs = t->stride(1);
const auto cs = t->stride(2);
if (rs != 1 && cs != 1) {
return false;
}
if (cs == 1) {
if (rs < static_cast<int64_t>(cols)) {
return false;
}
} else {
if (cs < static_cast<int64_t>(rows)) {
return false;
}
}
if (batch > 1 && bs == 0) {
return false;
}
return true;
}
inline Tensor ensure_gemm_compatible(const Tensor &t) {
if (t->ndim() == 2) {
return t->is_contiguous() ? t : rearrange(t);
} else if (t->ndim() == 3) {
return is_gemm_compatible_3d(t) ? t : rearrange(t);
}
return t->is_contiguous() ? t : rearrange(t);
}
} // anonymous namespace
Tensor bilinear(Tensor x1, Tensor x2, Tensor weight, std::optional<Tensor> bias) {
ScopedTF32Disable tf32_guard;
const size_t batch_size = x1->shape()[0];
const size_t in1_features = x1->shape()[1];
const size_t in2_features = x2->shape()[1];
const size_t out_features = weight->shape()[0];
Tensor x1_compat = ensure_gemm_compatible(x1);
Tensor x2_compat = ensure_gemm_compatible(x2);
Tensor weight_cont = weight->is_contiguous() ? weight : weight->contiguous();
Tensor weight_permuted = weight_cont->permute({1, 0, 2});
Tensor weight_permuted_cont = weight_permuted->is_contiguous()
? weight_permuted
: weight_permuted->contiguous();
Tensor weight_matrix = weight_permuted_cont->view({in1_features, out_features * in2_features});
Tensor intermediate = matmul(x1_compat, weight_matrix, 1.0f);
Tensor intermediate_3d = intermediate->view({batch_size, out_features, in2_features});
Tensor intermediate_transposed = intermediate_3d->permute({0, 2, 1});
Tensor intermediate_compat = ensure_gemm_compatible(intermediate_transposed);
Tensor x2_row = x2_compat->view({batch_size, 1, in2_features});
Tensor x2_row_compat = ensure_gemm_compatible(x2_row);
Tensor out_3d = matmul(x2_row_compat, intermediate_compat, 1.0f);
Tensor out = out_3d->view({batch_size, out_features});
if (bias) {
Tensor bias_broadcast = (*bias)->as_strided(
{batch_size, out_features},
{0, (*bias)->strides()[0]});
out = add(out, bias_broadcast);
}
return out;
}
void bilinear_(Tensor out, Tensor x1, Tensor x2, Tensor weight, std::optional<Tensor> bias) {
Tensor result = bilinear(x1, x2, weight, bias);
rearrange_(out, result);
}
} // namespace infinicore::op
#include "infinicore/ops/cross_entropy.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace infinicore::op {
common::OpDispatcher<CrossEntropy::schema> &CrossEntropy::dispatcher() {
static common::OpDispatcher<CrossEntropy::schema> dispatcher_;
return dispatcher_;
};
void CrossEntropy::execute(Tensor output, Tensor input, Tensor target) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(input, target);
infinicore::context::setDevice(output->device());
auto device_type = output->device().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error("No CrossEntropy implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input, target);
}
Tensor cross_entropy(Tensor input, Tensor target) {
Shape shape = target->shape();
auto output = Tensor::empty(shape, input->dtype(), input->device());
cross_entropy_(output, input, target);
return output;
}
void cross_entropy_(Tensor output, Tensor input, Tensor target) {
CrossEntropy::execute(output, input, target);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/cross_entropy.hpp"
#include <infiniop.h>
namespace infinicore::op::cross_entropy_impl::infiniop {
thread_local common::OpCache<size_t, infiniopCrossEntropyDescriptor_t> caches(
100,
[](infiniopCrossEntropyDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyCrossEntropyDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor output, Tensor input, Tensor target) {
size_t seed = hash_combine(output, input, target);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopCrossEntropyDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateCrossEntropyDescriptor(
context::getInfiniopHandle(device),
&desc,
output->desc(),
input->desc(),
target->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetCrossEntropyWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopCrossEntropy(
desc,
workspace->data(),
workspace_size,
output->data(),
input->data(),
target->data(),
context::getStream()));
}
static bool registered = []() {
CrossEntropy::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::cross_entropy_impl::infiniop
#include "infinicore/ops/equal.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<Equal::schema> &Equal::dispatcher() {
static common::OpDispatcher<Equal::schema> dispatcher_;
return dispatcher_;
};
void Equal::execute(Tensor out, Tensor a, Tensor b) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(out, a, b);
infinicore::context::setDevice(out->device());
dispatcher().lookup(out->device().getType())(out, a, b);
}
Tensor equal(Tensor a, Tensor b) {
auto out = Tensor::empty(a->shape(), DataType::BOOL, a->device());
equal_(out, a, b);
return out;
}
void equal_(Tensor out, Tensor a, Tensor b) {
if (out->dtype() != DataType::BOOL) {
throw std::runtime_error("Equal expects bool output tensor.");
}
Equal::execute(out, a, b);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/equal.hpp"
#include <infiniop.h>
namespace infinicore::op::equal_impl::infiniop {
thread_local common::OpCache<size_t, infiniopEqualDescriptor_t> caches(
100,
[](infiniopEqualDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyEqualDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor out, Tensor a, Tensor b) {
size_t seed = hash_combine(out, a, b);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
infiniopEqualDescriptor_t desc = nullptr;
if (auto cached = cache.get(seed)) {
desc = *cached;
} else {
INFINICORE_CHECK_ERROR(infiniopCreateEqualDescriptor(
context::getInfiniopHandle(device), &desc,
out->desc(), a->desc(), b->desc()));
cache.put(seed, desc);
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetEqualWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace;
void *workspace_ptr = nullptr;
if (workspace_size != 0) {
workspace = context::allocateMemory(workspace_size);
workspace_ptr = workspace->data();
}
INFINICORE_CHECK_ERROR(infiniopEqual(
desc,
workspace_ptr,
workspace_size,
out->data(),
a->data(),
b->data(),
context::getStream()));
}
static bool registered = []() {
Equal::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::equal_impl::infiniop
#include "infinicore/ops/fmod.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<Fmod::schema> &Fmod::dispatcher() {
static common::OpDispatcher<Fmod::schema> dispatcher_;
return dispatcher_;
};
void Fmod::execute(Tensor c, Tensor a, Tensor b) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(c, a, b);
infinicore::context::setDevice(c->device());
dispatcher().lookup(c->device().getType())(c, a, b);
}
Tensor fmod(Tensor a, Tensor b) {
auto c = Tensor::empty(a->shape(), a->dtype(), a->device());
fmod_(c, a, b);
return c;
}
void fmod_(Tensor c, Tensor a, Tensor b) {
Fmod::execute(c, a, b);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/fmod.hpp"
#include <infiniop.h>
namespace infinicore::op::fmod_impl::infiniop {
thread_local common::OpCache<size_t, infiniopFmodDescriptor_t> caches(
100, // capacity
[](infiniopFmodDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyFmodDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor c, Tensor a, Tensor b) {
size_t seed = hash_combine(c, b, a);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopFmodDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateFmodDescriptor(
context::getInfiniopHandle(c->device()), &desc,
c->desc(), a->desc(), b->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetFmodWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopFmod(
desc, workspace->data(), workspace_size,
c->data(), a->data(), b->data(), context::getStream()));
}
static bool registered = []() {
Fmod::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::fmod_impl::infiniop
#include "infinicore/ops/hardswish.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace infinicore::op {
common::OpDispatcher<Hardswish::schema> &Hardswish::dispatcher() {
static common::OpDispatcher<Hardswish::schema> dispatcher_;
return dispatcher_;
}
void Hardswish::execute(Tensor output, Tensor input) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
infinicore::context::setDevice(output->device());
auto device_type = output->device().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error(
"No Hardswish implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input);
}
Tensor hardswish(Tensor input) {
auto output = Tensor::empty(input->shape(), input->dtype(), input->device());
hardswish_(output, input);
return output;
}
void hardswish_(Tensor output, Tensor input) {
Hardswish::execute(output, input);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/hardswish.hpp"
#include <infiniop.h>
namespace infinicore::op::hardswish_impl::infiniop {
thread_local common::OpCache<size_t, infiniopHardSwishDescriptor_t> caches(
100,
[](infiniopHardSwishDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyHardSwishDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor output, Tensor input) {
size_t seed = hash_combine(output, input);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopHardSwishDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateHardSwishDescriptor(
context::getInfiniopHandle(device),
&desc,
output->desc(),
input->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetHardSwishWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace;
void *workspace_ptr = nullptr;
if (workspace_size != 0) {
workspace = context::allocateMemory(workspace_size);
workspace_ptr = workspace->data();
}
INFINICORE_CHECK_ERROR(infiniopHardSwish(
desc,
workspace_ptr,
workspace_size,
output->data(),
input->data(),
context::getStream()));
}
static bool registered = []() {
Hardswish::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::hardswish_impl::infiniop
#include "infinicore/ops/hardtanh.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace infinicore::op {
common::OpDispatcher<HardTanh::schema> &HardTanh::dispatcher() {
static common::OpDispatcher<HardTanh::schema> dispatcher_;
return dispatcher_;
}
void HardTanh::execute(Tensor output, Tensor input, float min_val, float max_val) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
infinicore::context::setDevice(output->device());
auto device_type = output->device().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error(
"No HardTanh implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input, min_val, max_val);
}
Tensor hardtanh(Tensor input, float min_val, float max_val) {
auto output = Tensor::empty(input->shape(), input->dtype(), input->device());
hardtanh_(output, input, min_val, max_val);
return output;
}
void hardtanh_(Tensor output, Tensor input, float min_val, float max_val) {
HardTanh::execute(output, input, min_val, max_val);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/hardtanh.hpp"
#include <infiniop.h>
namespace infinicore::op::hardtanh_impl::infiniop {
thread_local common::OpCache<size_t, infiniopHardTanhDescriptor_t> caches(
100,
[](infiniopHardTanhDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyHardTanhDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor output, Tensor input, float min_val, float max_val) {
size_t seed = hash_combine(output, input, min_val, max_val);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopHardTanhDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateHardTanhDescriptor(
context::getInfiniopHandle(device),
&desc,
output->desc(),
input->desc(),
min_val,
max_val));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetHardTanhWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace;
void *workspace_ptr = nullptr;
if (workspace_size != 0) {
workspace = context::allocateMemory(workspace_size);
workspace_ptr = workspace->data();
}
INFINICORE_CHECK_ERROR(infiniopHardTanh(
desc,
workspace_ptr,
workspace_size,
output->data(),
input->data(),
context::getStream()));
}
static bool registered = []() {
HardTanh::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::hardtanh_impl::infiniop
......@@ -2,23 +2,13 @@
#include <pybind11/pybind11.h>
#include "ops/adaptive_max_pool1d.hpp"
#include "ops/add.hpp"
#include "ops/add_rms_norm.hpp"
#include "ops/all.hpp"
#include "ops/asinh.hpp"
#include "ops/attention.hpp"
#include "ops/avg_pool1d.hpp"
#include "ops/baddbmm.hpp"
#include "ops/bilinear.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
#include "ops/equal.hpp"
#include "ops/flash_attention.hpp"
#include "ops/fmod.hpp"
#include "ops/hardswish.hpp"
#include "ops/hardtanh.hpp"
#include "ops/kv_caching.hpp"
#include "ops/linear.hpp"
#include "ops/linear_w8a8i8.hpp"
......@@ -45,39 +35,28 @@ namespace py = pybind11;
namespace infinicore::ops {
inline void bind(py::module &m) {
bind_adaptive_max_pool1d(m);
bind_add(m);
bind_add_rms_norm(m);
bind_attention(m);
bind_asinh(m);
bind_baddbmm(m);
bind_bilinear(m);
bind_causal_softmax(m);
bind_flash_attention(m);
bind_kv_caching(m);
bind_fmod(m);
bind_random_sample(m);
bind_linear(m);
bind_matmul(m);
bind_mul(m);
bind_mha_varlen(m);
bind_hardswish(m);
bind_hardtanh(m);
bind_paged_attention(m);
bind_paged_attention_prefill(m);
bind_paged_caching(m);
bind_random_sample(m);
bind_cross_entropy(m);
bind_rearrange(m);
bind_rms_norm(m);
bind_avg_pool1d(m);
bind_silu(m);
bind_swiglu(m);
bind_rope(m);
bind_embedding(m);
bind_linear_w8a8i8(m);
bind_silu_and_mul(m);
bind_equal(m);
bind_sum(m);
bind_var_mean(m);
bind_var(m);
......
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/adaptive_max_pool1d.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_adaptive_max_pool1d(py::module &m) {
m.def("adaptive_max_pool1d",
&op::adaptive_max_pool1d,
py::arg("x"),
py::arg("output_size"),
R"doc(1D Adaptive Max Pooling.
Args:
x: Input tensor of shape (N, C, L_in) or (N, L_in)
output_size: Target output size L_out
Returns:
Output tensor of shape (N, C, L_out) or (N, L_out)
)doc");
m.def("adaptive_max_pool1d_",
&op::adaptive_max_pool1d_,
py::arg("y"),
py::arg("x"),
py::arg("output_size"),
R"doc(In-place 1D Adaptive Max Pooling.
Args:
y: Output tensor of shape (N, C, L_out) or (N, L_out)
x: Input tensor of shape (N, C, L_in) or (N, L_in)
output_size: Target output size L_out
)doc");
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/asinh.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_asinh(py::module &m) {
m.def("asinh",
&op::asinh,
py::arg("x"),
R"doc(Element-wise inverse hyperbolic sine function.)doc");
m.def("asinh_",
&op::asinh_,
py::arg("y"),
py::arg("x"),
R"doc(In-place element-wise inverse hyperbolic sine function.)doc");
}
} // namespace infinicore::ops
#pragma once
#include <optional>
#include <pybind11/pybind11.h>
#include "infinicore/ops/avg_pool1d.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_avg_pool1d(py::module &m) {
m.def(
"avg_pool1d",
[](::infinicore::Tensor input, size_t kernel_size, std::optional<size_t> stride, size_t padding) {
return op::avg_pool1d(input, kernel_size, stride.value_or(0), padding);
},
py::arg("input"),
py::arg("kernel_size"),
py::arg("stride") = py::none(),
py::arg("padding") = 0,
R"doc(AvgPool1d out-of-place.)doc");
m.def(
"avg_pool1d_",
[](::infinicore::Tensor output, ::infinicore::Tensor input, size_t kernel_size, std::optional<size_t> stride, size_t padding) {
op::avg_pool1d_(output, input, kernel_size, stride.value_or(0), padding);
},
py::arg("output"),
py::arg("input"),
py::arg("kernel_size"),
py::arg("stride") = py::none(),
py::arg("padding") = 0,
R"doc(AvgPool1d in-place variant writing to provided output tensor.)doc");
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/baddbmm.hpp"
namespace py = pybind11;
namespace infinicore::ops {
Tensor py_baddbmm(Tensor input, Tensor batch1, Tensor batch2, float beta = 1.0f, float alpha = 1.0f) {
return op::baddbmm(input, batch1, batch2, beta, alpha);
}
void py_baddbmm_(Tensor out, Tensor input, Tensor batch1, Tensor batch2, float beta = 1.0f, float alpha = 1.0f) {
op::baddbmm_(out, input, batch1, batch2, beta, alpha);
}
inline void bind_baddbmm(py::module &m) {
m.def("baddbmm",
&py_baddbmm,
py::arg("input"),
py::arg("batch1"),
py::arg("batch2"),
py::arg("beta") = 1.0f,
py::arg("alpha") = 1.0f,
R"doc(Batched matrix-matrix product with addition.
Args:
input: Input tensor
batch1: First batch of matrices
batch2: Second batch of matrices
beta: Scaling factor for input tensor
alpha: Scaling factor for the product of batch1 and batch2
Returns:
Output tensor after baddbmm operation
)doc");
m.def("baddbmm_",
&py_baddbmm_,
py::arg("out"),
py::arg("input"),
py::arg("batch1"),
py::arg("batch2"),
py::arg("beta") = 1.0f,
py::arg("alpha") = 1.0f,
R"doc(In-place batched matrix-matrix product with addition.
Args:
out: Output tensor
input: Input tensor
batch1: First batch of matrices
batch2: Second batch of matrices
beta: Scaling factor for input tensor
alpha: Scaling factor for the product of batch1 and batch2
)doc");
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/bilinear.hpp"
namespace py = pybind11;
namespace infinicore::ops {
Tensor py_bilinear(Tensor x1, Tensor x2, Tensor weight, pybind11::object bias) {
std::optional<Tensor> bias_tensor = std::nullopt;
if (!bias.is_none()) {
bias_tensor = bias.cast<Tensor>();
}
return op::bilinear(x1, x2, weight, bias_tensor);
}
void py_bilinear_(Tensor out, Tensor x1, Tensor x2, Tensor weight, pybind11::object bias) {
std::optional<Tensor> bias_tensor = std::nullopt;
if (!bias.is_none()) {
bias_tensor = bias.cast<Tensor>();
}
op::bilinear_(out, x1, x2, weight, bias_tensor);
}
inline void bind_bilinear(py::module &m) {
m.def("bilinear",
&py_bilinear,
py::arg("x1"),
py::arg("x2"),
py::arg("weight"),
py::arg("bias"),
R"doc(Bilinear transformation of two input tensors.
Args:
x1: First input tensor
x2: Second input tensor
weight: Weight tensor
bias: Bias tensor (optional)
Returns:
Output tensor after bilinear transformation
)doc");
m.def("bilinear_",
&py_bilinear_,
py::arg("out"),
py::arg("x1"),
py::arg("x2"),
py::arg("weight"),
py::arg("bias"),
R"doc(In-place bilinear transformation of two input tensors.
Args:
out: Output tensor
x1: First input tensor
x2: Second input tensor
weight: Weight tensor
bias: Bias tensor (optional)
)doc");
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/cross_entropy.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_cross_entropy(py::module &m) {
m.def("cross_entropy",
&op::cross_entropy,
py::arg("logits"),
py::arg("target"),
R"doc(Token-wise cross entropy loss without reduction.)doc");
m.def("cross_entropy_",
&op::cross_entropy_,
py::arg("loss"),
py::arg("logits"),
py::arg("target"),
R"doc(Write cross entropy loss into a provided tensor.)doc");
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/equal.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_equal(py::module &m) {
m.def("equal",
&op::equal,
py::arg("a"),
py::arg("b"),
R"doc(Elementwise equality returning a bool tensor.)doc");
m.def("equal_",
&op::equal_,
py::arg("out"),
py::arg("a"),
py::arg("b"),
R"doc(In-place elementwise equality writing into `out`.)doc");
}
} // namespace infinicore::ops
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment