Commit 18773b69 authored by wooway777's avatar wooway777
Browse files

Revert "Merge pull request #1069 from InfiniTensor/issue/1031_T1_1_15"

This reverts commit 21c6af2d, reversing
changes made to 99a802dd.
parent bfead271
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def reciprocal(input, *, out=None):
if out is None:
return Tensor(_infinicore.reciprocal(input._underlying))
_infinicore.reciprocal_(out._underlying, input._underlying)
return out
#include "infinicore/ops/addcmul.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<Addcmul::schema> &Addcmul::dispatcher() {
static common::OpDispatcher<Addcmul::schema> dispatcher_;
return dispatcher_;
};
// 执行核心逻辑:设备校验与后端分发
void Addcmul::execute(Tensor out, Tensor input, Tensor t1, Tensor t2, float value) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(out, input, t1, t2);
infinicore::context::setDevice(out->device());
dispatcher().lookup(out->device().getType())(out, input, t1, t2, value);
}
// Out-of-place 接口:自动创建输出 Tensor
Tensor addcmul(Tensor input, Tensor t1, Tensor t2, float value) {
auto out = Tensor::empty(input->shape(), input->dtype(), input->device());
addcmul_(out, input, t1, t2, value);
return out;
}
void addcmul_(Tensor out, Tensor input, Tensor t1, Tensor t2, float value) {
Addcmul::execute(out, input, t1, t2, value);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/addcmul.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::addcmul_impl::infiniop {
// 定义线程局部的算子描述符缓存
thread_local common::OpCache<size_t, infiniopAddcmulDescriptor_t> caches(
100,
[](infiniopAddcmulDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyAddcmulDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor out, Tensor input, Tensor t1, Tensor t2, float value) {
size_t seed = hash_combine(out, input, t1, t2, value);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopAddcmulDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateAddcmulDescriptor(
context::getInfiniopHandle(device), &desc,
out->desc(), input->desc(), t1->desc(), t2->desc(), value));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAddcmulWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopAddcmul(
desc, workspace->data(), workspace_size,
out->data(), input->data(), t1->data(), t2->data(), context::getStream()));
}
static bool registered = []() {
Addcmul::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::addcmul_impl::infiniop
#include "infinicore/ops/atanh.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
// 获取单例分发器
common::OpDispatcher<Atanh::schema> &Atanh::dispatcher() {
static common::OpDispatcher<Atanh::schema> dispatcher_;
return dispatcher_;
};
// 执行入口:负责设备切换和后端查找
void Atanh::execute(Tensor y, Tensor a) {
// 确保输入和输出在同一个设备上
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, a);
// 切换当前上下文到目标设备
infinicore::context::setDevice(y->device());
// 根据设备类型(CPU/CUDA等)查找对应的实现并执行
dispatcher().lookup(y->device().getType())(y, a);
}
// Out-of-place 接口:自动创建结果 Tensor
Tensor atanh(Tensor a) {
// 创建一个与输入形状、类型、设备完全相同的空 Tensor
auto y = Tensor::empty(a->shape(), a->dtype(), a->device());
atanh_(y, a);
return y;
}
// In-place 或指定输出接口
void atanh_(Tensor y, Tensor a) {
Atanh::execute(y, a);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/atanh.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::atanh_impl::infiniop {
// 定义线程局部的算子描述符缓存,避免重复创建 Descriptor 带来的开销
thread_local common::OpCache<size_t, infiniopAtanhDescriptor_t> caches(
100, // 缓存容量
[](infiniopAtanhDescriptor_t &desc) {
if (desc != nullptr) {
// 缓存释放时的回调:销毁 infiniop 算子描述符
INFINICORE_CHECK_ERROR(infiniopDestroyAtanhDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor y, Tensor a) {
// 1. 根据 Tensor 的形状、步长、类型等信息生成唯一 Hash 值
size_t seed = hash_combine(y, a);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
// 2. 尝试从缓存中获取已存在的描述符
auto desc_opt = cache.get(seed);
infiniopAtanhDescriptor_t desc = nullptr;
if (!desc_opt) {
// 如果缓存未命中,创建新的描述符
INFINICORE_CHECK_ERROR(infiniopCreateAtanhDescriptor(
context::getInfiniopHandle(device), &desc,
y->desc(), a->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
// 3. 获取并分配必要的 Workspace 空间(如果有的话)
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAtanhWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
// 4. 执行底层计算
INFINICORE_CHECK_ERROR(infiniopAtanh(
desc, workspace->data(), workspace_size,
y->data(), a->data(), context::getStream()));
}
// 5. 自动注册逻辑:程序启动时将此实现注册到分发器中
static bool registered = []() {
Atanh::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::atanh_impl::infiniop
#include "infinicore/ops/binary_cross_entropy_with_logits.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
// 静态调度器实例化
common::OpDispatcher<BinaryCrossEntropyWithLogits::schema> &BinaryCrossEntropyWithLogits::dispatcher() {
static common::OpDispatcher<BinaryCrossEntropyWithLogits::schema> dispatcher_;
return dispatcher_;
};
/**
* 执行核心逻辑:设备校验、上下文设置与后端分发
*/
void BinaryCrossEntropyWithLogits::execute(Tensor out, Tensor logits, Tensor target, Tensor weight, Tensor pos_weight, std::string reduction) {
// 1. 校验所有已定义的 Tensor 是否在同一设备上
// 使用宏或循环校验 logits, target, out 以及可选的 weight/pos_weight
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(out, logits, target);
if (weight) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(out, weight);
}
if (pos_weight) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(out, pos_weight);
}
// 2. 设置当前设备上下文
infinicore::context::setDevice(out->device());
// 3. 根据设备类型查找并执行具体的后端实现(如 CUDA 或 CPU 实现)
dispatcher().lookup(out->device().getType())(out, logits, target, weight, pos_weight, reduction);
}
/**
* Out-of-place 接口:根据 reduction 自动创建输出 Tensor
*/
Tensor binary_cross_entropy_with_logits(Tensor logits, Tensor target, Tensor weight, Tensor pos_weight, std::string reduction) {
std::vector<uint64_t> out_shape;
// 1. 根据归约方式确定输出形状
if (reduction == "none") {
// 不归约,形状与输入 logits 一致
auto in_shape = logits->shape();
for (auto dim : in_shape) {
out_shape.push_back(static_cast<uint64_t>(dim));
}
} else {
// mean 或 sum 归约,输出为标量 (空 shape 向量表示 0-dim tensor)
out_shape = {};
}
// 2. 创建输出 Tensor
auto out = Tensor::empty(out_shape, logits->dtype(), logits->device());
// 3. 调用显式接口执行计算
binary_cross_entropy_with_logits_(out, logits, target, weight, pos_weight, reduction);
return out;
}
/**
* 显式指定输出接口
*/
void binary_cross_entropy_with_logits_(Tensor out, Tensor logits, Tensor target, Tensor weight, Tensor pos_weight, std::string reduction) {
BinaryCrossEntropyWithLogits::execute(out, logits, target, weight, pos_weight, reduction);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/binary_cross_entropy_with_logits.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::bce_logits_impl::infiniop {
// 定义线程局部的 BCEWithLogits 算子描述符缓存
thread_local common::OpCache<size_t, infiniopBCEWithLogitsDescriptor_t> caches(
100,
[](infiniopBCEWithLogitsDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyBCEWithLogitsDescriptor(desc));
desc = nullptr;
}
});
/**
* @brief 执行 BCEWithLogits 计算
* @param out 输出 Tensor (根据 reduction 可能是标量或与 logits 同形状)
* @param logits 预测值 Tensor
* @param target 标签 Tensor
* @param weight 样本权重 Tensor (可选)
* @param pos_weight 正类权重 Tensor (可选)
* @param reduction_str 归约方式 ("none", "mean", "sum")
*/
void calculate(Tensor out, Tensor logits, Tensor target, Tensor weight, Tensor pos_weight, std::string reduction_str) {
// 1. 将字符串归约参数转换为底层 API 使用的枚举值
infiniopReduction_t reduction;
if (reduction_str == "none") {
reduction = INFINIOP_REDUCTION_NONE;
} else if (reduction_str == "mean") {
reduction = INFINIOP_REDUCTION_MEAN;
} else if (reduction_str == "sum") {
reduction = INFINIOP_REDUCTION_SUM;
} else {
throw std::runtime_error("Unknown reduction mode: " + reduction_str);
}
// 2. 生成唯一 Hash Seed 用于缓存查找
// 包含所有输入 Tensor 的状态和 reduction 参数,确保缓存键的唯一性
size_t seed = hash_combine(out, logits, target, weight, pos_weight, static_cast<int>(reduction));
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopBCEWithLogitsDescriptor_t desc = nullptr;
// 3. 如果缓存未命中,创建新的描述符并存入缓存
if (!desc_opt) {
// 获取可选 Tensor 的描述符,若未定义则传 nullptr
auto weight_desc = weight ? weight->desc() : nullptr;
auto pos_weight_desc = pos_weight ? pos_weight->desc() : nullptr;
INFINICORE_CHECK_ERROR(infiniopCreateBCEWithLogitsDescriptor(
context::getInfiniopHandle(device),
&desc,
out->desc(),
logits->desc(),
target->desc(),
weight_desc,
pos_weight_desc,
reduction));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
// 4. 动态获取并分配 Workspace 临时内存
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetBCEWithLogitsWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
// 5. 获取数据指针,处理可选 Tensor 的空指针逻辑
const void *weight_ptr = weight ? weight->data() : nullptr;
const void *pos_weight_ptr = pos_weight ? pos_weight->data() : nullptr;
// 6. 执行底层算子
INFINICORE_CHECK_ERROR(infiniopBCEWithLogits(
desc,
workspace->data(),
workspace_size,
out->data(),
logits->data(),
target->data(),
weight_ptr,
pos_weight_ptr,
context::getStream()));
}
// 7. 自动注册到调度器 (Dispatcher)
static bool registered = []() {
BinaryCrossEntropyWithLogits::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::bce_logits_impl::infiniop
#include "infinicore/ops/cdist.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
// 静态调度器实例化
common::OpDispatcher<Cdist::schema> &Cdist::dispatcher() {
static common::OpDispatcher<Cdist::schema> dispatcher_;
return dispatcher_;
};
/**
* 执行核心逻辑:设备校验与后端分发
*/
void Cdist::execute(Tensor out, Tensor x1, Tensor x2, double p) {
// 校验三个 Tensor 是否在同一设备上
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(out, x1, x2);
// 设置当前设备上下文
infinicore::context::setDevice(out->device());
// 根据设备类型(CUDA/CPU/etc.)查找并执行注册的算子实现
dispatcher().lookup(out->device().getType())(out, x1, x2, p);
}
/**
* Out-of-place 接口:自动创建输出 Tensor
* x1: (M, D), x2: (N, D) -> out: (M, N)
*/
Tensor cdist(Tensor x1, Tensor x2, double p) {
// 1. 获取输入维度
auto shape1 = x1->shape(); // 假设为 {M, D}
auto shape2 = x2->shape(); // 假设为 {N, D}
// 将原来的 std::vector<int64_t> 修改为 std::vector<uint64_t>
std::vector<uint64_t> out_shape = {
static_cast<uint64_t>(shape1[0]),
static_cast<uint64_t>(shape2[0])};
// 或者使用更简洁的初始化列表方式,强制转换类型
auto out = Tensor::empty({(uint64_t)shape1[0], (uint64_t)shape2[0]}, x1->dtype(), x1->device());
// 5. 调用执行接口
cdist_(out, x1, x2, p);
return out;
}
/**
* 显式指定输出接口
*/
void cdist_(Tensor out, Tensor x1, Tensor x2, double p) {
Cdist::execute(out, x1, x2, p);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/cdist.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::cdist_impl::infiniop {
// 定义线程局部的 cdist 算子描述符缓存
// 缓存 key 为输入 Tensor 描述信息及参数 p 的哈希值
thread_local common::OpCache<size_t, infiniopCdistDescriptor_t> caches(
100,
[](infiniopCdistDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyCdistDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor out, Tensor x1, Tensor x2, double p) {
// 1. 生成唯一 Hash Seed 用于缓存查找
size_t seed = hash_combine(out, x1, x2, p);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopCdistDescriptor_t desc = nullptr;
// 2. 如果缓存未命中,创建新的描述符并存入缓存
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateCdistDescriptor(
context::getInfiniopHandle(device),
&desc,
out->desc(),
x1->desc(),
x2->desc(),
p));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
// 3. 动态获取并分配 Workspace 临时内存
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetCdistWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
// 4. 执行底层算子
INFINICORE_CHECK_ERROR(infiniopCdist(
desc,
workspace->data(),
workspace_size,
out->data(),
x1->data(),
x2->data(),
context::getStream()));
}
// 5. 自动注册到调度器 (Dispatcher)
static bool registered = []() {
Cdist::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::cdist_impl::infiniop
#include "infinicore/ops/reciprocal.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<Reciprocal::schema> &Reciprocal::dispatcher() {
static common::OpDispatcher<Reciprocal::schema> dispatcher_;
return dispatcher_;
};
void Reciprocal::execute(Tensor y, Tensor x) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(y, x);
infinicore::context::setDevice(y->device());
dispatcher().lookup(y->device().getType())(y, x);
}
Tensor reciprocal(Tensor x) {
auto y = Tensor::empty(x->shape(), x->dtype(), x->device());
reciprocal_(y, x);
return y;
}
void reciprocal_(Tensor y, Tensor x) {
Reciprocal::execute(y, x);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/reciprocal.hpp"
#include <infiniop.h>
namespace infinicore::op::reciprocal_impl::infiniop {
thread_local common::OpCache<size_t, infiniopReciprocalDescriptor_t> caches(
100, // capacity
[](infiniopReciprocalDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyReciprocalDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor y, Tensor x) {
size_t seed = hash_combine(y, x);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopReciprocalDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateReciprocalDescriptor(
context::getInfiniopHandle(device), &desc,
y->desc(), x->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetReciprocalWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopReciprocal(
desc, workspace->data(), workspace_size,
y->data(), x->data(), context::getStream()));
}
static bool registered = []() {
Reciprocal::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::reciprocal_impl::infiniop
......@@ -4,14 +4,10 @@
#include "ops/add.hpp"
#include "ops/add_rms_norm.hpp"
#include "ops/addcmul.hpp"
#include "ops/all.hpp"
#include "ops/atanh.hpp"
#include "ops/attention.hpp"
#include "ops/avg_pool1d.hpp"
#include "ops/binary_cross_entropy_with_logits.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cdist.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
#include "ops/equal.hpp"
......@@ -30,7 +26,6 @@
#include "ops/paged_caching.hpp"
#include "ops/random_sample.hpp"
#include "ops/rearrange.hpp"
#include "ops/reciprocal.hpp"
#include "ops/rms_norm.hpp"
#include "ops/rope.hpp"
#include "ops/silu.hpp"
......@@ -79,11 +74,6 @@ inline void bind(py::module &m) {
bind_topk(m);
bind_all(m);
bind_equal(m);
bind_atanh(m);
bind_addcmul(m);
bind_cdist(m);
bind_binary_cross_entropy_with_logits(m);
bind_reciprocal(m);
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/addcmul.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_addcmul(py::module &m) {
// 绑定 out-of-place 接口: out = addcmul(input, t1, t2, value)
m.def("addcmul",
&op::addcmul,
py::arg("input"),
py::arg("tensor1"),
py::arg("tensor2"),
py::arg("value") = 1.0f,
R"doc(Performs the element-wise multiplication of tensor1 by tensor2,
multiplies the result by value and adds it to input.
Args:
input: Tensor to be added
tensor1: First tensor for multiplication
tensor2: Second tensor for multiplication
value: Scalar multiplier for tensor1 * tensor2 (default: 1.0)
Returns:
The output tensor
)doc");
// 绑定 in-place / specified output 接口: addcmul_(out, input, t1, t2, value)
m.def("addcmul_",
&op::addcmul_,
py::arg("out"),
py::arg("input"),
py::arg("tensor1"),
py::arg("tensor2"),
py::arg("value") = 1.0f,
R"doc(In-place version of addcmul.
Args:
out: The destination tensor to store the result
input: Tensor to be added
tensor1: First tensor for multiplication
tensor2: Second tensor for multiplication
value: Scalar multiplier for tensor1 * tensor2 (default: 1.0)
)doc");
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/atanh.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_atanh(py::module &m) {
m.def("atanh",
&op::atanh,
py::arg("a"),
R"doc(Inverse hyperbolic tangent of a tensor.)doc");
m.def("atanh_",
&op::atanh_,
py::arg("y"),
py::arg("a"),
R"doc(Compute inverse hyperbolic tangent and store in the provided output tensor.)doc");
}
} // namespace infinicore::ops
#pragma once
#include "infinicore/ops/binary_cross_entropy_with_logits.hpp"
#include <pybind11/pybind11.h>
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_binary_cross_entropy_with_logits(py::module &m) {
// 1. 绑定 out-of-place 接口: out = binary_cross_entropy_with_logits(...)
m.def(
"binary_cross_entropy_with_logits",
[](Tensor logits,
Tensor target,
py::object weight,
py::object pos_weight,
std::string reduction) {
Tensor w = weight.is_none() ? Tensor() : weight.cast<Tensor>();
Tensor pw = pos_weight.is_none() ? Tensor() : pos_weight.cast<Tensor>();
return op::binary_cross_entropy_with_logits(
logits, target, w, pw, reduction);
},
py::arg("input"),
py::arg("target"),
py::arg("weight") = py::none(),
py::arg("pos_weight") = py::none(),
py::arg("reduction") = "mean",
R"doc(Measures Binary Cross Entropy between target and output logits.
Args:
input: Tensor of arbitrary shape as unnormalized scores (logits).
target: Tensor of the same shape as input with values between 0 and 1.
weight: Optional rescaling weight for each loss component.
pos_weight: Optional weight for positive examples (must be broadcastable).
reduction: Specfies the reduction to apply: 'none' | 'mean' | 'sum'.
Returns:
A tensor representing the loss.
)doc");
// 2. 绑定指定输出接口: binary_cross_entropy_with_logits_(out, ...)
m.def(
"binary_cross_entropy_with_logits_",
[](Tensor output,
Tensor logits,
Tensor target,
py::object weight,
py::object pos_weight,
std::string reduction) {
Tensor w = weight.is_none() ? Tensor() : weight.cast<Tensor>();
Tensor pw = pos_weight.is_none() ? Tensor() : pos_weight.cast<Tensor>();
return op::binary_cross_entropy_with_logits_(
output, logits, target, w, pw, reduction);
},
py::arg("out"),
py::arg("input"),
py::arg("target"),
py::arg("weight") = py::none(),
py::arg("pos_weight") = py::none(),
py::arg("reduction") = "mean",
R"doc(Specified output version of binary_cross_entropy_with_logits.
Args:
out: The destination tensor to store the loss.
input: Logits tensor.
target: Target tensor.
weight: Optional sample weight.
pos_weight: Optional positive class weight.
reduction: Specfies the reduction to apply.
)doc");
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/cdist.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_cdist(py::module &m) {
// 1. 绑定 out-of-place 接口: out = cdist(x1, x2, p)
m.def("cdist",
&op::cdist,
py::arg("x1"),
py::arg("x2"),
py::arg("p") = 2.0,
R"doc(Computes batched pairwise distance between vectors in x1 and x2 using p-norm.
Args:
x1: First set of vectors, shape (M, D)
x2: Second set of vectors, shape (N, D)
p: The p-norm to apply (default: 2.0)
Returns:
A matrix containing pairwise distances, shape (M, N)
)doc");
// 2. 绑定 in-place / specified output 接口: cdist_(out, x1, x2, p)
m.def("cdist_",
&op::cdist_,
py::arg("out"),
py::arg("x1"),
py::arg("x2"),
py::arg("p") = 2.0,
R"doc(In-place version of cdist. Stores the results in the 'out' tensor.
Args:
out: The destination tensor, shape (M, N)
x1: First set of vectors, shape (M, D)
x2: Second set of vectors, shape (N, D)
p: The p-norm to apply (default: 2.0)
)doc");
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/reciprocal.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_reciprocal(py::module &m) {
m.def("reciprocal",
&op::reciprocal,
py::arg("x"),
R"doc(Computes the reciprocal of the input tensor.)doc");
m.def("reciprocal_",
&op::reciprocal_,
py::arg("y"),
py::arg("x"),
R"doc(Computes the reciprocal of the input tensor and stores in the output tensor.)doc");
}
} // namespace infinicore::ops
......@@ -13,11 +13,6 @@ DECLARE_INFINIOP_TEST(rope)
DECLARE_INFINIOP_TEST(clip)
DECLARE_INFINIOP_TEST(swiglu)
DECLARE_INFINIOP_TEST(add)
DECLARE_INFINIOP_TEST(atanh)
DECLARE_INFINIOP_TEST(addcmul)
DECLARE_INFINIOP_TEST(cdist)
DECLARE_INFINIOP_TEST(binary_cross_entropy_with_logits)
DECLARE_INFINIOP_TEST(reciprocal)
DECLARE_INFINIOP_TEST(causal_softmax)
DECLARE_INFINIOP_TEST(rearrange)
DECLARE_INFINIOP_TEST(silu)
......
#include "ops.hpp"
#include "utils.hpp"
#include <infinirt.h>
#include <iomanip>
#include <iostream>
namespace infiniop_test::addcmul {
struct Test::Attributes {
std::shared_ptr<Tensor> input;
std::shared_ptr<Tensor> t1;
std::shared_ptr<Tensor> t2;
std::shared_ptr<Tensor> out;
std::shared_ptr<Tensor> ans;
float value;
};
std::shared_ptr<Test> Test::build(
std::unordered_map<std::string, std::vector<uint8_t>> attributes,
std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
double rtol, double atol) {
auto test = std::shared_ptr<Test>(new Test(rtol, atol));
test->_attributes = new Attributes();
// 校验张量是否存在
if (tensors.find("input") == tensors.end() || tensors.find("t1") == tensors.end() || tensors.find("t2") == tensors.end() || tensors.find("out") == tensors.end() || tensors.find("ans") == tensors.end()) {
throw std::runtime_error("Invalid Addcmul Test: Missing tensors");
}
// 获取标量属性 value
test->_attributes->value = 1.0f; // 默认值
if (attributes.find("value") != attributes.end()) {
test->_attributes->value = *reinterpret_cast<float *>(attributes["value"].data());
}
test->_attributes->input = tensors["input"];
test->_attributes->t1 = tensors["t1"];
test->_attributes->t2 = tensors["t2"];
test->_attributes->out = tensors["out"];
test->_attributes->ans = tensors["ans"];
return test;
}
std::shared_ptr<infiniop_test::Result> Test::run(
infiniopHandle_t handle, infiniDevice_t device, int device_id, size_t warm_ups, size_t iterations) {
infiniopAddcmulDescriptor_t op_desc;
// 数据迁移至指定设备
auto input = _attributes->input->to(device, device_id);
auto t1 = _attributes->t1->to(device, device_id);
auto t2 = _attributes->t2->to(device, device_id);
auto out = _attributes->out->to(device, device_id);
// 创建算子描述符
CHECK_OR(infiniopCreateAddcmulDescriptor(handle, &op_desc,
out->desc(),
input->desc(),
t1->desc(),
t2->desc(),
_attributes->value),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to create addcmul descriptor."));
// Workspace 处理
size_t workspace_size;
CHECK_OR(infiniopGetAddcmulWorkspaceSize(op_desc, &workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size."));
void *workspace;
CHECK_OR(infinirtMalloc(&workspace, workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace."));
// 执行计算
CHECK_OR(infiniopAddcmul(op_desc, workspace, workspace_size,
out->data(),
input->data(),
t1->data(),
t2->data(),
nullptr), // stream
return TEST_FAILED(OP_EXECUTION_FAILED, "Failed during execution."));
// 结果验证
try {
allClose(out, _attributes->ans, _rtol, _atol);
} catch (const std::exception &e) {
return TEST_FAILED(RESULT_INCORRECT, e.what());
}
// 性能测试
double elapsed_time = benchmark(
[=]() {
infiniopAddcmul(op_desc, workspace, workspace_size,
out->data(),
input->data(),
t1->data(),
t2->data(),
nullptr);
},
warm_ups, iterations);
// 资源清理
infinirtFree(workspace);
infiniopDestroyAddcmulDescriptor(op_desc);
return TEST_PASSED(elapsed_time);
}
std::vector<std::string> Test::attribute_names() {
return {"value"};
}
std::vector<std::string> Test::tensor_names() {
return {"input", "t1", "t2", "out", "ans"};
}
std::vector<std::string> Test::output_names() {
return {"out"};
}
std::string Test::toString() const {
std::ostringstream oss;
oss << op_name() << std::endl;
oss << "- value: " << _attributes->value << std::endl;
oss << "- input: " << _attributes->input->info() << std::endl;
oss << "- t1: " << _attributes->t1->info() << std::endl;
oss << "- t2: " << _attributes->t2->info() << std::endl;
oss << "- out: " << _attributes->out->info() << std::endl;
oss << std::scientific << std::setprecision(2);
oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
return oss.str();
}
Test::~Test() {
delete _attributes;
}
} // namespace infiniop_test::addcmul
#include "ops.hpp"
#include "utils.hpp"
#include <infinirt.h>
#include <iomanip>
#include <iostream>
namespace infiniop_test::atanh {
struct Test::Attributes {
std::shared_ptr<Tensor> a; // 输入
std::shared_ptr<Tensor> y; // 输出
std::shared_ptr<Tensor> ans; // 参考结果
};
std::shared_ptr<Test> Test::build(
std::unordered_map<std::string, std::vector<uint8_t>> attributes,
std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
double rtol, double atol) {
auto test = std::shared_ptr<Test>(new Test(rtol, atol));
test->_attributes = new Attributes();
// atanh 只需要 a (input), y (output) 和 ans (reference)
if (tensors.find("a") == tensors.end()
|| tensors.find("y") == tensors.end()
|| tensors.find("ans") == tensors.end()) {
throw std::runtime_error("Invalid Atanh Test: Missing tensors.");
}
test->_attributes->a = tensors["a"];
test->_attributes->y = tensors["y"];
test->_attributes->ans = tensors["ans"];
return test;
}
std::shared_ptr<infiniop_test::Result> Test::run(
infiniopHandle_t handle, infiniDevice_t device, int device_id, size_t warm_ups, size_t iterations) {
infiniopAtanhDescriptor_t op_desc;
auto a = _attributes->a->to(device, device_id);
auto y = _attributes->y->to(device, device_id);
// 调用修正后的 4 参数版本接口 (handle, desc, y, a)
CHECK_OR(infiniopCreateAtanhDescriptor(handle, &op_desc,
y->desc(),
a->desc()),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to create atanh descriptor."));
size_t workspace_size;
CHECK_OR(infiniopGetAtanhWorkspaceSize(op_desc, &workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size."));
void *workspace;
CHECK_OR(infinirtMalloc(&workspace, workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace."));
// 执行计算 (移除 b 相关的参数)
CHECK_OR(infiniopAtanh(op_desc, workspace, workspace_size,
y->data(),
a->data(),
nullptr),
return TEST_FAILED(OP_EXECUTION_FAILED, "Failed during atanh execution."));
// 验证结果
try {
allClose(y, _attributes->ans, _rtol, _atol);
} catch (const std::exception &e) {
return TEST_FAILED(RESULT_INCORRECT, e.what());
}
// 性能测试 (Benchmark)
double elapsed_time = 0.;
elapsed_time = benchmark(
[=]() {
infiniopAtanh(
op_desc, workspace, workspace_size,
y->data(),
a->data(),
nullptr);
},
warm_ups, iterations);
// 释放资源 (可选:根据框架决定是否在此释放 op_desc)
// infiniopDestroyAtanhDescriptor(op_desc);
// infinirtFree(workspace);
return TEST_PASSED(elapsed_time);
}
std::vector<std::string> Test::attribute_names() {
return {};
}
std::vector<std::string> Test::tensor_names() {
return {"a", "y", "ans"};
}
std::vector<std::string> Test::output_names() {
return {"y"};
}
std::string Test::toString() const {
std::ostringstream oss;
oss << op_name() << std::endl;
oss << "- a: " << _attributes->a->info() << std::endl;
oss << "- y: " << _attributes->y->info() << std::endl;
oss << std::scientific << std::setprecision(2);
oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
return oss.str();
}
Test::~Test() {
delete _attributes;
}
} // namespace infiniop_test::atanh
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment