Commit 5f329d7a authored by PanZezhong's avatar PanZezhong
Browse files

issue/1031 T1-1-15

parent b2660e66
......@@ -16,6 +16,11 @@ hash_combine(size_t &seed, const T &value) {
// Specialization for Tensor
inline void hash_combine(size_t &seed, Tensor tensor) {
if (!tensor) {
hash_combine(seed, static_cast<size_t>(0));
return;
}
hash_combine(seed, static_cast<size_t>(tensor->dtype()));
for (Size shape : tensor->shape()) {
hash_combine(seed, shape);
......
......@@ -2,9 +2,13 @@
#include "ops/add.hpp"
#include "ops/add_rms_norm.hpp"
#include "ops/addcmul.hpp"
#include "ops/atanh.hpp"
#include "ops/attention.hpp"
#include "ops/avg_pool1d.hpp"
#include "ops/binary_cross_entropy_with_logits.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cdist.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
#include "ops/flash_attention.hpp"
......@@ -18,6 +22,7 @@
#include "ops/paged_caching.hpp"
#include "ops/random_sample.hpp"
#include "ops/rearrange.hpp"
#include "ops/reciprocal.hpp"
#include "ops/rms_norm.hpp"
#include "ops/rope.hpp"
#include "ops/silu.hpp"
......
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Addcmul {
public:
// schema: out, input, t1, t2, value
using schema = void (*)(Tensor, Tensor, Tensor, Tensor, float);
static void execute(Tensor out, Tensor input, Tensor t1, Tensor t2, float value);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor addcmul(Tensor input, Tensor t1, Tensor t2, float value);
void addcmul_(Tensor out, Tensor input, Tensor t1, Tensor t2, float value);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Atanh {
public:
// schema 定义为:void(输出 Tensor, 输入 Tensor)
using schema = void (*)(Tensor, Tensor);
// 执行函数
static void execute(Tensor y, Tensor a);
// 获取算子分发器,用于多后端(CPU/CUDA 等)匹配
static common::OpDispatcher<schema> &dispatcher();
};
/**
* @brief 计算输入 Tensor 的反双曲正切值 (out-of-place)
* @param a 输入 Tensor
* @return 包含结果的新 Tensor
*/
Tensor atanh(Tensor a);
/**
* @brief 计算输入 Tensor 的反双曲正切值 (in-place / specified output)
* @param y 输出 Tensor
* @param a 输入 Tensor
*/
void atanh_(Tensor y, Tensor a);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <string>
namespace infinicore::op {
class BinaryCrossEntropyWithLogits {
public:
/**
* @brief BCEWithLogits 算子的函数原型
* 参数顺序: out, logits, target, weight, pos_weight, reduction
*/
using schema = void (*)(Tensor, Tensor, Tensor, Tensor, Tensor, std::string);
static void execute(Tensor out,
Tensor logits,
Tensor target,
Tensor weight,
Tensor pos_weight,
std::string reduction);
static common::OpDispatcher<schema> &dispatcher();
};
/**
* @brief 非原地操作接口 (Out-of-place)
*/
Tensor binary_cross_entropy_with_logits(Tensor logits,
Tensor target,
Tensor weight = {},
Tensor pos_weight = {},
std::string reduction = "mean");
/**
* @brief 显式指定输出张量的接口
*/
void binary_cross_entropy_with_logits_(Tensor out,
Tensor logits,
Tensor target,
Tensor weight,
Tensor pos_weight,
std::string reduction);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Cdist {
public:
/**
* @brief 成对距离计算算子 (Pairwise distance)
* schema: out (M, N), x1 (M, D), x2 (N, D), p (norm degree)
*/
using schema = void (*)(Tensor, Tensor, Tensor, double);
static void execute(Tensor out, Tensor x1, Tensor x2, double p);
static common::OpDispatcher<schema> &dispatcher();
};
/**
* @brief 非原地(Out-of-place)接口
* @return 返回形状为 (M, N) 的新 Tensor
*/
Tensor cdist(Tensor x1, Tensor x2, double p = 2.0);
/**
* @brief 显式指定输出接口
*/
void cdist_(Tensor out, Tensor x1, Tensor x2, double p = 2.0);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Reciprocal {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor y, Tensor x);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor reciprocal(Tensor x);
void reciprocal_(Tensor y, Tensor x);
} // namespace infinicore::op
......@@ -4,16 +4,25 @@
#include "infiniop/handle.h"
#include "infiniop/ops/add.h"
#include "infiniop/ops/add_rms_norm.h"
#include "infiniop/ops/addcmul.h"
#include "infiniop/ops/all.h"
#include "infiniop/ops/atanh.h"
#include "infiniop/ops/attention.h"
#include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/binary_cross_entropy_with_logits.h"
#include "infiniop/ops/causal_softmax.h"
#include "infiniop/ops/cdist.h"
#include "infiniop/ops/clip.h"
#include "infiniop/ops/conv.h"
#include "infiniop/ops/cross_entropy.h"
#include "infiniop/ops/dequantize_awq.h"
#include "infiniop/ops/embedding.h"
#include "infiniop/ops/equal.h"
#include "infiniop/ops/flash_attention.h"
#include "infiniop/ops/gelu.h"
#include "infiniop/ops/gemm.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/hardtanh.h"
#include "infiniop/ops/int8_gemm.h"
#include "infiniop/ops/kv_caching.h"
#include "infiniop/ops/layer_norm.h"
......@@ -27,6 +36,7 @@
#include "infiniop/ops/quant/per_channel_quant_int8.h"
#include "infiniop/ops/random_sample.h"
#include "infiniop/ops/rearrange.h"
#include "infiniop/ops/reciprocal.h"
#include "infiniop/ops/relu.h"
#include "infiniop/ops/rms_norm.h"
#include "infiniop/ops/rope.h"
......@@ -47,10 +57,4 @@
#include "infiniop/ops/zeros.h"
#include "infiniop/tensor_descriptor.h"
#include "infiniop/ops/cross_entropy.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/equal.h"
#include "infiniop/ops/hardtanh.h"
#endif // __INFINIOP_API_H__
#ifndef __INFINIOP_ADDCMUL_API_H__
#define __INFINIOP_ADDCMUL_API_H__
#include "../operator_descriptor.h"
// 定义 addcmul 算子描述符类型
typedef struct InfiniopDescriptor *infiniopAddcmulDescriptor_t;
/**
* @brief 创建 Addcmul 算子描述符
* @param handle 算子句柄
* @param desc_ptr 指向返回的描述符指针
* @param out 输出张量描述符
* @param input 加项张量描述符
* @param tensor1 乘项张量1描述符
* @param tensor2 乘项张量2描述符
* @param value 乘积的标量系数
*/
__INFINI_C __export infiniStatus_t infiniopCreateAddcmulDescriptor(infiniopHandle_t handle,
infiniopAddcmulDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out,
infiniopTensorDescriptor_t input,
infiniopTensorDescriptor_t tensor1,
infiniopTensorDescriptor_t tensor2,
float value);
/**
* @brief 获取 Addcmul 计算所需的临时空间大小
*/
__INFINI_C __export infiniStatus_t infiniopGetAddcmulWorkspaceSize(infiniopAddcmulDescriptor_t desc, size_t *size);
/**
* @brief 执行 Addcmul 计算
* @param desc 算子描述符
* @param workspace 临时空间指针
* @param workspace_size 临时空间大小
* @param out 输出数据指针
* @param input 加项数据指针
* @param tensor1 乘项1数据指针
* @param tensor2 乘项2数据指针
* @param stream 计算流 (CUDA stream 等)
*/
__INFINI_C __export infiniStatus_t infiniopAddcmul(infiniopAddcmulDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *out,
const void *input,
const void *tensor1,
const void *tensor2,
void *stream);
/**
* @brief 销毁 Addcmul 算子描述符
*/
__INFINI_C __export infiniStatus_t infiniopDestroyAddcmulDescriptor(infiniopAddcmulDescriptor_t desc);
#endif
#ifndef __INFINIOP_Atanh_API_H__
#define __INFINIOP_Atanh_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopAtanhDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateAtanhDescriptor(infiniopHandle_t handle,
infiniopAtanhDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t a);
__INFINI_C __export infiniStatus_t infiniopGetAtanhWorkspaceSize(infiniopAtanhDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopAtanh(infiniopAtanhDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *a,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyAtanhDescriptor(infiniopAtanhDescriptor_t desc);
#endif
#ifndef __INFINIOP_BINARY_CROSS_ENTROPY_WITH_LOGITS_API_H__
#define __INFINIOP_BINARY_CROSS_ENTROPY_WITH_LOGITS_API_H__
#include "../operator_descriptor.h"
// 定义归约方式枚举
typedef enum {
INFINIOP_REDUCTION_NONE = 0,
INFINIOP_REDUCTION_MEAN = 1,
INFINIOP_REDUCTION_SUM = 2
} infiniopReduction_t;
// 定义 BCEWithLogits 算子描述符类型
typedef struct InfiniopDescriptor *infiniopBCEWithLogitsDescriptor_t;
/**
* @brief 创建 BCEWithLogits 算子描述符
* @param handle 算子句柄
* @param desc_ptr 指向返回的描述符指针
* @param out 输出张量描述符 (none时与input同形状,mean/sum时为标量)
* @param logits 输入 Logits 张量描述符
* @param target 目标标签张量描述符
* @param weight 样本权重描述符 (可选,不需要则传 NULL)
* @param pos_weight 正样本权重描述符 (可选,不需要则传 NULL)
* @param reduction 归约方式 (none, mean, sum)
*/
__INFINI_C __export infiniStatus_t infiniopCreateBCEWithLogitsDescriptor(
infiniopHandle_t handle,
infiniopBCEWithLogitsDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t out,
infiniopTensorDescriptor_t logits,
infiniopTensorDescriptor_t target,
infiniopTensorDescriptor_t weight,
infiniopTensorDescriptor_t pos_weight,
infiniopReduction_t reduction);
/**
* @brief 获取 BCEWithLogits 计算所需的临时空间大小
*/
__INFINI_C __export infiniStatus_t infiniopGetBCEWithLogitsWorkspaceSize(
infiniopBCEWithLogitsDescriptor_t desc,
size_t *size);
/**
* @brief 执行 BCEWithLogits 计算
* @param desc 算子描述符
* @param workspace 临时空间指针
* @param workspace_size 临时空间大小
* @param out 输出数据指针
* @param logits Logits 数据指针
* @param target Target 数据指针
* @param weight 权重数据指针 (可选,传 NULL 表示权重全为 1)
* @param pos_weight 正样本权重数据指针 (可选,传 NULL 表示权重全为 1)
* @param stream 计算流
*/
__INFINI_C __export infiniStatus_t infiniopBCEWithLogits(
infiniopBCEWithLogitsDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *out,
const void *logits,
const void *target,
const void *weight,
const void *pos_weight,
void *stream);
/**
* @brief 销毁 BCEWithLogits 算子描述符
*/
__INFINI_C __export infiniStatus_t infiniopDestroyBCEWithLogitsDescriptor(
infiniopBCEWithLogitsDescriptor_t desc);
#endif
#ifndef __INFINIOP_CDIST_API_H__
#define __INFINIOP_CDIST_API_H__
#include "../operator_descriptor.h"
// 定义 cdist 算子描述符类型
typedef struct InfiniopDescriptor *infiniopCdistDescriptor_t;
/**
* @brief 创建 Cdist 算子描述符
* @param handle 算子句柄
* @param desc_ptr 指向返回的描述符指针
* @param y 输出张量描述符 (Shape: M x N)
* @param x1 输入张量1描述符 (Shape: M x D)
* @param x2 输入张量2描述符 (Shape: N x D)
* @param p 范数阶数 (L-p norm)
*/
__INFINI_C __export infiniStatus_t infiniopCreateCdistDescriptor(
infiniopHandle_t handle,
infiniopCdistDescriptor_t *desc_ptr, // 注意这里应该是具体类型的指针
infiniopTensorDescriptor_t y_desc,
infiniopTensorDescriptor_t x1_desc,
infiniopTensorDescriptor_t x2_desc,
double p);
/**
* @brief 获取 Cdist 计算所需的临时空间大小
*/
__INFINI_C __export infiniStatus_t infiniopGetCdistWorkspaceSize(infiniopCdistDescriptor_t desc,
size_t *size);
/**
* @brief 执行 Cdist 计算
* @param desc 算子描述符
* @param workspace 临时空间指针
* @param workspace_size 临时空间大小
* @param y 输出数据指针
* @param x1 输入1数据指针
* @param x2 输入2数据指针
* @param stream 计算流 (CUDA stream 等)
*/
__INFINI_C __export infiniStatus_t infiniopCdist(
infiniopCdistDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x1,
const void *x2,
void *stream);
/**
* @brief 销毁 Cdist 算子描述符
*/
__INFINI_C __export infiniStatus_t infiniopDestroyCdistDescriptor(infiniopCdistDescriptor_t desc);
#endif
#ifndef __INFINIOP_RECIPROCAL_API_H__
#define __INFINIOP_RECIPROCAL_API_H__
#include "../operator_descriptor.h"
typedef struct InfiniopDescriptor *infiniopReciprocalDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateReciprocalDescriptor(infiniopHandle_t handle,
infiniopReciprocalDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x);
__INFINI_C __export infiniStatus_t infiniopGetReciprocalWorkspaceSize(infiniopReciprocalDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopReciprocal(infiniopReciprocalDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
const void *x,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyReciprocalDescriptor(infiniopReciprocalDescriptor_t desc);
#endif
......@@ -49,8 +49,14 @@ from infinicore.dtype import (
)
from infinicore.ops.add import add
from infinicore.ops.add_rms_norm import add_rms_norm
from infinicore.ops.addcmul import addcmul
from infinicore.ops.all import all
from infinicore.ops.atanh import atanh
from infinicore.ops.attention import attention
from infinicore.ops.binary_cross_entropy_with_logits import (
binary_cross_entropy_with_logits,
)
from infinicore.ops.cdist import cdist
from infinicore.ops.cross_entropy import cross_entropy
from infinicore.ops.equal import equal
from infinicore.ops.kv_caching import kv_caching
......@@ -62,6 +68,7 @@ from infinicore.ops.paged_attention import paged_attention
from infinicore.ops.paged_attention_prefill import paged_attention_prefill
from infinicore.ops.paged_caching import paged_caching
from infinicore.ops.rearrange import rearrange
from infinicore.ops.reciprocal import reciprocal
from infinicore.ops.squeeze import squeeze
from infinicore.ops.sum import sum
from infinicore.ops.topk import topk
......@@ -125,6 +132,11 @@ __all__ = [
"short",
"uint8",
# Operations.
"addcmul",
"atanh",
"binary_cross_entropy_with_logits",
"cdist",
"reciprocal",
"add",
"add_rms_norm",
"add_rms_norm_",
......
from .avg_pool1d import avg_pool1d
from .binary_cross_entropy_with_logits import binary_cross_entropy_with_logits
from .causal_softmax import causal_softmax
from .embedding import embedding
from .flash_attention import flash_attention
......@@ -18,6 +19,7 @@ __all__ = [
"embedding",
"flash_attention",
"linear",
"binary_cross_entropy_with_logits",
"random_sample",
"rms_norm",
"RopeAlgo",
......
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def binary_cross_entropy_with_logits(
input: Tensor,
target: Tensor,
weight: Tensor | None = None,
pos_weight: Tensor | None = None,
reduction: str = "mean",
*,
out: Tensor | None = None,
) -> Tensor:
"""Binary cross entropy loss with logits.
This wraps the underlying C++/CUDA implementation exposed via `_infinicore`.
The low-level binding treats missing ``weight`` / ``pos_weight`` via
default-constructed tensors. Here we avoid passing ``None`` down and
instead omit arguments when they are not provided, so pybind11 uses
its defaults.
"""
# Out-of-place API
if out is None:
# Neither weight nor pos_weight
if weight is None and pos_weight is None:
return Tensor(
_infinicore.binary_cross_entropy_with_logits(
input._underlying,
target._underlying,
reduction=reduction,
)
)
# weight provided only
if weight is not None and pos_weight is None:
return Tensor(
_infinicore.binary_cross_entropy_with_logits(
input._underlying,
target._underlying,
weight._underlying,
reduction=reduction,
)
)
# pos_weight provided only
if weight is None and pos_weight is not None:
return Tensor(
_infinicore.binary_cross_entropy_with_logits(
input._underlying,
target._underlying,
pos_weight=pos_weight._underlying,
reduction=reduction,
)
)
# both provided
return Tensor(
_infinicore.binary_cross_entropy_with_logits(
input._underlying,
target._underlying,
weight._underlying,
pos_weight._underlying,
reduction,
)
)
# In-place-style API with explicit out
if weight is None and pos_weight is None:
_infinicore.binary_cross_entropy_with_logits_(
out._underlying,
input._underlying,
target._underlying,
reduction=reduction,
)
elif weight is not None and pos_weight is None:
_infinicore.binary_cross_entropy_with_logits_(
out._underlying,
input._underlying,
target._underlying,
weight._underlying,
reduction=reduction,
)
elif weight is None and pos_weight is not None:
_infinicore.binary_cross_entropy_with_logits_(
out._underlying,
input._underlying,
target._underlying,
pos_weight=pos_weight._underlying,
reduction=reduction,
)
else:
_infinicore.binary_cross_entropy_with_logits_(
out._underlying,
input._underlying,
target._underlying,
weight._underlying,
pos_weight._underlying,
reduction,
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def addcmul(input, tensor1, tensor2, value=1.0, *, out=None):
if out is None:
return Tensor(
_infinicore.addcmul(
input._underlying,
tensor1._underlying,
tensor2._underlying,
float(value),
)
)
_infinicore.addcmul_(
out._underlying,
input._underlying,
tensor1._underlying,
tensor2._underlying,
float(value),
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def atanh(input, *, out=None):
if out is None:
return Tensor(_infinicore.atanh(input._underlying))
_infinicore.atanh_(out._underlying, input._underlying)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def binary_cross_entropy_with_logits(
input, target, weight=None, pos_weight=None, reduction="mean", *, out=None
):
"""
input: Tensor (logits)
target: Tensor (labels)
weight: Tensor (optional, sample-wise weight)
pos_weight: Tensor (optional, class-wise weight)
reduction: str ('none', 'mean', 'sum')
"""
# 提取底层 C++ 对象,处理可选 Tensor
weight_raw = weight._underlying if weight is not None else None
pos_weight_raw = pos_weight._underlying if pos_weight is not None else None
if out is None:
# 调用非原地接口,返回新创建的 Tensor
return Tensor(
_infinicore.binary_cross_entropy_with_logits(
input._underlying,
target._underlying,
weight_raw,
pos_weight_raw,
str(reduction),
)
)
# 调用显式指定输出的接口 (binary_cross_entropy_with_logits_)
_infinicore.binary_cross_entropy_with_logits_(
out._underlying,
input._underlying,
target._underlying,
weight_raw,
pos_weight_raw,
str(reduction),
)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def cdist(x1, x2, p=2.0, *, out=None):
"""
计算两组向量集合中每一对向量之间的 p-norm 距离。
参数:
x1 (Tensor): 形状为 (M, D) 的输入张量。
x2 (Tensor): 形状为 (N, D) 的输入张量。
p (float): p-norm 的阶数,默认为 2.0。
out (Tensor, optional): 结果输出张量。
返回:
Tensor: 形状为 (M, N) 的距离矩阵。
"""
if out is None:
# 非原地操作:由底层 C++ 接口根据 x1, x2 推导形状并创建新 Tensor
return Tensor(
_infinicore.cdist(
x1._underlying,
x2._underlying,
float(p),
)
)
# 原地/指定输出操作:结果写入用户提供的 out 张量
_infinicore.cdist_(
out._underlying,
x1._underlying,
x2._underlying,
float(p),
)
return out
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment