Commit 210e31d3 authored by PanZezhong's avatar PanZezhong
Browse files

issue/1031 T1-1-4

parent 7f295448
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
#include <vector>
namespace infinicore::op {
class All {
public:
using schema = void (*)(Tensor, Tensor, std::vector<size_t>, bool);
static void execute(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim = false);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor all(Tensor input, std::vector<size_t> dim, bool keepdim = false);
void all_(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim = false);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
#include <vector>
namespace infinicore::op {
class Sum {
public:
using schema = void (*)(Tensor, Tensor, std::vector<size_t>, bool);
static void execute(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim = false);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor sum(Tensor input, std::vector<size_t> dim, bool keepdim = false);
void sum_(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim = false);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class TopK {
public:
using schema = void (*)(Tensor, Tensor, Tensor, size_t, size_t, bool, bool);
static void execute(Tensor values_output, Tensor indices_output, Tensor input, size_t k, size_t dim, bool largest = true, bool sorted = true);
static common::OpDispatcher<schema> &dispatcher();
};
std::pair<Tensor, Tensor> topk(Tensor input, size_t k, size_t dim, bool largest = true, bool sorted = true);
void topk_(Tensor values_output, Tensor indices_output, Tensor input, size_t k, size_t dim, bool largest = true, bool sorted = true);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
#include <utility>
#include <vector>
namespace infinicore::op {
class Var {
public:
using schema = void (*)(Tensor, Tensor, std::vector<size_t>, bool, bool); // var_output, input, dim, unbiased, keepdim
static void execute(Tensor var_output, Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor var(Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
void var_(Tensor var_output, Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
#include <utility>
#include <vector>
namespace infinicore::op {
class Var_Mean {
public:
using schema = void (*)(Tensor, Tensor, Tensor, std::vector<size_t>, bool, bool); // var_output, mean_output, input, dim, unbiased, keepdim
static void execute(Tensor var_output, Tensor mean_output, Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
static common::OpDispatcher<schema> &dispatcher();
};
std::pair<Tensor, Tensor> var_mean(Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
void var_mean_(Tensor var_output, Tensor mean_output, Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
} // namespace infinicore::op
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include "infiniop/ops/adaptive_max_pool1d.h" #include "infiniop/ops/adaptive_max_pool1d.h"
#include "infiniop/ops/add.h" #include "infiniop/ops/add.h"
#include "infiniop/ops/add_rms_norm.h" #include "infiniop/ops/add_rms_norm.h"
#include "infiniop/ops/all.h"
#include "infiniop/ops/asinh.h" #include "infiniop/ops/asinh.h"
#include "infiniop/ops/attention.h" #include "infiniop/ops/attention.h"
#include "infiniop/ops/causal_softmax.h" #include "infiniop/ops/causal_softmax.h"
...@@ -38,17 +39,21 @@ ...@@ -38,17 +39,21 @@
#include "infiniop/ops/softmax.h" #include "infiniop/ops/softmax.h"
#include "infiniop/ops/softplus.h" #include "infiniop/ops/softplus.h"
#include "infiniop/ops/sub.h" #include "infiniop/ops/sub.h"
#include "infiniop/ops/sum.h"
#include "infiniop/ops/swiglu.h" #include "infiniop/ops/swiglu.h"
#include "infiniop/ops/tanh.h" #include "infiniop/ops/tanh.h"
#include "infiniop/ops/topk.h"
#include "infiniop/ops/topkrouter.h" #include "infiniop/ops/topkrouter.h"
#include "infiniop/ops/topksoftmax.h" #include "infiniop/ops/topksoftmax.h"
#include "infiniop/ops/var.h"
#include "infiniop/ops/var_mean.h"
#include "infiniop/ops/zeros.h" #include "infiniop/ops/zeros.h"
#include "infiniop/tensor_descriptor.h" #include "infiniop/tensor_descriptor.h"
#include "infiniop/ops/cross_entropy.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/avg_pool1d.h" #include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/cross_entropy.h"
#include "infiniop/ops/equal.h" #include "infiniop/ops/equal.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/hardtanh.h" #include "infiniop/ops/hardtanh.h"
#endif // __INFINIOP_API_H__ #endif // __INFINIOP_API_H__
#ifndef __INFINIOP_ALL_API_H__
#define __INFINIOP_ALL_API_H__
#include "../operator_descriptor.h"
#include <cstddef>
#include <vector>
typedef struct InfiniopDescriptor *infiniopAllDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateAllDescriptor(infiniopHandle_t handle,
infiniopAllDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc,
infiniopTensorDescriptor_t input_desc,
size_t *dim,
size_t dim_size,
bool keepdim);
__INFINI_C __export infiniStatus_t infiniopGetAllWorkspaceSize(infiniopAllDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopAll(infiniopAllDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
size_t *dim,
size_t dim_size,
bool keepdim,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyAllDescriptor(infiniopAllDescriptor_t desc);
#endif
#ifndef __INFINIOP_SUM_API_H__
#define __INFINIOP_SUM_API_H__
#include "../operator_descriptor.h"
#include <cstddef>
#include <vector>
typedef struct InfiniopDescriptor *infiniopSumDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateSumDescriptor(infiniopHandle_t handle,
infiniopSumDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc,
infiniopTensorDescriptor_t input_desc,
size_t *dim,
size_t dim_size,
bool keepdim);
__INFINI_C __export infiniStatus_t infiniopGetSumWorkspaceSize(infiniopSumDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopSum(infiniopSumDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
size_t *dim,
size_t dim_size,
bool keepdim,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroySumDescriptor(infiniopSumDescriptor_t desc);
#endif
#ifndef __INFINIOP_TOPK_API_H__
#define __INFINIOP_TOPK_API_H__
#include "../operator_descriptor.h"
#include <cstddef>
#include <vector>
typedef struct InfiniopDescriptor *infiniopTopKDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateTopKDescriptor(infiniopHandle_t handle,
infiniopTopKDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t values_output_desc,
infiniopTensorDescriptor_t indices_output_desc,
infiniopTensorDescriptor_t input_desc,
size_t k,
size_t dim,
bool largest,
bool sorted);
__INFINI_C __export infiniStatus_t infiniopGetTopKWorkspaceSize(infiniopTopKDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopTopK(infiniopTopKDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *values_output,
void *indices_output,
const void *input,
size_t k,
size_t dim,
bool largest,
bool sorted,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyTopKDescriptor(infiniopTopKDescriptor_t desc);
#endif
#ifndef __INFINIOP_VAR_API_H__
#define __INFINIOP_VAR_API_H__
#include "../operator_descriptor.h"
#include <cstddef>
#include <vector>
typedef struct InfiniopDescriptor *infiniopVarDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateVarDescriptor(infiniopHandle_t handle,
infiniopVarDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t var_output_desc,
infiniopTensorDescriptor_t input_desc,
size_t *dim,
size_t dim_size,
bool unbiased,
bool keepdim);
__INFINI_C __export infiniStatus_t infiniopGetVarWorkspaceSize(infiniopVarDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopVar(infiniopVarDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *var_output,
const void *input,
size_t *dim,
size_t dim_size,
bool unbiased,
bool keepdim,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyVarDescriptor(infiniopVarDescriptor_t desc);
#endif
#ifndef __INFINIOP_VAR_MEAN_API_H__
#define __INFINIOP_VAR_MEAN_API_H__
#include "../operator_descriptor.h"
#include <cstddef>
#include <vector>
typedef struct InfiniopDescriptor *infiniopVarMeanDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateVarMeanDescriptor(infiniopHandle_t handle,
infiniopVarMeanDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t var_output_desc,
infiniopTensorDescriptor_t mean_output_desc,
infiniopTensorDescriptor_t input_desc,
size_t *dim,
size_t dim_size,
bool unbiased,
bool keepdim);
__INFINI_C __export infiniStatus_t infiniopGetVarMeanWorkspaceSize(infiniopVarMeanDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopVarMean(infiniopVarMeanDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *var_output,
void *mean_output,
const void *input,
size_t *dim,
size_t dim_size,
bool unbiased,
bool keepdim,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyVarMeanDescriptor(infiniopVarMeanDescriptor_t desc);
#endif
...@@ -49,6 +49,7 @@ from infinicore.dtype import ( ...@@ -49,6 +49,7 @@ from infinicore.dtype import (
) )
from infinicore.ops.add import add from infinicore.ops.add import add
from infinicore.ops.add_rms_norm import add_rms_norm from infinicore.ops.add_rms_norm import add_rms_norm
from infinicore.ops.all import all
from infinicore.ops.asinh import asinh from infinicore.ops.asinh import asinh
from infinicore.ops.attention import attention from infinicore.ops.attention import attention
from infinicore.ops.baddbmm import baddbmm from infinicore.ops.baddbmm import baddbmm
...@@ -66,7 +67,11 @@ from infinicore.ops.paged_attention_prefill import paged_attention_prefill ...@@ -66,7 +67,11 @@ from infinicore.ops.paged_attention_prefill import paged_attention_prefill
from infinicore.ops.paged_caching import paged_caching from infinicore.ops.paged_caching import paged_caching
from infinicore.ops.rearrange import rearrange from infinicore.ops.rearrange import rearrange
from infinicore.ops.squeeze import squeeze from infinicore.ops.squeeze import squeeze
from infinicore.ops.sum import sum
from infinicore.ops.topk import topk
from infinicore.ops.unsqueeze import unsqueeze from infinicore.ops.unsqueeze import unsqueeze
from infinicore.ops.var import var
from infinicore.ops.var_mean import var_mean
from infinicore.tensor import ( from infinicore.tensor import (
Tensor, Tensor,
empty, empty,
...@@ -155,6 +160,11 @@ __all__ = [ ...@@ -155,6 +160,11 @@ __all__ = [
"strided_empty", "strided_empty",
"strided_from_blob", "strided_from_blob",
"zeros", "zeros",
"sum",
"var_mean",
"var",
"topk",
"all",
] ]
use_ntops = False use_ntops = False
......
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def all(input, dim=None, keepdim=False, out=None):
if out is None:
return Tensor(_infinicore.all(input._underlying, dim, keepdim))
_infinicore.all_(out._underlying, input._underlying, dim, keepdim)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def sum(input, dim=None, keepdim=False, out=None):
"""
Sum the elements of the input tensor along the given dimensions.
Args:
input (Tensor): The input tensor.
out (Tensor, optional): The output tensor.
Returns:
Tensor: The output tensor.
Example:
>>> import infinicore
>>> input = infinicore.tensor([[1, 2, 3], [4, 5, 6]])
>>> output = infinicore.sum(input)
>>> print(output)
tensor([15])
"""
if out is None:
return Tensor(_infinicore.sum(input._underlying, dim, keepdim))
_infinicore.sum_(out._underlying, input._underlying, dim, keepdim)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def topk(input, k, dim, largest=True, sorted=True, out=None):
if out is None:
values, indices = _infinicore.topk(input._underlying, k, dim, largest, sorted)
return Tensor(values), Tensor(indices)
_infinicore.topk_(out._underlying, input._underlying, k, dim, largest, sorted)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def var(input, dim=None, unbiased=True, keepdim=False, out=None):
if out is None:
var_tensor = _infinicore.var(input._underlying, dim, unbiased, keepdim)
return Tensor(var_tensor)
var_output = out
_infinicore.var_(var_output._underlying, input._underlying, dim, unbiased, keepdim)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def var_mean(input, dim=None, unbiased=True, keepdim=False, out=None):
if out is None:
var_tensor, mean_tensor = _infinicore.var_mean(
input._underlying, dim, unbiased, keepdim
)
return Tensor(var_tensor), Tensor(mean_tensor)
var_output, mean_output = out
_infinicore.var_mean_(
var_output._underlying,
mean_output._underlying,
input._underlying,
dim,
unbiased,
keepdim,
)
return out
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/all.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::all_impl::infiniop {
thread_local common::OpCache<size_t, infiniopAllDescriptor_t> caches(
100, // capacity
[](infiniopAllDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyAllDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
size_t seed = hash_combine(output, input, dim.size(), keepdim);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopAllDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateAllDescriptor(
context::getInfiniopHandle(output->device()), &desc,
output->desc(), input->desc(), dim.data(), dim.size(), keepdim));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAllWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopAll(
desc, workspace->data(), workspace_size,
output->data(), input->data(), dim.data(), dim.size(), keepdim, context::getStream()));
}
static bool registered = []() {
All::dispatcher().registerDevice({Device::Type::CPU,
Device::Type::NVIDIA,
Device::Type::METAX,
Device::Type::MOORE,
Device::Type::ILUVATAR},
&calculate, false);
return true;
}();
} // namespace infinicore::op::all_impl::infiniop
#include "infinicore/ops/all.hpp"
#include "../../utils.hpp"
#include <iostream>
#include <stdexcept>
#include <vector>
namespace infinicore::op {
common::OpDispatcher<All::schema> &All::dispatcher() {
static common::OpDispatcher<All::schema> dispatcher_;
return dispatcher_;
};
void All::execute(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
infinicore::context::setDevice(input->device());
auto device_type = context::getDevice().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error("No All implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input, dim, keepdim);
}
Tensor all(Tensor input, std::vector<size_t> dim, bool keepdim) {
auto in_shape = input->shape();
std::vector<size_t> out_shape;
if (dim.empty()) {
for (size_t i = 0; i < in_shape.size(); i++) {
dim.push_back(i);
}
}
std::sort(dim.begin(), dim.end());
if (dim.size() == in_shape.size() && !keepdim) {
out_shape = {};
} else {
if (keepdim) {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
out_shape.push_back(1);
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
} else {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
}
}
auto output = Tensor::empty(out_shape, DataType::BOOL, input->device());
all_(output, input, dim, keepdim);
return output;
}
void all_(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
All::execute(output, input, dim, keepdim);
}
} // namespace infinicore::op
#include "infinicore/ops/sum.hpp"
#include "../../utils.hpp"
#include <stdexcept>
#include <vector>
namespace infinicore::op {
common::OpDispatcher<Sum::schema> &Sum::dispatcher() {
static common::OpDispatcher<Sum::schema> dispatcher_;
return dispatcher_;
};
void Sum::execute(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
infinicore::context::setDevice(input->device());
auto device_type = context::getDevice().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error("No Sum implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input, dim, keepdim);
}
Tensor sum(Tensor input, std::vector<size_t> dim, bool keepdim) {
auto in_shape = input->shape();
std::vector<size_t> out_shape;
if (dim.empty()) {
for (size_t i = 0; i < in_shape.size(); i++) {
dim.push_back(i);
}
}
std::sort(dim.begin(), dim.end());
if (dim.size() == in_shape.size() && !keepdim) {
out_shape = {};
} else {
if (keepdim) {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
out_shape.push_back(1);
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
} else {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
}
}
auto output = Tensor::empty(out_shape, input->dtype(), input->device());
sum_(output, input, dim, keepdim);
return output;
}
void sum_(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
Sum::execute(output, input, dim, keepdim);
}
} // namespace infinicore::op
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment