Commit 210e31d3 authored by PanZezhong's avatar PanZezhong
Browse files

issue/1031 T1-1-4

parent 7f295448
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
#include <vector>
namespace infinicore::op {
class All {
public:
using schema = void (*)(Tensor, Tensor, std::vector<size_t>, bool);
static void execute(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim = false);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor all(Tensor input, std::vector<size_t> dim, bool keepdim = false);
void all_(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim = false);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
#include <vector>
namespace infinicore::op {
class Sum {
public:
using schema = void (*)(Tensor, Tensor, std::vector<size_t>, bool);
static void execute(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim = false);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor sum(Tensor input, std::vector<size_t> dim, bool keepdim = false);
void sum_(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim = false);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class TopK {
public:
using schema = void (*)(Tensor, Tensor, Tensor, size_t, size_t, bool, bool);
static void execute(Tensor values_output, Tensor indices_output, Tensor input, size_t k, size_t dim, bool largest = true, bool sorted = true);
static common::OpDispatcher<schema> &dispatcher();
};
std::pair<Tensor, Tensor> topk(Tensor input, size_t k, size_t dim, bool largest = true, bool sorted = true);
void topk_(Tensor values_output, Tensor indices_output, Tensor input, size_t k, size_t dim, bool largest = true, bool sorted = true);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
#include <utility>
#include <vector>
namespace infinicore::op {
class Var {
public:
using schema = void (*)(Tensor, Tensor, std::vector<size_t>, bool, bool); // var_output, input, dim, unbiased, keepdim
static void execute(Tensor var_output, Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor var(Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
void var_(Tensor var_output, Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
#include <optional>
#include <utility>
#include <vector>
namespace infinicore::op {
class Var_Mean {
public:
using schema = void (*)(Tensor, Tensor, Tensor, std::vector<size_t>, bool, bool); // var_output, mean_output, input, dim, unbiased, keepdim
static void execute(Tensor var_output, Tensor mean_output, Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
static common::OpDispatcher<schema> &dispatcher();
};
std::pair<Tensor, Tensor> var_mean(Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
void var_mean_(Tensor var_output, Tensor mean_output, Tensor input, std::vector<size_t> dim, bool unbiased = true, bool keepdim = false);
} // namespace infinicore::op
......@@ -5,6 +5,7 @@
#include "infiniop/ops/adaptive_max_pool1d.h"
#include "infiniop/ops/add.h"
#include "infiniop/ops/add_rms_norm.h"
#include "infiniop/ops/all.h"
#include "infiniop/ops/asinh.h"
#include "infiniop/ops/attention.h"
#include "infiniop/ops/causal_softmax.h"
......@@ -38,17 +39,21 @@
#include "infiniop/ops/softmax.h"
#include "infiniop/ops/softplus.h"
#include "infiniop/ops/sub.h"
#include "infiniop/ops/sum.h"
#include "infiniop/ops/swiglu.h"
#include "infiniop/ops/tanh.h"
#include "infiniop/ops/topk.h"
#include "infiniop/ops/topkrouter.h"
#include "infiniop/ops/topksoftmax.h"
#include "infiniop/ops/var.h"
#include "infiniop/ops/var_mean.h"
#include "infiniop/ops/zeros.h"
#include "infiniop/tensor_descriptor.h"
#include "infiniop/ops/cross_entropy.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/avg_pool1d.h"
#include "infiniop/ops/cross_entropy.h"
#include "infiniop/ops/equal.h"
#include "infiniop/ops/hardswish.h"
#include "infiniop/ops/hardtanh.h"
#endif // __INFINIOP_API_H__
#ifndef __INFINIOP_ALL_API_H__
#define __INFINIOP_ALL_API_H__
#include "../operator_descriptor.h"
#include <cstddef>
#include <vector>
typedef struct InfiniopDescriptor *infiniopAllDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateAllDescriptor(infiniopHandle_t handle,
infiniopAllDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc,
infiniopTensorDescriptor_t input_desc,
size_t *dim,
size_t dim_size,
bool keepdim);
__INFINI_C __export infiniStatus_t infiniopGetAllWorkspaceSize(infiniopAllDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopAll(infiniopAllDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
size_t *dim,
size_t dim_size,
bool keepdim,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyAllDescriptor(infiniopAllDescriptor_t desc);
#endif
#ifndef __INFINIOP_SUM_API_H__
#define __INFINIOP_SUM_API_H__
#include "../operator_descriptor.h"
#include <cstddef>
#include <vector>
typedef struct InfiniopDescriptor *infiniopSumDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateSumDescriptor(infiniopHandle_t handle,
infiniopSumDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t output_desc,
infiniopTensorDescriptor_t input_desc,
size_t *dim,
size_t dim_size,
bool keepdim);
__INFINI_C __export infiniStatus_t infiniopGetSumWorkspaceSize(infiniopSumDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopSum(infiniopSumDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *output,
const void *input,
size_t *dim,
size_t dim_size,
bool keepdim,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroySumDescriptor(infiniopSumDescriptor_t desc);
#endif
#ifndef __INFINIOP_TOPK_API_H__
#define __INFINIOP_TOPK_API_H__
#include "../operator_descriptor.h"
#include <cstddef>
#include <vector>
typedef struct InfiniopDescriptor *infiniopTopKDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateTopKDescriptor(infiniopHandle_t handle,
infiniopTopKDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t values_output_desc,
infiniopTensorDescriptor_t indices_output_desc,
infiniopTensorDescriptor_t input_desc,
size_t k,
size_t dim,
bool largest,
bool sorted);
__INFINI_C __export infiniStatus_t infiniopGetTopKWorkspaceSize(infiniopTopKDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopTopK(infiniopTopKDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *values_output,
void *indices_output,
const void *input,
size_t k,
size_t dim,
bool largest,
bool sorted,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyTopKDescriptor(infiniopTopKDescriptor_t desc);
#endif
#ifndef __INFINIOP_VAR_API_H__
#define __INFINIOP_VAR_API_H__
#include "../operator_descriptor.h"
#include <cstddef>
#include <vector>
typedef struct InfiniopDescriptor *infiniopVarDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateVarDescriptor(infiniopHandle_t handle,
infiniopVarDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t var_output_desc,
infiniopTensorDescriptor_t input_desc,
size_t *dim,
size_t dim_size,
bool unbiased,
bool keepdim);
__INFINI_C __export infiniStatus_t infiniopGetVarWorkspaceSize(infiniopVarDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopVar(infiniopVarDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *var_output,
const void *input,
size_t *dim,
size_t dim_size,
bool unbiased,
bool keepdim,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyVarDescriptor(infiniopVarDescriptor_t desc);
#endif
#ifndef __INFINIOP_VAR_MEAN_API_H__
#define __INFINIOP_VAR_MEAN_API_H__
#include "../operator_descriptor.h"
#include <cstddef>
#include <vector>
typedef struct InfiniopDescriptor *infiniopVarMeanDescriptor_t;
__INFINI_C __export infiniStatus_t infiniopCreateVarMeanDescriptor(infiniopHandle_t handle,
infiniopVarMeanDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t var_output_desc,
infiniopTensorDescriptor_t mean_output_desc,
infiniopTensorDescriptor_t input_desc,
size_t *dim,
size_t dim_size,
bool unbiased,
bool keepdim);
__INFINI_C __export infiniStatus_t infiniopGetVarMeanWorkspaceSize(infiniopVarMeanDescriptor_t desc, size_t *size);
__INFINI_C __export infiniStatus_t infiniopVarMean(infiniopVarMeanDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *var_output,
void *mean_output,
const void *input,
size_t *dim,
size_t dim_size,
bool unbiased,
bool keepdim,
void *stream);
__INFINI_C __export infiniStatus_t infiniopDestroyVarMeanDescriptor(infiniopVarMeanDescriptor_t desc);
#endif
......@@ -49,6 +49,7 @@ from infinicore.dtype import (
)
from infinicore.ops.add import add
from infinicore.ops.add_rms_norm import add_rms_norm
from infinicore.ops.all import all
from infinicore.ops.asinh import asinh
from infinicore.ops.attention import attention
from infinicore.ops.baddbmm import baddbmm
......@@ -66,7 +67,11 @@ from infinicore.ops.paged_attention_prefill import paged_attention_prefill
from infinicore.ops.paged_caching import paged_caching
from infinicore.ops.rearrange import rearrange
from infinicore.ops.squeeze import squeeze
from infinicore.ops.sum import sum
from infinicore.ops.topk import topk
from infinicore.ops.unsqueeze import unsqueeze
from infinicore.ops.var import var
from infinicore.ops.var_mean import var_mean
from infinicore.tensor import (
Tensor,
empty,
......@@ -155,6 +160,11 @@ __all__ = [
"strided_empty",
"strided_from_blob",
"zeros",
"sum",
"var_mean",
"var",
"topk",
"all",
]
use_ntops = False
......
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def all(input, dim=None, keepdim=False, out=None):
if out is None:
return Tensor(_infinicore.all(input._underlying, dim, keepdim))
_infinicore.all_(out._underlying, input._underlying, dim, keepdim)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def sum(input, dim=None, keepdim=False, out=None):
"""
Sum the elements of the input tensor along the given dimensions.
Args:
input (Tensor): The input tensor.
out (Tensor, optional): The output tensor.
Returns:
Tensor: The output tensor.
Example:
>>> import infinicore
>>> input = infinicore.tensor([[1, 2, 3], [4, 5, 6]])
>>> output = infinicore.sum(input)
>>> print(output)
tensor([15])
"""
if out is None:
return Tensor(_infinicore.sum(input._underlying, dim, keepdim))
_infinicore.sum_(out._underlying, input._underlying, dim, keepdim)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def topk(input, k, dim, largest=True, sorted=True, out=None):
if out is None:
values, indices = _infinicore.topk(input._underlying, k, dim, largest, sorted)
return Tensor(values), Tensor(indices)
_infinicore.topk_(out._underlying, input._underlying, k, dim, largest, sorted)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def var(input, dim=None, unbiased=True, keepdim=False, out=None):
if out is None:
var_tensor = _infinicore.var(input._underlying, dim, unbiased, keepdim)
return Tensor(var_tensor)
var_output = out
_infinicore.var_(var_output._underlying, input._underlying, dim, unbiased, keepdim)
return out
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def var_mean(input, dim=None, unbiased=True, keepdim=False, out=None):
if out is None:
var_tensor, mean_tensor = _infinicore.var_mean(
input._underlying, dim, unbiased, keepdim
)
return Tensor(var_tensor), Tensor(mean_tensor)
var_output, mean_output = out
_infinicore.var_mean_(
var_output._underlying,
mean_output._underlying,
input._underlying,
dim,
unbiased,
keepdim,
)
return out
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/all.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::all_impl::infiniop {
thread_local common::OpCache<size_t, infiniopAllDescriptor_t> caches(
100, // capacity
[](infiniopAllDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyAllDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
size_t seed = hash_combine(output, input, dim.size(), keepdim);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopAllDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateAllDescriptor(
context::getInfiniopHandle(output->device()), &desc,
output->desc(), input->desc(), dim.data(), dim.size(), keepdim));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAllWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopAll(
desc, workspace->data(), workspace_size,
output->data(), input->data(), dim.data(), dim.size(), keepdim, context::getStream()));
}
static bool registered = []() {
All::dispatcher().registerDevice({Device::Type::CPU,
Device::Type::NVIDIA,
Device::Type::METAX,
Device::Type::MOORE,
Device::Type::ILUVATAR},
&calculate, false);
return true;
}();
} // namespace infinicore::op::all_impl::infiniop
#include "infinicore/ops/all.hpp"
#include "../../utils.hpp"
#include <iostream>
#include <stdexcept>
#include <vector>
namespace infinicore::op {
common::OpDispatcher<All::schema> &All::dispatcher() {
static common::OpDispatcher<All::schema> dispatcher_;
return dispatcher_;
};
void All::execute(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
infinicore::context::setDevice(input->device());
auto device_type = context::getDevice().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error("No All implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input, dim, keepdim);
}
Tensor all(Tensor input, std::vector<size_t> dim, bool keepdim) {
auto in_shape = input->shape();
std::vector<size_t> out_shape;
if (dim.empty()) {
for (size_t i = 0; i < in_shape.size(); i++) {
dim.push_back(i);
}
}
std::sort(dim.begin(), dim.end());
if (dim.size() == in_shape.size() && !keepdim) {
out_shape = {};
} else {
if (keepdim) {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
out_shape.push_back(1);
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
} else {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
}
}
auto output = Tensor::empty(out_shape, DataType::BOOL, input->device());
all_(output, input, dim, keepdim);
return output;
}
void all_(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
All::execute(output, input, dim, keepdim);
}
} // namespace infinicore::op
#include "infinicore/ops/sum.hpp"
#include "../../utils.hpp"
#include <stdexcept>
#include <vector>
namespace infinicore::op {
common::OpDispatcher<Sum::schema> &Sum::dispatcher() {
static common::OpDispatcher<Sum::schema> dispatcher_;
return dispatcher_;
};
void Sum::execute(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
infinicore::context::setDevice(input->device());
auto device_type = context::getDevice().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error("No Sum implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input, dim, keepdim);
}
Tensor sum(Tensor input, std::vector<size_t> dim, bool keepdim) {
auto in_shape = input->shape();
std::vector<size_t> out_shape;
if (dim.empty()) {
for (size_t i = 0; i < in_shape.size(); i++) {
dim.push_back(i);
}
}
std::sort(dim.begin(), dim.end());
if (dim.size() == in_shape.size() && !keepdim) {
out_shape = {};
} else {
if (keepdim) {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
out_shape.push_back(1);
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
} else {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
}
}
auto output = Tensor::empty(out_shape, input->dtype(), input->device());
sum_(output, input, dim, keepdim);
return output;
}
void sum_(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
Sum::execute(output, input, dim, keepdim);
}
} // namespace infinicore::op
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment