Unverified Commit 93191613 authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Merge pull request #1075 from InfiniTensor/RevertT_1-1-4

Revert T1-1-4
parents 6ab911c3 def22a08
#include "infinicore/ops/cross_entropy.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace infinicore::op {
common::OpDispatcher<CrossEntropy::schema> &CrossEntropy::dispatcher() {
static common::OpDispatcher<CrossEntropy::schema> dispatcher_;
return dispatcher_;
};
void CrossEntropy::execute(Tensor output, Tensor input, Tensor target) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(input, target);
infinicore::context::setDevice(output->device());
auto device_type = output->device().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error("No CrossEntropy implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input, target);
}
Tensor cross_entropy(Tensor input, Tensor target) {
Shape shape = target->shape();
auto output = Tensor::empty(shape, input->dtype(), input->device());
cross_entropy_(output, input, target);
return output;
}
void cross_entropy_(Tensor output, Tensor input, Tensor target) {
CrossEntropy::execute(output, input, target);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/cross_entropy.hpp"
#include <infiniop.h>
namespace infinicore::op::cross_entropy_impl::infiniop {
thread_local common::OpCache<size_t, infiniopCrossEntropyDescriptor_t> caches(
100,
[](infiniopCrossEntropyDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyCrossEntropyDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor output, Tensor input, Tensor target) {
size_t seed = hash_combine(output, input, target);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopCrossEntropyDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateCrossEntropyDescriptor(
context::getInfiniopHandle(device),
&desc,
output->desc(),
input->desc(),
target->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetCrossEntropyWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopCrossEntropy(
desc,
workspace->data(),
workspace_size,
output->data(),
input->data(),
target->data(),
context::getStream()));
}
static bool registered = []() {
CrossEntropy::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::cross_entropy_impl::infiniop
#include "infinicore/ops/equal.hpp"
#include "../../utils.hpp"
namespace infinicore::op {
common::OpDispatcher<Equal::schema> &Equal::dispatcher() {
static common::OpDispatcher<Equal::schema> dispatcher_;
return dispatcher_;
};
void Equal::execute(Tensor out, Tensor a, Tensor b) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(out, a, b);
infinicore::context::setDevice(out->device());
dispatcher().lookup(out->device().getType())(out, a, b);
}
Tensor equal(Tensor a, Tensor b) {
auto out = Tensor::empty(a->shape(), DataType::BOOL, a->device());
equal_(out, a, b);
return out;
}
void equal_(Tensor out, Tensor a, Tensor b) {
if (out->dtype() != DataType::BOOL) {
throw std::runtime_error("Equal expects bool output tensor.");
}
Equal::execute(out, a, b);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/equal.hpp"
#include <infiniop.h>
namespace infinicore::op::equal_impl::infiniop {
thread_local common::OpCache<size_t, infiniopEqualDescriptor_t> caches(
100,
[](infiniopEqualDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyEqualDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor out, Tensor a, Tensor b) {
size_t seed = hash_combine(out, a, b);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
infiniopEqualDescriptor_t desc = nullptr;
if (auto cached = cache.get(seed)) {
desc = *cached;
} else {
INFINICORE_CHECK_ERROR(infiniopCreateEqualDescriptor(
context::getInfiniopHandle(device), &desc,
out->desc(), a->desc(), b->desc()));
cache.put(seed, desc);
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetEqualWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace;
void *workspace_ptr = nullptr;
if (workspace_size != 0) {
workspace = context::allocateMemory(workspace_size);
workspace_ptr = workspace->data();
}
INFINICORE_CHECK_ERROR(infiniopEqual(
desc,
workspace_ptr,
workspace_size,
out->data(),
a->data(),
b->data(),
context::getStream()));
}
static bool registered = []() {
Equal::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::equal_impl::infiniop
#include "infinicore/ops/hardswish.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace infinicore::op {
common::OpDispatcher<Hardswish::schema> &Hardswish::dispatcher() {
static common::OpDispatcher<Hardswish::schema> dispatcher_;
return dispatcher_;
}
void Hardswish::execute(Tensor output, Tensor input) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
infinicore::context::setDevice(output->device());
auto device_type = output->device().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error(
"No Hardswish implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input);
}
Tensor hardswish(Tensor input) {
auto output = Tensor::empty(input->shape(), input->dtype(), input->device());
hardswish_(output, input);
return output;
}
void hardswish_(Tensor output, Tensor input) {
Hardswish::execute(output, input);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/hardswish.hpp"
#include <infiniop.h>
namespace infinicore::op::hardswish_impl::infiniop {
thread_local common::OpCache<size_t, infiniopHardSwishDescriptor_t> caches(
100,
[](infiniopHardSwishDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyHardSwishDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor output, Tensor input) {
size_t seed = hash_combine(output, input);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopHardSwishDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateHardSwishDescriptor(
context::getInfiniopHandle(device),
&desc,
output->desc(),
input->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetHardSwishWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace;
void *workspace_ptr = nullptr;
if (workspace_size != 0) {
workspace = context::allocateMemory(workspace_size);
workspace_ptr = workspace->data();
}
INFINICORE_CHECK_ERROR(infiniopHardSwish(
desc,
workspace_ptr,
workspace_size,
output->data(),
input->data(),
context::getStream()));
}
static bool registered = []() {
Hardswish::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::hardswish_impl::infiniop
#include "infinicore/ops/hardtanh.hpp"
#include "../../utils.hpp"
#include <stdexcept>
namespace infinicore::op {
common::OpDispatcher<HardTanh::schema> &HardTanh::dispatcher() {
static common::OpDispatcher<HardTanh::schema> dispatcher_;
return dispatcher_;
}
void HardTanh::execute(Tensor output, Tensor input, float min_val, float max_val) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
infinicore::context::setDevice(output->device());
auto device_type = output->device().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error(
"No HardTanh implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input, min_val, max_val);
}
Tensor hardtanh(Tensor input, float min_val, float max_val) {
auto output = Tensor::empty(input->shape(), input->dtype(), input->device());
hardtanh_(output, input, min_val, max_val);
return output;
}
void hardtanh_(Tensor output, Tensor input, float min_val, float max_val) {
HardTanh::execute(output, input, min_val, max_val);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/hardtanh.hpp"
#include <infiniop.h>
namespace infinicore::op::hardtanh_impl::infiniop {
thread_local common::OpCache<size_t, infiniopHardTanhDescriptor_t> caches(
100,
[](infiniopHardTanhDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyHardTanhDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor output, Tensor input, float min_val, float max_val) {
size_t seed = hash_combine(output, input, min_val, max_val);
auto device = context::getDevice();
auto &cache = caches.getCache(device);
auto desc_opt = cache.get(seed);
infiniopHardTanhDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateHardTanhDescriptor(
context::getInfiniopHandle(device),
&desc,
output->desc(),
input->desc(),
min_val,
max_val));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetHardTanhWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace;
void *workspace_ptr = nullptr;
if (workspace_size != 0) {
workspace = context::allocateMemory(workspace_size);
workspace_ptr = workspace->data();
}
INFINICORE_CHECK_ERROR(infiniopHardTanh(
desc,
workspace_ptr,
workspace_size,
output->data(),
input->data(),
context::getStream()));
}
static bool registered = []() {
HardTanh::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::hardtanh_impl::infiniop
#include "infinicore/ops/sum.hpp"
#include "../../utils.hpp"
#include <stdexcept>
#include <vector>
namespace infinicore::op {
common::OpDispatcher<Sum::schema> &Sum::dispatcher() {
static common::OpDispatcher<Sum::schema> dispatcher_;
return dispatcher_;
};
void Sum::execute(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(output, input);
infinicore::context::setDevice(input->device());
auto device_type = context::getDevice().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error("No Sum implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(output, input, dim, keepdim);
}
Tensor sum(Tensor input, std::vector<size_t> dim, bool keepdim) {
auto in_shape = input->shape();
std::vector<size_t> out_shape;
if (dim.empty()) {
for (size_t i = 0; i < in_shape.size(); i++) {
dim.push_back(i);
}
}
std::sort(dim.begin(), dim.end());
if (dim.size() == in_shape.size() && !keepdim) {
out_shape = {};
} else {
if (keepdim) {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
out_shape.push_back(1);
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
} else {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
}
}
auto output = Tensor::empty(out_shape, input->dtype(), input->device());
sum_(output, input, dim, keepdim);
return output;
}
void sum_(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
Sum::execute(output, input, dim, keepdim);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/sum.hpp"
#include <infiniop.h>
namespace infinicore::op::sum_impl::infiniop {
thread_local common::OpCache<size_t, infiniopSumDescriptor_t> caches(
100, // capacity
[](infiniopSumDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroySumDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor output, Tensor input, std::vector<size_t> dim, bool keepdim) {
size_t seed = hash_combine(output, input, dim.size(), keepdim);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopSumDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateSumDescriptor(
context::getInfiniopHandle(output->device()), &desc,
output->desc(), input->desc(), dim.data(), dim.size(), keepdim));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetSumWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopSum(
desc, workspace->data(), workspace_size,
output->data(), input->data(), dim.data(), dim.size(), keepdim, context::getStream()));
}
static bool registered = []() {
Sum::dispatcher().registerDevice({Device::Type::CPU,
Device::Type::NVIDIA,
Device::Type::METAX,
Device::Type::MOORE,
Device::Type::ILUVATAR},
&calculate, false);
return true;
}();
} // namespace infinicore::op::sum_impl::infiniop
#include "infinicore/ops/topk.hpp"
#include "../../utils.hpp"
#include <stdexcept>
#include <vector>
namespace infinicore::op {
common::OpDispatcher<TopK::schema> &TopK::dispatcher() {
static common::OpDispatcher<TopK::schema> dispatcher_;
return dispatcher_;
};
void TopK::execute(Tensor values_output, Tensor indices_output, Tensor input, size_t k, size_t dim, bool largest, bool sorted) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(values_output, input);
infinicore::context::setDevice(input->device());
auto device_type = context::getDevice().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error("No Topk implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(values_output, indices_output, input, k, dim, largest, sorted);
}
std::pair<Tensor, Tensor> topk(Tensor input, size_t k, size_t dim, bool largest, bool sorted) {
auto in_shape = input->shape();
std::vector<size_t> out_shape = in_shape;
out_shape[dim] = k;
auto values_output = Tensor::empty(out_shape, input->dtype(), input->device());
auto indices_output = Tensor::empty(out_shape, DataType::I32, input->device());
topk_(values_output, indices_output, input, k, dim, largest, sorted);
return {values_output, indices_output};
}
void topk_(Tensor values_output, Tensor indices_output, Tensor input, size_t k, size_t dim, bool largest, bool sorted) {
TopK::execute(values_output, indices_output, input, k, dim, largest, sorted);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/topk.hpp"
#include <infiniop.h>
namespace infinicore::op::topk_impl::infiniop {
thread_local common::OpCache<size_t, infiniopTopKDescriptor_t> caches(
100, // capacity
[](infiniopTopKDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyTopKDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor values_output, Tensor indices_output, Tensor input, size_t k, size_t dim, bool largest, bool sorted) {
size_t seed = hash_combine(values_output, indices_output, input, k, dim, largest, sorted);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopTopKDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateTopKDescriptor(
context::getInfiniopHandle(values_output->device()), &desc,
values_output->desc(), indices_output->desc(), input->desc(), k, dim, largest, sorted));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetTopKWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopTopK(
desc, workspace->data(), workspace_size,
values_output->data(), indices_output->data(), input->data(), k, dim, largest, sorted, context::getStream()));
}
static bool registered = []() {
TopK::dispatcher().registerDevice({Device::Type::CPU,
Device::Type::NVIDIA,
Device::Type::METAX,
Device::Type::MOORE,
Device::Type::ILUVATAR},
&calculate, false);
return true;
}();
} // namespace infinicore::op::topk_impl::infiniop
#include "infinicore/ops/var.hpp"
#include "../../utils.hpp"
#include <stdexcept>
#include <vector>
namespace infinicore::op {
common::OpDispatcher<Var::schema> &Var::dispatcher() {
static common::OpDispatcher<Var::schema> dispatcher_;
return dispatcher_;
};
void Var::execute(Tensor var_output, Tensor input, std::vector<size_t> dim, bool unbiased, bool keepdim) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(var_output, input);
infinicore::context::setDevice(input->device());
auto device_type = context::getDevice().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error("No Var implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(var_output, input, dim, unbiased, keepdim);
}
Tensor var(Tensor input, std::vector<size_t> dim, bool unbiased, bool keepdim) {
auto in_shape = input->shape();
std::vector<size_t> out_shape;
if (dim.empty()) {
for (size_t i = 0; i < in_shape.size(); i++) {
dim.push_back(i);
}
}
std::sort(dim.begin(), dim.end());
if (dim.size() == in_shape.size() && !keepdim) {
out_shape = {};
} else {
if (keepdim) {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
out_shape.push_back(1);
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
} else {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
}
}
auto var_output = Tensor::empty(out_shape, input->dtype(), input->device());
var_(var_output, input, dim, unbiased, keepdim);
return var_output;
}
void var_(Tensor var_output, Tensor input, std::vector<size_t> dim, bool unbiased, bool keepdim) {
Var::execute(var_output, input, dim, unbiased, keepdim);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/var.hpp"
#include <infiniop.h>
namespace infinicore::op::var_impl::infiniop {
thread_local common::OpCache<size_t, infiniopVarDescriptor_t> caches(
100, // capacity
[](infiniopVarDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyVarDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor var_output, Tensor input, std::vector<size_t> dim, bool unbiased, bool keepdim) {
size_t seed = hash_combine(var_output, input, dim.size(), unbiased, keepdim);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopVarDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateVarDescriptor(
context::getInfiniopHandle(var_output->device()), &desc,
var_output->desc(), input->desc(), dim.data(), dim.size(), unbiased, keepdim));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetVarWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopVar(
desc, workspace->data(), workspace_size,
var_output->data(), input->data(), dim.data(), dim.size(), unbiased, keepdim, context::getStream()));
}
static bool registered = []() {
Var::dispatcher().registerDevice({Device::Type::CPU,
Device::Type::NVIDIA,
Device::Type::METAX,
Device::Type::MOORE,
Device::Type::ILUVATAR},
&calculate, false);
return true;
}();
} // namespace infinicore::op::var_impl::infiniop
#include "infinicore/ops/var_mean.hpp"
#include "../../utils.hpp"
#include <stdexcept>
#include <vector>
namespace infinicore::op {
common::OpDispatcher<Var_Mean::schema> &Var_Mean::dispatcher() {
static common::OpDispatcher<Var_Mean::schema> dispatcher_;
return dispatcher_;
};
void Var_Mean::execute(Tensor var_output, Tensor mean_output, Tensor input, std::vector<size_t> dim, bool unbiased, bool keepdim) {
INFINICORE_ASSERT_TENSORS_SAME_DEVICE(var_output, mean_output, input);
infinicore::context::setDevice(input->device());
auto device_type = context::getDevice().getType();
auto func = dispatcher().lookup(device_type);
if (func == nullptr) {
throw std::runtime_error("No Var_Mean implementation found for device type: " + std::to_string(static_cast<int>(device_type)));
}
func(var_output, mean_output, input, dim, unbiased, keepdim);
}
std::pair<Tensor, Tensor> var_mean(Tensor input, std::vector<size_t> dim, bool unbiased, bool keepdim) {
auto in_shape = input->shape();
std::vector<size_t> out_shape;
if (dim.empty()) {
for (size_t i = 0; i < in_shape.size(); i++) {
dim.push_back(i);
}
}
std::sort(dim.begin(), dim.end());
if (dim.size() == in_shape.size() && !keepdim) {
out_shape = {};
} else {
if (keepdim) {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
out_shape.push_back(1);
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
} else {
size_t j = 0;
for (size_t i = 0; i < in_shape.size(); i++) {
if (j < dim.size() && dim[j] == i) {
j++;
} else {
out_shape.push_back(in_shape[i]);
}
}
}
}
auto var_output = Tensor::empty(out_shape, input->dtype(), input->device());
auto mean_output = Tensor::empty(out_shape, input->dtype(), input->device());
var_mean_(var_output, mean_output, input, dim, unbiased, keepdim);
return {var_output, mean_output};
}
void var_mean_(Tensor var_output, Tensor mean_output, Tensor input, std::vector<size_t> dim, bool unbiased, bool keepdim) {
Var_Mean::execute(var_output, mean_output, input, dim, unbiased, keepdim);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/common/cache.hpp"
#include "infinicore/ops/var_mean.hpp"
#include <infiniop.h>
// todo 实现需要修改calculate函数
namespace infinicore::op::var_mean_impl::infiniop {
thread_local common::OpCache<size_t, infiniopVarMeanDescriptor_t> caches(
100, // capacity
[](infiniopVarMeanDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyVarMeanDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor var_output, Tensor mean_output, Tensor input, std::vector<size_t> dim, bool unbiased, bool keepdim) {
size_t seed = hash_combine(var_output, mean_output, input, dim.size(), unbiased, keepdim);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopVarMeanDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateVarMeanDescriptor(
context::getInfiniopHandle(var_output->device()), &desc,
var_output->desc(), mean_output->desc(), input->desc(), dim.data(), dim.size(), unbiased, keepdim));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetVarMeanWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopVarMean(
desc, workspace->data(), workspace_size,
var_output->data(), mean_output->data(), input->data(), dim.data(), dim.size(), unbiased, keepdim, context::getStream()));
}
static bool registered = []() {
Var_Mean::dispatcher().registerDevice({Device::Type::CPU,
Device::Type::NVIDIA,
Device::Type::METAX,
Device::Type::MOORE,
Device::Type::ILUVATAR},
&calculate, false);
return true;
}();
} // namespace infinicore::op::var_mean_impl::infiniop
......@@ -4,16 +4,10 @@
#include "ops/add.hpp"
#include "ops/add_rms_norm.hpp"
#include "ops/all.hpp"
#include "ops/attention.hpp"
#include "ops/avg_pool1d.hpp"
#include "ops/causal_softmax.hpp"
#include "ops/cross_entropy.hpp"
#include "ops/embedding.hpp"
#include "ops/equal.hpp"
#include "ops/flash_attention.hpp"
#include "ops/hardswish.hpp"
#include "ops/hardtanh.hpp"
#include "ops/kv_caching.hpp"
#include "ops/linear.hpp"
#include "ops/linear_w8a8i8.hpp"
......@@ -30,11 +24,7 @@
#include "ops/rope.hpp"
#include "ops/silu.hpp"
#include "ops/silu_and_mul.hpp"
#include "ops/sum.hpp"
#include "ops/swiglu.hpp"
#include "ops/topk.hpp"
#include "ops/var.hpp"
#include "ops/var_mean.hpp"
namespace py = pybind11;
......@@ -52,28 +42,18 @@ inline void bind(py::module &m) {
bind_mul(m);
bind_mha_kvcache(m);
bind_mha_varlen(m);
bind_hardswish(m);
bind_hardtanh(m);
bind_paged_attention(m);
bind_paged_attention_prefill(m);
bind_paged_caching(m);
bind_random_sample(m);
bind_cross_entropy(m);
bind_rearrange(m);
bind_rms_norm(m);
bind_avg_pool1d(m);
bind_silu(m);
bind_swiglu(m);
bind_rope(m);
bind_embedding(m);
bind_linear_w8a8i8(m);
bind_silu_and_mul(m);
bind_sum(m);
bind_var_mean(m);
bind_var(m);
bind_topk(m);
bind_all(m);
bind_equal(m);
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/all.hpp"
namespace py = pybind11;
namespace infinicore::ops {
Tensor py_all(Tensor input, py::object dim, bool keepdim) {
if (dim.is_none()) {
std::vector<size_t> dim_vec;
for (int i = 0; i < input->shape().size(); i++) {
dim_vec.push_back(i);
}
return op::all(input, dim_vec, keepdim);
} else if (py::isinstance<py::tuple>(dim) || py::isinstance<py::list>(dim)) {
return op::all(input, dim.cast<std::vector<size_t>>(), keepdim);
} else if (py::isinstance<py::int_>(dim)) {
return op::all(input, std::vector<size_t>(1, dim.cast<size_t>()), keepdim);
} else {
throw std::invalid_argument("dim must be a tuple or an integer");
}
}
void py_all_(Tensor output, Tensor input, py::object dim, bool keepdim) {
if (dim.is_none()) {
std::vector<size_t> dim_vec;
for (int i = 0; i < input->shape().size(); i++) {
dim_vec.push_back(i);
}
op::all_(output, input, dim_vec, keepdim);
} else if (py::isinstance<py::tuple>(dim) || py::isinstance<py::list>(dim)) {
op::all_(output, input, dim.cast<std::vector<size_t>>(), keepdim);
} else if (py::isinstance<py::int_>(dim)) {
op::all_(output, input, std::vector<size_t>(1, dim.cast<size_t>()), keepdim);
} else {
throw std::invalid_argument("dim must be a tuple or an integer");
}
}
inline void bind_all(py::module &m) {
m.def("all",
&py_all,
py::arg("input"),
py::arg("dim"),
py::arg("keepdim"),
R"doc(All of input tensor along the given dimensions.)doc");
m.def("all_",
&py_all_,
py::arg("output"),
py::arg("input"),
py::arg("dim"),
py::arg("keepdim"),
R"doc(In-place tensor all.)doc");
}
} // namespace infinicore::ops
#pragma once
#include <optional>
#include <pybind11/pybind11.h>
#include "infinicore/ops/avg_pool1d.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_avg_pool1d(py::module &m) {
m.def(
"avg_pool1d",
[](::infinicore::Tensor input, size_t kernel_size, std::optional<size_t> stride, size_t padding) {
return op::avg_pool1d(input, kernel_size, stride.value_or(0), padding);
},
py::arg("input"),
py::arg("kernel_size"),
py::arg("stride") = py::none(),
py::arg("padding") = 0,
R"doc(AvgPool1d out-of-place.)doc");
m.def(
"avg_pool1d_",
[](::infinicore::Tensor output, ::infinicore::Tensor input, size_t kernel_size, std::optional<size_t> stride, size_t padding) {
op::avg_pool1d_(output, input, kernel_size, stride.value_or(0), padding);
},
py::arg("output"),
py::arg("input"),
py::arg("kernel_size"),
py::arg("stride") = py::none(),
py::arg("padding") = 0,
R"doc(AvgPool1d in-place variant writing to provided output tensor.)doc");
}
} // namespace infinicore::ops
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/ops/cross_entropy.hpp"
namespace py = pybind11;
namespace infinicore::ops {
inline void bind_cross_entropy(py::module &m) {
m.def("cross_entropy",
&op::cross_entropy,
py::arg("logits"),
py::arg("target"),
R"doc(Token-wise cross entropy loss without reduction.)doc");
m.def("cross_entropy_",
&op::cross_entropy_,
py::arg("loss"),
py::arg("logits"),
py::arg("target"),
R"doc(Write cross entropy loss into a provided tensor.)doc");
}
} // namespace infinicore::ops
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment