Unverified Commit 9a05446f authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

issue/461 InfiniCore 推理运行时


Co-authored-by: default avatarJiacheng Huang <huangjiacheng0709@outlook.com>
Co-authored-by: default avatarwooway777 <wooway777@gmail.com>
parent 37411f6d
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <infinicore.hpp>
namespace py = pybind11;
namespace infinicore {
PYBIND11_MODULE(infinicore, m) {
py::enum_<DataType>(m, "dtype")
.value("bfloat16", DataType::bfloat16)
.value("float16", DataType::float16)
.value("float32", DataType::float32)
.value("float64", DataType::float64)
.value("int32", DataType::int32)
.value("int64", DataType::int64)
.value("uint8", DataType::uint8)
.export_values();
py::class_<Device>(m, "Device")
.def(py::init<const Device::Type &, const Device::Index &>(),
py::arg("type"), py::arg("index") = 0)
.def_property_readonly("type", &Device::get_type)
.def_property_readonly("index", &Device::get_index)
.def("__repr__", static_cast<std::string (Device::*)() const>(&Device::to_string));
py::class_<Tensor>(m, "Tensor")
.def(py::init<const Tensor::Shape &, const DataType &, const Device &>(),
py::arg("shape"), py::arg("dtype") = DataType::float32, py::arg("device") = Device{Device::Type::cpu})
.def_property_readonly("shape", &Tensor::get_shape)
.def_property_readonly("dtype", &Tensor::get_dtype)
.def_property_readonly("device", &Tensor::get_device);
}
} // namespace infinicore
#include "infinicore/memory.hpp"
namespace infinicore {
Memory::Memory(std::byte *data,
size_t size,
Device device,
Memory::Deleter deleter,
bool pin_memory)
: data_{data}, size_{size}, device_{device}, deleter_{deleter}, is_pinned_(pin_memory) {}
Memory::~Memory() {
if (deleter_) {
deleter_(data_);
}
}
std::byte *Memory::data() {
return data_;
}
Device Memory::device() const {
return device_;
}
size_t Memory::size() const {
return size_;
}
bool Memory::is_pinned() const {
return is_pinned_;
}
} // namespace infinicore
#include "infinicore/op/matmul.hpp"
namespace infinicore::op {
common::OpDispatcher<Matmul::schema> &Matmul::dispatcher() {
static common::OpDispatcher<Matmul::schema> dispatcher_;
return dispatcher_;
};
void Matmul::execute(Tensor c, Tensor a, Tensor b) {
dispatcher().lookup(context::getDevice().getType())(c, a, b);
}
Tensor matmul(Tensor a, Tensor b) {
Shape shape = a->shape();
Size size = a->ndim();
shape[size - 1] = b->size(size - 1);
auto c = Tensor::empty(shape, a->dtype(), a->device());
matmul_(c, a, b);
return c;
}
void matmul_(Tensor c, Tensor a, Tensor b) {
Matmul::execute(c, a, b);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/op/common/cache.hpp"
#include "infinicore/op/matmul.hpp"
#include <infiniop.h>
namespace infinicore::op::matmul_impl::infiniop {
thread_local common::OpCache<size_t, infiniopGemmDescriptor_t> caches(
100, // capacity
[](infiniopGemmDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyGemmDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor c, Tensor a, Tensor b) {
size_t seed = hash_combine(c, b, a);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopGemmDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateGemmDescriptor(context::getInfiniopHandle(), &desc, c->desc(), a->desc(), b->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetGemmWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopGemm(
desc, workspace->data(), workspace_size,
c->data(), a->data(), b->data(), 1.f, 0.f, context::getStream()));
}
static bool registered = []() {
Matmul::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::matmul_impl::infiniop
#include "infinicore/op/ones.hpp"
namespace infinicore::op {
common::OpDispatcher<Ones::schema> &Ones::dispatcher() {
static common::OpDispatcher<Ones::schema> dispatcher_;
return dispatcher_;
};
void Ones::execute(Tensor output) {
}
} // namespace infinicore::op
#include "infinicore/op/rearrange.hpp"
namespace infinicore::op {
common::OpDispatcher<Rearrange::schema> &Rearrange::dispatcher() {
static common::OpDispatcher<Rearrange::schema> dispatcher_;
return dispatcher_;
};
void Rearrange::execute(Tensor y, Tensor x) {
dispatcher().lookup(context::getDevice().getType())(y, x);
}
Tensor rearrange(Tensor x) {
auto y = Tensor::empty(x->shape(), x->dtype(), x->device());
rearrange_(y, x);
return y;
}
void rearrange_(Tensor y, Tensor x) {
Rearrange::execute(y, x);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/op/common/cache.hpp"
#include "infinicore/op/rearrange.hpp"
#include <infiniop.h>
namespace infinicore::op::rearrange_impl::infiniop {
thread_local common::OpCache<size_t, infiniopRearrangeDescriptor_t> caches(
100, // capacity
[](infiniopRearrangeDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyRearrangeDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor y, Tensor x) {
size_t seed = hash_combine(y, x);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopRearrangeDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateRearrangeDescriptor(context::getInfiniopHandle(), &desc, y->desc(), x->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
INFINICORE_CHECK_ERROR(
infiniopRearrange(
desc,
y->data(),
x->data(),
context::getStream()));
}
static bool registered = []() {
Rearrange::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::rearrange_impl::infiniop
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore.hpp"
namespace py = pybind11;
namespace infinicore::context {
inline void bind(py::module &m) {
m.def("get_device", &getDevice);
m.def("get_device_count", &getDeviceCount);
}
} // namespace infinicore::context
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore.hpp"
namespace py = pybind11;
namespace infinicore::device {
inline void bind(py::module &m) {
py::class_<Device> device(m, "Device");
py::enum_<Device::Type>(device, "Type")
.value("CPU", Device::Type::CPU)
.value("NVIDIA", Device::Type::NVIDIA)
.value("CAMBRICON", Device::Type::CAMBRICON)
.value("ASCEND", Device::Type::ASCEND)
.value("METAX", Device::Type::METAX)
.value("MOORE", Device::Type::MOORE)
.value("ILUVATAR", Device::Type::ILUVATAR)
.value("KUNLUN", Device::Type::KUNLUN)
.value("SUGON", Device::Type::SUGON)
.value("COUNT", Device::Type::COUNT);
device
.def(py::init<const Device::Type &, const Device::Index &>(),
py::arg("type") = Device::Type::CPU, py::arg("index") = 0)
.def_property_readonly("type", &Device::getType)
.def_property_readonly("index", &Device::getIndex)
.def("__str__", static_cast<std::string (Device::*)() const>(&Device::toString));
}
} // namespace infinicore::device
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore.hpp"
namespace py = pybind11;
namespace infinicore::dtype {
inline void bind(py::module &m) {
py::enum_<DataType>(m, "DataType")
.value("BYTE", DataType::BYTE)
.value("BOOL", DataType::BOOL)
.value("I8", DataType::I8)
.value("I16", DataType::I16)
.value("I32", DataType::I32)
.value("I64", DataType::I64)
.value("U8", DataType::U8)
.value("U16", DataType::U16)
.value("U32", DataType::U32)
.value("U64", DataType::U64)
.value("F8", DataType::F8)
.value("F16", DataType::F16)
.value("F32", DataType::F32)
.value("F64", DataType::F64)
.value("C16", DataType::C16)
.value("C32", DataType::C32)
.value("C64", DataType::C64)
.value("C128", DataType::C128)
.value("BF16", DataType::BF16);
}
} // namespace infinicore::dtype
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "context.hpp"
#include "device.hpp"
#include "dtype.hpp"
#include "op.hpp"
#include "tensor.hpp"
namespace infinicore {
PYBIND11_MODULE(_infinicore, m) {
context::bind(m);
device::bind(m);
dtype::bind(m);
op::bind(m);
tensor::bind(m);
}
} // namespace infinicore
#pragma once
#include <pybind11/pybind11.h>
#include "op/matmul.hpp"
#include "op/rearrange.hpp"
namespace py = pybind11;
namespace infinicore::op {
inline void bind(py::module &m) {
bind_matmul(m);
bind_rearrange(m);
}
} // namespace infinicore::op
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/op/matmul.hpp"
namespace py = pybind11;
namespace infinicore::op {
inline void bind_matmul(py::module &m) {
m.def("matmul",
&op::matmul,
py::arg("a"),
py::arg("b"),
R"doc(Matrix multiplication of two tensors.)doc");
m.def("matmul_",
&op::matmul_,
py::arg("c"),
py::arg("a"),
py::arg("b"),
R"doc(In-place matrix multiplication.)doc");
}
} // namespace infinicore::op
#pragma once
#include <pybind11/pybind11.h>
#include "infinicore/op/rearrange.hpp"
namespace py = pybind11;
namespace infinicore::op {
inline void bind_rearrange(py::module &m) {
m.def("rearrange",
&op::rearrange,
py::arg("x"),
R"doc(Matrix rearrangement of a tensor.)doc");
m.def("rearrange_",
&op::rearrange_,
py::arg("y"),
py::arg("x"),
R"doc(In-place tensor rearrangement.)doc");
}
} // namespace infinicore::op
#pragma once
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include "infinicore.hpp"
namespace py = pybind11;
namespace infinicore::tensor {
inline void bind(py::module &m) {
py::class_<Tensor>(m, "Tensor")
.def_property_readonly("shape", [](const Tensor &tensor) { return tensor->shape(); })
.def_property_readonly("strides", [](const Tensor &tensor) { return tensor->strides(); })
.def_property_readonly("ndim", [](const Tensor &tensor) { return tensor->ndim(); })
.def_property_readonly("dtype", [](const Tensor &tensor) { return tensor->dtype(); })
.def("data_ptr", [](const Tensor &tensor) { return tensor->data(); })
.def("size", [](const Tensor &tensor, std::size_t dim) { return tensor->size(dim); })
.def("stride", [](const Tensor &tensor, std::size_t dim) { return tensor->stride(dim); })
.def("numel", [](const Tensor &tensor) { return tensor->numel(); })
.def("is_contiguous", [](const Tensor &tensor) { return tensor->is_contiguous(); })
.def("is_pinned", [](const Tensor &tensor) { return tensor->is_pinned(); })
.def("info", [](const Tensor &tensor) { return tensor->info(); })
.def("copy_", [](Tensor &tensor, const Tensor &other) { tensor->copy_from(other); })
.def("to", [](const Tensor &tensor, const Device &device) { return tensor->to(device); })
.def("as_strided", [](const Tensor &tensor, const Shape &shape, const Strides &strides) { return tensor->as_strided(shape, strides); })
.def("contiguous", [](const Tensor &tensor) { return tensor->contiguous(); })
.def("permute", [](const Tensor &tensor, const Shape &dims) { return tensor->permute(dims); })
.def("view", [](const Tensor &tensor, const Shape &shape) { return tensor->view(shape); });
m.def("empty", &Tensor::empty,
py::arg("shape"),
py::arg("dtype"),
py::arg("device"),
py::arg("pin_memory") = false);
m.def("strided_empty", &Tensor::strided_empty,
py::arg("shape"),
py::arg("strides"),
py::arg("dtype"),
py::arg("device"),
py::arg("pin_memory") = false);
m.def("zeros", &Tensor::zeros,
py::arg("shape"),
py::arg("dtype"),
py::arg("device"),
py::arg("pin_memory") = false);
m.def("ones", &Tensor::ones,
py::arg("shape"),
py::arg("dtype"),
py::arg("device"),
py::arg("pin_memory") = false);
m.def(
"from_blob", [](uintptr_t raw_ptr, Shape &shape, const DataType &dtype, const Device &device) {
return Tensor{infinicore::Tensor::from_blob(reinterpret_cast<void *>(raw_ptr), shape, dtype, device)};
},
pybind11::arg("raw_ptr"), pybind11::arg("shape"), pybind11::arg("dtype"), pybind11::arg("device"));
m.def(
"strided_from_blob", [](uintptr_t raw_ptr, Shape &shape, Strides &strides, const DataType &dtype, const Device &device) {
return Tensor{infinicore::Tensor::strided_from_blob(reinterpret_cast<void *>(raw_ptr), shape, strides, dtype, device)};
},
pybind11::arg("raw_ptr"), pybind11::arg("shape"), pybind11::arg("strides"), pybind11::arg("dtype"), pybind11::arg("device"));
}
} // namespace infinicore::tensor
#include <infinicore.hpp>
namespace infinicore {
Tensor::Tensor(const Shape &shape, const DataType &dtype, const Device &device) : shape_{shape}, dtype_{dtype}, device_{device} {}
const Tensor::Shape &Tensor::get_shape() const {
return shape_;
}
const DataType &Tensor::get_dtype() const {
return dtype_;
}
const Device &Tensor::get_device() const {
return device_;
}
} // namespace infinicore
#include "infinicore/context/context.hpp"
#include "infinicore/dtype.hpp"
#include "infinicore/ops.hpp"
#include "infinicore/tensor.hpp"
#include <spdlog/spdlog.h>
namespace infinicore {
Tensor TensorImpl::to(Device device) const {
if (device == data_.memory->device()) {
return Tensor(const_cast<TensorImpl *>(this)->shared_from_this());
} else {
std::shared_ptr<TensorImpl> _t = empty(meta_.shape, meta_.dtype, device, true);
_t->copy_from(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()));
return Tensor(_t);
}
}
void TensorImpl::copy_from(Tensor src) {
if (src->shape() != this->shape()) {
throw std::runtime_error("Cannot copy from tensor with different shape");
}
if (this->device().getType() == src->device().getType()) {
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), src);
} else {
if (!src->is_contiguous()) {
src = src->contiguous();
}
if (this->device().getType() == Device::Type::CPU) {
if (this->is_contiguous()) {
context::memcpyD2H(this->data(), src->data(), this->data_.memory->size());
} else {
auto local_src = Tensor::empty(this->shape(), this->dtype(), this->device());
context::memcpyD2H(local_src->data(), src->data(), this->data_.memory->size());
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), local_src);
}
} else if (src->device().getType() == Device::Type::CPU) {
if (this->is_contiguous()) {
context::memcpyH2D(this->data(), src->data(), this->data_.memory->size());
} else {
auto local_src = Tensor::empty(this->shape(), this->dtype(), this->device());
context::memcpyH2D(local_src->data(), src->data(), this->data_.memory->size());
op::rearrange_(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()), local_src);
}
}
}
}
Tensor TensorImpl::contiguous() const {
if (is_contiguous()) {
return Tensor(const_cast<TensorImpl *>(this)->shared_from_this());
} else {
return op::rearrange(Tensor(const_cast<TensorImpl *>(this)->shared_from_this()));
}
}
} // namespace infinicore
#include "infinicore/tensor.hpp"
#include "../utils.hpp"
#include "infinicore/context/context.hpp"
#include "infinicore/dtype.hpp"
#include <spdlog/spdlog.h>
namespace {
// Helper function to calculate contiguous strides
inline infinicore::Strides calculate_contiguous_strides(const infinicore::Shape &shape) {
infinicore::Strides strides(shape.size());
infinicore::Stride stride = 1;
for (int i = shape.size() - 1; i >= 0; --i) {
strides[i] = stride;
stride *= shape[i];
}
return strides;
}
} // namespace
namespace infinicore {
TensorImpl *Tensor::operator->() { return impl_.get(); }
const TensorImpl *Tensor::operator->() const { return impl_.get(); }
Tensor Tensor::empty(const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory) {
return Tensor{TensorImpl::empty(shape, dtype, device, pin_memory)};
}
Tensor Tensor::strided_empty(const Shape &shape,
const Strides &strides,
const DataType &dtype,
const Device &device,
bool pin_memory) {
return Tensor{TensorImpl::strided_empty(shape, strides, dtype, device, pin_memory)};
}
Tensor Tensor::zeros(const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory) {
return Tensor{TensorImpl::zeros(shape, dtype, device, pin_memory)};
}
Tensor Tensor::ones(const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory) {
return Tensor{TensorImpl::ones(shape, dtype, device, pin_memory)};
}
Tensor Tensor::from_blob(void *raw_ptr, const Shape &shape, const DataType &dtype, const Device &device) {
return Tensor{TensorImpl::from_blob(raw_ptr, shape, dtype, device)};
}
Tensor Tensor::strided_from_blob(void *raw_ptr, const Shape &shape, const Strides &strides, const DataType &dtype, const Device &device) {
return Tensor{TensorImpl::strided_from_blob(raw_ptr, shape, strides, dtype, device)};
}
TensorMetaData::TensorMetaData(const Shape &_shape, const Strides &_strides, const DataType &_dtype)
: shape(_shape), strides(_strides), dtype(_dtype) {
INFINICORE_CHECK_ERROR(infiniopCreateTensorDescriptor(&desc, shape.size(), shape.data(), strides.data(), (infiniDtype_t)dtype));
}
TensorImpl::TensorImpl(const Shape &shape, const DataType &dtype)
: meta_(TensorMetaData(shape, calculate_contiguous_strides(shape), dtype)) {}
TensorImpl::TensorImpl(const Shape &shape, const Strides &strides, const DataType &dtype)
: meta_(TensorMetaData(shape, strides, dtype)) {}
std::byte *TensorImpl::data() {
return data_.memory->data() + data_.offset;
}
const std::byte *TensorImpl::data() const {
return data_.memory->data() + data_.offset;
}
const Shape &TensorImpl::shape() const {
return meta_.shape;
}
const Strides &TensorImpl::strides() const {
return meta_.strides;
}
Size TensorImpl::ndim() const {
return meta_.shape.size();
}
bool TensorImpl::is_contiguous() const {
Stride expected_stride = 1;
for (int i = meta_.shape.size() - 1; i >= 0; --i) {
if (meta_.strides[i] != expected_stride) {
return false;
}
expected_stride *= meta_.shape[i];
}
return true;
}
Size TensorImpl::numel() const {
Size total = 1;
for (const auto &dim : meta_.shape) {
total *= dim;
}
return total;
}
Size TensorImpl::size(size_t dim) const {
return meta_.shape[dim];
}
Stride TensorImpl::stride(size_t dim) const {
return meta_.strides[dim];
}
DataType TensorImpl::dtype() const {
return meta_.dtype;
}
Device TensorImpl::device() const {
return data_.memory->device();
}
infiniopTensorDescriptor_t TensorImpl::desc() const {
return meta_.desc;
}
bool TensorImpl::is_pinned() const {
return data_.memory->is_pinned();
}
std::string TensorImpl::info() const {
std::stringstream ss;
ss << "Tensor: "
<< "shape[ ";
for (auto s : this->shape()) {
ss << s << " ";
}
ss << "] strides[ ";
for (auto s : this->strides()) {
ss << s << " ";
}
ss << "] dtype=" << toString(this->dtype());
return ss.str();
}
std::shared_ptr<TensorImpl> TensorImpl::empty(const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory) {
auto t = std::shared_ptr<TensorImpl>(new TensorImpl(shape, dtype));
t->data_.offset = 0;
context::setDevice(device);
if (device == Device::Type::CPU) {
if (pin_memory) {
if (context::getDevice() == Device::Type::CPU) {
spdlog::warn("Tensor memory is not pinned by any device with CPU runtime.");
t->data_.memory = context::allocateHostMemory(t->numel() * dsize(dtype));
} else {
t->data_.memory = context::allocatePinnedHostMemory(t->numel() * dsize(dtype));
}
} else {
t->data_.memory = context::allocateHostMemory(t->numel() * dsize(dtype));
}
} else {
t->data_.memory = context::allocateMemory(t->numel() * dsize(dtype));
}
return t;
}
std::shared_ptr<TensorImpl> TensorImpl::strided_empty(
const Shape &shape,
const Strides &strides,
const DataType &dtype,
const Device &device,
bool pin_memory) {
auto impl = std::shared_ptr<TensorImpl>(new TensorImpl(shape, strides, dtype));
impl->data_.offset = 0;
context::setDevice(device);
size_t max_offset = 0;
for (size_t i = 0; i < shape.size(); ++i) {
if (shape[i] > 0) {
max_offset += (shape[i] - 1) * strides[i];
}
}
size_t required_elements = max_offset + 1;
size_t required_bytes = required_elements * dsize(dtype);
if (device == Device::Type::CPU) {
if (pin_memory) {
if (context::getDevice() == Device::Type::CPU) {
spdlog::warn("Tensor memory is not pinned by any device with CPU runtime.");
impl->data_.memory = context::allocateHostMemory(required_bytes);
} else {
impl->data_.memory = context::allocatePinnedHostMemory(required_bytes);
}
} else {
impl->data_.memory = context::allocateHostMemory(required_bytes);
}
} else {
impl->data_.memory = context::allocateMemory(required_bytes);
}
return impl;
}
std::shared_ptr<TensorImpl> TensorImpl::zeros(const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory) {
// TODO: Implement this.
return empty(shape, dtype, device, pin_memory);
}
std::shared_ptr<TensorImpl> TensorImpl::ones(const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory) {
// TODO: Implement this.
return empty(shape, dtype, device, pin_memory);
}
std::shared_ptr<TensorImpl> TensorImpl::from_blob(
void *raw_ptr,
const Shape &shape,
const DataType &dtype,
const Device &device) {
auto t = std::shared_ptr<TensorImpl>(new TensorImpl(shape, dtype));
t->data_.offset = 0;
t->data_.memory = std::make_shared<Memory>((std::byte *)raw_ptr, t->numel() * dsize(dtype), device, nullptr);
return t;
}
std::shared_ptr<TensorImpl> TensorImpl::strided_from_blob(
void *raw_ptr,
const Shape &shape,
const Strides &strides,
const DataType &dtype,
const Device &device) {
auto t = std::shared_ptr<TensorImpl>(new TensorImpl(shape, strides, dtype));
t->data_.offset = 0;
t->data_.memory = std::make_shared<Memory>((std::byte *)raw_ptr, t->numel() * dsize(dtype), device, nullptr);
return t;
}
} // namespace infinicore
#include "infinicore/context/context.hpp"
#include "infinicore/dtype.hpp"
#include "infinicore/tensor.hpp"
#include <spdlog/spdlog.h>
namespace infinicore {
Tensor TensorImpl::narrow(const std::vector<TensorSliceParams> &slices) const {
// Create new shape and calculate offset
Shape new_shape = meta_.shape;
size_t offset = data_.offset;
for (const auto &slice : slices) {
assert(slice.len > 0);
assert(meta_.shape[slice.dim] >= slice.start + slice.len);
new_shape[slice.dim] = slice.len;
offset += slice.start * meta_.strides[slice.dim] * dsize(meta_.dtype);
}
// Create new tensor with the same strides but narrowed shape
auto tensor_impl = std::make_shared<TensorImpl>(new_shape, meta_.strides, meta_.dtype);
tensor_impl->data_.offset = offset;
tensor_impl->data_.memory = data_.memory;
return Tensor(tensor_impl);
}
Tensor TensorImpl::permute(const Shape &order) const {
// Validate input
assert(meta_.shape.size() == order.size());
// Check that order contains all indices from 0 to n-1 exactly once
for (size_t i = 0; i < order.size(); i++) {
assert(std::find(order.begin(), order.end(), i) != order.end());
}
// Permute shape and strides
Shape new_shape(order.size());
Strides new_strides(order.size());
for (size_t i = 0; i < order.size(); i++) {
new_shape[i] = meta_.shape[order[i]];
new_strides[i] = meta_.strides[order[i]];
}
auto tensor_impl = std::make_shared<TensorImpl>(new_shape, new_strides, meta_.dtype);
tensor_impl->data_ = data_;
return Tensor(tensor_impl);
}
Tensor TensorImpl::view(const Shape &new_shape) const {
// Step 1: Validate total size
Size numel = 1;
for (Size dim : meta_.shape) {
numel *= dim;
}
Size new_numel = 1;
for (Size dim : new_shape) {
new_numel *= dim;
}
assert(numel == new_numel);
// Step 2: Get current shape and strides
const Shape &old_shape = meta_.shape;
const Strides &old_strides = meta_.strides;
// Step 3: Create merged shape and strides
Shape merged_shape;
Strides merged_strides;
if (!old_shape.empty()) {
merged_shape.push_back(old_shape[0]);
merged_strides.push_back(old_strides[0]);
for (size_t i = 1; i < old_shape.size(); ++i) {
if (old_strides[i] * static_cast<Stride>(old_shape[i]) == merged_strides.back()) {
merged_shape.back() *= old_shape[i];
merged_strides.back() = old_strides[i];
} else {
merged_shape.push_back(old_shape[i]);
merged_strides.push_back(old_strides[i]);
}
}
}
// Step 4: Compute new strides by splitting merged dimensions
Strides new_strides(new_shape.size());
size_t merged_idx = 0;
Stride current_stride = merged_strides[0];
Size remaining_size = merged_shape[0];
for (size_t i = 0; i < new_shape.size(); ++i) {
// Find which merged dimension contains this new dimension
while (new_shape[i] > remaining_size) {
assert(++merged_idx < merged_shape.size());
current_stride = merged_strides[merged_idx];
remaining_size = merged_shape[merged_idx];
}
assert(remaining_size % new_shape[i] == 0);
new_strides[i] = current_stride * (remaining_size / new_shape[i]);
remaining_size /= new_shape[i];
}
return this->as_strided(new_shape, new_strides);
}
Tensor TensorImpl::as_strided(const Shape &new_shape, const Strides &new_strides) const {
auto tensor_impl = std::make_shared<TensorImpl>(new_shape, new_strides, meta_.dtype);
tensor_impl->data_ = data_;
return Tensor(tensor_impl);
}
} // namespace infinicore
#pragma once
#include "../utils/infini_status_string.h"
#include <spdlog/cfg/env.h>
#include <spdlog/spdlog.h>
#include <stdexcept>
inline struct SpdlogInitializer {
SpdlogInitializer() {
if (!std::getenv("INFINICORE_LOG_LEVEL")) {
spdlog::set_level(spdlog::level::off);
} else {
spdlog::cfg::load_env_levels("INFINICORE_LOG_LEVEL");
}
}
} spdlog_initializer;
#define STRINGIZE_(x) #x
#define STRINGIZE(x) STRINGIZE_(x)
#define INFINICORE_CHECK_ERROR(call) \
do { \
spdlog::info("Entering `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
infiniStatus_t ret = (call); \
spdlog::info("Exiting `" #call "` at `" __FILE__ ":" STRINGIZE(__LINE__) "`."); \
if (ret != INFINI_STATUS_SUCCESS) { \
throw std::runtime_error(#call " failed with error: " + std::string(infini_status_string(ret))); \
} \
} while (false)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment