Unverified Commit 3cbe5e02 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #84 from PanZezhong1725/issue/82

issue/82 重构tensor描述
parents 9874946c 0a9029f8
......@@ -71,51 +71,5 @@ typedef enum {
INFINI_DTYPE_BF16 = 19,
} infiniDtype_t;
inline size_t infiniSizeof(infiniDtype_t dtype) {
switch (dtype) {
case INFINI_DTYPE_INVALID:
return 0;
case INFINI_DTYPE_BYTE:
return 1;
case INFINI_DTYPE_BOOL:
return 1;
case INFINI_DTYPE_I8:
return 1;
case INFINI_DTYPE_I16:
return 2;
case INFINI_DTYPE_I32:
return 4;
case INFINI_DTYPE_I64:
return 8;
case INFINI_DTYPE_U8:
return 1;
case INFINI_DTYPE_U16:
return 2;
case INFINI_DTYPE_U32:
return 4;
case INFINI_DTYPE_U64:
return 8;
case INFINI_DTYPE_F8:
return 1;
case INFINI_DTYPE_F16:
return 2;
case INFINI_DTYPE_F32:
return 4;
case INFINI_DTYPE_F64:
return 8;
case INFINI_DTYPE_C8:
return 2;
case INFINI_DTYPE_C16:
return 4;
case INFINI_DTYPE_C32:
return 8;
case INFINI_DTYPE_C64:
return 16;
case INFINI_DTYPE_BF16:
return 2;
default:
return 0;
}
}
#endif // __INFINICORE_H__
......@@ -3,16 +3,7 @@
#include "../infinicore.h"
struct InfiniopTensorDescriptor {
// Datatype
infiniDtype_t dtype;
// Number of dimensions
size_t ndim;
// Shape of the tensor, ndim elements
size_t *shape;
// Stride of each dimension in elements, ndim elements
ptrdiff_t *strides;
};
struct InfiniopTensorDescriptor;
typedef struct InfiniopTensorDescriptor *infiniopTensorDescriptor_t;
......
#include "common_ascend.h"
int64_t numElements(const int64_t *shape, int64_t num) {
int64_t numEle = 1;
for (int i = 0; i < num; i++) {
numEle *= shape[i];
}
return numEle;
}
infiniStatus_t mallocWorkspace(void **workspaceAddr, size_t workspaceSize) {
*workspaceAddr = nullptr;
if (workspaceSize > 0) {
......
......@@ -26,7 +26,6 @@ struct InfiniopAscendHandle {
int device_id;
};
int64_t numElements(const int64_t *shape, int64_t num);
const char *dataTypeToString(aclDataType dtype);
const char *formatToString(aclFormat format);
infiniStatus_t mallocWorkspace(void **workspaceAddr, size_t workspaceSize);
......
#include "tensor_aclnn.h"
#include "../../ops/utils.h"
#include "../../../utils.h"
#include <algorithm>
infiniStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const std::vector<int64_t> &shape, const std::vector<int64_t> &strides) {
......@@ -16,7 +16,7 @@ infiniStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const std
aclFormat format = aclFormat::ACL_FORMAT_ND;
this->format = format;
CHECK_STATUS(this->inferStorageShape(), INFINI_STATUS_SUCCESS);
CHECK_STATUS(this->inferStorageShape());
return INFINI_STATUS_SUCCESS;
}
......
#ifndef __COMMON_BANG_H__
#define __COMMON_BANG_H__
#include "../../../utils.h"
#include "../pool.h"
#include "bang_handle.h"
#include "cnnl.h"
......
#ifndef __INFINIOP_COMMON_CPU_H__
#define __INFINIOP_COMMON_CPU_H__
#include "../../../utils.h"
#include <cmath>
#include <cstddef>
#include <cstdint>
......
#ifndef __INFINIOP_COMMON_KUNLUN_H__
#define __INFINIOP_COMMON_KUNLUN_H__
#include "../../../utils.h"
#include "../pool.h"
#include "infinicore.h"
#include "kunlun_handle.h"
......@@ -13,15 +14,7 @@ namespace xdnn = baidu::xpu::api;
typedef xdnn::Context *xdnnHandle_t;
typedef XPUStream KunlunStream_t;
#define CHECK_KUNLUN(call) \
{ \
auto err = call; \
if (XPU_SUCCESS != err) { \
fprintf(stderr, "KUNLUN error in %s:%i : %s.\n", __FILE__, \
__LINE__, xpu_strerror(err)); \
return INFINI_STATUS_INTERNAL_ERROR; \
} \
}
#define CHECK_KUNLUN(call) CHECK_INTERNAL(call, XPU_SUCCESS)
struct InfiniopKunlunHandle {
infiniDevice_t device;
......
#include "matmul_ascend.h"
#include "../../../devices/ascend/ascend_handle.h"
#include "../../../devices/ascend/tensor_aclnn.h"
#include "../../utils.h"
#include <acl/acl_base.h>
#include <aclnn/acl_meta.h>
#include <aclnnop/aclnn_matmul.h>
......@@ -55,27 +54,24 @@ infiniStatus_t Descriptor::create(
// Treat A, B, C as 2D matrix, reuse aclnnTensorDescriptor for batched
// operation
CHECK_STATUS(c->setDescriptor(
toAclDataType(c_desc->dtype),
{static_cast<int64_t>(info.c_matrix.rows),
static_cast<int64_t>(info.c_matrix.cols)},
{info.c_matrix.row_stride, info.c_matrix.col_stride}),
INFINI_STATUS_SUCCESS);
toAclDataType(c_desc->dtype),
{static_cast<int64_t>(info.c_matrix.rows),
static_cast<int64_t>(info.c_matrix.cols)},
{info.c_matrix.row_stride, info.c_matrix.col_stride}));
CHECK_STATUS(a->setDescriptor(
toAclDataType(a_desc->dtype),
{static_cast<int64_t>(info.a_matrix.rows),
static_cast<int64_t>(info.a_matrix.cols)},
{info.a_matrix.row_stride, info.a_matrix.col_stride}),
INFINI_STATUS_SUCCESS);
toAclDataType(a_desc->dtype),
{static_cast<int64_t>(info.a_matrix.rows),
static_cast<int64_t>(info.a_matrix.cols)},
{info.a_matrix.row_stride, info.a_matrix.col_stride}));
CHECK_STATUS(b->setDescriptor(
toAclDataType(b_desc->dtype),
{static_cast<int64_t>(info.b_matrix.rows),
static_cast<int64_t>(info.b_matrix.cols)},
{info.b_matrix.row_stride, info.b_matrix.col_stride}),
INFINI_STATUS_SUCCESS);
toAclDataType(b_desc->dtype),
{static_cast<int64_t>(info.b_matrix.rows),
static_cast<int64_t>(info.b_matrix.cols)},
{info.b_matrix.row_stride, info.b_matrix.col_stride}));
CHECK_STATUS(c->createTensor(), INFINI_STATUS_SUCCESS);
CHECK_STATUS(a->createTensor(), INFINI_STATUS_SUCCESS);
CHECK_STATUS(b->createTensor(), INFINI_STATUS_SUCCESS);
CHECK_STATUS(c->createTensor());
CHECK_STATUS(a->createTensor());
CHECK_STATUS(b->createTensor());
auto tc = c->t,
ta = a->t,
......@@ -127,7 +123,7 @@ infiniStatus_t Descriptor::calculate(
}
aclSetAclOpExecutorRepeatable(_opaque->executor);
auto unit = infiniSizeof(_dtype);
auto unit = infiniSizeOf(_dtype);
for (size_t i = 0; i < _info.batch; ++i) {
AclSetTensorAddr(_opaque->executor, 0, ta, ((char *)a) + i * _info.a_matrix.stride * unit);
AclSetTensorAddr(_opaque->executor, 1, tb, ((char *)b) + i * _info.b_matrix.stride * unit);
......
#include "matmul_bang.h"
#include "../../../devices/bang/bang_handle.h"
#include "../../../devices/bang/common_bang.h"
#include "../../utils.h"
#include <cnnl_extra.h>
namespace matmul::bang {
......
#ifndef __BLAS_H__
#define __BLAS_H__
#include "../../tensor.h"
#include "infiniop/operator.h"
#include <algorithm>
......@@ -17,22 +18,22 @@ struct BlasMatrix {
BlasMatrix() = default;
BlasMatrix(infiniopTensorDescriptor_t layout, infiniStatus_t *status) {
if (layout->ndim == 2) {
if (layout->ndim() == 2) {
ndim = 2;
batch = 1;
stride = 0;
rows = layout->shape[0];
cols = layout->shape[1];
row_stride = layout->strides[0];
col_stride = layout->strides[1];
} else if (layout->ndim == 3) {
rows = layout->dim(0);
cols = layout->dim(1);
row_stride = layout->stride(0);
col_stride = layout->stride(1);
} else if (layout->ndim() == 3) {
ndim = 3;
batch = layout->shape[0];
stride = batch == 1 ? 0 : layout->strides[0];
rows = layout->shape[1];
cols = layout->shape[2];
row_stride = layout->strides[1];
col_stride = layout->strides[2];
batch = layout->dim(0);
stride = batch == 1 ? 0 : layout->stride(0);
rows = layout->dim(1);
cols = layout->dim(2);
row_stride = layout->stride(1);
col_stride = layout->stride(2);
} else {
*status = INFINI_STATUS_BAD_TENSOR_SHAPE;
return;
......
......@@ -13,7 +13,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
auto handle = reinterpret_cast<infiniopCpuHandle_t>(handle_);
auto dtype = c_desc->dtype;
auto dtype = c_desc->dtype();
if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
return INFINI_STATUS_BAD_TENSOR_DTYPE;
......
#include "../../../devices/cuda/common_cuda.cuh"
#include "../../utils.h"
#include "matmul_cuda.cuh"
namespace matmul::cuda {
......@@ -19,7 +18,7 @@ infiniStatus_t Descriptor::create(
infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc) {
auto handle = reinterpret_cast<infiniopCudaHandle_t>(handle_);
auto dtype = c_desc->dtype;
auto dtype = c_desc->dtype();
if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
return INFINI_STATUS_BAD_TENSOR_DTYPE;
......
......@@ -56,7 +56,7 @@ void calculate(
auto transA = info.a_matrix.col_stride == 1 ? false : true;
auto transB = info.b_matrix.col_stride == 1 ? false : true;
auto unit = infiniSizeof(dtype);
auto unit = infiniSizeOf(dtype);
use_xdnn(xdnn_handle_pool,
(KunlunStream_t)stream,
......
#ifndef __UTILS_H__
#define __UTILS_H__
#include "infiniop/tensor_descriptor.h"
#include <algorithm>
#include <iostream>
#include <numeric>
#include <stdio.h>
#include <stdlib.h>
#include <vector>
/* This file contains some useful macros and helper functions */
#define ROUND_UP_DIV(x, y) ((x + y - 1) / y)
#define CHECK_ERROR(call, target, errCode) \
do { \
if (auto value = (call); value == (target)) { \
std::cerr << "Error: expected " << (target) << " but got " \
<< value << " in file " << __FILE__ << ", function " \
<< __func__ << ", line " << __LINE__ << std::endl; \
return (errCode); \
} \
} while (0)
#define CREATE_CHECK_ERROR(expr, value, target, errCode) \
expr; \
CHECK_ERROR(value, target, errCode)
#define CHECK_STATUS(call, target) \
do { \
if (auto value = (call); value != (target)) { \
std::cerr << "Error: expected " << (target) << " but got " \
<< value << " in file " << __FILE__ << ", function " \
<< __func__ << ", line " << __LINE__ << std::endl; \
return value; \
} \
} while (0)
inline std::vector<ptrdiff_t> getByteStrides(infiniopTensorDescriptor_t desc) {
std::vector<ptrdiff_t> strides(desc->ndim);
for (size_t i = 0; i < desc->ndim; i++) {
strides[i] = desc->strides[i] * infiniSizeof(desc->dtype);
}
return strides;
}
inline size_t getByteSize(infiniopTensorDescriptor_t desc) {
size_t size = 1;
for (size_t i = 0; i < desc->ndim; i++) {
size *= desc->shape[i];
}
return size * infiniSizeof(desc->dtype);
}
// calculate the broadcasted shape for two tensors
inline bool getBroadcastShape(const size_t *shape1, size_t ndim1,
const size_t *shape2, size_t ndim2,
size_t *broadcast_shape,
size_t *padded_shape1, size_t *padded_shape2,
size_t max_rank) {
// prepending and initializing
std::fill(padded_shape1, padded_shape1 + max_rank, 1);
std::fill(padded_shape2, padded_shape2 + max_rank, 1);
std::copy(shape1, shape1 + ndim1, padded_shape1 + max_rank - ndim1);
std::copy(shape2, shape2 + ndim2, padded_shape2 + max_rank - ndim2);
// compute broadcasted shape
for (size_t i = 0; i < max_rank; ++i) {
if (padded_shape1[i] == padded_shape2[i] || padded_shape1[i] == 1 || padded_shape2[i] == 1) {
broadcast_shape[i] = std::max(padded_shape1[i], padded_shape2[i]);
} else {
return false;
}
}
return true;
}
// check if the shape of tensor c is valid after broadcasting tensors a and b
// and also get the broadcasted shapes
inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b,
infiniopTensorDescriptor_t c,
size_t broadcast_ndim) {
std::vector<size_t> broadcast_shape_(broadcast_ndim),
padded_shape1_(broadcast_ndim), padded_shape2_(broadcast_ndim);
auto broadcast_shape = broadcast_shape_.data(),
padded_shape1 = padded_shape1_.data(),
padded_shape2 = padded_shape2_.data();
if (broadcast_ndim != c->ndim || !getBroadcastShape(a->shape, a->ndim, b->shape, b->ndim, broadcast_shape, padded_shape1, padded_shape2, broadcast_ndim)) {
return false;
}
return std::equal(broadcast_shape, broadcast_shape + broadcast_ndim,
c->shape);
}
// check if the shape of tensor src can be validly broadcasted to that of the
// tensor dst
inline bool isValidBroadcastShape(infiniopTensorDescriptor_t dst,
infiniopTensorDescriptor_t src) {
if (dst->ndim < src->ndim) {
return false;
}
std::vector<size_t> padded_shape_(dst->ndim);
auto padded_shape = padded_shape_.data();
std::fill(padded_shape, padded_shape + dst->ndim, 1);
std::copy(src->shape, src->shape + src->ndim,
padded_shape + dst->ndim - src->ndim);
for (size_t i = 0; i < dst->ndim; ++i) {
if (padded_shape[i] != dst->shape[i] && padded_shape[i] != 1) {
return false;
}
}
return true;
}
// check if the shape of tensor c is valid after broadcasting tensors a and b
inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b,
infiniopTensorDescriptor_t c) {
return isValidBroadcastShape(a, b, c, std::max(a->ndim, b->ndim));
}
// permute the dimensions of a tensor descriptor
inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
const std::vector<size_t> &order) {
size_t ndim = desc->ndim;
if (order.size() != ndim) {
return nullptr;
}
size_t *shape = new size_t[ndim];
ptrdiff_t *strides = new ptrdiff_t[ndim];
for (size_t i = 0; i < ndim; i++) {
if (std::find(order.begin(), order.end(), i) == order.end()) {
return nullptr;
}
shape[i] = desc->shape[order[i]];
strides[i] = desc->strides[order[i]];
}
return new InfiniopTensorDescriptor{desc->dtype, ndim, shape, strides};
}
// check if the dimensions [dim_start, dim_end] of a tensor descriptor are
// contiguous
inline bool isContiguous(const infiniopTensorDescriptor_t &desc,
size_t dim_start, size_t dim_end) {
for (size_t i = dim_start + 1; i <= dim_end; i++) {
if (desc->strides[i - 1] != static_cast<ptrdiff_t>(desc->shape[i]) * desc->strides[i]) {
return false;
}
}
return true;
}
inline bool isContiguous(const infiniopTensorDescriptor_t &desc) {
if (desc->ndim == 0) {
return true;
}
return isContiguous(desc, 0, desc->ndim - 1);
}
// merge the dimensions [dim_start, dim_end] of a tensor descriptor
inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
size_t dim_start, size_t dim_end) {
size_t ndim = desc->ndim;
if (dim_start > dim_end || dim_end >= ndim) {
return nullptr;
}
size_t new_ndim = ndim - (dim_end - dim_start);
size_t *new_shape = new size_t[new_ndim];
ptrdiff_t *new_strides = new ptrdiff_t[new_ndim];
size_t index = 0;
for (size_t i = 0; i < dim_start; i++) {
new_shape[index] = desc->shape[i];
new_strides[index] = desc->strides[i];
index++;
}
if (!isContiguous(desc, dim_start, dim_end)) {
return nullptr;
}
new_shape[index] = 1;
for (size_t i = dim_start; i <= dim_end; i++) {
new_shape[index] *= desc->shape[i];
}
new_strides[index] = desc->strides[dim_end];
index++;
for (size_t i = dim_end + 1; i < ndim; i++) {
new_shape[index] = desc->shape[i];
new_strides[index] = desc->strides[i];
index++;
}
return new InfiniopTensorDescriptor{desc->dtype, new_ndim, new_shape,
new_strides};
}
// split the dimension dim of a tensor descriptor into multiple dimensions
inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
size_t dim,
const std::vector<size_t> &dims) {
size_t ndim = desc->ndim;
if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (size_t)1, std::multiplies{})) {
return nullptr;
}
size_t new_ndim = ndim + dims.size() - 1;
size_t *new_shape = new size_t[new_ndim];
ptrdiff_t *new_strides = new ptrdiff_t[new_ndim];
size_t index = 0;
for (size_t i = 0; i < dim; i++) {
new_shape[index] = desc->shape[i];
new_strides[index] = desc->strides[i];
index++;
}
for (size_t i = 0; i < dims.size(); i++) {
new_shape[index] = dims[i];
new_strides[index] = desc->strides[dim] * desc->shape[dim] / std::accumulate(dims.begin(), dims.begin() + i + 1, (size_t)1, std::multiplies<size_t>());
index++;
}
for (size_t i = dim + 1; i < ndim; i++) {
new_shape[index] = desc->shape[i];
new_strides[index] = desc->strides[i];
index++;
}
return new InfiniopTensorDescriptor{desc->dtype, new_ndim, new_shape,
new_strides};
}
#endif // __UTILS_H__
#ifndef __INFINIOP_TENSOR_H__
#define __INFINIOP_TENSOR_H__
#include "infiniop/tensor_descriptor.h"
#include <string>
#include <vector>
struct InfiniopTensorDescriptor {
private:
// Datatype
infiniDtype_t _dtype;
// Shape of the tensor
std::vector<size_t> _shape;
// Stride of each dimension in elements
std::vector<ptrdiff_t> _strides;
public:
InfiniopTensorDescriptor(infiniDtype_t dtype, size_t ndim, const size_t *shape, const ptrdiff_t *strides);
~InfiniopTensorDescriptor() = default;
infiniDtype_t dtype() const;
std::vector<size_t> shape() const;
size_t dim(size_t i) const;
size_t ndim() const;
std::vector<ptrdiff_t> strides() const;
ptrdiff_t stride(size_t i) const;
std::vector<ptrdiff_t> getByteStrides() const;
bool isContiguous(size_t dim_start, size_t dim_end) const;
bool isContiguous() const;
size_t numel() const;
infiniopTensorDescriptor_t dimMerge(size_t dim_start, size_t dim_end) const;
infiniopTensorDescriptor_t dimSplit(size_t axis, const std::vector<size_t> &dims) const;
infiniopTensorDescriptor_t dimPermute(const std::vector<size_t> &order) const;
std::string toString() const;
};
#endif // __INFINIOP_TENSOR_H__
#include "infiniop/tensor_descriptor.h"
#include "../utils.h"
#include "tensor.h"
#include <cstring>
#include <functional>
#include <numeric>
__C __export infiniStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, size_t const *shape_, ptrdiff_t const *strides_, infiniDtype_t datatype) {
size_t *shape = new size_t[ndim];
ptrdiff_t *strides = new ptrdiff_t[ndim];
std::memcpy(shape, shape_, ndim * sizeof(size_t));
if (strides_) {
std::memcpy(strides, strides_, ndim * sizeof(ptrdiff_t));
if (strides_ != nullptr) {
*desc_ptr = new InfiniopTensorDescriptor(datatype, ndim, shape_, strides_);
} else {
std::vector<ptrdiff_t> strides(ndim);
ptrdiff_t dsize = 1;
for (size_t i = ndim - 1; i >= 0; i--) {
strides[i] = dsize;
dsize *= shape[i];
dsize *= shape_[i];
}
*desc_ptr = new InfiniopTensorDescriptor(datatype, ndim, shape_, strides.data());
}
*desc_ptr = new InfiniopTensorDescriptor{datatype, ndim, shape, strides};
return INFINI_STATUS_SUCCESS;
}
__C __export infiniStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc) {
delete[] desc->shape;
delete[] desc->strides;
delete desc;
return INFINI_STATUS_SUCCESS;
}
InfiniopTensorDescriptor::InfiniopTensorDescriptor(infiniDtype_t dtype, size_t ndim, const size_t *shape, const ptrdiff_t *strides) {
_dtype = dtype;
_shape = std::vector<size_t>(shape, shape + ndim);
_strides = std::vector<ptrdiff_t>(strides, strides + ndim);
}
infiniDtype_t InfiniopTensorDescriptor::dtype() const {
return _dtype;
}
std::vector<size_t> InfiniopTensorDescriptor::shape() const {
return std::vector<size_t>(_shape.begin(), _shape.end());
}
size_t InfiniopTensorDescriptor::dim(size_t i) const {
return _shape[i];
}
size_t InfiniopTensorDescriptor::ndim() const {
return _shape.size();
}
std::vector<ptrdiff_t> InfiniopTensorDescriptor::strides() const {
return std::vector<ptrdiff_t>(_strides.begin(), _strides.end());
}
ptrdiff_t InfiniopTensorDescriptor::stride(size_t i) const {
return _strides[i];
}
size_t InfiniopTensorDescriptor::numel() const {
return std::accumulate(_shape.begin(), _shape.end(), (size_t)1, std::multiplies<size_t>());
}
std::vector<ptrdiff_t> InfiniopTensorDescriptor::getByteStrides() const {
std::vector<ptrdiff_t> byte_strides(_shape.size());
for (size_t i = 0; i < _shape.size(); i++) {
byte_strides[i] = _strides[i] * infiniSizeOf(_dtype);
}
return byte_strides;
}
bool InfiniopTensorDescriptor::isContiguous(size_t dim_start, size_t dim_end) const {
if (ndim() == 0) {
return true;
}
for (size_t i = dim_start + 1; i <= dim_end; i++) {
if (stride(i - 1) != static_cast<ptrdiff_t>(dim(i)) * stride(i)) {
return false;
}
}
return true;
}
bool InfiniopTensorDescriptor::isContiguous() const {
return isContiguous(0, ndim() - 1);
}
infiniopTensorDescriptor_t InfiniopTensorDescriptor::dimMerge(size_t dim_start, size_t dim_end) const {
if (dim_start > dim_end || dim_end >= ndim()) {
return nullptr;
}
size_t new_ndim = ndim() - (dim_end - dim_start);
std::vector<size_t> new_shape(new_ndim);
std::vector<ptrdiff_t> new_strides(new_ndim);
size_t index = 0;
for (size_t i = 0; i < dim_start; i++) {
new_shape[index] = dim(i);
new_strides[index] = stride(i);
index++;
}
if (!isContiguous(dim_start, dim_end)) {
return nullptr;
}
new_shape[index] = 1;
for (size_t i = dim_start; i <= dim_end; i++) {
new_shape[index] *= dim(i);
}
new_strides[index] = stride(dim_end);
index++;
for (size_t i = dim_end + 1; i < ndim(); i++) {
new_shape[index] = dim(i);
new_strides[index] = stride(i);
index++;
}
return new InfiniopTensorDescriptor(_dtype, new_ndim, new_shape.data(), new_strides.data());
}
infiniopTensorDescriptor_t InfiniopTensorDescriptor::dimSplit(size_t axis, const std::vector<size_t> &dims) const {
size_t ndim_ = ndim();
if (dim(axis) != std::accumulate(dims.begin(), dims.end(), (size_t)1, std::multiplies<size_t>())) {
return nullptr;
}
size_t new_ndim = ndim_ + dims.size() - 1;
std::vector<size_t> new_shape(new_ndim);
std::vector<ptrdiff_t> new_strides(new_ndim);
size_t index = 0;
for (size_t i = 0; i < axis; i++) {
new_shape[index] = dim(i);
new_strides[index] = stride(i);
index++;
}
for (size_t i = 0; i < dims.size(); i++) {
new_shape[index] = dims[i];
new_strides[index] = stride(axis) * dim(axis) / std::accumulate(dims.begin(), dims.begin() + i + 1, (size_t)1, std::multiplies<size_t>());
index++;
}
for (size_t i = axis + 1; i < ndim_; i++) {
new_shape[index] = dim(i);
new_strides[index] = stride(i);
index++;
}
return new InfiniopTensorDescriptor(_dtype, new_ndim, new_shape.data(), new_strides.data());
}
infiniopTensorDescriptor_t InfiniopTensorDescriptor::dimPermute(const std::vector<size_t> &order) const {
auto ndim_ = ndim();
if (order.size() != ndim_) {
return nullptr;
}
std::vector<size_t> new_shape(ndim_);
std::vector<ptrdiff_t> new_strides(ndim_);
for (size_t i = 0; i < ndim_; i++) {
if (std::find(order.begin(), order.end(), i) == order.end()) {
return nullptr;
}
new_shape[i] = dim(order[i]);
new_strides[i] = stride(order[i]);
}
return new InfiniopTensorDescriptor(_dtype, ndim_, new_shape.data(), new_strides.data());
}
std::string InfiniopTensorDescriptor::toString() const {
std::string str = "dtype: " + infiniDtypeToString(_dtype) + ", shape: [";
for (size_t i = 0; i < ndim(); i++) {
str += std::to_string(dim(i)) + (i == ndim() - 1 ? "" : ", ");
}
str += "], strides: [";
for (size_t i = 0; i < ndim(); i++) {
str += std::to_string(stride(i)) + (i == ndim() - 1 ? "" : ", ");
}
str += "]";
return str;
}
#ifndef INFINIUTILS_H
#define INFINIUTILS_H
#include "infinicore.h"
#include "utils/check.h"
#include "utils/rearrange.h"
inline size_t infiniSizeOf(infiniDtype_t dtype) {
switch (dtype) {
case INFINI_DTYPE_INVALID:
return 0;
case INFINI_DTYPE_BYTE:
return 1;
case INFINI_DTYPE_BOOL:
return 1;
case INFINI_DTYPE_I8:
return 1;
case INFINI_DTYPE_I16:
return 2;
case INFINI_DTYPE_I32:
return 4;
case INFINI_DTYPE_I64:
return 8;
case INFINI_DTYPE_U8:
return 1;
case INFINI_DTYPE_U16:
return 2;
case INFINI_DTYPE_U32:
return 4;
case INFINI_DTYPE_U64:
return 8;
case INFINI_DTYPE_F8:
return 1;
case INFINI_DTYPE_F16:
return 2;
case INFINI_DTYPE_F32:
return 4;
case INFINI_DTYPE_F64:
return 8;
case INFINI_DTYPE_C8:
return 2;
case INFINI_DTYPE_C16:
return 4;
case INFINI_DTYPE_C32:
return 8;
case INFINI_DTYPE_C64:
return 16;
case INFINI_DTYPE_BF16:
return 2;
default:
return 0;
}
}
inline std::string infiniDtypeToString(infiniDtype_t dtype) {
switch (dtype) {
case INFINI_DTYPE_INVALID:
return "INVALID";
case INFINI_DTYPE_BYTE:
return "BYTE";
case INFINI_DTYPE_BOOL:
return "BOOL";
case INFINI_DTYPE_I8:
return "I8";
case INFINI_DTYPE_I16:
return "I16";
case INFINI_DTYPE_I32:
return "I32";
case INFINI_DTYPE_I64:
return "I64";
case INFINI_DTYPE_U8:
return "U8";
case INFINI_DTYPE_U16:
return "U16";
case INFINI_DTYPE_U32:
return "U32";
case INFINI_DTYPE_U64:
return "U64";
case INFINI_DTYPE_F8:
return "F8";
case INFINI_DTYPE_F16:
return "F16";
case INFINI_DTYPE_F32:
return "F32";
case INFINI_DTYPE_F64:
return "F64";
case INFINI_DTYPE_C8:
return "C8";
case INFINI_DTYPE_C16:
return "C16";
case INFINI_DTYPE_C32:
return "C32";
case INFINI_DTYPE_C64:
return "C64";
case INFINI_DTYPE_BF16:
return "BF16";
default:
return "INVALID";
}
}
#endif
......@@ -15,4 +15,6 @@
#define CHECK_INTERNAL(API, EXPECT) CHECK_API_OR(API, EXPECT, return INFINI_STATUS_INTERNAL_ERROR)
#define CHECK_STATUS(API) CHECK_API_OR(API, INFINI_STATUS_SUCCESS, return api_result_)
#endif // INFINIUTILS_CHECK_H
from calendar import c
import os
import platform
import ctypes
from ctypes import c_int, c_int64, c_uint64, Structure, POINTER, c_size_t
from ctypes import c_int, c_int64, c_uint64, Structure, POINTER
from .datatypes import *
from .devices import *
from pathlib import Path
......@@ -14,17 +13,7 @@ INFINI_ROOT = os.getenv("INFINI_ROOT") or str(Path.home() / ".infini")
class TensorDescriptor(Structure):
_fields_ = [
("dtype", c_int),
("ndim", c_size_t),
("shape", POINTER(c_size_t)),
("strides", POINTER(c_int64)),
]
def invalidate(self):
for i in range(self.ndim):
self.shape[i] = 0
self.strides[i] = 0
_fields_ = []
infiniopTensorDescriptor_t = ctypes.POINTER(TensorDescriptor)
......@@ -35,6 +24,10 @@ class CTensor:
self.descriptor = desc
self.torch_tensor_ = torch_tensor
self.data = torch_tensor.data_ptr()
def destroyDesc(self, lib_):
lib_.infiniopDestroyTensorDescriptor(self.descriptor)
self.descriptor = None
class Handle(Structure):
......@@ -73,6 +66,9 @@ def open_lib():
POINTER(c_int64),
c_int,
]
lib.infiniopCreateTensorDescriptor.restype = c_int
lib.infiniopDestroyTensorDescriptor.argtypes = [infiniopTensorDescriptor_t]
lib.infiniopDestroyTensorDescriptor.restype = c_int
lib.infiniopCreateHandle.argtypes = [POINTER(infiniopHandle_t), c_int, c_int]
lib.infiniopCreateHandle.restype = c_int
lib.infiniopDestroyHandle.argtypes = [infiniopHandle_t]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment