Merge pull request #84 from PanZezhong1725/issue/82

issue/82 重构tensor描述

Merge pull request #84 from PanZezhong1725/issue/82
issue/82 重构tensor描述
3cbe5e02 · PanZezhong1725 · GitHub · 9874946c · 0a9029f8 · 3cbe5e02
Unverified Commit 3cbe5e02 authored Mar 05, 2025 by PanZezhong1725 Committed by GitHub Mar 05, 2025
20 changed files
--- a/include/infinicore.h
+++ b/include/infinicore.h
@@ -71,51 +71,5 @@ typedef enum {
    INFINI_DTYPE_BF16 = 19,
 } infiniDtype_t;

-inline size_t infiniSizeof(infiniDtype_t dtype) {
-    switch (dtype) {
-    case INFINI_DTYPE_INVALID:
-        return 0;
-    case INFINI_DTYPE_BYTE:
-        return 1;
-    case INFINI_DTYPE_BOOL:
-        return 1;
-    case INFINI_DTYPE_I8:
-        return 1;
-    case INFINI_DTYPE_I16:
-        return 2;
-    case INFINI_DTYPE_I32:
-        return 4;
-    case INFINI_DTYPE_I64:
-        return 8;
-    case INFINI_DTYPE_U8:
-        return 1;
-    case INFINI_DTYPE_U16:
-        return 2;
-    case INFINI_DTYPE_U32:
-        return 4;
-    case INFINI_DTYPE_U64:
-        return 8;
-    case INFINI_DTYPE_F8:
-        return 1;
-    case INFINI_DTYPE_F16:
-        return 2;
-    case INFINI_DTYPE_F32:
-        return 4;
-    case INFINI_DTYPE_F64:
-        return 8;
-    case INFINI_DTYPE_C8:
-        return 2;
-    case INFINI_DTYPE_C16:
-        return 4;
-    case INFINI_DTYPE_C32:
-        return 8;
-    case INFINI_DTYPE_C64:
-        return 16;
-    case INFINI_DTYPE_BF16:
-        return 2;
-    default:
-        return 0;
-    }
-}

 #endif // __INFINICORE_H__
--- a/include/infiniop/tensor_descriptor.h
+++ b/include/infiniop/tensor_descriptor.h
@@ -3,16 +3,7 @@

 #include "../infinicore.h"

-struct InfiniopTensorDescriptor {
-    // Datatype
-    infiniDtype_t dtype;
-    // Number of dimensions
-    size_t ndim;
-    // Shape of the tensor, ndim elements
-    size_t *shape;
-    // Stride of each dimension in elements, ndim elements
-    ptrdiff_t *strides;
-};
+struct InfiniopTensorDescriptor;

 typedef struct InfiniopTensorDescriptor *infiniopTensorDescriptor_t;


--- a/src/infiniop/devices/ascend/common_ascend.cc
+++ b/src/infiniop/devices/ascend/common_ascend.cc
 #include "common_ascend.h"

-int64_t numElements(const int64_t *shape, int64_t num) {
-    int64_t numEle = 1;
-    for (int i = 0; i < num; i++) {
-        numEle *= shape[i];
-    }
-    return numEle;
-}
-
 infiniStatus_t mallocWorkspace(void **workspaceAddr, size_t workspaceSize) {
    *workspaceAddr = nullptr;
    if (workspaceSize > 0) {

--- a/src/infiniop/devices/ascend/common_ascend.h
+++ b/src/infiniop/devices/ascend/common_ascend.h
@@ -26,7 +26,6 @@ struct InfiniopAscendHandle {
    int device_id;
 };

-int64_t numElements(const int64_t *shape, int64_t num);
 const char *dataTypeToString(aclDataType dtype);
 const char *formatToString(aclFormat format);
 infiniStatus_t mallocWorkspace(void **workspaceAddr, size_t workspaceSize);

--- a/src/infiniop/devices/ascend/tensor_aclnn.cc
+++ b/src/infiniop/devices/ascend/tensor_aclnn.cc
 #include "tensor_aclnn.h"
-#include "../../ops/utils.h"
+#include "../../../utils.h"
 #include <algorithm>

 infiniStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const std::vector<int64_t> &shape, const std::vector<int64_t> &strides) {
@@ -16,7 +16,7 @@ infiniStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const std
    aclFormat format = aclFormat::ACL_FORMAT_ND;
    this->format = format;

-    CHECK_STATUS(this->inferStorageShape(), INFINI_STATUS_SUCCESS);
+    CHECK_STATUS(this->inferStorageShape());

    return INFINI_STATUS_SUCCESS;
 }

--- a/src/infiniop/devices/bang/common_bang.h
+++ b/src/infiniop/devices/bang/common_bang.h
 #ifndef __COMMON_BANG_H__
 #define __COMMON_BANG_H__

+#include "../../../utils.h"
 #include "../pool.h"
 #include "bang_handle.h"
 #include "cnnl.h"

--- a/src/infiniop/devices/cpu/common_cpu.h
+++ b/src/infiniop/devices/cpu/common_cpu.h
 #ifndef __INFINIOP_COMMON_CPU_H__
 #define __INFINIOP_COMMON_CPU_H__

+#include "../../../utils.h"
 #include <cmath>
 #include <cstddef>
 #include <cstdint>

--- a/src/infiniop/devices/kunlun/common_kunlun.h
+++ b/src/infiniop/devices/kunlun/common_kunlun.h
 #ifndef __INFINIOP_COMMON_KUNLUN_H__
 #define __INFINIOP_COMMON_KUNLUN_H__

+#include "../../../utils.h"
 #include "../pool.h"
 #include "infinicore.h"
 #include "kunlun_handle.h"
@@ -13,15 +14,7 @@ namespace xdnn = baidu::xpu::api;
 typedef xdnn::Context *xdnnHandle_t;
 typedef XPUStream KunlunStream_t;

-#define CHECK_KUNLUN(call)                                             \
-    {                                                                  \
-        auto err = call;                                               \
-        if (XPU_SUCCESS != err) {                                      \
-            fprintf(stderr, "KUNLUN error in %s:%i : %s.\n", __FILE__, \
-                    __LINE__, xpu_strerror(err));                      \
-            return INFINI_STATUS_INTERNAL_ERROR;                       \
-        }                                                              \
-    }
+#define CHECK_KUNLUN(call) CHECK_INTERNAL(call, XPU_SUCCESS)

 struct InfiniopKunlunHandle {
    infiniDevice_t device;

--- a/src/infiniop/ops/matmul/ascend/matmul_ascend.cc
+++ b/src/infiniop/ops/matmul/ascend/matmul_ascend.cc
 #include "matmul_ascend.h"
 #include "../../../devices/ascend/ascend_handle.h"
 #include "../../../devices/ascend/tensor_aclnn.h"
-#include "../../utils.h"
 #include <acl/acl_base.h>
 #include <aclnn/acl_meta.h>
 #include <aclnnop/aclnn_matmul.h>
@@ -55,27 +54,24 @@ infiniStatus_t Descriptor::create(
    // Treat A, B, C as 2D matrix, reuse aclnnTensorDescriptor for batched
    // operation
    CHECK_STATUS(c->setDescriptor(
-                     toAclDataType(c_desc->dtype),
-                     {static_cast<int64_t>(info.c_matrix.rows),
-                      static_cast<int64_t>(info.c_matrix.cols)},
-                     {info.c_matrix.row_stride, info.c_matrix.col_stride}),
-                 INFINI_STATUS_SUCCESS);
+        toAclDataType(c_desc->dtype),
+        {static_cast<int64_t>(info.c_matrix.rows),
+         static_cast<int64_t>(info.c_matrix.cols)},
+        {info.c_matrix.row_stride, info.c_matrix.col_stride}));
    CHECK_STATUS(a->setDescriptor(
-                     toAclDataType(a_desc->dtype),
-                     {static_cast<int64_t>(info.a_matrix.rows),
-                      static_cast<int64_t>(info.a_matrix.cols)},
-                     {info.a_matrix.row_stride, info.a_matrix.col_stride}),
-                 INFINI_STATUS_SUCCESS);
+        toAclDataType(a_desc->dtype),
+        {static_cast<int64_t>(info.a_matrix.rows),
+         static_cast<int64_t>(info.a_matrix.cols)},
+        {info.a_matrix.row_stride, info.a_matrix.col_stride}));
    CHECK_STATUS(b->setDescriptor(
-                     toAclDataType(b_desc->dtype),
-                     {static_cast<int64_t>(info.b_matrix.rows),
-                      static_cast<int64_t>(info.b_matrix.cols)},
-                     {info.b_matrix.row_stride, info.b_matrix.col_stride}),
-                 INFINI_STATUS_SUCCESS);
+        toAclDataType(b_desc->dtype),
+        {static_cast<int64_t>(info.b_matrix.rows),
+         static_cast<int64_t>(info.b_matrix.cols)},
+        {info.b_matrix.row_stride, info.b_matrix.col_stride}));

-    CHECK_STATUS(c->createTensor(), INFINI_STATUS_SUCCESS);
-    CHECK_STATUS(a->createTensor(), INFINI_STATUS_SUCCESS);
-    CHECK_STATUS(b->createTensor(), INFINI_STATUS_SUCCESS);
+    CHECK_STATUS(c->createTensor());
+    CHECK_STATUS(a->createTensor());
+    CHECK_STATUS(b->createTensor());

    auto tc = c->t,
         ta = a->t,
@@ -127,7 +123,7 @@ infiniStatus_t Descriptor::calculate(
    }
    aclSetAclOpExecutorRepeatable(_opaque->executor);

-    auto unit = infiniSizeof(_dtype);
+    auto unit = infiniSizeOf(_dtype);
    for (size_t i = 0; i < _info.batch; ++i) {
        AclSetTensorAddr(_opaque->executor, 0, ta, ((char *)a) + i * _info.a_matrix.stride * unit);
        AclSetTensorAddr(_opaque->executor, 1, tb, ((char *)b) + i * _info.b_matrix.stride * unit);

--- a/src/infiniop/ops/matmul/bang/matmul_bang.cc
+++ b/src/infiniop/ops/matmul/bang/matmul_bang.cc
 #include "matmul_bang.h"
 #include "../../../devices/bang/bang_handle.h"
 #include "../../../devices/bang/common_bang.h"
-#include "../../utils.h"
 #include <cnnl_extra.h>

 namespace matmul::bang {

--- a/src/infiniop/ops/matmul/blas.h
+++ b/src/infiniop/ops/matmul/blas.h
 #ifndef __BLAS_H__
 #define __BLAS_H__

+#include "../../tensor.h"
 #include "infiniop/operator.h"
 #include <algorithm>

@@ -17,22 +18,22 @@ struct BlasMatrix {
    BlasMatrix() = default;

    BlasMatrix(infiniopTensorDescriptor_t layout, infiniStatus_t *status) {
-        if (layout->ndim == 2) {
+        if (layout->ndim() == 2) {
            ndim = 2;
            batch = 1;
            stride = 0;
-            rows = layout->shape[0];
-            cols = layout->shape[1];
-            row_stride = layout->strides[0];
-            col_stride = layout->strides[1];
-        } else if (layout->ndim == 3) {
+            rows = layout->dim(0);
+            cols = layout->dim(1);
+            row_stride = layout->stride(0);
+            col_stride = layout->stride(1);
+        } else if (layout->ndim() == 3) {
            ndim = 3;
-            batch = layout->shape[0];
-            stride = batch == 1 ? 0 : layout->strides[0];
-            rows = layout->shape[1];
-            cols = layout->shape[2];
-            row_stride = layout->strides[1];
-            col_stride = layout->strides[2];
+            batch = layout->dim(0);
+            stride = batch == 1 ? 0 : layout->stride(0);
+            rows = layout->dim(1);
+            cols = layout->dim(2);
+            row_stride = layout->stride(1);
+            col_stride = layout->stride(2);
        } else {
            *status = INFINI_STATUS_BAD_TENSOR_SHAPE;
            return;

--- a/src/infiniop/ops/matmul/cpu/matmul_cpu.cc
+++ b/src/infiniop/ops/matmul/cpu/matmul_cpu.cc
@@ -13,7 +13,7 @@ infiniStatus_t Descriptor::create(
    infiniopTensorDescriptor_t a_desc,
    infiniopTensorDescriptor_t b_desc) {
    auto handle = reinterpret_cast<infiniopCpuHandle_t>(handle_);
-    auto dtype = c_desc->dtype;
+    auto dtype = c_desc->dtype();

    if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
        return INFINI_STATUS_BAD_TENSOR_DTYPE;

--- a/src/infiniop/ops/matmul/cuda/matmul_cuda.cu
+++ b/src/infiniop/ops/matmul/cuda/matmul_cuda.cu
 #include "../../../devices/cuda/common_cuda.cuh"
-#include "../../utils.h"
 #include "matmul_cuda.cuh"

 namespace matmul::cuda {
@@ -19,7 +18,7 @@ infiniStatus_t Descriptor::create(
    infiniopTensorDescriptor_t a_desc,
    infiniopTensorDescriptor_t b_desc) {
    auto handle = reinterpret_cast<infiniopCudaHandle_t>(handle_);
-    auto dtype = c_desc->dtype;
+    auto dtype = c_desc->dtype();

    if (dtype != INFINI_DTYPE_F16 && dtype != INFINI_DTYPE_F32) {
        return INFINI_STATUS_BAD_TENSOR_DTYPE;

--- a/src/infiniop/ops/matmul/kunlun/matmul_kunlun.cc
+++ b/src/infiniop/ops/matmul/kunlun/matmul_kunlun.cc
@@ -56,7 +56,7 @@ void calculate(
    auto transA = info.a_matrix.col_stride == 1 ? false : true;
    auto transB = info.b_matrix.col_stride == 1 ? false : true;

-    auto unit = infiniSizeof(dtype);
+    auto unit = infiniSizeOf(dtype);

    use_xdnn(xdnn_handle_pool,
             (KunlunStream_t)stream,

--- a/src/infiniop/ops/utils.h
+++ b/src/infiniop/ops/utils.h
-#ifndef __UTILS_H__
-#define __UTILS_H__
-
-#include "infiniop/tensor_descriptor.h"
-#include <algorithm>
-#include <iostream>
-#include <numeric>
-#include <stdio.h>
-#include <stdlib.h>
-#include <vector>
-
-/* This file contains some useful macros and helper functions */
-
-#define ROUND_UP_DIV(x, y) ((x + y - 1) / y)
-
-#define CHECK_ERROR(call, target, errCode)                                 \
-    do {                                                                   \
-        if (auto value = (call); value == (target)) {                      \
-            std::cerr << "Error: expected " << (target) << " but got "     \
-                      << value << " in file " << __FILE__ << ", function " \
-                      << __func__ << ", line " << __LINE__ << std::endl;   \
-            return (errCode);                                              \
-        }                                                                  \
-    } while (0)
-
-#define CREATE_CHECK_ERROR(expr, value, target, errCode) \
-    expr;                                                \
-    CHECK_ERROR(value, target, errCode)
-
-#define CHECK_STATUS(call, target)                                         \
-    do {                                                                   \
-        if (auto value = (call); value != (target)) {                      \
-            std::cerr << "Error: expected " << (target) << " but got "     \
-                      << value << " in file " << __FILE__ << ", function " \
-                      << __func__ << ", line " << __LINE__ << std::endl;   \
-            return value;                                                  \
-        }                                                                  \
-    } while (0)
-
-inline std::vector<ptrdiff_t> getByteStrides(infiniopTensorDescriptor_t desc) {
-    std::vector<ptrdiff_t> strides(desc->ndim);
-    for (size_t i = 0; i < desc->ndim; i++) {
-        strides[i] = desc->strides[i] * infiniSizeof(desc->dtype);
-    }
-    return strides;
-}
-
-inline size_t getByteSize(infiniopTensorDescriptor_t desc) {
-    size_t size = 1;
-    for (size_t i = 0; i < desc->ndim; i++) {
-        size *= desc->shape[i];
-    }
-    return size * infiniSizeof(desc->dtype);
-}
-
-// calculate the broadcasted shape for two tensors
-inline bool getBroadcastShape(const size_t *shape1, size_t ndim1,
-                              const size_t *shape2, size_t ndim2,
-                              size_t *broadcast_shape,
-                              size_t *padded_shape1, size_t *padded_shape2,
-                              size_t max_rank) {
-    // prepending and initializing
-    std::fill(padded_shape1, padded_shape1 + max_rank, 1);
-    std::fill(padded_shape2, padded_shape2 + max_rank, 1);
-    std::copy(shape1, shape1 + ndim1, padded_shape1 + max_rank - ndim1);
-    std::copy(shape2, shape2 + ndim2, padded_shape2 + max_rank - ndim2);
-
-    // compute broadcasted shape
-    for (size_t i = 0; i < max_rank; ++i) {
-        if (padded_shape1[i] == padded_shape2[i] || padded_shape1[i] == 1 || padded_shape2[i] == 1) {
-            broadcast_shape[i] = std::max(padded_shape1[i], padded_shape2[i]);
-        } else {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-// check if the shape of tensor c is valid after broadcasting tensors a and b
-// and also get the broadcasted shapes
-inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
-                                  infiniopTensorDescriptor_t b,
-                                  infiniopTensorDescriptor_t c,
-                                  size_t broadcast_ndim) {
-    std::vector<size_t> broadcast_shape_(broadcast_ndim),
-        padded_shape1_(broadcast_ndim), padded_shape2_(broadcast_ndim);
-    auto broadcast_shape = broadcast_shape_.data(),
-         padded_shape1 = padded_shape1_.data(),
-         padded_shape2 = padded_shape2_.data();
-    if (broadcast_ndim != c->ndim || !getBroadcastShape(a->shape, a->ndim, b->shape, b->ndim, broadcast_shape, padded_shape1, padded_shape2, broadcast_ndim)) {
-        return false;
-    }
-    return std::equal(broadcast_shape, broadcast_shape + broadcast_ndim,
-                      c->shape);
-}
-
-// check if the shape of tensor src can be validly broadcasted to that of the
-// tensor dst
-inline bool isValidBroadcastShape(infiniopTensorDescriptor_t dst,
-                                  infiniopTensorDescriptor_t src) {
-    if (dst->ndim < src->ndim) {
-        return false;
-    }
-    std::vector<size_t> padded_shape_(dst->ndim);
-    auto padded_shape = padded_shape_.data();
-    std::fill(padded_shape, padded_shape + dst->ndim, 1);
-    std::copy(src->shape, src->shape + src->ndim,
-              padded_shape + dst->ndim - src->ndim);
-    for (size_t i = 0; i < dst->ndim; ++i) {
-        if (padded_shape[i] != dst->shape[i] && padded_shape[i] != 1) {
-            return false;
-        }
-    }
-    return true;
-}
-
-// check if the shape of tensor c is valid after broadcasting tensors a and b
-inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
-                                  infiniopTensorDescriptor_t b,
-                                  infiniopTensorDescriptor_t c) {
-    return isValidBroadcastShape(a, b, c, std::max(a->ndim, b->ndim));
-}
-
-// permute the dimensions of a tensor descriptor
-inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
-                                          const std::vector<size_t> &order) {
-    size_t ndim = desc->ndim;
-    if (order.size() != ndim) {
-        return nullptr;
-    }
-    size_t *shape = new size_t[ndim];
-    ptrdiff_t *strides = new ptrdiff_t[ndim];
-    for (size_t i = 0; i < ndim; i++) {
-        if (std::find(order.begin(), order.end(), i) == order.end()) {
-            return nullptr;
-        }
-        shape[i] = desc->shape[order[i]];
-        strides[i] = desc->strides[order[i]];
-    }
-    return new InfiniopTensorDescriptor{desc->dtype, ndim, shape, strides};
-}
-
-// check if the dimensions [dim_start, dim_end] of a tensor descriptor are
-// contiguous
-inline bool isContiguous(const infiniopTensorDescriptor_t &desc,
-                         size_t dim_start, size_t dim_end) {
-    for (size_t i = dim_start + 1; i <= dim_end; i++) {
-        if (desc->strides[i - 1] != static_cast<ptrdiff_t>(desc->shape[i]) * desc->strides[i]) {
-            return false;
-        }
-    }
-    return true;
-}
-
-inline bool isContiguous(const infiniopTensorDescriptor_t &desc) {
-    if (desc->ndim == 0) {
-        return true;
-    }
-    return isContiguous(desc, 0, desc->ndim - 1);
-}
-
-// merge the dimensions [dim_start, dim_end] of a tensor descriptor
-inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
-                                           size_t dim_start, size_t dim_end) {
-    size_t ndim = desc->ndim;
-    if (dim_start > dim_end || dim_end >= ndim) {
-        return nullptr;
-    }
-
-    size_t new_ndim = ndim - (dim_end - dim_start);
-    size_t *new_shape = new size_t[new_ndim];
-    ptrdiff_t *new_strides = new ptrdiff_t[new_ndim];
-    size_t index = 0;
-    for (size_t i = 0; i < dim_start; i++) {
-        new_shape[index] = desc->shape[i];
-        new_strides[index] = desc->strides[i];
-        index++;
-    }
-    if (!isContiguous(desc, dim_start, dim_end)) {
-        return nullptr;
-    }
-    new_shape[index] = 1;
-    for (size_t i = dim_start; i <= dim_end; i++) {
-        new_shape[index] *= desc->shape[i];
-    }
-    new_strides[index] = desc->strides[dim_end];
-    index++;
-    for (size_t i = dim_end + 1; i < ndim; i++) {
-        new_shape[index] = desc->shape[i];
-        new_strides[index] = desc->strides[i];
-        index++;
-    }
-    return new InfiniopTensorDescriptor{desc->dtype, new_ndim, new_shape,
-                                        new_strides};
-}
-
-// split the dimension dim of a tensor descriptor into multiple dimensions
-inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
-                                           size_t dim,
-                                           const std::vector<size_t> &dims) {
-    size_t ndim = desc->ndim;
-    if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (size_t)1, std::multiplies{})) {
-        return nullptr;
-    }
-    size_t new_ndim = ndim + dims.size() - 1;
-    size_t *new_shape = new size_t[new_ndim];
-    ptrdiff_t *new_strides = new ptrdiff_t[new_ndim];
-    size_t index = 0;
-    for (size_t i = 0; i < dim; i++) {
-        new_shape[index] = desc->shape[i];
-        new_strides[index] = desc->strides[i];
-        index++;
-    }
-    for (size_t i = 0; i < dims.size(); i++) {
-        new_shape[index] = dims[i];
-        new_strides[index] = desc->strides[dim] * desc->shape[dim] / std::accumulate(dims.begin(), dims.begin() + i + 1, (size_t)1, std::multiplies<size_t>());
-        index++;
-    }
-    for (size_t i = dim + 1; i < ndim; i++) {
-        new_shape[index] = desc->shape[i];
-        new_strides[index] = desc->strides[i];
-        index++;
-    }
-    return new InfiniopTensorDescriptor{desc->dtype, new_ndim, new_shape,
-                                        new_strides};
-}
-
-#endif // __UTILS_H__
--- a/src/infiniop/tensor.h
+++ b/src/infiniop/tensor.h
+#ifndef __INFINIOP_TENSOR_H__
+#define __INFINIOP_TENSOR_H__
+
+#include "infiniop/tensor_descriptor.h"
+#include <string>
+#include <vector>
+
+struct InfiniopTensorDescriptor {
+private:
+    // Datatype
+    infiniDtype_t _dtype;
+    // Shape of the tensor
+    std::vector<size_t> _shape;
+    // Stride of each dimension in elements
+    std::vector<ptrdiff_t> _strides;
+
+public:
+    InfiniopTensorDescriptor(infiniDtype_t dtype, size_t ndim, const size_t *shape, const ptrdiff_t *strides);
+    ~InfiniopTensorDescriptor() = default;
+    infiniDtype_t dtype() const;
+    std::vector<size_t> shape() const;
+    size_t dim(size_t i) const;
+    size_t ndim() const;
+    std::vector<ptrdiff_t> strides() const;
+    ptrdiff_t stride(size_t i) const;
+    std::vector<ptrdiff_t> getByteStrides() const;
+    bool isContiguous(size_t dim_start, size_t dim_end) const;
+    bool isContiguous() const;
+    size_t numel() const;
+
+    infiniopTensorDescriptor_t dimMerge(size_t dim_start, size_t dim_end) const;
+    infiniopTensorDescriptor_t dimSplit(size_t axis, const std::vector<size_t> &dims) const;
+    infiniopTensorDescriptor_t dimPermute(const std::vector<size_t> &order) const;
+
+    std::string toString() const;
+};
+
+#endif // __INFINIOP_TENSOR_H__
--- a/src/infiniop/tensor_descriptor.cc
+++ b/src/infiniop/tensor_descriptor.cc
-#include "infiniop/tensor_descriptor.h"
+#include "../utils.h"
+#include "tensor.h"
 #include <cstring>
+#include <functional>
+#include <numeric>

 __C __export infiniStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, size_t const *shape_, ptrdiff_t const *strides_, infiniDtype_t datatype) {
-    size_t *shape = new size_t[ndim];
-    ptrdiff_t *strides = new ptrdiff_t[ndim];
-    std::memcpy(shape, shape_, ndim * sizeof(size_t));
-    if (strides_) {
-        std::memcpy(strides, strides_, ndim * sizeof(ptrdiff_t));
+    if (strides_ != nullptr) {
+        *desc_ptr = new InfiniopTensorDescriptor(datatype, ndim, shape_, strides_);
    } else {
+        std::vector<ptrdiff_t> strides(ndim);
        ptrdiff_t dsize = 1;
        for (size_t i = ndim - 1; i >= 0; i--) {
            strides[i] = dsize;
-            dsize *= shape[i];
+            dsize *= shape_[i];
        }
+        *desc_ptr = new InfiniopTensorDescriptor(datatype, ndim, shape_, strides.data());
    }
-    *desc_ptr = new InfiniopTensorDescriptor{datatype, ndim, shape, strides};
+
    return INFINI_STATUS_SUCCESS;
 }

 __C __export infiniStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc) {
-    delete[] desc->shape;
-    delete[] desc->strides;
    delete desc;
    return INFINI_STATUS_SUCCESS;
 }
+
+InfiniopTensorDescriptor::InfiniopTensorDescriptor(infiniDtype_t dtype, size_t ndim, const size_t *shape, const ptrdiff_t *strides) {
+    _dtype = dtype;
+    _shape = std::vector<size_t>(shape, shape + ndim);
+    _strides = std::vector<ptrdiff_t>(strides, strides + ndim);
+}
+
+infiniDtype_t InfiniopTensorDescriptor::dtype() const {
+    return _dtype;
+}
+
+std::vector<size_t> InfiniopTensorDescriptor::shape() const {
+    return std::vector<size_t>(_shape.begin(), _shape.end());
+}
+
+size_t InfiniopTensorDescriptor::dim(size_t i) const {
+    return _shape[i];
+}
+
+size_t InfiniopTensorDescriptor::ndim() const {
+    return _shape.size();
+}
+
+std::vector<ptrdiff_t> InfiniopTensorDescriptor::strides() const {
+    return std::vector<ptrdiff_t>(_strides.begin(), _strides.end());
+}
+
+ptrdiff_t InfiniopTensorDescriptor::stride(size_t i) const {
+    return _strides[i];
+}
+
+size_t InfiniopTensorDescriptor::numel() const {
+    return std::accumulate(_shape.begin(), _shape.end(), (size_t)1, std::multiplies<size_t>());
+}
+
+std::vector<ptrdiff_t> InfiniopTensorDescriptor::getByteStrides() const {
+    std::vector<ptrdiff_t> byte_strides(_shape.size());
+    for (size_t i = 0; i < _shape.size(); i++) {
+        byte_strides[i] = _strides[i] * infiniSizeOf(_dtype);
+    }
+    return byte_strides;
+}
+
+bool InfiniopTensorDescriptor::isContiguous(size_t dim_start, size_t dim_end) const {
+    if (ndim() == 0) {
+        return true;
+    }
+    for (size_t i = dim_start + 1; i <= dim_end; i++) {
+        if (stride(i - 1) != static_cast<ptrdiff_t>(dim(i)) * stride(i)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+bool InfiniopTensorDescriptor::isContiguous() const {
+    return isContiguous(0, ndim() - 1);
+}
+
+infiniopTensorDescriptor_t InfiniopTensorDescriptor::dimMerge(size_t dim_start, size_t dim_end) const {
+    if (dim_start > dim_end || dim_end >= ndim()) {
+        return nullptr;
+    }
+
+    size_t new_ndim = ndim() - (dim_end - dim_start);
+    std::vector<size_t> new_shape(new_ndim);
+    std::vector<ptrdiff_t> new_strides(new_ndim);
+    size_t index = 0;
+
+    for (size_t i = 0; i < dim_start; i++) {
+        new_shape[index] = dim(i);
+        new_strides[index] = stride(i);
+        index++;
+    }
+
+    if (!isContiguous(dim_start, dim_end)) {
+        return nullptr;
+    }
+
+    new_shape[index] = 1;
+    for (size_t i = dim_start; i <= dim_end; i++) {
+        new_shape[index] *= dim(i);
+    }
+
+    new_strides[index] = stride(dim_end);
+    index++;
+
+    for (size_t i = dim_end + 1; i < ndim(); i++) {
+        new_shape[index] = dim(i);
+        new_strides[index] = stride(i);
+        index++;
+    }
+
+    return new InfiniopTensorDescriptor(_dtype, new_ndim, new_shape.data(), new_strides.data());
+}
+
+infiniopTensorDescriptor_t InfiniopTensorDescriptor::dimSplit(size_t axis, const std::vector<size_t> &dims) const {
+    size_t ndim_ = ndim();
+
+    if (dim(axis) != std::accumulate(dims.begin(), dims.end(), (size_t)1, std::multiplies<size_t>())) {
+        return nullptr;
+    }
+
+    size_t new_ndim = ndim_ + dims.size() - 1;
+    std::vector<size_t> new_shape(new_ndim);
+    std::vector<ptrdiff_t> new_strides(new_ndim);
+    size_t index = 0;
+    for (size_t i = 0; i < axis; i++) {
+        new_shape[index] = dim(i);
+        new_strides[index] = stride(i);
+        index++;
+    }
+    for (size_t i = 0; i < dims.size(); i++) {
+        new_shape[index] = dims[i];
+        new_strides[index] = stride(axis) * dim(axis) / std::accumulate(dims.begin(), dims.begin() + i + 1, (size_t)1, std::multiplies<size_t>());
+        index++;
+    }
+    for (size_t i = axis + 1; i < ndim_; i++) {
+        new_shape[index] = dim(i);
+        new_strides[index] = stride(i);
+        index++;
+    }
+
+    return new InfiniopTensorDescriptor(_dtype, new_ndim, new_shape.data(), new_strides.data());
+}
+
+infiniopTensorDescriptor_t InfiniopTensorDescriptor::dimPermute(const std::vector<size_t> &order) const {
+    auto ndim_ = ndim();
+    if (order.size() != ndim_) {
+        return nullptr;
+    }
+    std::vector<size_t> new_shape(ndim_);
+    std::vector<ptrdiff_t> new_strides(ndim_);
+    for (size_t i = 0; i < ndim_; i++) {
+        if (std::find(order.begin(), order.end(), i) == order.end()) {
+            return nullptr;
+        }
+        new_shape[i] = dim(order[i]);
+        new_strides[i] = stride(order[i]);
+    }
+    return new InfiniopTensorDescriptor(_dtype, ndim_, new_shape.data(), new_strides.data());
+}
+
+std::string InfiniopTensorDescriptor::toString() const {
+    std::string str = "dtype: " + infiniDtypeToString(_dtype) + ", shape: [";
+    for (size_t i = 0; i < ndim(); i++) {
+        str += std::to_string(dim(i)) + (i == ndim() - 1 ? "" : ", ");
+    }
+    str += "], strides: [";
+    for (size_t i = 0; i < ndim(); i++) {
+        str += std::to_string(stride(i)) + (i == ndim() - 1 ? "" : ", ");
+    }
+    str += "]";
+    return str;
+}
--- a/src/utils.h
+++ b/src/utils.h
 #ifndef INFINIUTILS_H
 #define INFINIUTILS_H

+#include "infinicore.h"
 #include "utils/check.h"
 #include "utils/rearrange.h"

+inline size_t infiniSizeOf(infiniDtype_t dtype) {
+    switch (dtype) {
+    case INFINI_DTYPE_INVALID:
+        return 0;
+    case INFINI_DTYPE_BYTE:
+        return 1;
+    case INFINI_DTYPE_BOOL:
+        return 1;
+    case INFINI_DTYPE_I8:
+        return 1;
+    case INFINI_DTYPE_I16:
+        return 2;
+    case INFINI_DTYPE_I32:
+        return 4;
+    case INFINI_DTYPE_I64:
+        return 8;
+    case INFINI_DTYPE_U8:
+        return 1;
+    case INFINI_DTYPE_U16:
+        return 2;
+    case INFINI_DTYPE_U32:
+        return 4;
+    case INFINI_DTYPE_U64:
+        return 8;
+    case INFINI_DTYPE_F8:
+        return 1;
+    case INFINI_DTYPE_F16:
+        return 2;
+    case INFINI_DTYPE_F32:
+        return 4;
+    case INFINI_DTYPE_F64:
+        return 8;
+    case INFINI_DTYPE_C8:
+        return 2;
+    case INFINI_DTYPE_C16:
+        return 4;
+    case INFINI_DTYPE_C32:
+        return 8;
+    case INFINI_DTYPE_C64:
+        return 16;
+    case INFINI_DTYPE_BF16:
+        return 2;
+    default:
+        return 0;
+    }
+}
+
+inline std::string infiniDtypeToString(infiniDtype_t dtype) {
+    switch (dtype) {
+    case INFINI_DTYPE_INVALID:
+        return "INVALID";
+    case INFINI_DTYPE_BYTE:
+        return "BYTE";
+    case INFINI_DTYPE_BOOL:
+        return "BOOL";
+    case INFINI_DTYPE_I8:
+        return "I8";
+    case INFINI_DTYPE_I16:
+        return "I16";
+    case INFINI_DTYPE_I32:
+        return "I32";
+    case INFINI_DTYPE_I64:
+        return "I64";
+    case INFINI_DTYPE_U8:
+        return "U8";
+    case INFINI_DTYPE_U16:
+        return "U16";
+    case INFINI_DTYPE_U32:
+        return "U32";
+    case INFINI_DTYPE_U64:
+        return "U64";
+    case INFINI_DTYPE_F8:
+        return "F8";
+    case INFINI_DTYPE_F16:
+        return "F16";
+    case INFINI_DTYPE_F32:
+        return "F32";
+    case INFINI_DTYPE_F64:
+        return "F64";
+    case INFINI_DTYPE_C8:
+        return "C8";
+    case INFINI_DTYPE_C16:
+        return "C16";
+    case INFINI_DTYPE_C32:
+        return "C32";
+    case INFINI_DTYPE_C64:
+        return "C64";
+    case INFINI_DTYPE_BF16:
+        return "BF16";
+    default:
+        return "INVALID";
+    }
+}
+
 #endif
--- a/src/utils/check.h
+++ b/src/utils/check.h
@@ -15,4 +15,6 @@

 #define CHECK_INTERNAL(API, EXPECT) CHECK_API_OR(API, EXPECT, return INFINI_STATUS_INTERNAL_ERROR)

+#define CHECK_STATUS(API) CHECK_API_OR(API, INFINI_STATUS_SUCCESS, return api_result_)
+
 #endif // INFINIUTILS_CHECK_H
--- a/test/infiniop/libinfiniop/liboperators.py
+++ b/test/infiniop/libinfiniop/liboperators.py
-from calendar import c
 import os
 import platform
 import ctypes
-from ctypes import c_int, c_int64, c_uint64, Structure, POINTER, c_size_t
+from ctypes import c_int, c_int64, c_uint64, Structure, POINTER
 from .datatypes import *
 from .devices import *
 from pathlib import Path
@@ -14,17 +13,7 @@ INFINI_ROOT = os.getenv("INFINI_ROOT") or str(Path.home() / ".infini")


 class TensorDescriptor(Structure):
-    _fields_ = [
-        ("dtype", c_int),
-        ("ndim", c_size_t),
-        ("shape", POINTER(c_size_t)),
-        ("strides", POINTER(c_int64)),
-    ]
-
-    def invalidate(self):
-        for i in range(self.ndim):
-            self.shape[i] = 0
-            self.strides[i] = 0
+    _fields_ = []


 infiniopTensorDescriptor_t = ctypes.POINTER(TensorDescriptor)
@@ -35,6 +24,10 @@ class CTensor:
        self.descriptor = desc
        self.torch_tensor_ = torch_tensor
        self.data = torch_tensor.data_ptr()
+    
+    def destroyDesc(self, lib_):
+        lib_.infiniopDestroyTensorDescriptor(self.descriptor)
+        self.descriptor = None


 class Handle(Structure):
@@ -73,6 +66,9 @@ def open_lib():
        POINTER(c_int64),
        c_int,
    ]
+    lib.infiniopCreateTensorDescriptor.restype = c_int
+    lib.infiniopDestroyTensorDescriptor.argtypes = [infiniopTensorDescriptor_t]
+    lib.infiniopDestroyTensorDescriptor.restype = c_int
    lib.infiniopCreateHandle.argtypes = [POINTER(infiniopHandle_t), c_int, c_int]
    lib.infiniopCreateHandle.restype = c_int
    lib.infiniopDestroyHandle.argtypes = [infiniopHandle_t]