#ifndef INFER_TENSOR_H #define INFER_TENSOR_H #include "infinicore_infer.h" #include "utils.hpp" #include #include #include struct Storage { void *memory; size_t size; infiniDevice_t device_type; int device_id; static std::shared_ptr create(size_t size); static std::shared_ptr createAsync(size_t size, infinirtStream_t stream = nullptr); static std::shared_ptr createHost(size_t size); ~Storage(); }; struct SliceParams { size_t dim; size_t start; size_t len; }; class TensorDesc { private: infiniopTensorDescriptor_t _desc; public: static std::shared_ptr create(infiniDtype_t dtype, const std::vector &shape, const std::vector &strides); infiniopTensorDescriptor_t get() const { return _desc; }; ~TensorDesc(); }; class Tensor : public std::enable_shared_from_this { private: infiniDtype_t _dtype; std::vector _shape; std::vector _strides; void *_data; ptrdiff_t _offset; size_t _size; std::shared_ptr storage; infiniopTensorDescriptor_t _desc; void *data_impl(ptrdiff_t offset) const; std::shared_ptr slice_impl(const std::vector &slices) const; public: static std::shared_ptr buffer(infiniDtype_t dtype, const std::vector &shape, infinirtStream_t stream = nullptr); static std::shared_ptr weight(void *host_data, infiniDtype_t dtype, const std::vector &shape); std::shared_ptr slice(size_t dim, size_t start, size_t len); std::shared_ptr slice(size_t dim, size_t start, size_t len) const; std::shared_ptr slice(const std::vector &slices); std::shared_ptr slice(const std::vector &slices) const; std::shared_ptr dim_merge(size_t dim_start, size_t dim_end); std::shared_ptr dim_split(size_t dim, const std::vector &dims); std::shared_ptr permute(const std::vector &order); void *data(ptrdiff_t offset = 0); void const *data(ptrdiff_t offset = 0) const; void copy_from(std::shared_ptr src, infiniopHandle_t handle, infinirtStream_t stream = nullptr); const std::vector &shape() const; const std::vector &strides() const; size_t ndim() const; infiniDtype_t dtype() const; std::shared_ptr desc() const; size_t byte_size() const; ptrdiff_t data_offset() const; infiniDevice_t device_type() const; int device_id() const; bool is_contigous() const; void debug(const std::string &filename) const; void debug() const; ~Tensor(); }; inline size_t dsize(infiniDtype_t dtype) { switch (dtype) { case INFINI_DTYPE_INVALID: return 0; case INFINI_DTYPE_BYTE: return 1; case INFINI_DTYPE_BOOL: return 1; case INFINI_DTYPE_I8: return 1; case INFINI_DTYPE_I16: return 2; case INFINI_DTYPE_I32: return 4; case INFINI_DTYPE_I64: return 8; case INFINI_DTYPE_U8: return 1; case INFINI_DTYPE_U16: return 2; case INFINI_DTYPE_U32: return 4; case INFINI_DTYPE_U64: return 8; case INFINI_DTYPE_F8: return 1; case INFINI_DTYPE_F16: return 2; case INFINI_DTYPE_F32: return 4; case INFINI_DTYPE_F64: return 8; case INFINI_DTYPE_C16: return 2; case INFINI_DTYPE_C32: return 4; case INFINI_DTYPE_C64: return 8; case INFINI_DTYPE_C128: return 16; case INFINI_DTYPE_BF16: return 2; default: return 0; } } #endif