"git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "02671abb44c8c34a940d79e777fc7c3b3ebae50e"
Commit 64849b43 authored by PanZezhong's avatar PanZezhong
Browse files

issue/53: 形状统一为size_t,步长统一为ptrdiff_t

parent 3c31dc6c
...@@ -15,7 +15,7 @@ __C __export infiniopStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t ...@@ -15,7 +15,7 @@ __C __export infiniopStatus_t infiniopCreateAttentionDescriptor(infiniopHandle_t
infiniopTensorDescriptor_t v_desc, infiniopTensorDescriptor_t v_desc,
infiniopTensorDescriptor_t k_cache_desc, infiniopTensorDescriptor_t k_cache_desc,
infiniopTensorDescriptor_t v_cache_desc, infiniopTensorDescriptor_t v_cache_desc,
uint64_t pos); size_t pos);
__C __export infiniopStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size); __C __export infiniopStatus_t infiniopGetAttentionWorkspaceSize(infiniopAttentionDescriptor_t desc, size_t *size);
......
...@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateAvgPoolDescriptor(infiniopHandle_t h ...@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateAvgPoolDescriptor(infiniopHandle_t h
infiniopAvgPoolDescriptor_t *desc_ptr, infiniopAvgPoolDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y, infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x, infiniopTensorDescriptor_t x,
uint64_t const *kernel_shape, size_t const *kernel_shape,
uint64_t const *pads, size_t const *pads,
int64_t const *strides, ptrdiff_t const *strides,
uint64_t n); size_t n);
__C __export infiniopStatus_t infiniopGetAvgPoolWorkspaceSize(infiniopAvgPoolDescriptor_t desc, size_t *size); __C __export infiniopStatus_t infiniopGetAvgPoolWorkspaceSize(infiniopAvgPoolDescriptor_t desc, size_t *size);
......
...@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateMaxPoolDescriptor(infiniopHandle_t h ...@@ -9,10 +9,10 @@ __C __export infiniopStatus_t infiniopCreateMaxPoolDescriptor(infiniopHandle_t h
infiniopMaxPoolDescriptor_t *desc_ptr, infiniopMaxPoolDescriptor_t *desc_ptr,
infiniopTensorDescriptor_t y, infiniopTensorDescriptor_t y,
infiniopTensorDescriptor_t x, infiniopTensorDescriptor_t x,
uint64_t const *kernel_shape, size_t const *kernel_shape,
uint64_t const *pads, size_t const *pads,
int64_t const *strides, ptrdiff_t const *strides,
uint64_t n); size_t n);
__C __export infiniopStatus_t infiniopGetMaxPoolWorkspaceSize(infiniopMaxPoolDescriptor_t desc, size_t *size); __C __export infiniopStatus_t infiniopGetMaxPoolWorkspaceSize(infiniopMaxPoolDescriptor_t desc, size_t *size);
......
...@@ -12,12 +12,12 @@ struct InfiniopTensorDescriptor { ...@@ -12,12 +12,12 @@ struct InfiniopTensorDescriptor {
// Shape of the tensor, ndim elements // Shape of the tensor, ndim elements
size_t *shape; size_t *shape;
// Stride of each dimension in elements, ndim elements // Stride of each dimension in elements, ndim elements
int64_t *strides; ptrdiff_t *strides;
}; };
typedef struct InfiniopTensorDescriptor *infiniopTensorDescriptor_t; typedef struct InfiniopTensorDescriptor *infiniopTensorDescriptor_t;
__C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, size_t const *shape, int64_t const *strides, infiniDtype_t dtype); __C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, size_t const *shape, ptrdiff_t const *strides, infiniDtype_t dtype);
__C __export infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc);
......
...@@ -60,8 +60,8 @@ uint16_t f32_to_f16(float val) { ...@@ -60,8 +60,8 @@ uint16_t f32_to_f16(float val) {
} }
size_t indexToReducedOffset(size_t flat_index, size_t ndim, size_t indexToReducedOffset(size_t flat_index, size_t ndim,
int64_t const *broadcasted_strides, ptrdiff_t const *broadcasted_strides,
int64_t const *target_strides) { ptrdiff_t const *target_strides) {
size_t res = 0; size_t res = 0;
for (size_t i = 0; i < ndim; ++i) { for (size_t i = 0; i < ndim; ++i) {
res += flat_index / broadcasted_strides[i] * target_strides[i]; res += flat_index / broadcasted_strides[i] * target_strides[i];
...@@ -71,7 +71,7 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim, ...@@ -71,7 +71,7 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim,
} }
size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape,
int64_t const *strides) { ptrdiff_t const *strides) {
size_t res = 0; size_t res = 0;
for (size_t i = ndim; i-- >= 0;) { for (size_t i = ndim; i-- >= 0;) {
res += (flat_index % shape[i]) * strides[i]; res += (flat_index % shape[i]) * strides[i];
...@@ -81,7 +81,7 @@ size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, ...@@ -81,7 +81,7 @@ size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape,
} }
size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads) { size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads) {
uint64_t total_size = 1; size_t total_size = 1;
for (size_t i = 0; i < ndim; ++i) { for (size_t i = 0; i < ndim; ++i) {
total_size *= shape[i] + (i < 2 ? 0 : 2 * pads[i - 2]); total_size *= shape[i] + (i < 2 ? 0 : 2 * pads[i - 2]);
} }
......
...@@ -13,10 +13,10 @@ float f16_to_f32(uint16_t code); ...@@ -13,10 +13,10 @@ float f16_to_f32(uint16_t code);
uint16_t f32_to_f16(float val); uint16_t f32_to_f16(float val);
// return the memory offset of original tensor, given the flattened index of broadcasted tensor // return the memory offset of original tensor, given the flattened index of broadcasted tensor
size_t indexToReducedOffset(size_t flat_index, size_t ndim, int64_t const *broadcasted_strides, int64_t const *target_strides); size_t indexToReducedOffset(size_t flat_index, size_t ndim, ptrdiff_t const *broadcasted_strides, ptrdiff_t const *target_strides);
// return the memory offset a tensor given flattened index // return the memory offset a tensor given flattened index
size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, int64_t const *strides); size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, ptrdiff_t const *strides);
/** /**
* get the total array size (element count) after applying padding for a * get the total array size (element count) after applying padding for a
......
...@@ -96,8 +96,8 @@ inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) { ...@@ -96,8 +96,8 @@ inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) {
// return the memory offset of original tensor, given the flattened index of // return the memory offset of original tensor, given the flattened index of
// broadcasted tensor // broadcasted tensor
inline __device__ __host__ size_t indexToReducedOffset( inline __device__ __host__ size_t indexToReducedOffset(
size_t flat_index, size_t ndim, int64_t const *broadcasted_strides, size_t flat_index, size_t ndim, ptrdiff_t const *broadcasted_strides,
int64_t const *target_strides) { ptrdiff_t const *target_strides) {
size_t res = 0; size_t res = 0;
for (size_t i = 0; i < ndim; ++i) { for (size_t i = 0; i < ndim; ++i) {
res += flat_index / broadcasted_strides[i] * target_strides[i]; res += flat_index / broadcasted_strides[i] * target_strides[i];
...@@ -109,7 +109,7 @@ inline __device__ __host__ size_t indexToReducedOffset( ...@@ -109,7 +109,7 @@ inline __device__ __host__ size_t indexToReducedOffset(
// get the memory offset of the given element in a tensor given its flat index // get the memory offset of the given element in a tensor given its flat index
inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim, inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim,
size_t const *shape, size_t const *shape,
int64_t const *strides) { ptrdiff_t const *strides) {
size_t res = 0; size_t res = 0;
for (size_t i = ndim; i-- > 0;) { for (size_t i = ndim; i-- > 0;) {
res += (flat_index % shape[i]) * strides[i]; res += (flat_index % shape[i]) * strides[i];
......
...@@ -40,7 +40,7 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor( ...@@ -40,7 +40,7 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, uint64_t *size) { __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, size_t *size) {
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
...@@ -78,7 +78,7 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax ...@@ -78,7 +78,7 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t desc, void *workspace, uint64_t workspace_size, void *data, void *stream) { __C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t desc, void *workspace, size_t workspace_size, void *data, void *stream) {
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
......
...@@ -9,11 +9,11 @@ ...@@ -9,11 +9,11 @@
typedef struct BlasMatrix { typedef struct BlasMatrix {
size_t ndim; size_t ndim;
size_t batch; size_t batch;
int64_t stride; ptrdiff_t stride;
size_t rows; size_t rows;
size_t cols; size_t cols;
int64_t row_stride; ptrdiff_t row_stride;
int64_t col_stride; ptrdiff_t col_stride;
BlasMatrix() {} BlasMatrix() {}
...@@ -56,7 +56,7 @@ typedef struct BlasMatrix { ...@@ -56,7 +56,7 @@ typedef struct BlasMatrix {
std::swap(row_stride, col_stride); std::swap(row_stride, col_stride);
} }
int64_t ld() const { ptrdiff_t ld() const {
if (this->row_stride == 1) { if (this->row_stride == 1) {
return this->col_stride; return this->col_stride;
} else { } else {
......
...@@ -25,7 +25,7 @@ infiniopStatus_t cpuCreateMatmulDescriptor( ...@@ -25,7 +25,7 @@ infiniopStatus_t cpuCreateMatmulDescriptor(
} }
infiniopStatus_t cpuGetMatmulWorkspaceSize(infiniopMatmulCpuDescriptor_t desc, infiniopStatus_t cpuGetMatmulWorkspaceSize(infiniopMatmulCpuDescriptor_t desc,
uint64_t *size) { size_t *size) {
*size = 0; *size = 0;
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
} }
...@@ -76,7 +76,7 @@ infiniopStatus_t cpuCalculateMatmul(infiniopMatmulCpuDescriptor_t desc, void *c, ...@@ -76,7 +76,7 @@ infiniopStatus_t cpuCalculateMatmul(infiniopMatmulCpuDescriptor_t desc, void *c,
} }
infiniopStatus_t cpuMatmul(infiniopMatmulCpuDescriptor_t desc, void *workspace, infiniopStatus_t cpuMatmul(infiniopMatmulCpuDescriptor_t desc, void *workspace,
uint64_t workspace_size, void *c, void const *a, size_t workspace_size, void *c, void const *a,
void const *b, float alpha, float beta) { void const *b, float alpha, float beta) {
if (desc->dtype == INFINI_DTYPE_F16) { if (desc->dtype == INFINI_DTYPE_F16) {
return cpuCalculateMatmul<uint16_t>(desc, c, beta, a, b, alpha); return cpuCalculateMatmul<uint16_t>(desc, c, beta, a, b, alpha);
......
...@@ -14,10 +14,10 @@ infiniopStatus_t cpuCreateMatmulDescriptor( ...@@ -14,10 +14,10 @@ infiniopStatus_t cpuCreateMatmulDescriptor(
infiniopTensorDescriptor_t b_desc); infiniopTensorDescriptor_t b_desc);
infiniopStatus_t cpuGetMatmulWorkspaceSize(infiniopMatmulCpuDescriptor_t desc, infiniopStatus_t cpuGetMatmulWorkspaceSize(infiniopMatmulCpuDescriptor_t desc,
uint64_t *size); size_t *size);
infiniopStatus_t cpuMatmul(infiniopMatmulCpuDescriptor_t desc, void *workspace, infiniopStatus_t cpuMatmul(infiniopMatmulCpuDescriptor_t desc, void *workspace,
uint64_t workspace_size, void *c, void const *a, size_t workspace_size, void *c, void const *a,
void const *b, float alpha, float beta); void const *b, float alpha, float beta);
infiniopStatus_t cpuDestroyMatmulDescriptor(infiniopMatmulCpuDescriptor_t desc); infiniopStatus_t cpuDestroyMatmulDescriptor(infiniopMatmulCpuDescriptor_t desc);
......
...@@ -27,7 +27,7 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle, ...@@ -27,7 +27,7 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
} }
infiniopStatus_t cudaGetMatmulWorkspaceSize(infiniopMatmulCudaDescriptor_t desc, uint64_t *size) { infiniopStatus_t cudaGetMatmulWorkspaceSize(infiniopMatmulCudaDescriptor_t desc, size_t *size) {
*size = 0; *size = 0;
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
} }
......
...@@ -13,11 +13,11 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle, ...@@ -13,11 +13,11 @@ infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
infiniopTensorDescriptor_t a_desc, infiniopTensorDescriptor_t a_desc,
infiniopTensorDescriptor_t b_desc); infiniopTensorDescriptor_t b_desc);
infiniopStatus_t cudaGetMatmulWorkspaceSize(infiniopMatmulCudaDescriptor_t desc, uint64_t *size); infiniopStatus_t cudaGetMatmulWorkspaceSize(infiniopMatmulCudaDescriptor_t desc, size_t *size);
infiniopStatus_t cudaMatmul(infiniopMatmulCudaDescriptor_t desc, infiniopStatus_t cudaMatmul(infiniopMatmulCudaDescriptor_t desc,
void *workspace, void *workspace,
uint64_t workspace_size, size_t workspace_size,
void *c, void *c,
void const *a, void const *a,
void const *b, void const *b,
......
...@@ -56,7 +56,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c, ...@@ -56,7 +56,7 @@ infiniopStatus_t cudaMatmulCublas(infiniopMatmulCudaDescriptor_t desc, void *c,
infiniopStatus_t cudaMatmul(infiniopMatmulCudaDescriptor_t desc, infiniopStatus_t cudaMatmul(infiniopMatmulCudaDescriptor_t desc,
void *workspace, void *workspace,
uint64_t workspace_size, size_t workspace_size,
void *c, void *c,
void const *a, void const *a,
void const *b, void const *b,
......
...@@ -38,7 +38,7 @@ __C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handl ...@@ -38,7 +38,7 @@ __C infiniopStatus_t infiniopCreateRandomSampleDescriptor(infiniopHandle_t handl
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}; };
__C infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDescriptor_t desc, uint64_t *size) { __C infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDescriptor_t desc, size_t *size) {
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
...@@ -77,7 +77,7 @@ __C infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDe ...@@ -77,7 +77,7 @@ __C infiniopStatus_t infiniopGetRandomSampleWorkspaceSize(infiniopRandomSampleDe
__C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc, __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
void *workspace, void *workspace,
uint64_t workspace_size, size_t workspace_size,
void *result, void *result,
void const *probs, void const *probs,
float random_val, float random_val,
......
...@@ -46,7 +46,7 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor( ...@@ -46,7 +46,7 @@ __C infiniopStatus_t infiniopCreateRMSNormDescriptor(
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t desc, uint64_t *size) { __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t desc, size_t *size) {
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
...@@ -83,7 +83,7 @@ __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t ...@@ -83,7 +83,7 @@ __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *workspace, uint64_t workspace_size, __C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *workspace, size_t workspace_size,
void *y, void const *x, void const *w, void *stream) { void *y, void const *x, void const *w, void *stream) {
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
......
...@@ -53,7 +53,7 @@ __C infiniopStatus_t infiniopCreateRoPEDescriptor( ...@@ -53,7 +53,7 @@ __C infiniopStatus_t infiniopCreateRoPEDescriptor(
} }
__C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc, __C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
uint64_t *size) { size_t *size) {
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
...@@ -90,7 +90,7 @@ __C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc, ...@@ -90,7 +90,7 @@ __C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
} }
__C infiniopStatus_t infiniopRoPE(infiniopRoPEDescriptor_t desc, __C infiniopStatus_t infiniopRoPE(infiniopRoPEDescriptor_t desc,
void *workspace, uint64_t workspace_size, void *workspace, size_t workspace_size,
void *t, void const *pos_ids, void *t, void const *pos_ids,
void const *sin_table, void const *cos_table, void const *sin_table, void const *cos_table,
void *stream) { void *stream) {
......
...@@ -37,9 +37,9 @@ ...@@ -37,9 +37,9 @@
} \ } \
} while (0) } while (0)
inline std::vector<int64_t> getByteStrides(infiniopTensorDescriptor_t desc) { inline std::vector<ptrdiff_t> getByteStrides(infiniopTensorDescriptor_t desc) {
std::vector<int64_t> strides(desc->ndim); std::vector<ptrdiff_t> strides(desc->ndim);
for (uint64_t i = 0; i < desc->ndim; i++) { for (size_t i = 0; i < desc->ndim; i++) {
strides[i] = desc->strides[i] * infiniSizeof(desc->dtype); strides[i] = desc->strides[i] * infiniSizeof(desc->dtype);
} }
return strides; return strides;
...@@ -54,11 +54,11 @@ inline size_t getByteSize(infiniopTensorDescriptor_t desc) { ...@@ -54,11 +54,11 @@ inline size_t getByteSize(infiniopTensorDescriptor_t desc) {
} }
// calculate the broadcasted shape for two tensors // calculate the broadcasted shape for two tensors
inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1, inline bool getBroadcastShape(const size_t *shape1, size_t ndim1,
const uint64_t *shape2, uint64_t ndim2, const size_t *shape2, size_t ndim2,
uint64_t *broadcast_shape, size_t *broadcast_shape,
uint64_t *padded_shape1, uint64_t *padded_shape2, size_t *padded_shape1, size_t *padded_shape2,
uint64_t max_rank) { size_t max_rank) {
// prepending and initializing // prepending and initializing
std::fill(padded_shape1, padded_shape1 + max_rank, 1); std::fill(padded_shape1, padded_shape1 + max_rank, 1);
std::fill(padded_shape2, padded_shape2 + max_rank, 1); std::fill(padded_shape2, padded_shape2 + max_rank, 1);
...@@ -82,8 +82,8 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1, ...@@ -82,8 +82,8 @@ inline bool getBroadcastShape(const uint64_t *shape1, uint64_t ndim1,
inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a, inline bool isValidBroadcastShape(infiniopTensorDescriptor_t a,
infiniopTensorDescriptor_t b, infiniopTensorDescriptor_t b,
infiniopTensorDescriptor_t c, infiniopTensorDescriptor_t c,
uint64_t broadcast_ndim) { size_t broadcast_ndim) {
std::vector<uint64_t> broadcast_shape_(broadcast_ndim), std::vector<size_t> broadcast_shape_(broadcast_ndim),
padded_shape1_(broadcast_ndim), padded_shape2_(broadcast_ndim); padded_shape1_(broadcast_ndim), padded_shape2_(broadcast_ndim);
auto broadcast_shape = broadcast_shape_.data(), auto broadcast_shape = broadcast_shape_.data(),
padded_shape1 = padded_shape1_.data(), padded_shape1 = padded_shape1_.data(),
...@@ -130,7 +130,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc, ...@@ -130,7 +130,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
return nullptr; return nullptr;
} }
size_t *shape = new size_t[ndim]; size_t *shape = new size_t[ndim];
int64_t *strides = new int64_t[ndim]; ptrdiff_t *strides = new ptrdiff_t[ndim];
for (size_t i = 0; i < ndim; i++) { for (size_t i = 0; i < ndim; i++) {
if (std::find(order.begin(), order.end(), i) == order.end()) { if (std::find(order.begin(), order.end(), i) == order.end()) {
return nullptr; return nullptr;
...@@ -146,7 +146,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc, ...@@ -146,7 +146,7 @@ inline infiniopTensorDescriptor_t permute(infiniopTensorDescriptor_t desc,
inline bool isContiguous(const infiniopTensorDescriptor_t &desc, inline bool isContiguous(const infiniopTensorDescriptor_t &desc,
size_t dim_start, size_t dim_end) { size_t dim_start, size_t dim_end) {
for (size_t i = dim_start + 1; i <= dim_end; i++) { for (size_t i = dim_start + 1; i <= dim_end; i++) {
if (desc->strides[i - 1] != static_cast<int64_t>(desc->shape[i]) * desc->strides[i]) { if (desc->strides[i - 1] != static_cast<ptrdiff_t>(desc->shape[i]) * desc->strides[i]) {
return false; return false;
} }
} }
...@@ -170,7 +170,7 @@ inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc, ...@@ -170,7 +170,7 @@ inline infiniopTensorDescriptor_t dimMerge(infiniopTensorDescriptor_t desc,
size_t new_ndim = ndim - (dim_end - dim_start); size_t new_ndim = ndim - (dim_end - dim_start);
size_t *new_shape = new size_t[new_ndim]; size_t *new_shape = new size_t[new_ndim];
int64_t *new_strides = new int64_t[new_ndim]; ptrdiff_t *new_strides = new ptrdiff_t[new_ndim];
size_t index = 0; size_t index = 0;
for (size_t i = 0; i < dim_start; i++) { for (size_t i = 0; i < dim_start; i++) {
new_shape[index] = desc->shape[i]; new_shape[index] = desc->shape[i];
...@@ -205,7 +205,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc, ...@@ -205,7 +205,7 @@ inline infiniopTensorDescriptor_t dimSplit(infiniopTensorDescriptor_t desc,
} }
size_t new_ndim = ndim + dims.size() - 1; size_t new_ndim = ndim + dims.size() - 1;
size_t *new_shape = new size_t[new_ndim]; size_t *new_shape = new size_t[new_ndim];
int64_t *new_strides = new int64_t[new_ndim]; ptrdiff_t *new_strides = new ptrdiff_t[new_ndim];
size_t index = 0; size_t index = 0;
for (size_t i = 0; i < dim; i++) { for (size_t i = 0; i < dim; i++) {
new_shape[index] = desc->shape[i]; new_shape[index] = desc->shape[i];
......
#include "infiniop/tensor_descriptor.h" #include "infiniop/tensor_descriptor.h"
#include <cstring> #include <cstring>
__C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, size_t const *shape_, int64_t const *strides_, infiniDtype_t datatype) { __C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, size_t const *shape_, ptrdiff_t const *strides_, infiniDtype_t datatype) {
size_t *shape = new size_t[ndim]; size_t *shape = new size_t[ndim];
int64_t *strides = new int64_t[ndim]; ptrdiff_t *strides = new ptrdiff_t[ndim];
std::memcpy(shape, shape_, ndim * sizeof(size_t)); std::memcpy(shape, shape_, ndim * sizeof(size_t));
if (strides_) { if (strides_) {
std::memcpy(strides, strides_, ndim * sizeof(int64_t)); std::memcpy(strides, strides_, ndim * sizeof(ptrdiff_t));
} else { } else {
int64_t dsize = 1; ptrdiff_t dsize = 1;
for (int i = ndim - 1; i >= 0; i--) { for (size_t i = ndim - 1; i >= 0; i--) {
strides[i] = dsize; strides[i] = dsize;
dsize *= shape[i]; dsize *= shape[i];
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment