Commit 3144cc9c authored by YdrMaster's avatar YdrMaster
Browse files

issue/63/style: 尽量将 cv 修饰符移动到类型前


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent b461d520
......@@ -10,7 +10,7 @@ typedef struct InfiniopDescriptor {
int device_id;
} InfiniopDescriptor;
__C __export infiniopStatus_t infiniopGetDescriptorDeviceType(InfiniopDescriptor const *desc_ptr, infiniDevice_t *device_type);
__C __export infiniopStatus_t infiniopGetDescriptorDeviceId(InfiniopDescriptor const *desc_ptr, int *device_id);
__C __export infiniopStatus_t infiniopGetDescriptorDeviceType(const InfiniopDescriptor *desc_ptr, infiniDevice_t *device_type);
__C __export infiniopStatus_t infiniopGetDescriptorDeviceId(const InfiniopDescriptor *desc_ptr, int *device_id);
#endif //__INFINIOP_OPERATOR___
......@@ -23,9 +23,9 @@ __C __export infiniopStatus_t infiniopAttention(infiniopAttentionDescriptor_t de
void *workspace,
size_t workspace_size,
void *out,
void const *q,
void const *k,
void const *v,
const void *q,
const void *k,
const void *v,
void *k_cache,
void *v_cache,
void *stream);
......
......@@ -22,9 +22,9 @@ __C __export infiniopStatus_t infiniopMLP(infiniopMLPDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *y,
void const *x,
void const *w12,
void const *w3,
const void *x,
const void *w12,
const void *w3,
void *stream);
__C __export infiniopStatus_t infiniopDestroyMLPDescriptor(infiniopMLPDescriptor_t desc);
......
......@@ -17,7 +17,7 @@ struct InfiniopTensorDescriptor {
typedef struct InfiniopTensorDescriptor *infiniopTensorDescriptor_t;
__C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, size_t const *shape, ptrdiff_t const *strides, infiniDtype_t dtype);
__C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, const size_t *shape, const ptrdiff_t *strides, infiniDtype_t dtype);
__C __export infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc);
......
......@@ -59,9 +59,11 @@ uint16_t f32_to_f16(float val) {
}
}
size_t indexToReducedOffset(size_t flat_index, size_t ndim,
ptrdiff_t const *broadcasted_strides,
ptrdiff_t const *target_strides) {
size_t indexToReducedOffset(
size_t flat_index,
size_t ndim,
const ptrdiff_t *broadcasted_strides,
const ptrdiff_t *target_strides) {
size_t res = 0;
for (size_t i = 0; i < ndim; ++i) {
res += flat_index / broadcasted_strides[i] * target_strides[i];
......@@ -70,8 +72,11 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim,
return res;
}
size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape,
ptrdiff_t const *strides) {
size_t indexToOffset(
size_t flat_index,
size_t ndim,
const size_t *shape,
const ptrdiff_t *strides) {
size_t res = 0;
for (size_t i = ndim; i-- >= 0;) {
res += (flat_index % shape[i]) * strides[i];
......@@ -80,7 +85,10 @@ size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape,
return res;
}
size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads) {
size_t getPaddedSize(
size_t ndim,
size_t *shape,
const size_t *pads) {
size_t total_size = 1;
for (size_t i = 0; i < ndim; ++i) {
total_size *= shape[i] + (i < 2 ? 0 : 2 * pads[i - 2]);
......@@ -88,8 +96,10 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads) {
return total_size;
}
std::vector<size_t> getPaddedShape(size_t ndim, size_t const *shape,
size_t const *pads) {
std::vector<size_t> getPaddedShape(
size_t ndim,
const size_t *shape,
const size_t *pads) {
std::vector<size_t> padded_shape(ndim);
memcpy(padded_shape.data(), shape, ndim * sizeof(size_t));
for (size_t i = 2; i < ndim; ++i) {
......
#ifndef __INFINIOP__COMMON_CPU_H__
#define __INFINIOP__COMMON_CPU_H__
#ifndef __INFINIOP_COMMON_CPU_H__
#define __INFINIOP_COMMON_CPU_H__
#include <cmath>
#include <cstddef>
......@@ -14,18 +14,18 @@ float f16_to_f32(uint16_t code);
uint16_t f32_to_f16(float val);
// return the memory offset of original tensor, given the flattened index of broadcasted tensor
size_t indexToReducedOffset(size_t flat_index, size_t ndim, ptrdiff_t const *broadcasted_strides, ptrdiff_t const *target_strides);
size_t indexToReducedOffset(size_t flat_index, size_t ndim, const ptrdiff_t *broadcasted_strides, const ptrdiff_t *target_strides);
// return the memory offset a tensor given flattened index
size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, ptrdiff_t const *strides);
size_t indexToOffset(size_t flat_index, size_t ndim, const size_t *shape, const ptrdiff_t *strides);
/**
* get the total array size (element count) after applying padding for a
* ndim-ary tensor with the given shape
*/
size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads);
size_t getPaddedSize(size_t ndim, size_t *shape, const size_t *pads);
// calculate the padded shape and store the result in padded_shape
std::vector<size_t> getPaddedShape(size_t ndim, size_t const *shape, size_t const *pads);
std::vector<size_t> getPaddedShape(size_t ndim, const size_t *shape, const size_t *pads);
#endif // __INFINIOP__COMMON_CPU_H__
......@@ -47,8 +47,8 @@ struct InfiniopCudaHandle {
int compute_capability_minor;
};
template <class T>
void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> &pool, cudaStream_t stream, T const &f) {
template <typename T>
void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> &pool, cudaStream_t stream, const T &f) {
auto handle = pool->pop();
if (!handle) {
cublasCreate(&(*handle));
......@@ -58,8 +58,8 @@ void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> &pool, cudaStream_t stream
pool->push(std::move(*handle));
}
template <class T>
void use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> &pool, cudaStream_t stream, T const &f) {
template <typename T>
void use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> &pool, cudaStream_t stream, const T &f) {
auto handle = pool->pop();
if (!handle) {
cudnnCreate(&(*handle));
......@@ -95,8 +95,10 @@ inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) {
// return the memory offset of original tensor, given the flattened index of
// broadcasted tensor
inline __device__ __host__ size_t indexToReducedOffset(
size_t flat_index, size_t ndim, ptrdiff_t const *broadcasted_strides,
ptrdiff_t const *target_strides) {
size_t flat_index,
size_t ndim,
const ptrdiff_t *broadcasted_strides,
const ptrdiff_t *target_strides) {
size_t res = 0;
for (size_t i = 0; i < ndim; ++i) {
res += flat_index / broadcasted_strides[i] * target_strides[i];
......@@ -106,9 +108,11 @@ inline __device__ __host__ size_t indexToReducedOffset(
}
// get the memory offset of the given element in a tensor given its flat index
inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim,
size_t const *shape,
ptrdiff_t const *strides) {
inline __device__ __host__ size_t indexToOffset(
size_t flat_index,
size_t ndim,
const size_t *shape,
const ptrdiff_t *strides) {
size_t res = 0;
for (size_t i = ndim; i-- > 0;) {
res += (flat_index % shape[i]) * strides[i];
......
#include "infiniop/operator.h"
infiniopStatus_t infiniopGetDescriptorDeviceType(
InfiniopDescriptor const *desc_ptr,
const InfiniopDescriptor *desc_ptr,
infiniDevice_t *device_type) {
*device_type = desc_ptr->device_type;
return INFINIOP_STATUS_SUCCESS;
}
infiniopStatus_t infiniopGetDescriptorDeviceId(
InfiniopDescriptor const *desc_ptr,
const InfiniopDescriptor *desc_ptr,
int *device_id) {
*device_id = desc_ptr->device_id;
return INFINIOP_STATUS_SUCCESS;
......
......@@ -43,7 +43,7 @@ infiniopStatus_t Descriptor::create(
}
infiniopStatus_t status;
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR);
auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) {
return status;
}
......@@ -56,21 +56,21 @@ infiniopStatus_t Descriptor::create(
// operation
CHECK_STATUS(c->setDescriptor(
toAclDataType(c_desc->dtype),
{static_cast<int64_t>(_info.c_matrix.rows),
static_cast<int64_t>(_info.c_matrix.cols)},
{_info.c_matrix.row_stride, _info.c_matrix.col_stride}),
{static_cast<int64_t>(info.c_matrix.rows),
static_cast<int64_t>(info.c_matrix.cols)},
{info.c_matrix.row_stride, info.c_matrix.col_stride}),
INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(a->setDescriptor(
toAclDataType(a_desc->dtype),
{static_cast<int64_t>(_info.a_matrix.rows),
static_cast<int64_t>(_info.a_matrix.cols)},
{_info.a_matrix.row_stride, _info.a_matrix.col_stride}),
{static_cast<int64_t>(info.a_matrix.rows),
static_cast<int64_t>(info.a_matrix.cols)},
{info.a_matrix.row_stride, info.a_matrix.col_stride}),
INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(b->setDescriptor(
toAclDataType(b_desc->dtype),
{static_cast<int64_t>(_info.b_matrix.rows),
static_cast<int64_t>(_info.b_matrix.cols)},
{_info.b_matrix.row_stride, _info.b_matrix.col_stride}),
{static_cast<int64_t>(info.b_matrix.rows),
static_cast<int64_t>(info.b_matrix.cols)},
{info.b_matrix.row_stride, info.b_matrix.col_stride}),
INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(c->createTensor(), INFINIOP_STATUS_SUCCESS);
......@@ -95,7 +95,7 @@ infiniopStatus_t Descriptor::create(
aclSetAclOpExecutorRepeatable(executor);
*desc_ptr = new Descriptor(
dtype, _info, workspace_size,
dtype, info, workspace_size,
new Opaque{
executor,
c,
......@@ -112,8 +112,8 @@ infiniopStatus_t Descriptor::calculate(
size_t workspaceSize_,
void *c,
float beta,
void const *a,
void const *b,
const void *a,
const void *b,
float alpha,
void *stream) const {
......
......@@ -73,7 +73,7 @@ infiniopStatus_t Descriptor::create(
}
infiniopStatus_t status;
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR);
auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) {
return status;
}
......@@ -83,9 +83,9 @@ infiniopStatus_t Descriptor::create(
cnnlCreateTensorDescriptor(&b);
cnnlCreateTensorDescriptor(&c);
setMatrixTensorEx(a, _info.a_matrix, a_desc->dtype);
setMatrixTensorEx(b, _info.b_matrix, b_desc->dtype);
setMatrixTensorEx(c, _info.c_matrix, c_desc->dtype);
setMatrixTensorEx(a, info.a_matrix, a_desc->dtype);
setMatrixTensorEx(b, info.b_matrix, b_desc->dtype);
setMatrixTensorEx(c, info.c_matrix, c_desc->dtype);
cnnlMatMulDescriptor_t op;
cnnlMatMulAlgo_t algo;
......@@ -112,7 +112,7 @@ infiniopStatus_t Descriptor::create(
cnnlGetBatchMatMulHeuristicResult(algoResult, algo, &workspace_size);
*desc_ptr = new Descriptor(
dtype, _info, workspace_size,
dtype, info, workspace_size,
new Opaque{
op,
algo,
......@@ -130,8 +130,8 @@ infiniopStatus_t Descriptor::calculate(
size_t workspace_size,
void *c,
float beta,
void const *a,
void const *b,
const void *a,
const void *b,
float alpha,
void *stream) const {
......
......@@ -60,7 +60,7 @@ struct BlasMatrix {
}
};
enum class MatrixLayout : uint8_t {
enum class MatrixLayout : char {
COL_MAJOR,
ROW_MAJOR,
};
......
......@@ -20,13 +20,13 @@ infiniopStatus_t Descriptor::create(
}
infiniopStatus_t status;
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) {
return status;
}
*desc_ptr = new Descriptor(
dtype, _info, 0,
dtype, info, 0,
nullptr,
handle->device, handle->device_id);
return INFINIOP_STATUS_SUCCESS;
......@@ -34,24 +34,24 @@ infiniopStatus_t Descriptor::create(
template <typename Tdata>
void calculate(
MatmulInfo const &_info,
const MatmulInfo &info,
void *c,
float beta,
void const *a,
void const *b,
const void *a,
const void *b,
float alpha) {
if (_info.is_transed) {
if (info.is_transed) {
std::swap(a, b);
}
for (size_t i = 0; i < _info.batch; ++i) {
for (size_t m_ = 0; m_ < _info.m; ++m_) {
for (size_t n_ = 0; n_ < _info.n; ++n_) {
auto c_ = reinterpret_cast<Tdata *>(c) + i * _info.c_matrix.stride + m_ * _info.c_matrix.row_stride + n_ * _info.c_matrix.col_stride;
for (size_t i = 0; i < info.batch; ++i) {
for (size_t m_ = 0; m_ < info.m; ++m_) {
for (size_t n_ = 0; n_ < info.n; ++n_) {
auto c_ = reinterpret_cast<Tdata *>(c) + i * info.c_matrix.stride + m_ * info.c_matrix.row_stride + n_ * info.c_matrix.col_stride;
float sum = 0;
for (size_t k_ = 0; k_ < _info.k; ++k_) {
auto a_ = reinterpret_cast<Tdata const *>(a) + i * _info.a_matrix.stride + m_ * _info.a_matrix.row_stride + k_ * _info.a_matrix.col_stride;
auto b_ = reinterpret_cast<Tdata const *>(b) + i * _info.b_matrix.stride + n_ * _info.b_matrix.col_stride + k_ * _info.b_matrix.row_stride;
for (size_t k_ = 0; k_ < info.k; ++k_) {
auto a_ = reinterpret_cast<const Tdata *>(a) + i * info.a_matrix.stride + m_ * info.a_matrix.row_stride + k_ * info.a_matrix.col_stride;
auto b_ = reinterpret_cast<const Tdata *>(b) + i * info.b_matrix.stride + n_ * info.b_matrix.col_stride + k_ * info.b_matrix.row_stride;
if constexpr (std::is_same<Tdata, uint16_t>::value) {
sum += f16_to_f32(*a_) * f16_to_f32(*b_);
} else {
......@@ -77,8 +77,8 @@ infiniopStatus_t Descriptor::calculate(
size_t workspace_size,
void *c,
float beta,
void const *a,
void const *b,
const void *a,
const void *b,
float alpha,
void *stream) const {
......
......@@ -26,13 +26,13 @@ infiniopStatus_t Descriptor::create(
}
infiniopStatus_t status;
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) {
return status;
}
*desc_ptr = new Descriptor(
dtype, _info, 0,
dtype, info, 0,
new Opaque{handle->cublas_handle_pool},
handle->device, handle->device_id);
return INFINIOP_STATUS_SUCCESS;
......@@ -40,16 +40,16 @@ infiniopStatus_t Descriptor::create(
template <typename Tdata>
void calculate(
MatmulInfo const &_info,
const MatmulInfo &info,
std::shared_ptr<Pool<cublasHandle_t>> &cublas_handle_pool,
void *c,
float beta,
void const *a,
void const *b,
const void *a,
const void *b,
float alpha,
cudaStream_t stream) {
if (_info.is_transed) {
if (info.is_transed) {
std::swap(a, b);
}
......@@ -67,8 +67,8 @@ void calculate(
#endif
}
auto op_a = _info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_b = _info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_a = info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_b = info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
use_cublas(cublas_handle_pool,
stream,
......@@ -77,24 +77,24 @@ void calculate(
handle,
op_a,
op_b,
static_cast<int>(_info.m),
static_cast<int>(_info.n),
static_cast<int>(_info.k),
static_cast<int>(info.m),
static_cast<int>(info.n),
static_cast<int>(info.k),
&alpha,
a,
a_type,
static_cast<int>(_info.a_matrix.ld()),
_info.a_matrix.stride,
static_cast<int>(info.a_matrix.ld()),
info.a_matrix.stride,
b,
b_type,
static_cast<int>(_info.b_matrix.ld()),
_info.b_matrix.stride,
static_cast<int>(info.b_matrix.ld()),
info.b_matrix.stride,
&beta,
c,
c_type,
static_cast<int>(_info.c_matrix.ld()),
_info.c_matrix.stride,
static_cast<int>(_info.batch),
static_cast<int>(info.c_matrix.ld()),
info.c_matrix.stride,
static_cast<int>(info.batch),
compute_type,
CUBLAS_GEMM_DEFAULT_TENSOR_OP);
});
......@@ -105,8 +105,8 @@ infiniopStatus_t Descriptor::calculate(
size_t workspace_size,
void *c,
float beta,
void const *a,
void const *b,
const void *a,
const void *b,
float alpha,
void *stream) const {
......
......@@ -83,8 +83,8 @@
size_t workspace_size, \
void *c, \
float beta, \
void const *a, \
void const *b, \
const void *a, \
const void *b, \
float alpha, \
void *stream) const; \
}; \
......
......@@ -58,7 +58,7 @@ infiniopGetMatmulWorkspaceSize(
#define GET(CASE, NAMESPACE) \
case CASE: \
*size = reinterpret_cast<matmul::NAMESPACE::Descriptor const *>(desc)->workspace_size; \
*size = reinterpret_cast<const matmul::NAMESPACE::Descriptor *>(desc)->workspace_size; \
return INFINIOP_STATUS_SUCCESS
switch (desc->device_type) {
......@@ -87,15 +87,15 @@ __C infiniopStatus_t infiniopMatmul(
infiniopMatmulDescriptor_t desc,
void *workspace, size_t workspace_size,
void *c,
void const *a,
void const *b,
const void *a,
const void *b,
float alpha,
float beta,
void *stream) {
#define CALCULATE(CASE, NAMESPACE) \
case CASE: \
return reinterpret_cast<matmul::NAMESPACE::Descriptor const *>(desc) \
return reinterpret_cast<const matmul::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size, \
c, beta, \
a, b, alpha, \
......@@ -128,7 +128,7 @@ infiniopDestroyMatmulDescriptor(infiniopMatmulDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \
case CASE: \
delete reinterpret_cast<matmul::NAMESPACE::Descriptor const *>(desc); \
delete reinterpret_cast<const matmul::NAMESPACE::Descriptor *>(desc); \
return INFINIOP_STATUS_SUCCESS;
switch (desc->device_type) {
......
......@@ -79,7 +79,7 @@ __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
void *workspace,
size_t workspace_size,
void *result,
void const *probs,
const void *probs,
float random_val,
float topp,
int topk,
......
......@@ -43,7 +43,7 @@ __C infiniopStatus_t infiniopCreateRearrangeDescriptor(
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
}
__C infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void *dst, void const *src, void *stream) {
__C infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void *dst, const void *src, void *stream) {
switch (desc->device_type) {
#ifdef ENABLE_CPU
case DevCpu:
......
......@@ -84,7 +84,7 @@ __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t
}
__C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *workspace, size_t workspace_size,
void *y, void const *x, void const *w, void *stream) {
void *y, const void *x, const void *w, void *stream) {
switch (desc->device_type) {
#ifdef ENABLE_CPU
case DevCpu:
......
......@@ -91,8 +91,8 @@ __C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
__C infiniopStatus_t infiniopRoPE(infiniopRoPEDescriptor_t desc,
void *workspace, size_t workspace_size,
void *t, void const *pos_ids,
void const *sin_table, void const *cos_table,
void *t, const void *pos_ids,
const void *sin_table, const void *cos_table,
void *stream) {
switch (desc->device_type) {
#ifdef ENABLE_CPU
......
......@@ -46,7 +46,7 @@ __C infiniopStatus_t infiniopCreateSwiGLUDescriptor(
};
__C infiniopStatus_t infiniopSwiGLU(infiniopSwiGLUDescriptor_t desc, void *c,
void const *a, void const *b,
const void *a, const void *b,
void *stream) {
switch (desc->device_type) {
#ifdef ENABLE_CPU
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment