"git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "4879aac74991ad4552c5ed2ec178af511f3feb5e"
Commit 3144cc9c authored by YdrMaster's avatar YdrMaster
Browse files

issue/63/style: 尽量将 cv 修饰符移动到类型前


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent b461d520
...@@ -10,7 +10,7 @@ typedef struct InfiniopDescriptor { ...@@ -10,7 +10,7 @@ typedef struct InfiniopDescriptor {
int device_id; int device_id;
} InfiniopDescriptor; } InfiniopDescriptor;
__C __export infiniopStatus_t infiniopGetDescriptorDeviceType(InfiniopDescriptor const *desc_ptr, infiniDevice_t *device_type); __C __export infiniopStatus_t infiniopGetDescriptorDeviceType(const InfiniopDescriptor *desc_ptr, infiniDevice_t *device_type);
__C __export infiniopStatus_t infiniopGetDescriptorDeviceId(InfiniopDescriptor const *desc_ptr, int *device_id); __C __export infiniopStatus_t infiniopGetDescriptorDeviceId(const InfiniopDescriptor *desc_ptr, int *device_id);
#endif //__INFINIOP_OPERATOR___ #endif //__INFINIOP_OPERATOR___
...@@ -23,9 +23,9 @@ __C __export infiniopStatus_t infiniopAttention(infiniopAttentionDescriptor_t de ...@@ -23,9 +23,9 @@ __C __export infiniopStatus_t infiniopAttention(infiniopAttentionDescriptor_t de
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
void *out, void *out,
void const *q, const void *q,
void const *k, const void *k,
void const *v, const void *v,
void *k_cache, void *k_cache,
void *v_cache, void *v_cache,
void *stream); void *stream);
......
...@@ -22,9 +22,9 @@ __C __export infiniopStatus_t infiniopMLP(infiniopMLPDescriptor_t desc, ...@@ -22,9 +22,9 @@ __C __export infiniopStatus_t infiniopMLP(infiniopMLPDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
void *y, void *y,
void const *x, const void *x,
void const *w12, const void *w12,
void const *w3, const void *w3,
void *stream); void *stream);
__C __export infiniopStatus_t infiniopDestroyMLPDescriptor(infiniopMLPDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyMLPDescriptor(infiniopMLPDescriptor_t desc);
......
...@@ -17,7 +17,7 @@ struct InfiniopTensorDescriptor { ...@@ -17,7 +17,7 @@ struct InfiniopTensorDescriptor {
typedef struct InfiniopTensorDescriptor *infiniopTensorDescriptor_t; typedef struct InfiniopTensorDescriptor *infiniopTensorDescriptor_t;
__C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, size_t const *shape, ptrdiff_t const *strides, infiniDtype_t dtype); __C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescriptor_t *desc_ptr, size_t ndim, const size_t *shape, const ptrdiff_t *strides, infiniDtype_t dtype);
__C __export infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc);
......
...@@ -59,9 +59,11 @@ uint16_t f32_to_f16(float val) { ...@@ -59,9 +59,11 @@ uint16_t f32_to_f16(float val) {
} }
} }
size_t indexToReducedOffset(size_t flat_index, size_t ndim, size_t indexToReducedOffset(
ptrdiff_t const *broadcasted_strides, size_t flat_index,
ptrdiff_t const *target_strides) { size_t ndim,
const ptrdiff_t *broadcasted_strides,
const ptrdiff_t *target_strides) {
size_t res = 0; size_t res = 0;
for (size_t i = 0; i < ndim; ++i) { for (size_t i = 0; i < ndim; ++i) {
res += flat_index / broadcasted_strides[i] * target_strides[i]; res += flat_index / broadcasted_strides[i] * target_strides[i];
...@@ -70,8 +72,11 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim, ...@@ -70,8 +72,11 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim,
return res; return res;
} }
size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, size_t indexToOffset(
ptrdiff_t const *strides) { size_t flat_index,
size_t ndim,
const size_t *shape,
const ptrdiff_t *strides) {
size_t res = 0; size_t res = 0;
for (size_t i = ndim; i-- >= 0;) { for (size_t i = ndim; i-- >= 0;) {
res += (flat_index % shape[i]) * strides[i]; res += (flat_index % shape[i]) * strides[i];
...@@ -80,7 +85,10 @@ size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, ...@@ -80,7 +85,10 @@ size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape,
return res; return res;
} }
size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads) { size_t getPaddedSize(
size_t ndim,
size_t *shape,
const size_t *pads) {
size_t total_size = 1; size_t total_size = 1;
for (size_t i = 0; i < ndim; ++i) { for (size_t i = 0; i < ndim; ++i) {
total_size *= shape[i] + (i < 2 ? 0 : 2 * pads[i - 2]); total_size *= shape[i] + (i < 2 ? 0 : 2 * pads[i - 2]);
...@@ -88,8 +96,10 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads) { ...@@ -88,8 +96,10 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads) {
return total_size; return total_size;
} }
std::vector<size_t> getPaddedShape(size_t ndim, size_t const *shape, std::vector<size_t> getPaddedShape(
size_t const *pads) { size_t ndim,
const size_t *shape,
const size_t *pads) {
std::vector<size_t> padded_shape(ndim); std::vector<size_t> padded_shape(ndim);
memcpy(padded_shape.data(), shape, ndim * sizeof(size_t)); memcpy(padded_shape.data(), shape, ndim * sizeof(size_t));
for (size_t i = 2; i < ndim; ++i) { for (size_t i = 2; i < ndim; ++i) {
......
#ifndef __INFINIOP__COMMON_CPU_H__ #ifndef __INFINIOP_COMMON_CPU_H__
#define __INFINIOP__COMMON_CPU_H__ #define __INFINIOP_COMMON_CPU_H__
#include <cmath> #include <cmath>
#include <cstddef> #include <cstddef>
...@@ -14,18 +14,18 @@ float f16_to_f32(uint16_t code); ...@@ -14,18 +14,18 @@ float f16_to_f32(uint16_t code);
uint16_t f32_to_f16(float val); uint16_t f32_to_f16(float val);
// return the memory offset of original tensor, given the flattened index of broadcasted tensor // return the memory offset of original tensor, given the flattened index of broadcasted tensor
size_t indexToReducedOffset(size_t flat_index, size_t ndim, ptrdiff_t const *broadcasted_strides, ptrdiff_t const *target_strides); size_t indexToReducedOffset(size_t flat_index, size_t ndim, const ptrdiff_t *broadcasted_strides, const ptrdiff_t *target_strides);
// return the memory offset a tensor given flattened index // return the memory offset a tensor given flattened index
size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, ptrdiff_t const *strides); size_t indexToOffset(size_t flat_index, size_t ndim, const size_t *shape, const ptrdiff_t *strides);
/** /**
* get the total array size (element count) after applying padding for a * get the total array size (element count) after applying padding for a
* ndim-ary tensor with the given shape * ndim-ary tensor with the given shape
*/ */
size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads); size_t getPaddedSize(size_t ndim, size_t *shape, const size_t *pads);
// calculate the padded shape and store the result in padded_shape // calculate the padded shape and store the result in padded_shape
std::vector<size_t> getPaddedShape(size_t ndim, size_t const *shape, size_t const *pads); std::vector<size_t> getPaddedShape(size_t ndim, const size_t *shape, const size_t *pads);
#endif // __INFINIOP__COMMON_CPU_H__ #endif // __INFINIOP__COMMON_CPU_H__
...@@ -47,8 +47,8 @@ struct InfiniopCudaHandle { ...@@ -47,8 +47,8 @@ struct InfiniopCudaHandle {
int compute_capability_minor; int compute_capability_minor;
}; };
template <class T> template <typename T>
void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> &pool, cudaStream_t stream, T const &f) { void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> &pool, cudaStream_t stream, const T &f) {
auto handle = pool->pop(); auto handle = pool->pop();
if (!handle) { if (!handle) {
cublasCreate(&(*handle)); cublasCreate(&(*handle));
...@@ -58,8 +58,8 @@ void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> &pool, cudaStream_t stream ...@@ -58,8 +58,8 @@ void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> &pool, cudaStream_t stream
pool->push(std::move(*handle)); pool->push(std::move(*handle));
} }
template <class T> template <typename T>
void use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> &pool, cudaStream_t stream, T const &f) { void use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> &pool, cudaStream_t stream, const T &f) {
auto handle = pool->pop(); auto handle = pool->pop();
if (!handle) { if (!handle) {
cudnnCreate(&(*handle)); cudnnCreate(&(*handle));
...@@ -95,8 +95,10 @@ inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) { ...@@ -95,8 +95,10 @@ inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) {
// return the memory offset of original tensor, given the flattened index of // return the memory offset of original tensor, given the flattened index of
// broadcasted tensor // broadcasted tensor
inline __device__ __host__ size_t indexToReducedOffset( inline __device__ __host__ size_t indexToReducedOffset(
size_t flat_index, size_t ndim, ptrdiff_t const *broadcasted_strides, size_t flat_index,
ptrdiff_t const *target_strides) { size_t ndim,
const ptrdiff_t *broadcasted_strides,
const ptrdiff_t *target_strides) {
size_t res = 0; size_t res = 0;
for (size_t i = 0; i < ndim; ++i) { for (size_t i = 0; i < ndim; ++i) {
res += flat_index / broadcasted_strides[i] * target_strides[i]; res += flat_index / broadcasted_strides[i] * target_strides[i];
...@@ -106,9 +108,11 @@ inline __device__ __host__ size_t indexToReducedOffset( ...@@ -106,9 +108,11 @@ inline __device__ __host__ size_t indexToReducedOffset(
} }
// get the memory offset of the given element in a tensor given its flat index // get the memory offset of the given element in a tensor given its flat index
inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim, inline __device__ __host__ size_t indexToOffset(
size_t const *shape, size_t flat_index,
ptrdiff_t const *strides) { size_t ndim,
const size_t *shape,
const ptrdiff_t *strides) {
size_t res = 0; size_t res = 0;
for (size_t i = ndim; i-- > 0;) { for (size_t i = ndim; i-- > 0;) {
res += (flat_index % shape[i]) * strides[i]; res += (flat_index % shape[i]) * strides[i];
......
#include "infiniop/operator.h" #include "infiniop/operator.h"
infiniopStatus_t infiniopGetDescriptorDeviceType( infiniopStatus_t infiniopGetDescriptorDeviceType(
InfiniopDescriptor const *desc_ptr, const InfiniopDescriptor *desc_ptr,
infiniDevice_t *device_type) { infiniDevice_t *device_type) {
*device_type = desc_ptr->device_type; *device_type = desc_ptr->device_type;
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
} }
infiniopStatus_t infiniopGetDescriptorDeviceId( infiniopStatus_t infiniopGetDescriptorDeviceId(
InfiniopDescriptor const *desc_ptr, const InfiniopDescriptor *desc_ptr,
int *device_id) { int *device_id) {
*device_id = desc_ptr->device_id; *device_id = desc_ptr->device_id;
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
......
...@@ -43,7 +43,7 @@ infiniopStatus_t Descriptor::create( ...@@ -43,7 +43,7 @@ infiniopStatus_t Descriptor::create(
} }
infiniopStatus_t status; infiniopStatus_t status;
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR); auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) { if (status != INFINIOP_STATUS_SUCCESS) {
return status; return status;
} }
...@@ -56,21 +56,21 @@ infiniopStatus_t Descriptor::create( ...@@ -56,21 +56,21 @@ infiniopStatus_t Descriptor::create(
// operation // operation
CHECK_STATUS(c->setDescriptor( CHECK_STATUS(c->setDescriptor(
toAclDataType(c_desc->dtype), toAclDataType(c_desc->dtype),
{static_cast<int64_t>(_info.c_matrix.rows), {static_cast<int64_t>(info.c_matrix.rows),
static_cast<int64_t>(_info.c_matrix.cols)}, static_cast<int64_t>(info.c_matrix.cols)},
{_info.c_matrix.row_stride, _info.c_matrix.col_stride}), {info.c_matrix.row_stride, info.c_matrix.col_stride}),
INFINIOP_STATUS_SUCCESS); INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(a->setDescriptor( CHECK_STATUS(a->setDescriptor(
toAclDataType(a_desc->dtype), toAclDataType(a_desc->dtype),
{static_cast<int64_t>(_info.a_matrix.rows), {static_cast<int64_t>(info.a_matrix.rows),
static_cast<int64_t>(_info.a_matrix.cols)}, static_cast<int64_t>(info.a_matrix.cols)},
{_info.a_matrix.row_stride, _info.a_matrix.col_stride}), {info.a_matrix.row_stride, info.a_matrix.col_stride}),
INFINIOP_STATUS_SUCCESS); INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(b->setDescriptor( CHECK_STATUS(b->setDescriptor(
toAclDataType(b_desc->dtype), toAclDataType(b_desc->dtype),
{static_cast<int64_t>(_info.b_matrix.rows), {static_cast<int64_t>(info.b_matrix.rows),
static_cast<int64_t>(_info.b_matrix.cols)}, static_cast<int64_t>(info.b_matrix.cols)},
{_info.b_matrix.row_stride, _info.b_matrix.col_stride}), {info.b_matrix.row_stride, info.b_matrix.col_stride}),
INFINIOP_STATUS_SUCCESS); INFINIOP_STATUS_SUCCESS);
CHECK_STATUS(c->createTensor(), INFINIOP_STATUS_SUCCESS); CHECK_STATUS(c->createTensor(), INFINIOP_STATUS_SUCCESS);
...@@ -95,7 +95,7 @@ infiniopStatus_t Descriptor::create( ...@@ -95,7 +95,7 @@ infiniopStatus_t Descriptor::create(
aclSetAclOpExecutorRepeatable(executor); aclSetAclOpExecutorRepeatable(executor);
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
dtype, _info, workspace_size, dtype, info, workspace_size,
new Opaque{ new Opaque{
executor, executor,
c, c,
...@@ -112,8 +112,8 @@ infiniopStatus_t Descriptor::calculate( ...@@ -112,8 +112,8 @@ infiniopStatus_t Descriptor::calculate(
size_t workspaceSize_, size_t workspaceSize_,
void *c, void *c,
float beta, float beta,
void const *a, const void *a,
void const *b, const void *b,
float alpha, float alpha,
void *stream) const { void *stream) const {
......
...@@ -73,7 +73,7 @@ infiniopStatus_t Descriptor::create( ...@@ -73,7 +73,7 @@ infiniopStatus_t Descriptor::create(
} }
infiniopStatus_t status; infiniopStatus_t status;
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR); auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::ROW_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) { if (status != INFINIOP_STATUS_SUCCESS) {
return status; return status;
} }
...@@ -83,9 +83,9 @@ infiniopStatus_t Descriptor::create( ...@@ -83,9 +83,9 @@ infiniopStatus_t Descriptor::create(
cnnlCreateTensorDescriptor(&b); cnnlCreateTensorDescriptor(&b);
cnnlCreateTensorDescriptor(&c); cnnlCreateTensorDescriptor(&c);
setMatrixTensorEx(a, _info.a_matrix, a_desc->dtype); setMatrixTensorEx(a, info.a_matrix, a_desc->dtype);
setMatrixTensorEx(b, _info.b_matrix, b_desc->dtype); setMatrixTensorEx(b, info.b_matrix, b_desc->dtype);
setMatrixTensorEx(c, _info.c_matrix, c_desc->dtype); setMatrixTensorEx(c, info.c_matrix, c_desc->dtype);
cnnlMatMulDescriptor_t op; cnnlMatMulDescriptor_t op;
cnnlMatMulAlgo_t algo; cnnlMatMulAlgo_t algo;
...@@ -112,7 +112,7 @@ infiniopStatus_t Descriptor::create( ...@@ -112,7 +112,7 @@ infiniopStatus_t Descriptor::create(
cnnlGetBatchMatMulHeuristicResult(algoResult, algo, &workspace_size); cnnlGetBatchMatMulHeuristicResult(algoResult, algo, &workspace_size);
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
dtype, _info, workspace_size, dtype, info, workspace_size,
new Opaque{ new Opaque{
op, op,
algo, algo,
...@@ -130,8 +130,8 @@ infiniopStatus_t Descriptor::calculate( ...@@ -130,8 +130,8 @@ infiniopStatus_t Descriptor::calculate(
size_t workspace_size, size_t workspace_size,
void *c, void *c,
float beta, float beta,
void const *a, const void *a,
void const *b, const void *b,
float alpha, float alpha,
void *stream) const { void *stream) const {
......
...@@ -60,7 +60,7 @@ struct BlasMatrix { ...@@ -60,7 +60,7 @@ struct BlasMatrix {
} }
}; };
enum class MatrixLayout : uint8_t { enum class MatrixLayout : char {
COL_MAJOR, COL_MAJOR,
ROW_MAJOR, ROW_MAJOR,
}; };
......
...@@ -20,13 +20,13 @@ infiniopStatus_t Descriptor::create( ...@@ -20,13 +20,13 @@ infiniopStatus_t Descriptor::create(
} }
infiniopStatus_t status; infiniopStatus_t status;
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR); auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) { if (status != INFINIOP_STATUS_SUCCESS) {
return status; return status;
} }
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
dtype, _info, 0, dtype, info, 0,
nullptr, nullptr,
handle->device, handle->device_id); handle->device, handle->device_id);
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
...@@ -34,24 +34,24 @@ infiniopStatus_t Descriptor::create( ...@@ -34,24 +34,24 @@ infiniopStatus_t Descriptor::create(
template <typename Tdata> template <typename Tdata>
void calculate( void calculate(
MatmulInfo const &_info, const MatmulInfo &info,
void *c, void *c,
float beta, float beta,
void const *a, const void *a,
void const *b, const void *b,
float alpha) { float alpha) {
if (_info.is_transed) { if (info.is_transed) {
std::swap(a, b); std::swap(a, b);
} }
for (size_t i = 0; i < _info.batch; ++i) { for (size_t i = 0; i < info.batch; ++i) {
for (size_t m_ = 0; m_ < _info.m; ++m_) { for (size_t m_ = 0; m_ < info.m; ++m_) {
for (size_t n_ = 0; n_ < _info.n; ++n_) { for (size_t n_ = 0; n_ < info.n; ++n_) {
auto c_ = reinterpret_cast<Tdata *>(c) + i * _info.c_matrix.stride + m_ * _info.c_matrix.row_stride + n_ * _info.c_matrix.col_stride; auto c_ = reinterpret_cast<Tdata *>(c) + i * info.c_matrix.stride + m_ * info.c_matrix.row_stride + n_ * info.c_matrix.col_stride;
float sum = 0; float sum = 0;
for (size_t k_ = 0; k_ < _info.k; ++k_) { for (size_t k_ = 0; k_ < info.k; ++k_) {
auto a_ = reinterpret_cast<Tdata const *>(a) + i * _info.a_matrix.stride + m_ * _info.a_matrix.row_stride + k_ * _info.a_matrix.col_stride; auto a_ = reinterpret_cast<const Tdata *>(a) + i * info.a_matrix.stride + m_ * info.a_matrix.row_stride + k_ * info.a_matrix.col_stride;
auto b_ = reinterpret_cast<Tdata const *>(b) + i * _info.b_matrix.stride + n_ * _info.b_matrix.col_stride + k_ * _info.b_matrix.row_stride; auto b_ = reinterpret_cast<const Tdata *>(b) + i * info.b_matrix.stride + n_ * info.b_matrix.col_stride + k_ * info.b_matrix.row_stride;
if constexpr (std::is_same<Tdata, uint16_t>::value) { if constexpr (std::is_same<Tdata, uint16_t>::value) {
sum += f16_to_f32(*a_) * f16_to_f32(*b_); sum += f16_to_f32(*a_) * f16_to_f32(*b_);
} else { } else {
...@@ -77,8 +77,8 @@ infiniopStatus_t Descriptor::calculate( ...@@ -77,8 +77,8 @@ infiniopStatus_t Descriptor::calculate(
size_t workspace_size, size_t workspace_size,
void *c, void *c,
float beta, float beta,
void const *a, const void *a,
void const *b, const void *b,
float alpha, float alpha,
void *stream) const { void *stream) const {
......
...@@ -26,13 +26,13 @@ infiniopStatus_t Descriptor::create( ...@@ -26,13 +26,13 @@ infiniopStatus_t Descriptor::create(
} }
infiniopStatus_t status; infiniopStatus_t status;
auto _info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR); auto info = MatmulInfo(c_desc, a_desc, b_desc, &status, MatrixLayout::COL_MAJOR);
if (status != INFINIOP_STATUS_SUCCESS) { if (status != INFINIOP_STATUS_SUCCESS) {
return status; return status;
} }
*desc_ptr = new Descriptor( *desc_ptr = new Descriptor(
dtype, _info, 0, dtype, info, 0,
new Opaque{handle->cublas_handle_pool}, new Opaque{handle->cublas_handle_pool},
handle->device, handle->device_id); handle->device, handle->device_id);
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
...@@ -40,16 +40,16 @@ infiniopStatus_t Descriptor::create( ...@@ -40,16 +40,16 @@ infiniopStatus_t Descriptor::create(
template <typename Tdata> template <typename Tdata>
void calculate( void calculate(
MatmulInfo const &_info, const MatmulInfo &info,
std::shared_ptr<Pool<cublasHandle_t>> &cublas_handle_pool, std::shared_ptr<Pool<cublasHandle_t>> &cublas_handle_pool,
void *c, void *c,
float beta, float beta,
void const *a, const void *a,
void const *b, const void *b,
float alpha, float alpha,
cudaStream_t stream) { cudaStream_t stream) {
if (_info.is_transed) { if (info.is_transed) {
std::swap(a, b); std::swap(a, b);
} }
...@@ -67,8 +67,8 @@ void calculate( ...@@ -67,8 +67,8 @@ void calculate(
#endif #endif
} }
auto op_a = _info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T; auto op_a = info.a_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
auto op_b = _info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T; auto op_b = info.b_matrix.row_stride == 1 ? CUBLAS_OP_N : CUBLAS_OP_T;
use_cublas(cublas_handle_pool, use_cublas(cublas_handle_pool,
stream, stream,
...@@ -77,24 +77,24 @@ void calculate( ...@@ -77,24 +77,24 @@ void calculate(
handle, handle,
op_a, op_a,
op_b, op_b,
static_cast<int>(_info.m), static_cast<int>(info.m),
static_cast<int>(_info.n), static_cast<int>(info.n),
static_cast<int>(_info.k), static_cast<int>(info.k),
&alpha, &alpha,
a, a,
a_type, a_type,
static_cast<int>(_info.a_matrix.ld()), static_cast<int>(info.a_matrix.ld()),
_info.a_matrix.stride, info.a_matrix.stride,
b, b,
b_type, b_type,
static_cast<int>(_info.b_matrix.ld()), static_cast<int>(info.b_matrix.ld()),
_info.b_matrix.stride, info.b_matrix.stride,
&beta, &beta,
c, c,
c_type, c_type,
static_cast<int>(_info.c_matrix.ld()), static_cast<int>(info.c_matrix.ld()),
_info.c_matrix.stride, info.c_matrix.stride,
static_cast<int>(_info.batch), static_cast<int>(info.batch),
compute_type, compute_type,
CUBLAS_GEMM_DEFAULT_TENSOR_OP); CUBLAS_GEMM_DEFAULT_TENSOR_OP);
}); });
...@@ -105,8 +105,8 @@ infiniopStatus_t Descriptor::calculate( ...@@ -105,8 +105,8 @@ infiniopStatus_t Descriptor::calculate(
size_t workspace_size, size_t workspace_size,
void *c, void *c,
float beta, float beta,
void const *a, const void *a,
void const *b, const void *b,
float alpha, float alpha,
void *stream) const { void *stream) const {
......
...@@ -83,8 +83,8 @@ ...@@ -83,8 +83,8 @@
size_t workspace_size, \ size_t workspace_size, \
void *c, \ void *c, \
float beta, \ float beta, \
void const *a, \ const void *a, \
void const *b, \ const void *b, \
float alpha, \ float alpha, \
void *stream) const; \ void *stream) const; \
}; \ }; \
......
...@@ -58,7 +58,7 @@ infiniopGetMatmulWorkspaceSize( ...@@ -58,7 +58,7 @@ infiniopGetMatmulWorkspaceSize(
#define GET(CASE, NAMESPACE) \ #define GET(CASE, NAMESPACE) \
case CASE: \ case CASE: \
*size = reinterpret_cast<matmul::NAMESPACE::Descriptor const *>(desc)->workspace_size; \ *size = reinterpret_cast<const matmul::NAMESPACE::Descriptor *>(desc)->workspace_size; \
return INFINIOP_STATUS_SUCCESS return INFINIOP_STATUS_SUCCESS
switch (desc->device_type) { switch (desc->device_type) {
...@@ -87,15 +87,15 @@ __C infiniopStatus_t infiniopMatmul( ...@@ -87,15 +87,15 @@ __C infiniopStatus_t infiniopMatmul(
infiniopMatmulDescriptor_t desc, infiniopMatmulDescriptor_t desc,
void *workspace, size_t workspace_size, void *workspace, size_t workspace_size,
void *c, void *c,
void const *a, const void *a,
void const *b, const void *b,
float alpha, float alpha,
float beta, float beta,
void *stream) { void *stream) {
#define CALCULATE(CASE, NAMESPACE) \ #define CALCULATE(CASE, NAMESPACE) \
case CASE: \ case CASE: \
return reinterpret_cast<matmul::NAMESPACE::Descriptor const *>(desc) \ return reinterpret_cast<const matmul::NAMESPACE::Descriptor *>(desc) \
->calculate(workspace, workspace_size, \ ->calculate(workspace, workspace_size, \
c, beta, \ c, beta, \
a, b, alpha, \ a, b, alpha, \
...@@ -128,7 +128,7 @@ infiniopDestroyMatmulDescriptor(infiniopMatmulDescriptor_t desc) { ...@@ -128,7 +128,7 @@ infiniopDestroyMatmulDescriptor(infiniopMatmulDescriptor_t desc) {
#define DELETE(CASE, NAMESPACE) \ #define DELETE(CASE, NAMESPACE) \
case CASE: \ case CASE: \
delete reinterpret_cast<matmul::NAMESPACE::Descriptor const *>(desc); \ delete reinterpret_cast<const matmul::NAMESPACE::Descriptor *>(desc); \
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
switch (desc->device_type) { switch (desc->device_type) {
......
...@@ -79,7 +79,7 @@ __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc, ...@@ -79,7 +79,7 @@ __C infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescriptor_t desc,
void *workspace, void *workspace,
size_t workspace_size, size_t workspace_size,
void *result, void *result,
void const *probs, const void *probs,
float random_val, float random_val,
float topp, float topp,
int topk, int topk,
......
...@@ -43,7 +43,7 @@ __C infiniopStatus_t infiniopCreateRearrangeDescriptor( ...@@ -43,7 +43,7 @@ __C infiniopStatus_t infiniopCreateRearrangeDescriptor(
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
} }
__C infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void *dst, void const *src, void *stream) { __C infiniopStatus_t infiniopRearrange(infiniopRearrangeDescriptor_t desc, void *dst, const void *src, void *stream) {
switch (desc->device_type) { switch (desc->device_type) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
......
...@@ -84,7 +84,7 @@ __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t ...@@ -84,7 +84,7 @@ __C infiniopStatus_t infiniopGetRMSNormWorkspaceSize(infiniopRMSNormDescriptor_t
} }
__C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *workspace, size_t workspace_size, __C infiniopStatus_t infiniopRMSNorm(infiniopRMSNormDescriptor_t desc, void *workspace, size_t workspace_size,
void *y, void const *x, void const *w, void *stream) { void *y, const void *x, const void *w, void *stream) {
switch (desc->device_type) { switch (desc->device_type) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
......
...@@ -91,8 +91,8 @@ __C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc, ...@@ -91,8 +91,8 @@ __C infiniopStatus_t infiniopGetRoPEWorkspaceSize(infiniopRoPEDescriptor_t desc,
__C infiniopStatus_t infiniopRoPE(infiniopRoPEDescriptor_t desc, __C infiniopStatus_t infiniopRoPE(infiniopRoPEDescriptor_t desc,
void *workspace, size_t workspace_size, void *workspace, size_t workspace_size,
void *t, void const *pos_ids, void *t, const void *pos_ids,
void const *sin_table, void const *cos_table, const void *sin_table, const void *cos_table,
void *stream) { void *stream) {
switch (desc->device_type) { switch (desc->device_type) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
......
...@@ -46,7 +46,7 @@ __C infiniopStatus_t infiniopCreateSwiGLUDescriptor( ...@@ -46,7 +46,7 @@ __C infiniopStatus_t infiniopCreateSwiGLUDescriptor(
}; };
__C infiniopStatus_t infiniopSwiGLU(infiniopSwiGLUDescriptor_t desc, void *c, __C infiniopStatus_t infiniopSwiGLU(infiniopSwiGLUDescriptor_t desc, void *c,
void const *a, void const *b, const void *a, const void *b,
void *stream) { void *stream) {
switch (desc->device_type) { switch (desc->device_type) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment