Commit ec0ff893 authored by YdrMaster's avatar YdrMaster
Browse files

issue/52: 格式化所有 c/c++ 文件


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent 27ba98d1
...@@ -6,8 +6,7 @@ ...@@ -6,8 +6,7 @@
#define __INFINICORE_EXPORT_C__ #define __INFINICORE_EXPORT_C__
#if defined(_WIN32) #if defined(_WIN32)
#define __export __declspec(dllexport) #define __export __declspec(dllexport)
#elif defined(__GNUC__) && \ #elif defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
#define __export __attribute__((visibility("default"))) #define __export __attribute__((visibility("default")))
#else #else
#define __export #define __export
......
...@@ -19,5 +19,4 @@ __C __export infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescrip ...@@ -19,5 +19,4 @@ __C __export infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescrip
__C __export infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc);
#endif #endif
...@@ -21,5 +21,4 @@ __C __export infiniopStatus_t infiniopConv(infiniopConvDescriptor_t desc, void * ...@@ -21,5 +21,4 @@ __C __export infiniopStatus_t infiniopConv(infiniopConvDescriptor_t desc, void *
__C __export infiniopStatus_t infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc);
#endif #endif
...@@ -22,5 +22,4 @@ __C __export infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescripto ...@@ -22,5 +22,4 @@ __C __export infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescripto
__C __export infiniopStatus_t infiniopDestroyRandomSampleDescriptor(infiniopRandomSampleDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyRandomSampleDescriptor(infiniopRandomSampleDescriptor_t desc);
#endif #endif
...@@ -21,4 +21,4 @@ __C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescr ...@@ -21,4 +21,4 @@ __C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescr
__C __export infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc);
#endif// __INFINIOP_TENSOR_DESCRIPTOR__ #endif // __INFINIOP_TENSOR_DESCRIPTOR__
...@@ -31,35 +31,35 @@ infiniopStatus_t freeWorkspace(void *workspaceAddr) { ...@@ -31,35 +31,35 @@ infiniopStatus_t freeWorkspace(void *workspaceAddr) {
} }
aclDataType toAclDataType(infiniDtype_t dt) { aclDataType toAclDataType(infiniDtype_t dt) {
if (dt == INFINI_DTYPE_I8) if (dt == INFINI_DTYPE_I8) {
return aclDataType::ACL_INT8; return aclDataType::ACL_INT8;
else if (dt == INFINI_DTYPE_I16) } else if (dt == INFINI_DTYPE_I16) {
return aclDataType::ACL_INT16; return aclDataType::ACL_INT16;
else if (dt == INFINI_DTYPE_I32) } else if (dt == INFINI_DTYPE_I32) {
return aclDataType::ACL_INT32; return aclDataType::ACL_INT32;
else if (dt == INFINI_DTYPE_I64) } else if (dt == INFINI_DTYPE_I64) {
return aclDataType::ACL_INT64; return aclDataType::ACL_INT64;
else if (dt == INFINI_DTYPE_U8) } else if (dt == INFINI_DTYPE_U8) {
return aclDataType::ACL_UINT8; return aclDataType::ACL_UINT8;
else if (dt == INFINI_DTYPE_U16) } else if (dt == INFINI_DTYPE_U16) {
return aclDataType::ACL_UINT16; return aclDataType::ACL_UINT16;
else if (dt == INFINI_DTYPE_U32) } else if (dt == INFINI_DTYPE_U32) {
return aclDataType::ACL_UINT32; return aclDataType::ACL_UINT32;
else if (dt == INFINI_DTYPE_U64) } else if (dt == INFINI_DTYPE_U64) {
return aclDataType::ACL_UINT64; return aclDataType::ACL_UINT64;
else if (dt == INFINI_DTYPE_F16) } else if (dt == INFINI_DTYPE_F16) {
return aclDataType::ACL_FLOAT16; return aclDataType::ACL_FLOAT16;
else if (dt == INFINI_DTYPE_BF16) } else if (dt == INFINI_DTYPE_BF16) {
return aclDataType::ACL_BF16; return aclDataType::ACL_BF16;
else if (dt == INFINI_DTYPE_F32) } else if (dt == INFINI_DTYPE_F32) {
return aclDataType::ACL_FLOAT; return aclDataType::ACL_FLOAT;
else if (dt == INFINI_DTYPE_F64) } else if (dt == INFINI_DTYPE_F64) {
return aclDataType::ACL_DOUBLE; return aclDataType::ACL_DOUBLE;
else } else {
return aclDataType::ACL_DT_UNDEFINED; return aclDataType::ACL_DT_UNDEFINED;
}
} }
const char *dataTypeToString(aclDataType dtype) { const char *dataTypeToString(aclDataType dtype) {
switch (dtype) { switch (dtype) {
case ACL_DT_UNDEFINED: case ACL_DT_UNDEFINED:
......
...@@ -34,7 +34,6 @@ extern "C" { ...@@ -34,7 +34,6 @@ extern "C" {
return INFINIOP_STATUS_INTERNAL_ERROR; \ return INFINIOP_STATUS_INTERNAL_ERROR; \
} while (0) } while (0)
#ifdef __cplusplus #ifdef __cplusplus
}; };
#endif #endif
......
...@@ -21,7 +21,6 @@ infiniopStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const s ...@@ -21,7 +21,6 @@ infiniopStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const s
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
} }
/// @brief Infer storage shape. For now this ruturns a 1D shape of the total tensor storage size. /// @brief Infer storage shape. For now this ruturns a 1D shape of the total tensor storage size.
/// We don't see why higher dimensional storage shape is ever needed. To change if necesary. /// We don't see why higher dimensional storage shape is ever needed. To change if necesary.
infiniopStatus_t aclnnTensorDescriptor::inferStorageShape() { infiniopStatus_t aclnnTensorDescriptor::inferStorageShape() {
...@@ -93,8 +92,10 @@ char *aclnnTensorDescriptor::toString() { ...@@ -93,8 +92,10 @@ char *aclnnTensorDescriptor::toString() {
// Assume bufferSize // Assume bufferSize
size_t bufferSize = 1024 + this->ndim * 40 + this->storageNdim * 40; size_t bufferSize = 1024 + this->ndim * 40 + this->storageNdim * 40;
char *buffer = (char *) malloc(bufferSize); char *buffer = (char *)malloc(bufferSize);
if (!buffer) return NULL; if (!buffer) {
return NULL;
}
// Write info into buffer // Write info into buffer
char *ptr = buffer; char *ptr = buffer;
......
...@@ -37,8 +37,7 @@ uint16_t f32_to_f16(float val) { ...@@ -37,8 +37,7 @@ uint16_t f32_to_f16(float val) {
uint32_t f32; uint32_t f32;
memcpy(&f32, &val, sizeof(f32)); // Read the bits of the float32 memcpy(&f32, &val, sizeof(f32)); // Read the bits of the float32
uint16_t sign = (f32 >> 16) & 0x8000; // Extract the sign bit uint16_t sign = (f32 >> 16) & 0x8000; // Extract the sign bit
int32_t exponent = int32_t exponent = ((f32 >> 23) & 0xFF) - 127; // Extract and de-bias the exponent
((f32 >> 23) & 0xFF) - 127; // Extract and de-bias the exponent
uint32_t mantissa = f32 & 0x7FFFFF; // Extract the mantissa (fraction part) uint32_t mantissa = f32 & 0x7FFFFF; // Extract the mantissa (fraction part)
if (exponent >= 31) { // Special cases for Inf and NaN if (exponent >= 31) { // Special cases for Inf and NaN
......
...@@ -27,4 +27,4 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads); ...@@ -27,4 +27,4 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads);
// calculate the padded shape and store the result in padded_shape // calculate the padded shape and store the result in padded_shape
std::vector<size_t> getPaddedShape(size_t ndim, size_t const *shape, size_t const *pads); std::vector<size_t> getPaddedShape(size_t ndim, size_t const *shape, size_t const *pads);
#endif// __INFINIOP__COMMON_CPU_H__ #endif // __INFINIOP__COMMON_CPU_H__
...@@ -47,18 +47,18 @@ struct InfiniopCudaHandle { ...@@ -47,18 +47,18 @@ struct InfiniopCudaHandle {
int compute_capability_minor; int compute_capability_minor;
}; };
template<typename T> template <typename T>
void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool, int device_id, cudaStream_t stream, T const &f) { void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool, int device_id, cudaStream_t stream, T const &f) {
auto handle = cublas_handle_pool->pop(); auto handle = cublas_handle_pool->pop();
if (!handle) { if (!handle) {
cublasCreate(&(*handle)); cublasCreate(&(*handle));
} }
cublasSetStream(*handle, (cudaStream_t) stream); cublasSetStream(*handle, (cudaStream_t)stream);
f(*handle); f(*handle);
cublas_handle_pool->push(std::move(*handle)); cublas_handle_pool->push(std::move(*handle));
} }
template<typename T> template <typename T>
cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool, int device_id, cudaStream_t stream, T const &f) { cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool, int device_id, cudaStream_t stream, T const &f) {
auto handle = cudnn_handle_pool->pop(); auto handle = cudnn_handle_pool->pop();
if (!handle) { if (!handle) {
...@@ -118,4 +118,4 @@ inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim, ...@@ -118,4 +118,4 @@ inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim,
return res; return res;
} }
#endif// __INFINIOP_COMMON_CUDA_H__ #endif // __INFINIOP_COMMON_CUDA_H__
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <mutex> #include <mutex>
#include <optional> #include <optional>
template<class T> template <class T>
class Pool { class Pool {
public: public:
Pool() : _head(nullptr) {} Pool() : _head(nullptr) {}
...@@ -21,7 +21,7 @@ public: ...@@ -21,7 +21,7 @@ public:
void push(T &&val) const { void push(T &&val) const {
Node<T> *new_node = new Node<T>(std::move(val)); Node<T> *new_node = new Node<T>(std::move(val));
new_node->next = _head.load(); new_node->next = _head.load();
while (!_head.compare_exchange_weak(new_node->next, new_node)); while (!_head.compare_exchange_weak(new_node->next, new_node)) {}
} }
std::optional<T> pop() const { std::optional<T> pop() const {
...@@ -37,7 +37,7 @@ public: ...@@ -37,7 +37,7 @@ public:
} }
private: private:
template<class U> template <class U>
struct Node { struct Node {
U data; U data;
Node<U> *next; Node<U> *next;
......
...@@ -7,33 +7,33 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor( ...@@ -7,33 +7,33 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
switch (handle->device) { switch (handle->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
return cpuCreateCausalSoftmaxDescriptor(handle, (CausalSoftmaxCpuDescriptor_t *) desc_ptr, y_desc); return cpuCreateCausalSoftmaxDescriptor(handle, (CausalSoftmaxCpuDescriptor_t *)desc_ptr, y_desc);
#endif #endif
#ifdef ENABLE_NV_GPU #ifdef ENABLE_NV_GPU
case DevNvGpu: { case DevNvGpu: {
return cudaCreateCausalSoftmaxDescriptor((CudaHandle_t)handle, (CausalSoftmaxCudaDescriptor_t *) desc_ptr, y_desc); return cudaCreateCausalSoftmaxDescriptor((CudaHandle_t)handle, (CausalSoftmaxCudaDescriptor_t *)desc_ptr, y_desc);
} }
#endif #endif
#ifdef ENABLE_CAMBRICON_MLU #ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: { case DevCambriconMlu: {
return bangCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxBangDescriptor_t *) desc_ptr, y_desc); return bangCreateCausalSoftmaxDescriptor((BangHandle_t)handle, (CausalSoftmaxBangDescriptor_t *)desc_ptr, y_desc);
// return cnnlCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxCnnlDescriptor_t *) desc_ptr, y_desc); // return cnnlCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxCnnlDescriptor_t *) desc_ptr, y_desc);
} }
#endif #endif
#ifdef ENABLE_ASCEND_NPU #ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: { case DevAscendNpu: {
return aclnnCreateCausalSoftmaxDescriptor((AscendHandle_t) handle, (CausalSoftmaxAclnnDescriptor_t *) desc_ptr, y_desc); return aclnnCreateCausalSoftmaxDescriptor((AscendHandle_t)handle, (CausalSoftmaxAclnnDescriptor_t *)desc_ptr, y_desc);
} }
#endif #endif
#ifdef ENABLE_METAX_GPU #ifdef ENABLE_METAX_GPU
case DevMetaxGpu: { case DevMetaxGpu: {
return macaCreateCausalSoftmaxDescriptor((MacaHandle_t) handle, (CausalSoftmaxMacaDescriptor_t *) desc_ptr, y_desc); return macaCreateCausalSoftmaxDescriptor((MacaHandle_t)handle, (CausalSoftmaxMacaDescriptor_t *)desc_ptr, y_desc);
} }
#endif #endif
#ifdef ENABLE_MTHREADS_GPU #ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: { case DevMthreadsGpu: {
return musaCreateCausalSoftmaxDescriptor((MusaHandle_t) handle, (CausalSoftmaxMusaDescriptor_t *) desc_ptr, y_desc); return musaCreateCausalSoftmaxDescriptor((MusaHandle_t)handle, (CausalSoftmaxMusaDescriptor_t *)desc_ptr, y_desc);
} }
#endif #endif
} }
...@@ -44,34 +44,34 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax ...@@ -44,34 +44,34 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
return cpuGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCpuDescriptor_t) desc, size); return cpuGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCpuDescriptor_t)desc, size);
#endif #endif
#ifdef ENABLE_NV_GPU #ifdef ENABLE_NV_GPU
case DevNvGpu: { case DevNvGpu: {
return cudaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCudaDescriptor_t) desc, size); return cudaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCudaDescriptor_t)desc, size);
} }
#endif #endif
#ifdef ENABLE_CAMBRICON_MLU #ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: { case DevCambriconMlu: {
return bangGetCausalSoftmaxWorkspaceSize((CausalSoftmaxBangDescriptor_t) desc, size); return bangGetCausalSoftmaxWorkspaceSize((CausalSoftmaxBangDescriptor_t)desc, size);
// return cnnlGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCnnlDescriptor_t) desc, size); // return cnnlGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCnnlDescriptor_t) desc, size);
} }
#endif #endif
#ifdef ENABLE_ASCEND_NPU #ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: { case DevAscendNpu: {
return aclnnGetCausalSoftmaxWorkspaceSize((CausalSoftmaxAclnnDescriptor_t) desc, size); return aclnnGetCausalSoftmaxWorkspaceSize((CausalSoftmaxAclnnDescriptor_t)desc, size);
} }
#endif #endif
#ifdef ENABLE_METAX_GPU #ifdef ENABLE_METAX_GPU
case DevMetaxGpu: { case DevMetaxGpu: {
return macaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxMacaDescriptor_t) desc, size); return macaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxMacaDescriptor_t)desc, size);
} }
#endif #endif
#ifdef ENABLE_MTHREADS_GPU #ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: { case DevMthreadsGpu: {
return musaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxMusaDescriptor_t) desc, size); return musaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxMusaDescriptor_t)desc, size);
} }
#endif #endif
} }
...@@ -82,33 +82,33 @@ __C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t des ...@@ -82,33 +82,33 @@ __C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t des
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
return cpuCausalSoftmax((CausalSoftmaxCpuDescriptor_t) desc, workspace, workspace_size, data, stream); return cpuCausalSoftmax((CausalSoftmaxCpuDescriptor_t)desc, workspace, workspace_size, data, stream);
#endif #endif
#ifdef ENABLE_NV_GPU #ifdef ENABLE_NV_GPU
case DevNvGpu: { case DevNvGpu: {
return cudaCausalSoftmax((CausalSoftmaxCudaDescriptor_t) desc, workspace, workspace_size, data, stream); return cudaCausalSoftmax((CausalSoftmaxCudaDescriptor_t)desc, workspace, workspace_size, data, stream);
} }
#endif #endif
#ifdef ENABLE_CAMBRICON_MLU #ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: { case DevCambriconMlu: {
return bangCausalSoftmax((CausalSoftmaxBangDescriptor_t) desc, workspace, workspace_size, data, stream); return bangCausalSoftmax((CausalSoftmaxBangDescriptor_t)desc, workspace, workspace_size, data, stream);
// return cnnlCausalSoftmax((CausalSoftmaxCnnlDescriptor_t) desc, workspace, workspace_size, data, stream); // return cnnlCausalSoftmax((CausalSoftmaxCnnlDescriptor_t) desc, workspace, workspace_size, data, stream);
} }
#endif #endif
#ifdef ENABLE_ASCEND_NPU #ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: { case DevAscendNpu: {
return aclnnCausalSoftmax((CausalSoftmaxAclnnDescriptor_t) desc, workspace, workspace_size, data, stream); return aclnnCausalSoftmax((CausalSoftmaxAclnnDescriptor_t)desc, workspace, workspace_size, data, stream);
} }
#endif #endif
#ifdef ENABLE_METAX_GPU #ifdef ENABLE_METAX_GPU
case DevMetaxGpu: { case DevMetaxGpu: {
return macaCausalSoftmax((CausalSoftmaxMacaDescriptor_t) desc, workspace, workspace_size, data, stream); return macaCausalSoftmax((CausalSoftmaxMacaDescriptor_t)desc, workspace, workspace_size, data, stream);
} }
#endif #endif
#ifdef ENABLE_MTHREADS_GPU #ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: { case DevMthreadsGpu: {
return musaCausalSoftmax((CausalSoftmaxMusaDescriptor_t) desc, workspace, workspace_size, data, stream); return musaCausalSoftmax((CausalSoftmaxMusaDescriptor_t)desc, workspace, workspace_size, data, stream);
} }
#endif #endif
} }
...@@ -119,33 +119,33 @@ __C infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftma ...@@ -119,33 +119,33 @@ __C infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftma
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
return cpuDestroyCausalSoftmaxDescriptor((CausalSoftmaxCpuDescriptor_t) desc); return cpuDestroyCausalSoftmaxDescriptor((CausalSoftmaxCpuDescriptor_t)desc);
#endif #endif
#ifdef ENABLE_NV_GPU #ifdef ENABLE_NV_GPU
case DevNvGpu: { case DevNvGpu: {
return cudaDestroyCausalSoftmaxDescriptor((CausalSoftmaxCudaDescriptor_t) desc); return cudaDestroyCausalSoftmaxDescriptor((CausalSoftmaxCudaDescriptor_t)desc);
} }
#endif #endif
#ifdef ENABLE_CAMBRICON_MLU #ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: { case DevCambriconMlu: {
return bangDestroyCausalSoftmaxDescriptor((CausalSoftmaxBangDescriptor_t) desc); return bangDestroyCausalSoftmaxDescriptor((CausalSoftmaxBangDescriptor_t)desc);
// return cnnlDestroyCausalSoftmaxDescriptor((CausalSoftmaxCnnlDescriptor_t) desc); // return cnnlDestroyCausalSoftmaxDescriptor((CausalSoftmaxCnnlDescriptor_t) desc);
} }
#endif #endif
#ifdef ENABLE_ASCEND_NPU #ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: { case DevAscendNpu: {
return aclnnDestroyCausalSoftmaxDescriptor((CausalSoftmaxAclnnDescriptor_t) desc); return aclnnDestroyCausalSoftmaxDescriptor((CausalSoftmaxAclnnDescriptor_t)desc);
} }
#endif #endif
#ifdef ENABLE_METAX_GPU #ifdef ENABLE_METAX_GPU
case DevMetaxGpu: { case DevMetaxGpu: {
return macaDestroyCausalSoftmaxDescriptor((CausalSoftmaxMacaDescriptor_t) desc); return macaDestroyCausalSoftmaxDescriptor((CausalSoftmaxMacaDescriptor_t)desc);
} }
#endif #endif
#ifdef ENABLE_MTHREADS_GPU #ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: case DevMthreadsGpu:
return musaDestroyCausalSoftmaxDescriptor((CausalSoftmaxMusaDescriptor_t) desc); return musaDestroyCausalSoftmaxDescriptor((CausalSoftmaxMusaDescriptor_t)desc);
#endif #endif
} }
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
...@@ -123,17 +123,13 @@ infiniopStatus_t aclnnMatmul(MatmulAclnnDescriptor_t desc, void *workspace, ...@@ -123,17 +123,13 @@ infiniopStatus_t aclnnMatmul(MatmulAclnnDescriptor_t desc, void *workspace,
for (size_t i = 0; i < batch; i++) { for (size_t i = 0; i < batch; i++) {
AclSetTensorAddr(desc->executor, 0, ta, AclSetTensorAddr(desc->executor, 0, ta,
(char *)(a) + i * desc->info->a_matrix.stride * (char *)(a) + i * desc->info->a_matrix.stride * infiniSizeof(desc->dtype));
infiniSizeof(desc->dtype));
AclSetTensorAddr(desc->executor, 1, tb, AclSetTensorAddr(desc->executor, 1, tb,
(char *)(b) + i * desc->info->b_matrix.stride * (char *)(b) + i * desc->info->b_matrix.stride * infiniSizeof(desc->dtype));
infiniSizeof(desc->dtype));
AclSetTensorAddr(desc->executor, 2, tc, AclSetTensorAddr(desc->executor, 2, tc,
(char *)(c) + i * desc->info->c_matrix.stride * (char *)(c) + i * desc->info->c_matrix.stride * infiniSizeof(desc->dtype));
infiniSizeof(desc->dtype));
AclSetTensorAddr(desc->executor, 3, tc, AclSetTensorAddr(desc->executor, 3, tc,
(char *)(c) + i * desc->info->c_matrix.stride * (char *)(c) + i * desc->info->c_matrix.stride * infiniSizeof(desc->dtype));
infiniSizeof(desc->dtype));
ret = aclnnGemm(workspace, workspaceSize, desc->executor, stream); ret = aclnnGemm(workspace, workspaceSize, desc->executor, stream);
CHECK_RET(ret == ACL_SUCCESS, CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemm failed. ERROR: %d\n", ret); LOG_PRINT("aclnnGemm failed. ERROR: %d\n", ret);
......
...@@ -88,7 +88,7 @@ struct MatmulInfo { ...@@ -88,7 +88,7 @@ struct MatmulInfo {
return; return;
} }
if (c_matrix.rows != a_matrix.rows || c_matrix.cols != b_matrix.cols || a_matrix.cols != b_matrix.rows){ if (c_matrix.rows != a_matrix.rows || c_matrix.cols != b_matrix.cols || a_matrix.cols != b_matrix.rows) {
*status = INFINIOP_STATUS_BAD_TENSOR_SHAPE; *status = INFINIOP_STATUS_BAD_TENSOR_SHAPE;
return; return;
} }
...@@ -113,4 +113,4 @@ struct MatmulInfo { ...@@ -113,4 +113,4 @@ struct MatmulInfo {
} }
}; };
#endif// __BLAS_H__ #endif // __BLAS_H__
...@@ -49,20 +49,11 @@ infiniopStatus_t cpuCalculateMatmul(infiniopMatmulCpuDescriptor_t desc, void *c, ...@@ -49,20 +49,11 @@ infiniopStatus_t cpuCalculateMatmul(infiniopMatmulCpuDescriptor_t desc, void *c,
for (size_t i = 0; i < info.batch; ++i) { for (size_t i = 0; i < info.batch; ++i) {
for (size_t m_ = 0; m_ < info.m; ++m_) { for (size_t m_ = 0; m_ < info.m; ++m_) {
for (size_t n_ = 0; n_ < info.n; ++n_) { for (size_t n_ = 0; n_ < info.n; ++n_) {
auto c_ = reinterpret_cast<Tdata *>(c) + auto c_ = reinterpret_cast<Tdata *>(c) + i * info.c_matrix.stride + m_ * info.c_matrix.row_stride + n_ * info.c_matrix.col_stride;
i * info.c_matrix.stride +
m_ * info.c_matrix.row_stride +
n_ * info.c_matrix.col_stride;
float sum = 0; float sum = 0;
for (size_t k_ = 0; k_ < info.k; ++k_) { for (size_t k_ = 0; k_ < info.k; ++k_) {
auto a_ = reinterpret_cast<Tdata const *>(a) + auto a_ = reinterpret_cast<Tdata const *>(a) + i * info.a_matrix.stride + m_ * info.a_matrix.row_stride + k_ * info.a_matrix.col_stride;
i * info.a_matrix.stride + auto b_ = reinterpret_cast<Tdata const *>(b) + i * info.b_matrix.stride + n_ * info.b_matrix.col_stride + k_ * info.b_matrix.row_stride;
m_ * info.a_matrix.row_stride +
k_ * info.a_matrix.col_stride;
auto b_ = reinterpret_cast<Tdata const *>(b) +
i * info.b_matrix.stride +
n_ * info.b_matrix.col_stride +
k_ * info.b_matrix.row_stride;
if constexpr (std::is_same<Tdata, uint16_t>::value) { if constexpr (std::is_same<Tdata, uint16_t>::value) {
sum += f16_to_f32(*a_) * f16_to_f32(*b_); sum += f16_to_f32(*a_) * f16_to_f32(*b_);
} else { } else {
......
#include "./matmul_cuda.cuh"
#include "../../utils.h" #include "../../utils.h"
#include "./matmul_cuda.cuh"
infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle, infiniopStatus_t cudaCreateMatmulDescriptor(infiniopCudaHandle_t handle,
infiniopMatmulCudaDescriptor_t *desc_ptr, infiniopMatmulCudaDescriptor_t *desc_ptr,
......
#ifndef __INFINIOP_MATMUL_CUDA_H__ #ifndef __INFINIOP_MATMUL_CUDA_H__
#define __INFINIOP_MATMUL_CUDA_H__ #define __INFINIOP_MATMUL_CUDA_H__
#include "matmul_cuda_api.h"
#include "../../../devices/cuda/common_cuda.cuh" #include "../../../devices/cuda/common_cuda.cuh"
#include <memory>
#include "../blas.h" #include "../blas.h"
#include "matmul_cuda_api.h"
#include <memory>
typedef struct InfiniopMatmulCudaDescriptor { typedef struct InfiniopMatmulCudaDescriptor {
infiniDevice_t device; infiniDevice_t device;
...@@ -14,4 +14,4 @@ typedef struct InfiniopMatmulCudaDescriptor { ...@@ -14,4 +14,4 @@ typedef struct InfiniopMatmulCudaDescriptor {
std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool; std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool;
} InfiniopMatmulCudaDescriptor; } InfiniopMatmulCudaDescriptor;
#endif// __INFINIOP_MATMUL_CUDA_H__ #endif // __INFINIOP_MATMUL_CUDA_H__
...@@ -4,7 +4,6 @@ ...@@ -4,7 +4,6 @@
#include "../../../devices/cuda/cuda_handle.h" #include "../../../devices/cuda/cuda_handle.h"
#include "infiniop/operator.h" #include "infiniop/operator.h"
struct InfiniopMatmulCudaDescriptor; struct InfiniopMatmulCudaDescriptor;
typedef struct InfiniopMatmulCudaDescriptor *infiniopMatmulCudaDescriptor_t; typedef struct InfiniopMatmulCudaDescriptor *infiniopMatmulCudaDescriptor_t;
...@@ -28,5 +27,4 @@ infiniopStatus_t cudaMatmul(infiniopMatmulCudaDescriptor_t desc, ...@@ -28,5 +27,4 @@ infiniopStatus_t cudaMatmul(infiniopMatmulCudaDescriptor_t desc,
infiniopStatus_t cudaDestroyMatmulDescriptor(infiniopMatmulCudaDescriptor_t desc); infiniopStatus_t cudaDestroyMatmulDescriptor(infiniopMatmulCudaDescriptor_t desc);
#endif // __INFINIOP_MATMUL_CUDA_API_H__ #endif // __INFINIOP_MATMUL_CUDA_API_H__
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment