Unverified Commit 3c31dc6c authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #45 from YdrMaster/main

issue/52 代码格式化:机制和效果
parents 16dad776 e5ed9fa1
# Generated from CLion C/C++ Code Style settings ---
BasedOnStyle: LLVM BasedOnStyle: LLVM
AccessModifierOffset: -4 IndentWidth: 4 # 缩进宽度,LLVM 默认值为 2,改为 4
AlignAfterOpenBracket: Align AccessModifierOffset: -4 # public/protected/private 访问控制符相对成员的偏移,与 IndentWidth 配合,LLVM 默认值为 -2
# AlignConsecutiveAssignments: None AlignOperands: AlignAfterOperator # 双目运算符的行间对齐,LLVM 默认值为 Align,改为带符号一起换行
AlignOperands: Align BreakBeforeBinaryOperators: All # 在双目运算符之前换行,LLVM 默认值为 None,改为换行时总是把双目运算符放在行首,包括赋值(=)
AllowAllArgumentsOnNextLine: false ColumnLimit: 0 # 列宽限制,LLVM 默认值为 80,改为不限制
AllowAllConstructorInitializersOnNextLine: false AllowShortBlocksOnASingleLine: Always # 是否允许短块(单个语句的块)不换行,LLVM 默认值为 Never,改为允许
AllowAllParametersOfDeclarationOnNextLine: false AllowShortLoopsOnASingleLine: true # 是否允许短循环不换行,LLVM 默认值为 false,改为允许
AllowShortBlocksOnASingleLine: Always InsertBraces: true # 是否在 if/for/while/switch 等语句后插入大括号,LLVM 默认值为 false,改为允许
AllowShortCaseLabelsOnASingleLine: false BreakBeforeBraces: Custom # 大括号换行配置,LLVM 默认值为 LLVM,改为自定义以使 BraceWrapping 生效
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Always
AllowShortLambdasOnASingleLine: All
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterReturnType: None
AlwaysBreakTemplateDeclarations: No
BreakBeforeBraces: Custom
BraceWrapping: BraceWrapping:
AfterCaseLabel: false AfterCaseLabel: false
AfterClass: false AfterClass: false
...@@ -23,44 +16,15 @@ BraceWrapping: ...@@ -23,44 +16,15 @@ BraceWrapping:
AfterEnum: false AfterEnum: false
AfterFunction: false AfterFunction: false
AfterNamespace: false AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false AfterUnion: false
AfterExternBlock: false
BeforeCatch: false BeforeCatch: false
BeforeElse: false BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false IndentBraces: false
SplitEmptyFunction: false SplitEmptyFunction: true
SplitEmptyRecord: true SplitEmptyRecord: true
BreakBeforeBinaryOperators: None SplitEmptyNamespace: true
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeColon
BreakInheritanceList: BeforeColon
ColumnLimit: 0
CompactNamespaces: true
ContinuationIndentWidth: 4
IndentCaseLabels: true
IndentPPDirectives: None
IndentWidth: 4
KeepEmptyLinesAtTheStartOfBlocks: true
MaxEmptyLinesToKeep: 2
NamespaceIndentation: All
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PointerAlignment: Right
ReflowComments: false
SpaceAfterCStyleCast: true
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 0
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
TabWidth: 4
UseTab: Never
name: Build and test
on:
pull_request:
push:
paths-ignore:
- '**.md'
- 'LICENSE'
jobs:
build:
name: Build
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
type: [debug, release]
steps:
- name: checkout code
uses: actions/checkout@v4
- name: install black
run: pip install black
- name: check format
run: python3 scripts/format.py --path src --check
- name: install xmake
uses: xmake-io/github-action-setup-xmake@v1
with:
xmake-version: latest
- name: configure xmake
run: xmake f -cv
- name: build with xmake
run: xmake build && xmake install
# InfiniCore # InfiniCore
InfiniCore是一个跨平台统一编程工具集,为不同芯片平台的功能(包括计算、运行时、通信等)提供统一 C 语言接口。目前支持的芯片包括CPU、英伟达GPU、华为昇腾NPU、寒武纪MLU、摩尔线程GPU、天数智芯GPU、沐曦GPU、曙光DCU、昆仑芯。 InfiniCore 是一个跨平台统一编程工具集,为不同芯片平台的功能(包括计算、运行时、通信等)提供统一 C 语言接口。目前支持的硬件和后端包括:
## 一、使用说明 - CPU;
- CUDA
- 英伟达 GPU;
- 摩尔线程 GPU;
- 天数智芯 GPU;
- 沐曦 GPU;
- 曙光 DCU;
- 华为昇腾 NPU;
- 寒武纪 MLU;
- 昆仑芯 XPU;
### 1. 配置 ## 配置和使用
#### 查看当前配置 1. 项目配置
```xmake - 查看当前配置
xmake f -v
```
#### 配置 CPU (默认配置) ```shell
xmake f -v
```
```xmake - 配置 CPU(默认配置)
xmake f -cv
```
#### 配置加速卡 ```shell
xmake f -cv
```
```xmake - 配置加速卡
# 英伟达
# 可以指定 CUDA 路径, 一般环境变量为 `CUDA_HOME` 或者 `CUDA_ROOT`
xmake f --nv-gpu=true --cuda=$CUDA_HOME -cv
# 寒武纪 ```shell
xmake f --cambricon-mlu=true -cv # 英伟达
# 可以指定 CUDA 路径, 一般环境变量为 `CUDA_HOME` 或者 `CUDA_ROOT`
xmake f --nv-gpu=true --cuda=$CUDA_HOME -cv
# 华为昇腾 # 寒武纪
xmake f --ascend-npu=true -cv xmake f --cambricon-mlu=true -cv
```
### 2. 编译安装 # 华为昇腾
xmake f --ascend-npu=true -cv
```
```xmake 2. 编译安装
xmake build && xmake install
# 默认安装路径为 $HOME/.infini 默认安装路径为 `$HOME/.infini`
```
```shell
xmake build && xmake install
```
3. 设置环境变量
按输出提示设置 `INFINI_ROOT``LD_LIBRARY_PATH` 环境变量。
4. 运行算子测试
### 3. 设置环境变量 ```shell
python test/infiniop/[operator].py [--cpu | --nvidia | --cambricon | --ascend]
```
按输出提示设置 `INFINI_ROOT``LD_LIBRARY_PATH` 环境变量。 ## 开发指南
### 4. 运行算子测试 ### 代码格式化
```bash 本项目使用 [`scripts/format.py`](/scripts/format.py) 脚本实现代码格式化检查和操作。
python test/infiniop/[operator].py [--cpu | --nvidia | --cambricon | --ascend]
使用
```shell
python scripts/format.py -h
```
查看脚本帮助信息:
```plaintext
usage: format.py [-h] [--ref REF] [--path [PATH ...]] [--check] [--c C] [--py PY]
options:
-h, --help show this help message and exit
--ref REF Git reference (commit hash) to compare against.
--path [PATH ...] Files to format or check.
--check Check files without modifying them.
--c C C formatter (default: clang-format-16)
--py PY Python formatter (default: black)
``` ```
参数中:
- `ref``path` 控制格式化的文件范围
-`ref``path` 都为空,格式化当前暂存(git added)的文件;
- 否则
-`ref` 非空,将比较指定 commit 和当前代码的差异,只格式化修改过的文件;
-`path` 非空,可传入多个路径(`--path p0 p1 p2`),只格式化指定路径及其子目录中的文件;
- 若设置 `--check`,将检查代码是否需要修改格式,不修改文件内容;
- 通过 `--c` 指定 c/c++ 格式化器,默认为 `clang-format-16`
- 通过 `--python` 指定 python 格式化器 `black`
...@@ -6,8 +6,7 @@ ...@@ -6,8 +6,7 @@
#define __INFINICORE_EXPORT_C__ #define __INFINICORE_EXPORT_C__
#if defined(_WIN32) #if defined(_WIN32)
#define __export __declspec(dllexport) #define __export __declspec(dllexport)
#elif defined(__GNUC__) && \ #elif defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
#define __export __attribute__((visibility("default"))) #define __export __attribute__((visibility("default")))
#else #else
#define __export #define __export
......
...@@ -19,5 +19,4 @@ __C __export infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescrip ...@@ -19,5 +19,4 @@ __C __export infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescrip
__C __export infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc);
#endif #endif
...@@ -21,5 +21,4 @@ __C __export infiniopStatus_t infiniopConv(infiniopConvDescriptor_t desc, void * ...@@ -21,5 +21,4 @@ __C __export infiniopStatus_t infiniopConv(infiniopConvDescriptor_t desc, void *
__C __export infiniopStatus_t infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc);
#endif #endif
...@@ -22,5 +22,4 @@ __C __export infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescripto ...@@ -22,5 +22,4 @@ __C __export infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescripto
__C __export infiniopStatus_t infiniopDestroyRandomSampleDescriptor(infiniopRandomSampleDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyRandomSampleDescriptor(infiniopRandomSampleDescriptor_t desc);
#endif #endif
...@@ -21,4 +21,4 @@ __C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescr ...@@ -21,4 +21,4 @@ __C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescr
__C __export infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc); __C __export infiniopStatus_t infiniopDestroyTensorDescriptor(infiniopTensorDescriptor_t desc);
#endif// __INFINIOP_TENSOR_DESCRIPTOR__ #endif // __INFINIOP_TENSOR_DESCRIPTOR__
import argparse
import subprocess
import os
from pathlib import Path
from colorama import Fore, Style
# 支持的文件类型
SUPPORTED_FILES = {
".h": "c",
".hh": "c",
".hpp": "c",
".c": "c",
".cc": "c",
".cpp": "c",
".cxx": "c",
".cu": "c",
".cuh": "c",
".mlu": "c",
".cl": "c",
".py": "py",
}
def format_file(file: Path, check: bool, formatter) -> bool:
formatter = formatter.get(SUPPORTED_FILES.get(file.suffix, None), None)
if not formatter:
return True # 文件类型不支持,跳过
try:
cmd = []
if formatter.startswith("clang-format"):
cmd = [formatter, "-style=file", "-i", file]
if check:
cmd.insert(2, "-dry-run")
process = subprocess.run(
cmd,
capture_output=True,
text=True,
check=True,
)
if process.stderr:
print(f"{Fore.YELLOW}{file} is not formatted.{Style.RESET_ALL}")
print(
f"Use {Fore.CYAN}{formatter} -style=file -i {file}{Style.RESET_ALL} to format it."
)
return False
else:
subprocess.run(
cmd,
capture_output=True,
text=True,
check=True,
)
print(f"{Fore.CYAN}Formatted: {file}{Style.RESET_ALL}")
elif formatter == "black":
cmd = [formatter, file]
if check:
cmd.insert(1, "--check")
process = subprocess.run(
cmd,
capture_output=True,
text=True,
check=True,
)
if process.stderr:
print(f"{Fore.YELLOW}{file} is not formatted.{Style.RESET_ALL}")
print(
f"Use {Fore.CYAN}{formatter} {file}{Style.RESET_ALL} to format it."
)
return False
else:
subprocess.run(
cmd,
capture_output=True,
text=True,
check=True,
)
print(f"{Fore.CYAN}Formatted: {file}{Style.RESET_ALL}")
except FileNotFoundError:
print(
f"{Fore.RED}Formatter {formatter} not found, {file} skipped.{Style.RESET_ALL}"
)
except subprocess.CalledProcessError as e:
print(f"{Fore.RED}Formatter {formatter} failed: {e}{Style.RESET_ALL}")
return True
def git_added_files():
"""获取所有已暂存更改的文件"""
try:
# 使用 git diff --cached --name-only 获取所有已添加到暂存区的文件
result = subprocess.run(
["git", "diff", "--cached", "--name-only"],
capture_output=True,
text=True,
check=True,
)
for file in result.stdout.splitlines():
yield Path(file.strip())
except subprocess.CalledProcessError as e:
print(f"{Fore.RED}Git diff failed: {e}{Style.RESET_ALL}")
def git_modified_since_ref(ref):
"""获取从指定的 Git 引用到当前状态的修改文件列表"""
try:
result = subprocess.run(
["git", "diff", f"{ref}..", "--diff-filter=AMR", "--name-only"],
capture_output=True,
text=True,
check=True,
)
for file in result.stdout.splitlines():
yield Path(file.strip())
except subprocess.CalledProcessError as e:
print(f"{Fore.RED}Git diff failed: {e}{Style.RESET_ALL}")
def list_files(paths):
"""递归获取指定路径下的所有文件"""
files = []
for path in paths:
if path.is_file():
yield path
elif path.is_dir():
for dirpath, _, filenames in os.walk(path):
for name in filenames:
yield Path(dirpath) / name
else:
print(
f"{Fore.RED}Error: {path} is not a file or directory.{Style.RESET_ALL}"
)
def filter_in_path(file: Path, path) -> bool:
"""判断文件是否在指定路径下"""
for p in path:
if file.is_relative_to(p):
return True
return False
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--ref", type=str, help="Git reference (commit hash) to compare against."
)
parser.add_argument(
"--path", nargs="*", type=Path, help="Files to format or check."
)
parser.add_argument(
"--check", action="store_true", help="Check files without modifying them."
)
parser.add_argument(
"--c", default="clang-format-16", help="C formatter (default: clang-format-16)"
)
parser.add_argument(
"--py", default="black", help="Python formatter (default: black)"
)
args = parser.parse_args()
if args.ref is None and args.path is None:
# Last commit.
print("{Fore.GREEN}Formating git added files.{Style.RESET_ALL}")
files = git_added_files()
else:
if args.ref is None:
print(f"{Fore.GREEN}Formating files in {args.path}.{Style.RESET_ALL}")
files = list_files(args.path)
elif args.path is None:
print(
f"{Fore.GREEN}Formating git modified files from {args.ref}.{Style.RESET_ALL}"
)
files = git_modified_since_ref(args.ref)
else:
print(
f"{Fore.GREEN}Formating git modified files from {args.ref} in {args.path}.{Style.RESET_ALL}"
)
files = (
file
for file in git_modified_since_ref(args.ref)
if filter_in_path(file, args.path)
)
formatted = True
for file in files:
if not format_file(
file,
args.check,
{
"c": args.c,
"py": args.py,
},
):
formatted = False
if not formatted:
exit(1)
if __name__ == "__main__":
main()
...@@ -31,115 +31,115 @@ infiniopStatus_t freeWorkspace(void *workspaceAddr) { ...@@ -31,115 +31,115 @@ infiniopStatus_t freeWorkspace(void *workspaceAddr) {
} }
aclDataType toAclDataType(infiniDtype_t dt) { aclDataType toAclDataType(infiniDtype_t dt) {
if (dt == INFINI_DTYPE_I8) if (dt == INFINI_DTYPE_I8) {
return aclDataType::ACL_INT8; return aclDataType::ACL_INT8;
else if (dt == INFINI_DTYPE_I16) } else if (dt == INFINI_DTYPE_I16) {
return aclDataType::ACL_INT16; return aclDataType::ACL_INT16;
else if (dt == INFINI_DTYPE_I32) } else if (dt == INFINI_DTYPE_I32) {
return aclDataType::ACL_INT32; return aclDataType::ACL_INT32;
else if (dt == INFINI_DTYPE_I64) } else if (dt == INFINI_DTYPE_I64) {
return aclDataType::ACL_INT64; return aclDataType::ACL_INT64;
else if (dt == INFINI_DTYPE_U8) } else if (dt == INFINI_DTYPE_U8) {
return aclDataType::ACL_UINT8; return aclDataType::ACL_UINT8;
else if (dt == INFINI_DTYPE_U16) } else if (dt == INFINI_DTYPE_U16) {
return aclDataType::ACL_UINT16; return aclDataType::ACL_UINT16;
else if (dt == INFINI_DTYPE_U32) } else if (dt == INFINI_DTYPE_U32) {
return aclDataType::ACL_UINT32; return aclDataType::ACL_UINT32;
else if (dt == INFINI_DTYPE_U64) } else if (dt == INFINI_DTYPE_U64) {
return aclDataType::ACL_UINT64; return aclDataType::ACL_UINT64;
else if (dt == INFINI_DTYPE_F16) } else if (dt == INFINI_DTYPE_F16) {
return aclDataType::ACL_FLOAT16; return aclDataType::ACL_FLOAT16;
else if (dt == INFINI_DTYPE_BF16) } else if (dt == INFINI_DTYPE_BF16) {
return aclDataType::ACL_BF16; return aclDataType::ACL_BF16;
else if (dt == INFINI_DTYPE_F32) } else if (dt == INFINI_DTYPE_F32) {
return aclDataType::ACL_FLOAT; return aclDataType::ACL_FLOAT;
else if (dt == INFINI_DTYPE_F64) } else if (dt == INFINI_DTYPE_F64) {
return aclDataType::ACL_DOUBLE; return aclDataType::ACL_DOUBLE;
else } else {
return aclDataType::ACL_DT_UNDEFINED; return aclDataType::ACL_DT_UNDEFINED;
}
} }
const char *dataTypeToString(aclDataType dtype) { const char *dataTypeToString(aclDataType dtype) {
switch (dtype) { switch (dtype) {
case ACL_DT_UNDEFINED: case ACL_DT_UNDEFINED:
return "ACL_DT_UNDEFINED"; return "ACL_DT_UNDEFINED";
case ACL_FLOAT: case ACL_FLOAT:
return "ACL_FLOAT"; return "ACL_FLOAT";
case ACL_FLOAT16: case ACL_FLOAT16:
return "ACL_FLOAT16"; return "ACL_FLOAT16";
case ACL_INT8: case ACL_INT8:
return "ACL_INT8"; return "ACL_INT8";
case ACL_INT32: case ACL_INT32:
return "ACL_INT32"; return "ACL_INT32";
case ACL_UINT8: case ACL_UINT8:
return "ACL_UINT8"; return "ACL_UINT8";
case ACL_INT16: case ACL_INT16:
return "ACL_INT16"; return "ACL_INT16";
case ACL_UINT16: case ACL_UINT16:
return "ACL_UINT16"; return "ACL_UINT16";
case ACL_UINT32: case ACL_UINT32:
return "ACL_UINT32"; return "ACL_UINT32";
case ACL_INT64: case ACL_INT64:
return "ACL_INT64"; return "ACL_INT64";
case ACL_UINT64: case ACL_UINT64:
return "ACL_UINT64"; return "ACL_UINT64";
case ACL_DOUBLE: case ACL_DOUBLE:
return "ACL_DOUBLE"; return "ACL_DOUBLE";
case ACL_BOOL: case ACL_BOOL:
return "ACL_BOOL"; return "ACL_BOOL";
case ACL_STRING: case ACL_STRING:
return "ACL_STRING"; return "ACL_STRING";
case ACL_COMPLEX64: case ACL_COMPLEX64:
return "ACL_COMPLEX64"; return "ACL_COMPLEX64";
case ACL_COMPLEX128: case ACL_COMPLEX128:
return "ACL_COMPLEX128"; return "ACL_COMPLEX128";
case ACL_BF16: case ACL_BF16:
return "ACL_BF16"; return "ACL_BF16";
case ACL_INT4: case ACL_INT4:
return "ACL_INT4"; return "ACL_INT4";
case ACL_UINT1: case ACL_UINT1:
return "ACL_UINT1"; return "ACL_UINT1";
case ACL_COMPLEX32: case ACL_COMPLEX32:
return "ACL_COMPLEX32"; return "ACL_COMPLEX32";
default: default:
return "UNKNOWN"; return "UNKNOWN";
} }
} }
const char *formatToString(aclFormat format) { const char *formatToString(aclFormat format) {
switch (format) { switch (format) {
case ACL_FORMAT_UNDEFINED: case ACL_FORMAT_UNDEFINED:
return "ACL_FORMAT_UNDEFINED"; return "ACL_FORMAT_UNDEFINED";
case ACL_FORMAT_NCHW: case ACL_FORMAT_NCHW:
return "ACL_FORMAT_NCHW"; return "ACL_FORMAT_NCHW";
case ACL_FORMAT_NHWC: case ACL_FORMAT_NHWC:
return "ACL_FORMAT_NHWC"; return "ACL_FORMAT_NHWC";
case ACL_FORMAT_ND: case ACL_FORMAT_ND:
return "ACL_FORMAT_ND"; return "ACL_FORMAT_ND";
case ACL_FORMAT_NC1HWC0: case ACL_FORMAT_NC1HWC0:
return "ACL_FORMAT_NC1HWC0"; return "ACL_FORMAT_NC1HWC0";
case ACL_FORMAT_FRACTAL_Z: case ACL_FORMAT_FRACTAL_Z:
return "ACL_FORMAT_FRACTAL_Z"; return "ACL_FORMAT_FRACTAL_Z";
case ACL_FORMAT_NC1HWC0_C04: case ACL_FORMAT_NC1HWC0_C04:
return "ACL_FORMAT_NC1HWC0_C04"; return "ACL_FORMAT_NC1HWC0_C04";
case ACL_FORMAT_HWCN: case ACL_FORMAT_HWCN:
return "ACL_FORMAT_HWCN"; return "ACL_FORMAT_HWCN";
case ACL_FORMAT_NDHWC: case ACL_FORMAT_NDHWC:
return "ACL_FORMAT_NDHWC"; return "ACL_FORMAT_NDHWC";
case ACL_FORMAT_FRACTAL_NZ: case ACL_FORMAT_FRACTAL_NZ:
return "ACL_FORMAT_FRACTAL_NZ"; return "ACL_FORMAT_FRACTAL_NZ";
case ACL_FORMAT_NCDHW: case ACL_FORMAT_NCDHW:
return "ACL_FORMAT_NCDHW"; return "ACL_FORMAT_NCDHW";
case ACL_FORMAT_NDC1HWC0: case ACL_FORMAT_NDC1HWC0:
return "ACL_FORMAT_NDC1HWC0"; return "ACL_FORMAT_NDC1HWC0";
case ACL_FRACTAL_Z_3D: case ACL_FRACTAL_Z_3D:
return "ACL_FRACTAL_Z_3D"; return "ACL_FRACTAL_Z_3D";
case ACL_FORMAT_NC: case ACL_FORMAT_NC:
return "ACL_FORMAT_NC"; return "ACL_FORMAT_NC";
case ACL_FORMAT_NCL: case ACL_FORMAT_NCL:
return "ACL_FORMAT_NCL"; return "ACL_FORMAT_NCL";
default: default:
return "UNKNOWN"; return "UNKNOWN";
} }
} }
...@@ -34,7 +34,6 @@ extern "C" { ...@@ -34,7 +34,6 @@ extern "C" {
return INFINIOP_STATUS_INTERNAL_ERROR; \ return INFINIOP_STATUS_INTERNAL_ERROR; \
} while (0) } while (0)
#ifdef __cplusplus #ifdef __cplusplus
}; };
#endif #endif
......
...@@ -21,7 +21,6 @@ infiniopStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const s ...@@ -21,7 +21,6 @@ infiniopStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const s
return INFINIOP_STATUS_SUCCESS; return INFINIOP_STATUS_SUCCESS;
} }
/// @brief Infer storage shape. For now this ruturns a 1D shape of the total tensor storage size. /// @brief Infer storage shape. For now this ruturns a 1D shape of the total tensor storage size.
/// We don't see why higher dimensional storage shape is ever needed. To change if necesary. /// We don't see why higher dimensional storage shape is ever needed. To change if necesary.
infiniopStatus_t aclnnTensorDescriptor::inferStorageShape() { infiniopStatus_t aclnnTensorDescriptor::inferStorageShape() {
...@@ -93,8 +92,10 @@ char *aclnnTensorDescriptor::toString() { ...@@ -93,8 +92,10 @@ char *aclnnTensorDescriptor::toString() {
// Assume bufferSize // Assume bufferSize
size_t bufferSize = 1024 + this->ndim * 40 + this->storageNdim * 40; size_t bufferSize = 1024 + this->ndim * 40 + this->storageNdim * 40;
char *buffer = (char *) malloc(bufferSize); char *buffer = (char *)malloc(bufferSize);
if (!buffer) return NULL; if (!buffer) {
return NULL;
}
// Write info into buffer // Write info into buffer
char *ptr = buffer; char *ptr = buffer;
......
...@@ -35,11 +35,10 @@ float f16_to_f32(uint16_t h) { ...@@ -35,11 +35,10 @@ float f16_to_f32(uint16_t h) {
uint16_t f32_to_f16(float val) { uint16_t f32_to_f16(float val) {
uint32_t f32; uint32_t f32;
memcpy(&f32, &val, sizeof(f32)); // Read the bits of the float32 memcpy(&f32, &val, sizeof(f32)); // Read the bits of the float32
uint16_t sign = (f32 >> 16) & 0x8000; // Extract the sign bit uint16_t sign = (f32 >> 16) & 0x8000; // Extract the sign bit
int32_t exponent = int32_t exponent = ((f32 >> 23) & 0xFF) - 127; // Extract and de-bias the exponent
((f32 >> 23) & 0xFF) - 127; // Extract and de-bias the exponent uint32_t mantissa = f32 & 0x7FFFFF; // Extract the mantissa (fraction part)
uint32_t mantissa = f32 & 0x7FFFFF; // Extract the mantissa (fraction part)
if (exponent >= 31) { // Special cases for Inf and NaN if (exponent >= 31) { // Special cases for Inf and NaN
// NaN // NaN
......
...@@ -19,7 +19,7 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim, int64_t const *broad ...@@ -19,7 +19,7 @@ size_t indexToReducedOffset(size_t flat_index, size_t ndim, int64_t const *broad
size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, int64_t const *strides); size_t indexToOffset(size_t flat_index, size_t ndim, size_t const *shape, int64_t const *strides);
/** /**
* get the total array size (element count) after applying padding for a * get the total array size (element count) after applying padding for a
* ndim-ary tensor with the given shape * ndim-ary tensor with the given shape
*/ */
size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads); size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads);
...@@ -27,4 +27,4 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads); ...@@ -27,4 +27,4 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads);
// calculate the padded shape and store the result in padded_shape // calculate the padded shape and store the result in padded_shape
std::vector<size_t> getPaddedShape(size_t ndim, size_t const *shape, size_t const *pads); std::vector<size_t> getPaddedShape(size_t ndim, size_t const *shape, size_t const *pads);
#endif// __INFINIOP__COMMON_CPU_H__ #endif // __INFINIOP__COMMON_CPU_H__
...@@ -47,18 +47,18 @@ struct InfiniopCudaHandle { ...@@ -47,18 +47,18 @@ struct InfiniopCudaHandle {
int compute_capability_minor; int compute_capability_minor;
}; };
template<typename T> template <typename T>
void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool, int device_id, cudaStream_t stream, T const &f) { void use_cublas(std::shared_ptr<Pool<cublasHandle_t>> cublas_handle_pool, int device_id, cudaStream_t stream, T const &f) {
auto handle = cublas_handle_pool->pop(); auto handle = cublas_handle_pool->pop();
if (!handle) { if (!handle) {
cublasCreate(&(*handle)); cublasCreate(&(*handle));
} }
cublasSetStream(*handle, (cudaStream_t) stream); cublasSetStream(*handle, (cudaStream_t)stream);
f(*handle); f(*handle);
cublas_handle_pool->push(std::move(*handle)); cublas_handle_pool->push(std::move(*handle));
} }
template<typename T> template <typename T>
cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool, int device_id, cudaStream_t stream, T const &f) { cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool, int device_id, cudaStream_t stream, T const &f) {
auto handle = cudnn_handle_pool->pop(); auto handle = cudnn_handle_pool->pop();
if (!handle) { if (!handle) {
...@@ -72,24 +72,24 @@ cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool, ...@@ -72,24 +72,24 @@ cudnnStatus_t use_cudnn(std::shared_ptr<Pool<cudnnHandle_t>> cudnn_handle_pool,
inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) { inline cudnnDataType_t getCudnnDtype(infiniDtype_t dt) {
switch (dt) { switch (dt) {
case INFINI_DTYPE_F16: case INFINI_DTYPE_F16:
return CUDNN_DATA_HALF; return CUDNN_DATA_HALF;
case INFINI_DTYPE_F32: case INFINI_DTYPE_F32:
return CUDNN_DATA_FLOAT; return CUDNN_DATA_FLOAT;
case INFINI_DTYPE_F64: case INFINI_DTYPE_F64:
return CUDNN_DATA_DOUBLE; return CUDNN_DATA_DOUBLE;
case INFINI_DTYPE_BF16: case INFINI_DTYPE_BF16:
return CUDNN_DATA_BFLOAT16; return CUDNN_DATA_BFLOAT16;
case INFINI_DTYPE_I8: case INFINI_DTYPE_I8:
return CUDNN_DATA_INT8; return CUDNN_DATA_INT8;
case INFINI_DTYPE_I32: case INFINI_DTYPE_I32:
return CUDNN_DATA_INT32; return CUDNN_DATA_INT32;
case INFINI_DTYPE_I64: case INFINI_DTYPE_I64:
return CUDNN_DATA_INT64; return CUDNN_DATA_INT64;
case INFINI_DTYPE_U8: case INFINI_DTYPE_U8:
return CUDNN_DATA_UINT8; return CUDNN_DATA_UINT8;
default: default:
return CUDNN_DATA_FLOAT; return CUDNN_DATA_FLOAT;
} }
} }
...@@ -118,4 +118,4 @@ inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim, ...@@ -118,4 +118,4 @@ inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim,
return res; return res;
} }
#endif// __INFINIOP_COMMON_CUDA_H__ #endif // __INFINIOP_COMMON_CUDA_H__
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <mutex> #include <mutex>
#include <optional> #include <optional>
template<class T> template <class T>
class Pool { class Pool {
public: public:
Pool() : _head(nullptr) {} Pool() : _head(nullptr) {}
...@@ -21,7 +21,7 @@ public: ...@@ -21,7 +21,7 @@ public:
void push(T &&val) const { void push(T &&val) const {
Node<T> *new_node = new Node<T>(std::move(val)); Node<T> *new_node = new Node<T>(std::move(val));
new_node->next = _head.load(); new_node->next = _head.load();
while (!_head.compare_exchange_weak(new_node->next, new_node)); while (!_head.compare_exchange_weak(new_node->next, new_node)) {}
} }
std::optional<T> pop() const { std::optional<T> pop() const {
...@@ -37,7 +37,7 @@ public: ...@@ -37,7 +37,7 @@ public:
} }
private: private:
template<class U> template <class U>
struct Node { struct Node {
U data; U data;
Node<U> *next; Node<U> *next;
......
...@@ -6,35 +6,35 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor( ...@@ -6,35 +6,35 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
infiniopTensorDescriptor_t y_desc) { infiniopTensorDescriptor_t y_desc) {
switch (handle->device) { switch (handle->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
return cpuCreateCausalSoftmaxDescriptor(handle, (CausalSoftmaxCpuDescriptor_t *) desc_ptr, y_desc); return cpuCreateCausalSoftmaxDescriptor(handle, (CausalSoftmaxCpuDescriptor_t *)desc_ptr, y_desc);
#endif #endif
#ifdef ENABLE_NV_GPU #ifdef ENABLE_NV_GPU
case DevNvGpu: { case DevNvGpu: {
return cudaCreateCausalSoftmaxDescriptor((CudaHandle_t)handle, (CausalSoftmaxCudaDescriptor_t *) desc_ptr, y_desc); return cudaCreateCausalSoftmaxDescriptor((CudaHandle_t)handle, (CausalSoftmaxCudaDescriptor_t *)desc_ptr, y_desc);
} }
#endif #endif
#ifdef ENABLE_CAMBRICON_MLU #ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: { case DevCambriconMlu: {
return bangCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxBangDescriptor_t *) desc_ptr, y_desc); return bangCreateCausalSoftmaxDescriptor((BangHandle_t)handle, (CausalSoftmaxBangDescriptor_t *)desc_ptr, y_desc);
// return cnnlCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxCnnlDescriptor_t *) desc_ptr, y_desc); // return cnnlCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxCnnlDescriptor_t *) desc_ptr, y_desc);
} }
#endif #endif
#ifdef ENABLE_ASCEND_NPU #ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: { case DevAscendNpu: {
return aclnnCreateCausalSoftmaxDescriptor((AscendHandle_t) handle, (CausalSoftmaxAclnnDescriptor_t *) desc_ptr, y_desc); return aclnnCreateCausalSoftmaxDescriptor((AscendHandle_t)handle, (CausalSoftmaxAclnnDescriptor_t *)desc_ptr, y_desc);
} }
#endif #endif
#ifdef ENABLE_METAX_GPU #ifdef ENABLE_METAX_GPU
case DevMetaxGpu: { case DevMetaxGpu: {
return macaCreateCausalSoftmaxDescriptor((MacaHandle_t) handle, (CausalSoftmaxMacaDescriptor_t *) desc_ptr, y_desc); return macaCreateCausalSoftmaxDescriptor((MacaHandle_t)handle, (CausalSoftmaxMacaDescriptor_t *)desc_ptr, y_desc);
} }
#endif #endif
#ifdef ENABLE_MTHREADS_GPU #ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: { case DevMthreadsGpu: {
return musaCreateCausalSoftmaxDescriptor((MusaHandle_t) handle, (CausalSoftmaxMusaDescriptor_t *) desc_ptr, y_desc); return musaCreateCausalSoftmaxDescriptor((MusaHandle_t)handle, (CausalSoftmaxMusaDescriptor_t *)desc_ptr, y_desc);
} }
#endif #endif
} }
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -43,36 +43,36 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor( ...@@ -43,36 +43,36 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
__C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, uint64_t *size) { __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmaxDescriptor_t desc, uint64_t *size) {
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
return cpuGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCpuDescriptor_t) desc, size); return cpuGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCpuDescriptor_t)desc, size);
#endif #endif
#ifdef ENABLE_NV_GPU #ifdef ENABLE_NV_GPU
case DevNvGpu: { case DevNvGpu: {
return cudaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCudaDescriptor_t) desc, size); return cudaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCudaDescriptor_t)desc, size);
} }
#endif #endif
#ifdef ENABLE_CAMBRICON_MLU #ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: { case DevCambriconMlu: {
return bangGetCausalSoftmaxWorkspaceSize((CausalSoftmaxBangDescriptor_t) desc, size); return bangGetCausalSoftmaxWorkspaceSize((CausalSoftmaxBangDescriptor_t)desc, size);
// return cnnlGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCnnlDescriptor_t) desc, size); // return cnnlGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCnnlDescriptor_t) desc, size);
} }
#endif #endif
#ifdef ENABLE_ASCEND_NPU #ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: { case DevAscendNpu: {
return aclnnGetCausalSoftmaxWorkspaceSize((CausalSoftmaxAclnnDescriptor_t) desc, size); return aclnnGetCausalSoftmaxWorkspaceSize((CausalSoftmaxAclnnDescriptor_t)desc, size);
} }
#endif #endif
#ifdef ENABLE_METAX_GPU #ifdef ENABLE_METAX_GPU
case DevMetaxGpu: { case DevMetaxGpu: {
return macaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxMacaDescriptor_t) desc, size); return macaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxMacaDescriptor_t)desc, size);
} }
#endif #endif
#ifdef ENABLE_MTHREADS_GPU #ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: { case DevMthreadsGpu: {
return musaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxMusaDescriptor_t) desc, size); return musaGetCausalSoftmaxWorkspaceSize((CausalSoftmaxMusaDescriptor_t)desc, size);
} }
#endif #endif
} }
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -81,35 +81,35 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax ...@@ -81,35 +81,35 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax
__C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t desc, void *workspace, uint64_t workspace_size, void *data, void *stream) { __C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t desc, void *workspace, uint64_t workspace_size, void *data, void *stream) {
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
return cpuCausalSoftmax((CausalSoftmaxCpuDescriptor_t) desc, workspace, workspace_size, data, stream); return cpuCausalSoftmax((CausalSoftmaxCpuDescriptor_t)desc, workspace, workspace_size, data, stream);
#endif #endif
#ifdef ENABLE_NV_GPU #ifdef ENABLE_NV_GPU
case DevNvGpu: { case DevNvGpu: {
return cudaCausalSoftmax((CausalSoftmaxCudaDescriptor_t) desc, workspace, workspace_size, data, stream); return cudaCausalSoftmax((CausalSoftmaxCudaDescriptor_t)desc, workspace, workspace_size, data, stream);
} }
#endif #endif
#ifdef ENABLE_CAMBRICON_MLU #ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: { case DevCambriconMlu: {
return bangCausalSoftmax((CausalSoftmaxBangDescriptor_t) desc, workspace, workspace_size, data, stream); return bangCausalSoftmax((CausalSoftmaxBangDescriptor_t)desc, workspace, workspace_size, data, stream);
// return cnnlCausalSoftmax((CausalSoftmaxCnnlDescriptor_t) desc, workspace, workspace_size, data, stream); // return cnnlCausalSoftmax((CausalSoftmaxCnnlDescriptor_t) desc, workspace, workspace_size, data, stream);
} }
#endif #endif
#ifdef ENABLE_ASCEND_NPU #ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: { case DevAscendNpu: {
return aclnnCausalSoftmax((CausalSoftmaxAclnnDescriptor_t) desc, workspace, workspace_size, data, stream); return aclnnCausalSoftmax((CausalSoftmaxAclnnDescriptor_t)desc, workspace, workspace_size, data, stream);
} }
#endif #endif
#ifdef ENABLE_METAX_GPU #ifdef ENABLE_METAX_GPU
case DevMetaxGpu: { case DevMetaxGpu: {
return macaCausalSoftmax((CausalSoftmaxMacaDescriptor_t) desc, workspace, workspace_size, data, stream); return macaCausalSoftmax((CausalSoftmaxMacaDescriptor_t)desc, workspace, workspace_size, data, stream);
} }
#endif #endif
#ifdef ENABLE_MTHREADS_GPU #ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: { case DevMthreadsGpu: {
return musaCausalSoftmax((CausalSoftmaxMusaDescriptor_t) desc, workspace, workspace_size, data, stream); return musaCausalSoftmax((CausalSoftmaxMusaDescriptor_t)desc, workspace, workspace_size, data, stream);
} }
#endif #endif
} }
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -118,34 +118,34 @@ __C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t des ...@@ -118,34 +118,34 @@ __C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t des
__C infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc) { __C infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftmaxDescriptor_t desc) {
switch (desc->device) { switch (desc->device) {
#ifdef ENABLE_CPU #ifdef ENABLE_CPU
case DevCpu: case DevCpu:
return cpuDestroyCausalSoftmaxDescriptor((CausalSoftmaxCpuDescriptor_t) desc); return cpuDestroyCausalSoftmaxDescriptor((CausalSoftmaxCpuDescriptor_t)desc);
#endif #endif
#ifdef ENABLE_NV_GPU #ifdef ENABLE_NV_GPU
case DevNvGpu: { case DevNvGpu: {
return cudaDestroyCausalSoftmaxDescriptor((CausalSoftmaxCudaDescriptor_t) desc); return cudaDestroyCausalSoftmaxDescriptor((CausalSoftmaxCudaDescriptor_t)desc);
} }
#endif #endif
#ifdef ENABLE_CAMBRICON_MLU #ifdef ENABLE_CAMBRICON_MLU
case DevCambriconMlu: { case DevCambriconMlu: {
return bangDestroyCausalSoftmaxDescriptor((CausalSoftmaxBangDescriptor_t) desc); return bangDestroyCausalSoftmaxDescriptor((CausalSoftmaxBangDescriptor_t)desc);
// return cnnlDestroyCausalSoftmaxDescriptor((CausalSoftmaxCnnlDescriptor_t) desc); // return cnnlDestroyCausalSoftmaxDescriptor((CausalSoftmaxCnnlDescriptor_t) desc);
} }
#endif #endif
#ifdef ENABLE_ASCEND_NPU #ifdef ENABLE_ASCEND_NPU
case DevAscendNpu: { case DevAscendNpu: {
return aclnnDestroyCausalSoftmaxDescriptor((CausalSoftmaxAclnnDescriptor_t) desc); return aclnnDestroyCausalSoftmaxDescriptor((CausalSoftmaxAclnnDescriptor_t)desc);
} }
#endif #endif
#ifdef ENABLE_METAX_GPU #ifdef ENABLE_METAX_GPU
case DevMetaxGpu: { case DevMetaxGpu: {
return macaDestroyCausalSoftmaxDescriptor((CausalSoftmaxMacaDescriptor_t) desc); return macaDestroyCausalSoftmaxDescriptor((CausalSoftmaxMacaDescriptor_t)desc);
} }
#endif #endif
#ifdef ENABLE_MTHREADS_GPU #ifdef ENABLE_MTHREADS_GPU
case DevMthreadsGpu: case DevMthreadsGpu:
return musaDestroyCausalSoftmaxDescriptor((CausalSoftmaxMusaDescriptor_t) desc); return musaDestroyCausalSoftmaxDescriptor((CausalSoftmaxMusaDescriptor_t)desc);
#endif #endif
} }
return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
...@@ -123,17 +123,13 @@ infiniopStatus_t aclnnMatmul(MatmulAclnnDescriptor_t desc, void *workspace, ...@@ -123,17 +123,13 @@ infiniopStatus_t aclnnMatmul(MatmulAclnnDescriptor_t desc, void *workspace,
for (size_t i = 0; i < batch; i++) { for (size_t i = 0; i < batch; i++) {
AclSetTensorAddr(desc->executor, 0, ta, AclSetTensorAddr(desc->executor, 0, ta,
(char *)(a) + i * desc->info->a_matrix.stride * (char *)(a) + i * desc->info->a_matrix.stride * infiniSizeof(desc->dtype));
infiniSizeof(desc->dtype));
AclSetTensorAddr(desc->executor, 1, tb, AclSetTensorAddr(desc->executor, 1, tb,
(char *)(b) + i * desc->info->b_matrix.stride * (char *)(b) + i * desc->info->b_matrix.stride * infiniSizeof(desc->dtype));
infiniSizeof(desc->dtype));
AclSetTensorAddr(desc->executor, 2, tc, AclSetTensorAddr(desc->executor, 2, tc,
(char *)(c) + i * desc->info->c_matrix.stride * (char *)(c) + i * desc->info->c_matrix.stride * infiniSizeof(desc->dtype));
infiniSizeof(desc->dtype));
AclSetTensorAddr(desc->executor, 3, tc, AclSetTensorAddr(desc->executor, 3, tc,
(char *)(c) + i * desc->info->c_matrix.stride * (char *)(c) + i * desc->info->c_matrix.stride * infiniSizeof(desc->dtype));
infiniSizeof(desc->dtype));
ret = aclnnGemm(workspace, workspaceSize, desc->executor, stream); ret = aclnnGemm(workspace, workspaceSize, desc->executor, stream);
CHECK_RET(ret == ACL_SUCCESS, CHECK_RET(ret == ACL_SUCCESS,
LOG_PRINT("aclnnGemm failed. ERROR: %d\n", ret); LOG_PRINT("aclnnGemm failed. ERROR: %d\n", ret);
......
...@@ -73,8 +73,8 @@ bangDestroyMatmulDescriptor(infiniopMatmulBangDescriptor_t desc) { ...@@ -73,8 +73,8 @@ bangDestroyMatmulDescriptor(infiniopMatmulBangDescriptor_t desc) {
} }
void bangMatmulCnnl(infiniopMatmulBangDescriptor_t desc, void *workspace, void *c, void bangMatmulCnnl(infiniopMatmulBangDescriptor_t desc, void *workspace, void *c,
float beta, void const *a, void const *b, float alpha, float beta, void const *a, void const *b, float alpha,
void *stream) { void *stream) {
auto info = desc->info; auto info = desc->info;
if (info.is_transed) { if (info.is_transed) {
std::swap(a, b); std::swap(a, b);
......
...@@ -88,7 +88,7 @@ struct MatmulInfo { ...@@ -88,7 +88,7 @@ struct MatmulInfo {
return; return;
} }
if (c_matrix.rows != a_matrix.rows || c_matrix.cols != b_matrix.cols || a_matrix.cols != b_matrix.rows){ if (c_matrix.rows != a_matrix.rows || c_matrix.cols != b_matrix.cols || a_matrix.cols != b_matrix.rows) {
*status = INFINIOP_STATUS_BAD_TENSOR_SHAPE; *status = INFINIOP_STATUS_BAD_TENSOR_SHAPE;
return; return;
} }
...@@ -113,4 +113,4 @@ struct MatmulInfo { ...@@ -113,4 +113,4 @@ struct MatmulInfo {
} }
}; };
#endif// __BLAS_H__ #endif // __BLAS_H__
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment