Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
3c31dc6c
Unverified
Commit
3c31dc6c
authored
Feb 18, 2025
by
PanZezhong1725
Committed by
GitHub
Feb 18, 2025
Browse files
Merge pull request #45 from YdrMaster/main
issue/52 代码格式化:机制和效果
parents
16dad776
e5ed9fa1
Changes
48
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
542 additions
and
297 deletions
+542
-297
.clang-format
.clang-format
+17
-53
.github/workflows/build.yml
.github/workflows/build.yml
+37
-0
README.md
README.md
+80
-31
include/infinicore.h
include/infinicore.h
+1
-2
include/infiniop/ops/causal_softmax.h
include/infiniop/ops/causal_softmax.h
+0
-1
include/infiniop/ops/conv.h
include/infiniop/ops/conv.h
+0
-1
include/infiniop/ops/random_sample.h
include/infiniop/ops/random_sample.h
+0
-1
include/infiniop/tensor_descriptor.h
include/infiniop/tensor_descriptor.h
+1
-1
scripts/format.py
scripts/format.py
+204
-0
src/infiniop/devices/ascend/common_ascend.cc
src/infiniop/devices/ascend/common_ascend.cc
+88
-88
src/infiniop/devices/ascend/common_ascend.h
src/infiniop/devices/ascend/common_ascend.h
+0
-1
src/infiniop/devices/ascend/tensor_aclnn.cc
src/infiniop/devices/ascend/tensor_aclnn.cc
+4
-3
src/infiniop/devices/cpu/common_cpu.cc
src/infiniop/devices/cpu/common_cpu.cc
+4
-5
src/infiniop/devices/cpu/common_cpu.h
src/infiniop/devices/cpu/common_cpu.h
+2
-2
src/infiniop/devices/cuda/common_cuda.cuh
src/infiniop/devices/cuda/common_cuda.cuh
+22
-22
src/infiniop/devices/pool.h
src/infiniop/devices/pool.h
+3
-3
src/infiniop/ops/causal_softmax/operator.cc
src/infiniop/ops/causal_softmax/operator.cc
+71
-71
src/infiniop/ops/matmul/ascend/matmul_aclnn.cc
src/infiniop/ops/matmul/ascend/matmul_aclnn.cc
+4
-8
src/infiniop/ops/matmul/bang/matmul_cnnl.cc
src/infiniop/ops/matmul/bang/matmul_cnnl.cc
+2
-2
src/infiniop/ops/matmul/blas.h
src/infiniop/ops/matmul/blas.h
+2
-2
No files found.
.clang-format
View file @
3c31dc6c
# Generated from CLion C/C++ Code Style settings
---
BasedOnStyle: LLVM
BasedOnStyle: LLVM
AccessModifierOffset: -4
IndentWidth: 4 # 缩进宽度,LLVM 默认值为 2,改为 4
AlignAfterOpenBracket: Align
AccessModifierOffset: -4 # public/protected/private 访问控制符相对成员的偏移,与 IndentWidth 配合,LLVM 默认值为 -2
# AlignConsecutiveAssignments: None
AlignOperands: AlignAfterOperator # 双目运算符的行间对齐,LLVM 默认值为 Align,改为带符号一起换行
AlignOperands: Align
BreakBeforeBinaryOperators: All # 在双目运算符之前换行,LLVM 默认值为 None,改为换行时总是把双目运算符放在行首,包括赋值(=)
AllowAllArgumentsOnNextLine: false
ColumnLimit: 0 # 列宽限制,LLVM 默认值为 80,改为不限制
AllowAllConstructorInitializersOnNextLine: false
AllowShortBlocksOnASingleLine: Always # 是否允许短块(单个语句的块)不换行,LLVM 默认值为 Never,改为允许
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortLoopsOnASingleLine: true # 是否允许短循环不换行,LLVM 默认值为 false,改为允许
AllowShortBlocksOnASingleLine: Always
InsertBraces: true # 是否在 if/for/while/switch 等语句后插入大括号,LLVM 默认值为 false,改为允许
AllowShortCaseLabelsOnASingleLine: false
BreakBeforeBraces: Custom # 大括号换行配置,LLVM 默认值为 LLVM,改为自定义以使 BraceWrapping 生效
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Always
AllowShortLambdasOnASingleLine: All
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterReturnType: None
AlwaysBreakTemplateDeclarations: No
BreakBeforeBraces: Custom
BraceWrapping:
BraceWrapping:
AfterCaseLabel: false
AfterCaseLabel: false
AfterClass: false
AfterClass: false
...
@@ -23,44 +16,15 @@ BraceWrapping:
...
@@ -23,44 +16,15 @@ BraceWrapping:
AfterEnum: false
AfterEnum: false
AfterFunction: false
AfterFunction: false
AfterNamespace: false
AfterNamespace: false
AfterObjCDeclaration: false
AfterStruct: false
AfterUnion: false
AfterUnion: false
AfterExternBlock: false
BeforeCatch: false
BeforeCatch: false
BeforeElse: false
BeforeElse: false
BeforeLambdaBody: false
BeforeWhile: false
IndentBraces: false
IndentBraces: false
SplitEmptyFunction:
fals
e
SplitEmptyFunction:
tru
e
SplitEmptyRecord: true
SplitEmptyRecord: true
BreakBeforeBinaryOperators: None
SplitEmptyNamespace: true
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeColon
BreakInheritanceList: BeforeColon
ColumnLimit: 0
CompactNamespaces: true
ContinuationIndentWidth: 4
IndentCaseLabels: true
IndentPPDirectives: None
IndentWidth: 4
KeepEmptyLinesAtTheStartOfBlocks: true
MaxEmptyLinesToKeep: 2
NamespaceIndentation: All
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PointerAlignment: Right
ReflowComments: false
SpaceAfterCStyleCast: true
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 0
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
TabWidth: 4
UseTab: Never
.github/workflows/build.yml
0 → 100644
View file @
3c31dc6c
name
:
Build and test
on
:
pull_request
:
push
:
paths-ignore
:
-
'
**.md'
-
'
LICENSE'
jobs
:
build
:
name
:
Build
runs-on
:
ubuntu-latest
strategy
:
fail-fast
:
false
matrix
:
type
:
[
debug
,
release
]
steps
:
-
name
:
checkout code
uses
:
actions/checkout@v4
-
name
:
install black
run
:
pip install black
-
name
:
check format
run
:
python3 scripts/format.py --path src --check
-
name
:
install xmake
uses
:
xmake-io/github-action-setup-xmake@v1
with
:
xmake-version
:
latest
-
name
:
configure xmake
run
:
xmake f -cv
-
name
:
build with xmake
run
:
xmake build && xmake install
README.md
View file @
3c31dc6c
# InfiniCore
# InfiniCore
InfiniCore是一个跨平台统一编程工具集,为不同芯片平台的功能(包括计算、运行时、通信等)提供统一 C 语言接口。目前支持的
芯片包括CPU、英伟达GPU、华为昇腾NPU、寒武纪MLU、摩尔线程GPU、天数智芯GPU、沐曦GPU、曙光DCU、昆仑芯。
InfiniCore
是一个跨平台统一编程工具集,为不同芯片平台的功能(包括计算、运行时、通信等)提供统一 C 语言接口。目前支持的
硬件和后端包括:
## 一、使用说明
-
CPU;
-
CUDA
-
英伟达 GPU;
-
摩尔线程 GPU;
-
天数智芯 GPU;
-
沐曦 GPU;
-
曙光 DCU;
-
华为昇腾 NPU;
-
寒武纪 MLU;
-
昆仑芯 XPU;
##
# 1.
配置
## 配置
和使用
#### 查看当前
配置
1.
项目
配置
```
xmake
-
查看当前配置
xmake f -v
```
#### 配置 CPU (默认配置)
```shell
xmake f -v
```
```
xmake
-
配置 CPU(默认配置)
xmake f -cv
```
#### 配置加速卡
```shell
xmake f -cv
```
```
xmake
-
配置加速卡
# 英伟达
# 可以指定 CUDA 路径, 一般环境变量为 `CUDA_HOME` 或者 `CUDA_ROOT`
xmake f --nv-gpu=true --cuda=$CUDA_HOME -cv
# 寒武纪
```shell
xmake f --cambricon-mlu=true -cv
# 英伟达
# 可以指定 CUDA 路径, 一般环境变量为 `CUDA_HOME` 或者 `CUDA_ROOT`
xmake f --nv-gpu=true --cuda=$CUDA_HOME -cv
# 华为昇腾
# 寒武纪
xmake f --ascend-npu=true -cv
xmake f --cambricon-mlu=true -cv
```
### 2. 编译安装
# 华为昇腾
xmake f --ascend-npu=true -cv
```
```
xmake
2.
编译安装
xmake build && xmake install
# 默认安装路径为 $HOME/.infini
默认安装路径为
`$HOME/.infini`
。
```
```
shell
xmake build
&&
xmake
install
```
3.
设置环境变量
按输出提示设置
`INFINI_ROOT`
和
`LD_LIBRARY_PATH`
环境变量。
4.
运行算子测试
### 3. 设置环境变量
```
shell
python
test
/infiniop/[operator].py
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
]
```
按输出提示设置
`INFINI_ROOT`
和
`LD_LIBRARY_PATH`
环境变量。
## 开发指南
###
4. 运行算子测试
###
代码格式化
```
bash
本项目使用
[
`scripts/format.py`
](
/scripts/format.py
)
脚本实现代码格式化检查和操作。
python
test
/infiniop/[operator].py
[
--cpu
|
--nvidia
|
--cambricon
|
--ascend
]
使用
```
shell
python scripts/format.py
-h
```
查看脚本帮助信息:
```
plaintext
usage: format.py [-h] [--ref REF] [--path [PATH ...]] [--check] [--c C] [--py PY]
options:
-h, --help show this help message and exit
--ref REF Git reference (commit hash) to compare against.
--path [PATH ...] Files to format or check.
--check Check files without modifying them.
--c C C formatter (default: clang-format-16)
--py PY Python formatter (default: black)
```
```
参数中:
-
`ref`
和
`path`
控制格式化的文件范围
-
若
`ref`
和
`path`
都为空,格式化当前暂存(git added)的文件;
-
否则
-
若
`ref`
非空,将比较指定 commit 和当前代码的差异,只格式化修改过的文件;
-
若
`path`
非空,可传入多个路径(
`--path p0 p1 p2`
),只格式化指定路径及其子目录中的文件;
-
若设置
`--check`
,将检查代码是否需要修改格式,不修改文件内容;
-
通过
`--c`
指定 c/c++ 格式化器,默认为
`clang-format-16`
;
-
通过
`--python`
指定 python 格式化器
`black`
;
include/infinicore.h
View file @
3c31dc6c
...
@@ -6,8 +6,7 @@
...
@@ -6,8 +6,7 @@
#define __INFINICORE_EXPORT_C__
#define __INFINICORE_EXPORT_C__
#if defined(_WIN32)
#if defined(_WIN32)
#define __export __declspec(dllexport)
#define __export __declspec(dllexport)
#elif defined(__GNUC__) && \
#elif defined(__GNUC__) && ((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
((__GNUC__ >= 4) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3))
#define __export __attribute__((visibility("default")))
#define __export __attribute__((visibility("default")))
#else
#else
#define __export
#define __export
...
...
include/infiniop/ops/causal_softmax.h
View file @
3c31dc6c
...
@@ -19,5 +19,4 @@ __C __export infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescrip
...
@@ -19,5 +19,4 @@ __C __export infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescrip
__C
__export
infiniopStatus_t
infiniopDestroyCausalSoftmaxDescriptor
(
infiniopCausalSoftmaxDescriptor_t
desc
);
__C
__export
infiniopStatus_t
infiniopDestroyCausalSoftmaxDescriptor
(
infiniopCausalSoftmaxDescriptor_t
desc
);
#endif
#endif
include/infiniop/ops/conv.h
View file @
3c31dc6c
...
@@ -21,5 +21,4 @@ __C __export infiniopStatus_t infiniopConv(infiniopConvDescriptor_t desc, void *
...
@@ -21,5 +21,4 @@ __C __export infiniopStatus_t infiniopConv(infiniopConvDescriptor_t desc, void *
__C
__export
infiniopStatus_t
infiniopDestroyConvDescriptor
(
infiniopConvDescriptor_t
desc
);
__C
__export
infiniopStatus_t
infiniopDestroyConvDescriptor
(
infiniopConvDescriptor_t
desc
);
#endif
#endif
include/infiniop/ops/random_sample.h
View file @
3c31dc6c
...
@@ -22,5 +22,4 @@ __C __export infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescripto
...
@@ -22,5 +22,4 @@ __C __export infiniopStatus_t infiniopRandomSample(infiniopRandomSampleDescripto
__C
__export
infiniopStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
);
__C
__export
infiniopStatus_t
infiniopDestroyRandomSampleDescriptor
(
infiniopRandomSampleDescriptor_t
desc
);
#endif
#endif
include/infiniop/tensor_descriptor.h
View file @
3c31dc6c
...
@@ -21,4 +21,4 @@ __C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescr
...
@@ -21,4 +21,4 @@ __C __export infiniopStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescr
__C
__export
infiniopStatus_t
infiniopDestroyTensorDescriptor
(
infiniopTensorDescriptor_t
desc
);
__C
__export
infiniopStatus_t
infiniopDestroyTensorDescriptor
(
infiniopTensorDescriptor_t
desc
);
#endif// __INFINIOP_TENSOR_DESCRIPTOR__
#endif
// __INFINIOP_TENSOR_DESCRIPTOR__
scripts/format.py
0 → 100644
View file @
3c31dc6c
import
argparse
import
subprocess
import
os
from
pathlib
import
Path
from
colorama
import
Fore
,
Style
# 支持的文件类型
SUPPORTED_FILES
=
{
".h"
:
"c"
,
".hh"
:
"c"
,
".hpp"
:
"c"
,
".c"
:
"c"
,
".cc"
:
"c"
,
".cpp"
:
"c"
,
".cxx"
:
"c"
,
".cu"
:
"c"
,
".cuh"
:
"c"
,
".mlu"
:
"c"
,
".cl"
:
"c"
,
".py"
:
"py"
,
}
def
format_file
(
file
:
Path
,
check
:
bool
,
formatter
)
->
bool
:
formatter
=
formatter
.
get
(
SUPPORTED_FILES
.
get
(
file
.
suffix
,
None
),
None
)
if
not
formatter
:
return
True
# 文件类型不支持,跳过
try
:
cmd
=
[]
if
formatter
.
startswith
(
"clang-format"
):
cmd
=
[
formatter
,
"-style=file"
,
"-i"
,
file
]
if
check
:
cmd
.
insert
(
2
,
"-dry-run"
)
process
=
subprocess
.
run
(
cmd
,
capture_output
=
True
,
text
=
True
,
check
=
True
,
)
if
process
.
stderr
:
print
(
f
"
{
Fore
.
YELLOW
}{
file
}
is not formatted.
{
Style
.
RESET_ALL
}
"
)
print
(
f
"Use
{
Fore
.
CYAN
}{
formatter
}
-style=file -i
{
file
}{
Style
.
RESET_ALL
}
to format it."
)
return
False
else
:
subprocess
.
run
(
cmd
,
capture_output
=
True
,
text
=
True
,
check
=
True
,
)
print
(
f
"
{
Fore
.
CYAN
}
Formatted:
{
file
}{
Style
.
RESET_ALL
}
"
)
elif
formatter
==
"black"
:
cmd
=
[
formatter
,
file
]
if
check
:
cmd
.
insert
(
1
,
"--check"
)
process
=
subprocess
.
run
(
cmd
,
capture_output
=
True
,
text
=
True
,
check
=
True
,
)
if
process
.
stderr
:
print
(
f
"
{
Fore
.
YELLOW
}{
file
}
is not formatted.
{
Style
.
RESET_ALL
}
"
)
print
(
f
"Use
{
Fore
.
CYAN
}{
formatter
}
{
file
}{
Style
.
RESET_ALL
}
to format it."
)
return
False
else
:
subprocess
.
run
(
cmd
,
capture_output
=
True
,
text
=
True
,
check
=
True
,
)
print
(
f
"
{
Fore
.
CYAN
}
Formatted:
{
file
}{
Style
.
RESET_ALL
}
"
)
except
FileNotFoundError
:
print
(
f
"
{
Fore
.
RED
}
Formatter
{
formatter
}
not found,
{
file
}
skipped.
{
Style
.
RESET_ALL
}
"
)
except
subprocess
.
CalledProcessError
as
e
:
print
(
f
"
{
Fore
.
RED
}
Formatter
{
formatter
}
failed:
{
e
}{
Style
.
RESET_ALL
}
"
)
return
True
def
git_added_files
():
"""获取所有已暂存更改的文件"""
try
:
# 使用 git diff --cached --name-only 获取所有已添加到暂存区的文件
result
=
subprocess
.
run
(
[
"git"
,
"diff"
,
"--cached"
,
"--name-only"
],
capture_output
=
True
,
text
=
True
,
check
=
True
,
)
for
file
in
result
.
stdout
.
splitlines
():
yield
Path
(
file
.
strip
())
except
subprocess
.
CalledProcessError
as
e
:
print
(
f
"
{
Fore
.
RED
}
Git diff failed:
{
e
}{
Style
.
RESET_ALL
}
"
)
def
git_modified_since_ref
(
ref
):
"""获取从指定的 Git 引用到当前状态的修改文件列表"""
try
:
result
=
subprocess
.
run
(
[
"git"
,
"diff"
,
f
"
{
ref
}
.."
,
"--diff-filter=AMR"
,
"--name-only"
],
capture_output
=
True
,
text
=
True
,
check
=
True
,
)
for
file
in
result
.
stdout
.
splitlines
():
yield
Path
(
file
.
strip
())
except
subprocess
.
CalledProcessError
as
e
:
print
(
f
"
{
Fore
.
RED
}
Git diff failed:
{
e
}{
Style
.
RESET_ALL
}
"
)
def
list_files
(
paths
):
"""递归获取指定路径下的所有文件"""
files
=
[]
for
path
in
paths
:
if
path
.
is_file
():
yield
path
elif
path
.
is_dir
():
for
dirpath
,
_
,
filenames
in
os
.
walk
(
path
):
for
name
in
filenames
:
yield
Path
(
dirpath
)
/
name
else
:
print
(
f
"
{
Fore
.
RED
}
Error:
{
path
}
is not a file or directory.
{
Style
.
RESET_ALL
}
"
)
def
filter_in_path
(
file
:
Path
,
path
)
->
bool
:
"""判断文件是否在指定路径下"""
for
p
in
path
:
if
file
.
is_relative_to
(
p
):
return
True
return
False
def
main
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--ref"
,
type
=
str
,
help
=
"Git reference (commit hash) to compare against."
)
parser
.
add_argument
(
"--path"
,
nargs
=
"*"
,
type
=
Path
,
help
=
"Files to format or check."
)
parser
.
add_argument
(
"--check"
,
action
=
"store_true"
,
help
=
"Check files without modifying them."
)
parser
.
add_argument
(
"--c"
,
default
=
"clang-format-16"
,
help
=
"C formatter (default: clang-format-16)"
)
parser
.
add_argument
(
"--py"
,
default
=
"black"
,
help
=
"Python formatter (default: black)"
)
args
=
parser
.
parse_args
()
if
args
.
ref
is
None
and
args
.
path
is
None
:
# Last commit.
print
(
"{Fore.GREEN}Formating git added files.{Style.RESET_ALL}"
)
files
=
git_added_files
()
else
:
if
args
.
ref
is
None
:
print
(
f
"
{
Fore
.
GREEN
}
Formating files in
{
args
.
path
}
.
{
Style
.
RESET_ALL
}
"
)
files
=
list_files
(
args
.
path
)
elif
args
.
path
is
None
:
print
(
f
"
{
Fore
.
GREEN
}
Formating git modified files from
{
args
.
ref
}
.
{
Style
.
RESET_ALL
}
"
)
files
=
git_modified_since_ref
(
args
.
ref
)
else
:
print
(
f
"
{
Fore
.
GREEN
}
Formating git modified files from
{
args
.
ref
}
in
{
args
.
path
}
.
{
Style
.
RESET_ALL
}
"
)
files
=
(
file
for
file
in
git_modified_since_ref
(
args
.
ref
)
if
filter_in_path
(
file
,
args
.
path
)
)
formatted
=
True
for
file
in
files
:
if
not
format_file
(
file
,
args
.
check
,
{
"c"
:
args
.
c
,
"py"
:
args
.
py
,
},
):
formatted
=
False
if
not
formatted
:
exit
(
1
)
if
__name__
==
"__main__"
:
main
()
src/infiniop/devices/ascend/common_ascend.cc
View file @
3c31dc6c
...
@@ -31,35 +31,35 @@ infiniopStatus_t freeWorkspace(void *workspaceAddr) {
...
@@ -31,35 +31,35 @@ infiniopStatus_t freeWorkspace(void *workspaceAddr) {
}
}
aclDataType
toAclDataType
(
infiniDtype_t
dt
)
{
aclDataType
toAclDataType
(
infiniDtype_t
dt
)
{
if
(
dt
==
INFINI_DTYPE_I8
)
if
(
dt
==
INFINI_DTYPE_I8
)
{
return
aclDataType
::
ACL_INT8
;
return
aclDataType
::
ACL_INT8
;
else
if
(
dt
==
INFINI_DTYPE_I16
)
}
else
if
(
dt
==
INFINI_DTYPE_I16
)
{
return
aclDataType
::
ACL_INT16
;
return
aclDataType
::
ACL_INT16
;
else
if
(
dt
==
INFINI_DTYPE_I32
)
}
else
if
(
dt
==
INFINI_DTYPE_I32
)
{
return
aclDataType
::
ACL_INT32
;
return
aclDataType
::
ACL_INT32
;
else
if
(
dt
==
INFINI_DTYPE_I64
)
}
else
if
(
dt
==
INFINI_DTYPE_I64
)
{
return
aclDataType
::
ACL_INT64
;
return
aclDataType
::
ACL_INT64
;
else
if
(
dt
==
INFINI_DTYPE_U8
)
}
else
if
(
dt
==
INFINI_DTYPE_U8
)
{
return
aclDataType
::
ACL_UINT8
;
return
aclDataType
::
ACL_UINT8
;
else
if
(
dt
==
INFINI_DTYPE_U16
)
}
else
if
(
dt
==
INFINI_DTYPE_U16
)
{
return
aclDataType
::
ACL_UINT16
;
return
aclDataType
::
ACL_UINT16
;
else
if
(
dt
==
INFINI_DTYPE_U32
)
}
else
if
(
dt
==
INFINI_DTYPE_U32
)
{
return
aclDataType
::
ACL_UINT32
;
return
aclDataType
::
ACL_UINT32
;
else
if
(
dt
==
INFINI_DTYPE_U64
)
}
else
if
(
dt
==
INFINI_DTYPE_U64
)
{
return
aclDataType
::
ACL_UINT64
;
return
aclDataType
::
ACL_UINT64
;
else
if
(
dt
==
INFINI_DTYPE_F16
)
}
else
if
(
dt
==
INFINI_DTYPE_F16
)
{
return
aclDataType
::
ACL_FLOAT16
;
return
aclDataType
::
ACL_FLOAT16
;
else
if
(
dt
==
INFINI_DTYPE_BF16
)
}
else
if
(
dt
==
INFINI_DTYPE_BF16
)
{
return
aclDataType
::
ACL_BF16
;
return
aclDataType
::
ACL_BF16
;
else
if
(
dt
==
INFINI_DTYPE_F32
)
}
else
if
(
dt
==
INFINI_DTYPE_F32
)
{
return
aclDataType
::
ACL_FLOAT
;
return
aclDataType
::
ACL_FLOAT
;
else
if
(
dt
==
INFINI_DTYPE_F64
)
}
else
if
(
dt
==
INFINI_DTYPE_F64
)
{
return
aclDataType
::
ACL_DOUBLE
;
return
aclDataType
::
ACL_DOUBLE
;
else
}
else
{
return
aclDataType
::
ACL_DT_UNDEFINED
;
return
aclDataType
::
ACL_DT_UNDEFINED
;
}
}
}
const
char
*
dataTypeToString
(
aclDataType
dtype
)
{
const
char
*
dataTypeToString
(
aclDataType
dtype
)
{
switch
(
dtype
)
{
switch
(
dtype
)
{
case
ACL_DT_UNDEFINED
:
case
ACL_DT_UNDEFINED
:
...
...
src/infiniop/devices/ascend/common_ascend.h
View file @
3c31dc6c
...
@@ -34,7 +34,6 @@ extern "C" {
...
@@ -34,7 +34,6 @@ extern "C" {
return INFINIOP_STATUS_INTERNAL_ERROR; \
return INFINIOP_STATUS_INTERNAL_ERROR; \
} while (0)
} while (0)
#ifdef __cplusplus
#ifdef __cplusplus
};
};
#endif
#endif
...
...
src/infiniop/devices/ascend/tensor_aclnn.cc
View file @
3c31dc6c
...
@@ -21,7 +21,6 @@ infiniopStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const s
...
@@ -21,7 +21,6 @@ infiniopStatus_t aclnnTensorDescriptor::setDescriptor(aclDataType dtype, const s
return
INFINIOP_STATUS_SUCCESS
;
return
INFINIOP_STATUS_SUCCESS
;
}
}
/// @brief Infer storage shape. For now this ruturns a 1D shape of the total tensor storage size.
/// @brief Infer storage shape. For now this ruturns a 1D shape of the total tensor storage size.
/// We don't see why higher dimensional storage shape is ever needed. To change if necesary.
/// We don't see why higher dimensional storage shape is ever needed. To change if necesary.
infiniopStatus_t
aclnnTensorDescriptor
::
inferStorageShape
()
{
infiniopStatus_t
aclnnTensorDescriptor
::
inferStorageShape
()
{
...
@@ -93,8 +92,10 @@ char *aclnnTensorDescriptor::toString() {
...
@@ -93,8 +92,10 @@ char *aclnnTensorDescriptor::toString() {
// Assume bufferSize
// Assume bufferSize
size_t
bufferSize
=
1024
+
this
->
ndim
*
40
+
this
->
storageNdim
*
40
;
size_t
bufferSize
=
1024
+
this
->
ndim
*
40
+
this
->
storageNdim
*
40
;
char
*
buffer
=
(
char
*
)
malloc
(
bufferSize
);
char
*
buffer
=
(
char
*
)
malloc
(
bufferSize
);
if
(
!
buffer
)
return
NULL
;
if
(
!
buffer
)
{
return
NULL
;
}
// Write info into buffer
// Write info into buffer
char
*
ptr
=
buffer
;
char
*
ptr
=
buffer
;
...
...
src/infiniop/devices/cpu/common_cpu.cc
View file @
3c31dc6c
...
@@ -37,8 +37,7 @@ uint16_t f32_to_f16(float val) {
...
@@ -37,8 +37,7 @@ uint16_t f32_to_f16(float val) {
uint32_t
f32
;
uint32_t
f32
;
memcpy
(
&
f32
,
&
val
,
sizeof
(
f32
));
// Read the bits of the float32
memcpy
(
&
f32
,
&
val
,
sizeof
(
f32
));
// Read the bits of the float32
uint16_t
sign
=
(
f32
>>
16
)
&
0x8000
;
// Extract the sign bit
uint16_t
sign
=
(
f32
>>
16
)
&
0x8000
;
// Extract the sign bit
int32_t
exponent
=
int32_t
exponent
=
((
f32
>>
23
)
&
0xFF
)
-
127
;
// Extract and de-bias the exponent
((
f32
>>
23
)
&
0xFF
)
-
127
;
// Extract and de-bias the exponent
uint32_t
mantissa
=
f32
&
0x7FFFFF
;
// Extract the mantissa (fraction part)
uint32_t
mantissa
=
f32
&
0x7FFFFF
;
// Extract the mantissa (fraction part)
if
(
exponent
>=
31
)
{
// Special cases for Inf and NaN
if
(
exponent
>=
31
)
{
// Special cases for Inf and NaN
...
...
src/infiniop/devices/cpu/common_cpu.h
View file @
3c31dc6c
...
@@ -27,4 +27,4 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads);
...
@@ -27,4 +27,4 @@ size_t getPaddedSize(size_t ndim, size_t *shape, size_t const *pads);
// calculate the padded shape and store the result in padded_shape
// calculate the padded shape and store the result in padded_shape
std
::
vector
<
size_t
>
getPaddedShape
(
size_t
ndim
,
size_t
const
*
shape
,
size_t
const
*
pads
);
std
::
vector
<
size_t
>
getPaddedShape
(
size_t
ndim
,
size_t
const
*
shape
,
size_t
const
*
pads
);
#endif// __INFINIOP__COMMON_CPU_H__
#endif
// __INFINIOP__COMMON_CPU_H__
src/infiniop/devices/cuda/common_cuda.cuh
View file @
3c31dc6c
...
@@ -47,18 +47,18 @@ struct InfiniopCudaHandle {
...
@@ -47,18 +47,18 @@ struct InfiniopCudaHandle {
int
compute_capability_minor
;
int
compute_capability_minor
;
};
};
template
<
typename
T
>
template
<
typename
T
>
void
use_cublas
(
std
::
shared_ptr
<
Pool
<
cublasHandle_t
>>
cublas_handle_pool
,
int
device_id
,
cudaStream_t
stream
,
T
const
&
f
)
{
void
use_cublas
(
std
::
shared_ptr
<
Pool
<
cublasHandle_t
>>
cublas_handle_pool
,
int
device_id
,
cudaStream_t
stream
,
T
const
&
f
)
{
auto
handle
=
cublas_handle_pool
->
pop
();
auto
handle
=
cublas_handle_pool
->
pop
();
if
(
!
handle
)
{
if
(
!
handle
)
{
cublasCreate
(
&
(
*
handle
));
cublasCreate
(
&
(
*
handle
));
}
}
cublasSetStream
(
*
handle
,
(
cudaStream_t
)
stream
);
cublasSetStream
(
*
handle
,
(
cudaStream_t
)
stream
);
f
(
*
handle
);
f
(
*
handle
);
cublas_handle_pool
->
push
(
std
::
move
(
*
handle
));
cublas_handle_pool
->
push
(
std
::
move
(
*
handle
));
}
}
template
<
typename
T
>
template
<
typename
T
>
cudnnStatus_t
use_cudnn
(
std
::
shared_ptr
<
Pool
<
cudnnHandle_t
>>
cudnn_handle_pool
,
int
device_id
,
cudaStream_t
stream
,
T
const
&
f
)
{
cudnnStatus_t
use_cudnn
(
std
::
shared_ptr
<
Pool
<
cudnnHandle_t
>>
cudnn_handle_pool
,
int
device_id
,
cudaStream_t
stream
,
T
const
&
f
)
{
auto
handle
=
cudnn_handle_pool
->
pop
();
auto
handle
=
cudnn_handle_pool
->
pop
();
if
(
!
handle
)
{
if
(
!
handle
)
{
...
@@ -118,4 +118,4 @@ inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim,
...
@@ -118,4 +118,4 @@ inline __device__ __host__ size_t indexToOffset(size_t flat_index, size_t ndim,
return
res
;
return
res
;
}
}
#endif// __INFINIOP_COMMON_CUDA_H__
#endif
// __INFINIOP_COMMON_CUDA_H__
src/infiniop/devices/pool.h
View file @
3c31dc6c
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
#include <mutex>
#include <mutex>
#include <optional>
#include <optional>
template
<
class
T
>
template
<
class
T
>
class
Pool
{
class
Pool
{
public:
public:
Pool
()
:
_head
(
nullptr
)
{}
Pool
()
:
_head
(
nullptr
)
{}
...
@@ -21,7 +21,7 @@ public:
...
@@ -21,7 +21,7 @@ public:
void
push
(
T
&&
val
)
const
{
void
push
(
T
&&
val
)
const
{
Node
<
T
>
*
new_node
=
new
Node
<
T
>
(
std
::
move
(
val
));
Node
<
T
>
*
new_node
=
new
Node
<
T
>
(
std
::
move
(
val
));
new_node
->
next
=
_head
.
load
();
new_node
->
next
=
_head
.
load
();
while
(
!
_head
.
compare_exchange_weak
(
new_node
->
next
,
new_node
))
;
while
(
!
_head
.
compare_exchange_weak
(
new_node
->
next
,
new_node
))
{}
}
}
std
::
optional
<
T
>
pop
()
const
{
std
::
optional
<
T
>
pop
()
const
{
...
@@ -37,7 +37,7 @@ public:
...
@@ -37,7 +37,7 @@ public:
}
}
private:
private:
template
<
class
U
>
template
<
class
U
>
struct
Node
{
struct
Node
{
U
data
;
U
data
;
Node
<
U
>
*
next
;
Node
<
U
>
*
next
;
...
...
src/infiniop/ops/causal_softmax/operator.cc
View file @
3c31dc6c
...
@@ -7,33 +7,33 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
...
@@ -7,33 +7,33 @@ __C infiniopStatus_t infiniopCreateCausalSoftmaxDescriptor(
switch
(
handle
->
device
)
{
switch
(
handle
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuCreateCausalSoftmaxDescriptor
(
handle
,
(
CausalSoftmaxCpuDescriptor_t
*
)
desc_ptr
,
y_desc
);
return
cpuCreateCausalSoftmaxDescriptor
(
handle
,
(
CausalSoftmaxCpuDescriptor_t
*
)
desc_ptr
,
y_desc
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaCreateCausalSoftmaxDescriptor
((
CudaHandle_t
)
handle
,
(
CausalSoftmaxCudaDescriptor_t
*
)
desc_ptr
,
y_desc
);
return
cudaCreateCausalSoftmaxDescriptor
((
CudaHandle_t
)
handle
,
(
CausalSoftmaxCudaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangCreateCausalSoftmaxDescriptor
((
BangHandle_t
)
handle
,
(
CausalSoftmaxBangDescriptor_t
*
)
desc_ptr
,
y_desc
);
return
bangCreateCausalSoftmaxDescriptor
((
BangHandle_t
)
handle
,
(
CausalSoftmaxBangDescriptor_t
*
)
desc_ptr
,
y_desc
);
// return cnnlCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxCnnlDescriptor_t *) desc_ptr, y_desc);
// return cnnlCreateCausalSoftmaxDescriptor((BangHandle_t) handle, (CausalSoftmaxCnnlDescriptor_t *) desc_ptr, y_desc);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnCreateCausalSoftmaxDescriptor
((
AscendHandle_t
)
handle
,
(
CausalSoftmaxAclnnDescriptor_t
*
)
desc_ptr
,
y_desc
);
return
aclnnCreateCausalSoftmaxDescriptor
((
AscendHandle_t
)
handle
,
(
CausalSoftmaxAclnnDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaCreateCausalSoftmaxDescriptor
((
MacaHandle_t
)
handle
,
(
CausalSoftmaxMacaDescriptor_t
*
)
desc_ptr
,
y_desc
);
return
macaCreateCausalSoftmaxDescriptor
((
MacaHandle_t
)
handle
,
(
CausalSoftmaxMacaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaCreateCausalSoftmaxDescriptor
((
MusaHandle_t
)
handle
,
(
CausalSoftmaxMusaDescriptor_t
*
)
desc_ptr
,
y_desc
);
return
musaCreateCausalSoftmaxDescriptor
((
MusaHandle_t
)
handle
,
(
CausalSoftmaxMusaDescriptor_t
*
)
desc_ptr
,
y_desc
);
}
}
#endif
#endif
}
}
...
@@ -44,34 +44,34 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax
...
@@ -44,34 +44,34 @@ __C infiniopStatus_t infiniopGetCausalSoftmaxWorkspaceSize(infiniopCausalSoftmax
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxCpuDescriptor_t
)
desc
,
size
);
return
cpuGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxCpuDescriptor_t
)
desc
,
size
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxCudaDescriptor_t
)
desc
,
size
);
return
cudaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxCudaDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxBangDescriptor_t
)
desc
,
size
);
return
bangGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxBangDescriptor_t
)
desc
,
size
);
// return cnnlGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCnnlDescriptor_t) desc, size);
// return cnnlGetCausalSoftmaxWorkspaceSize((CausalSoftmaxCnnlDescriptor_t) desc, size);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxAclnnDescriptor_t
)
desc
,
size
);
return
aclnnGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxAclnnDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxMacaDescriptor_t
)
desc
,
size
);
return
macaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxMacaDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxMusaDescriptor_t
)
desc
,
size
);
return
musaGetCausalSoftmaxWorkspaceSize
((
CausalSoftmaxMusaDescriptor_t
)
desc
,
size
);
}
}
#endif
#endif
}
}
...
@@ -82,33 +82,33 @@ __C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t des
...
@@ -82,33 +82,33 @@ __C infiniopStatus_t infiniopCausalSoftmax(infiniopCausalSoftmaxDescriptor_t des
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuCausalSoftmax
((
CausalSoftmaxCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
return
cpuCausalSoftmax
((
CausalSoftmaxCpuDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaCausalSoftmax
((
CausalSoftmaxCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
return
cudaCausalSoftmax
((
CausalSoftmaxCudaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangCausalSoftmax
((
CausalSoftmaxBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
return
bangCausalSoftmax
((
CausalSoftmaxBangDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
// return cnnlCausalSoftmax((CausalSoftmaxCnnlDescriptor_t) desc, workspace, workspace_size, data, stream);
// return cnnlCausalSoftmax((CausalSoftmaxCnnlDescriptor_t) desc, workspace, workspace_size, data, stream);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnCausalSoftmax
((
CausalSoftmaxAclnnDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
return
aclnnCausalSoftmax
((
CausalSoftmaxAclnnDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaCausalSoftmax
((
CausalSoftmaxMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
return
macaCausalSoftmax
((
CausalSoftmaxMacaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
{
case
DevMthreadsGpu
:
{
return
musaCausalSoftmax
((
CausalSoftmaxMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
return
musaCausalSoftmax
((
CausalSoftmaxMusaDescriptor_t
)
desc
,
workspace
,
workspace_size
,
data
,
stream
);
}
}
#endif
#endif
}
}
...
@@ -119,33 +119,33 @@ __C infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftma
...
@@ -119,33 +119,33 @@ __C infiniopStatus_t infiniopDestroyCausalSoftmaxDescriptor(infiniopCausalSoftma
switch
(
desc
->
device
)
{
switch
(
desc
->
device
)
{
#ifdef ENABLE_CPU
#ifdef ENABLE_CPU
case
DevCpu
:
case
DevCpu
:
return
cpuDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxCpuDescriptor_t
)
desc
);
return
cpuDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxCpuDescriptor_t
)
desc
);
#endif
#endif
#ifdef ENABLE_NV_GPU
#ifdef ENABLE_NV_GPU
case
DevNvGpu
:
{
case
DevNvGpu
:
{
return
cudaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxCudaDescriptor_t
)
desc
);
return
cudaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxCudaDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_CAMBRICON_MLU
#ifdef ENABLE_CAMBRICON_MLU
case
DevCambriconMlu
:
{
case
DevCambriconMlu
:
{
return
bangDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxBangDescriptor_t
)
desc
);
return
bangDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxBangDescriptor_t
)
desc
);
// return cnnlDestroyCausalSoftmaxDescriptor((CausalSoftmaxCnnlDescriptor_t) desc);
// return cnnlDestroyCausalSoftmaxDescriptor((CausalSoftmaxCnnlDescriptor_t) desc);
}
}
#endif
#endif
#ifdef ENABLE_ASCEND_NPU
#ifdef ENABLE_ASCEND_NPU
case
DevAscendNpu
:
{
case
DevAscendNpu
:
{
return
aclnnDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxAclnnDescriptor_t
)
desc
);
return
aclnnDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxAclnnDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_METAX_GPU
#ifdef ENABLE_METAX_GPU
case
DevMetaxGpu
:
{
case
DevMetaxGpu
:
{
return
macaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxMacaDescriptor_t
)
desc
);
return
macaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxMacaDescriptor_t
)
desc
);
}
}
#endif
#endif
#ifdef ENABLE_MTHREADS_GPU
#ifdef ENABLE_MTHREADS_GPU
case
DevMthreadsGpu
:
case
DevMthreadsGpu
:
return
musaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxMusaDescriptor_t
)
desc
);
return
musaDestroyCausalSoftmaxDescriptor
((
CausalSoftmaxMusaDescriptor_t
)
desc
);
#endif
#endif
}
}
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
return
INFINIOP_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infiniop/ops/matmul/ascend/matmul_aclnn.cc
View file @
3c31dc6c
...
@@ -123,17 +123,13 @@ infiniopStatus_t aclnnMatmul(MatmulAclnnDescriptor_t desc, void *workspace,
...
@@ -123,17 +123,13 @@ infiniopStatus_t aclnnMatmul(MatmulAclnnDescriptor_t desc, void *workspace,
for
(
size_t
i
=
0
;
i
<
batch
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
batch
;
i
++
)
{
AclSetTensorAddr
(
desc
->
executor
,
0
,
ta
,
AclSetTensorAddr
(
desc
->
executor
,
0
,
ta
,
(
char
*
)(
a
)
+
i
*
desc
->
info
->
a_matrix
.
stride
*
(
char
*
)(
a
)
+
i
*
desc
->
info
->
a_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
infiniSizeof
(
desc
->
dtype
));
AclSetTensorAddr
(
desc
->
executor
,
1
,
tb
,
AclSetTensorAddr
(
desc
->
executor
,
1
,
tb
,
(
char
*
)(
b
)
+
i
*
desc
->
info
->
b_matrix
.
stride
*
(
char
*
)(
b
)
+
i
*
desc
->
info
->
b_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
infiniSizeof
(
desc
->
dtype
));
AclSetTensorAddr
(
desc
->
executor
,
2
,
tc
,
AclSetTensorAddr
(
desc
->
executor
,
2
,
tc
,
(
char
*
)(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
(
char
*
)(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
infiniSizeof
(
desc
->
dtype
));
AclSetTensorAddr
(
desc
->
executor
,
3
,
tc
,
AclSetTensorAddr
(
desc
->
executor
,
3
,
tc
,
(
char
*
)(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
(
char
*
)(
c
)
+
i
*
desc
->
info
->
c_matrix
.
stride
*
infiniSizeof
(
desc
->
dtype
));
infiniSizeof
(
desc
->
dtype
));
ret
=
aclnnGemm
(
workspace
,
workspaceSize
,
desc
->
executor
,
stream
);
ret
=
aclnnGemm
(
workspace
,
workspaceSize
,
desc
->
executor
,
stream
);
CHECK_RET
(
ret
==
ACL_SUCCESS
,
CHECK_RET
(
ret
==
ACL_SUCCESS
,
LOG_PRINT
(
"aclnnGemm failed. ERROR: %d
\n
"
,
ret
);
LOG_PRINT
(
"aclnnGemm failed. ERROR: %d
\n
"
,
ret
);
...
...
src/infiniop/ops/matmul/bang/matmul_cnnl.cc
View file @
3c31dc6c
src/infiniop/ops/matmul/blas.h
View file @
3c31dc6c
...
@@ -88,7 +88,7 @@ struct MatmulInfo {
...
@@ -88,7 +88,7 @@ struct MatmulInfo {
return
;
return
;
}
}
if
(
c_matrix
.
rows
!=
a_matrix
.
rows
||
c_matrix
.
cols
!=
b_matrix
.
cols
||
a_matrix
.
cols
!=
b_matrix
.
rows
){
if
(
c_matrix
.
rows
!=
a_matrix
.
rows
||
c_matrix
.
cols
!=
b_matrix
.
cols
||
a_matrix
.
cols
!=
b_matrix
.
rows
)
{
*
status
=
INFINIOP_STATUS_BAD_TENSOR_SHAPE
;
*
status
=
INFINIOP_STATUS_BAD_TENSOR_SHAPE
;
return
;
return
;
}
}
...
@@ -113,4 +113,4 @@ struct MatmulInfo {
...
@@ -113,4 +113,4 @@ struct MatmulInfo {
}
}
};
};
#endif// __BLAS_H__
#endif
// __BLAS_H__
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment