Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
09ed53f7
Commit
09ed53f7
authored
Jun 06, 2025
by
YdrMaster
Browse files
issue/158/fix: 修改天数上的其他编译问题
Signed-off-by:
YdrMaster
<
ydrml@hotmail.com
>
parent
29089d99
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
32 additions
and
34 deletions
+32
-34
README.md
README.md
+15
-12
src/infiniop/devices/cuda/cuda_kernel_common.cuh
src/infiniop/devices/cuda/cuda_kernel_common.cuh
+2
-0
src/infiniop/ops/clip/cuda/clip_cuda_internal.cuh
src/infiniop/ops/clip/cuda/clip_cuda_internal.cuh
+7
-9
src/infiniop/ops/gemm/cuda/gemm_cuda.cu
src/infiniop/ops/gemm/cuda/gemm_cuda.cu
+4
-0
src/infiniop/ops/rearrange/cuda/rearrange_kernel.cuh
src/infiniop/ops/rearrange/cuda/rearrange_kernel.cuh
+0
-9
src/infinirt/cuda/infinirt_cuda.cu
src/infinirt/cuda/infinirt_cuda.cu
+3
-3
xmake/iluvatar.lua
xmake/iluvatar.lua
+1
-1
No files found.
README.md
View file @
09ed53f7
...
...
@@ -38,18 +38,19 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
参数
`XMAKE_CONFIG_FLAGS`
是 xmake 构建配置,可配置下列可选项:
| 选项 | 功能 | 默认值
|--------------------------|-------------------------------|:-:
|
`--omp=[y\|n]`
| 是否使用 OpenMP | y
|
`--cpu=[y\|n]`
| 是否编译 CPU 接口实现 | y
|
`--nv-gpu=[y\|n]`
| 是否编译英伟达 GPU 接口实现 | n
|
`--ascend-npu=[y\|n]`
| 是否编译昇腾 NPU 接口实现 | n
|
`--cambricon-mlu=[y\|n]`
| 是否编译寒武纪 MLU 接口实现 | n
|
`--metax-gpu=[y\|n]`
| 是否编译沐曦 GPU 接口实现 | n
|
`--moore-gpu=[y\|n]`
| 是否编译摩尔线程 GPU 接口实现 | n
|
`--sugon-dcu=[y\|n]`
| 是否编译曙光 DCU 接口实现 | n
|
`--kunlun-xpu=[y\|n]`
| 是否编译昆仑 XPU 接口实现 | n
|
`--ccl=[y\|n]`
| 是否编译 InfiniCCL 通信库接口实现 | n
| 选项 | 功能 | 默认值
|--------------------------|-----------------------------------|:-:
|
`--omp=[y\|n]`
| 是否使用 OpenMP | y
|
`--cpu=[y\|n]`
| 是否编译 CPU 接口实现 | y
|
`--nv-gpu=[y\|n]`
| 是否编译英伟达 GPU 接口实现 | n
|
`--ascend-npu=[y\|n]`
| 是否编译昇腾 NPU 接口实现 | n
|
`--cambricon-mlu=[y\|n]`
| 是否编译寒武纪 MLU 接口实现 | n
|
`--metax-gpu=[y\|n]`
| 是否编译沐曦 GPU 接口实现 | n
|
`--moore-gpu=[y\|n]`
| 是否编译摩尔线程 GPU 接口实现 | n
|
`--iluvatar-gpu=[y\|n]`
| 是否编译沐曦 GPU 接口实现 | n
|
`--sugon-dcu=[y\|n]`
| 是否编译曙光 DCU 接口实现 | n
|
`--kunlun-xpu=[y\|n]`
| 是否编译昆仑 XPU 接口实现 | n
|
`--ccl=[y\|n]`
| 是否编译 InfiniCCL 通信库接口实现 | n
### 手动安装
...
...
@@ -114,11 +115,13 @@ python scripts/python_test.py [--cpu | --nvidia | --cambricon | --ascend]
#### 通信库(InfiniCCL)测试
编译(需要先安装InfiniCCL):
```
shell
xmake build infiniccl-test
```
在英伟达平台运行测试(会自动使用所有可见的卡):
```
shell
infiniccl-test
--nvidia
```
...
...
src/infiniop/devices/cuda/cuda_kernel_common.cuh
View file @
09ed53f7
...
...
@@ -51,10 +51,12 @@ exp_(const float val) {
return
expf
(
val
);
}
#ifndef ENABLE_ILUVATAR_CUDA_API
__forceinline__
__device__
long
double
exp_
(
const
long
double
val
)
{
return
expl
(
val
);
}
#endif
__forceinline__
__device__
double
exp_
(
const
double
val
)
{
...
...
src/infiniop/ops/clip/cuda/clip_cuda_internal.cuh
View file @
09ed53f7
...
...
@@ -2,6 +2,7 @@
#define __CLIP_CUDA_H__
#include "../../../elementwise/cuda/elementwise_cuda.cuh"
#include <cuda_bf16.h>
#include <cuda_fp16.h>
namespace
op
::
clip
::
cuda
{
...
...
@@ -12,17 +13,14 @@ public:
template
<
typename
T
>
__device__
__forceinline__
T
operator
()(
const
T
&
x
,
const
T
&
min_val
,
const
T
&
max_val
)
const
{
if
constexpr
(
std
::
is_same_v
<
T
,
half2
>
)
{
if
constexpr
(
std
::
is_same_v
<
T
,
half2
>
||
std
::
is_same_v
<
T
,
nv_bfloat162
>
)
{
#ifndef ENABLE_ILUVATAR_CUDA_API
return
__hmax2
(
__hmin2
(
x
,
max_val
),
min_val
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
half
>
)
{
return
__hmax
(
__hmin
(
x
,
max_val
),
min_val
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
float
>
)
{
return
fmaxf
(
fminf
(
x
,
max_val
),
min_val
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
double
>
)
{
return
fmax
(
fmin
(
x
,
max_val
),
min_val
);
}
else
{
return
std
::
max
(
std
::
min
(
x
,
max_val
),
min_val
);
#else
return
{
std
::
clamp
(
x
.
x
,
min_val
.
x
,
max_val
.
x
),
std
::
clamp
(
x
.
y
,
min_val
.
y
,
max_val
.
y
)};
#endif
}
return
std
::
clamp
(
x
,
min_val
,
max_val
);
}
}
ClipOp
;
}
// namespace op::clip::cuda
...
...
src/infiniop/ops/gemm/cuda/gemm_cuda.cu
View file @
09ed53f7
...
...
@@ -60,7 +60,11 @@ infiniStatus_t Descriptor::calculate(
break
;
case
INFINI_DTYPE_BF16
:
a_type
=
b_type
=
c_type
=
CUDA_R_16BF
;
#ifdef ENABLE_ILUVATAR_CUDA_API
compute_type
=
CUDA_R_32F
;
#else
compute_type
=
CUBLAS_COMPUTE_32F
;
#endif
break
;
case
INFINI_DTYPE_F32
:
a_type
=
b_type
=
c_type
=
CUDA_R_32F
;
...
...
src/infiniop/ops/rearrange/cuda/rearrange_kernel.cuh
View file @
09ed53f7
...
...
@@ -233,17 +233,8 @@ utils::Result<void *> getRearrangeKernel(const RearrangeParams ¶ms) {
CHECK_OR_RETURN
(
grid_num
<=
MAX_GRID_ARRAY_SIZE
&&
grid_num
!=
0
,
INFINI_STATUS_BAD_PARAM
);
CHECK_OR_RETURN
(
block_num
<=
MAX_BLOCK_ARRAY_SIZE
&&
block_num
!=
0
,
INFINI_STATUS_BAD_PARAM
);
CHECK_OR_RETURN
(
constraint_num
<=
2
,
INFINI_STATUS_BAD_PARAM
);
auto
block_len
=
params
.
block_len
.
data
();
auto
src_block_stride
=
params
.
src_block_stride
.
data
();
auto
dst_block_stride
=
params
.
dst_block_stride
.
data
();
auto
grid_len
=
params
.
grid_len
.
data
();
auto
src_grid_stride
=
params
.
src_grid_stride
.
data
();
auto
dst_grid_stride
=
params
.
dst_grid_stride
.
data
();
auto
constrain
=
params
.
constraints
.
data
();
void
*
kernel_func
=
nullptr
;
#define GET_REARRANGE_KERNEL(Tmem_type, block_array_size, grid_array_size, constraint_num) \
kernel_func = (void *)rearrange_unit_##Tmem_type##_block_##block_array_size##_grid_##grid_array_size##_constrain_##constraint_num;
...
...
src/infinirt/cuda/infinirt_cuda.cu
View file @
09ed53f7
...
...
@@ -38,11 +38,11 @@ infiniStatus_t streamSynchronize(infinirtStream_t stream) {
}
infiniStatus_t
streamWaitEvent
(
infinirtStream_t
stream
,
infinirtEvent_t
event
)
{
#ifndef ENABLE_ILUVATAR_CUDA_API
#ifdef ENABLE_ILUVATAR_CUDA_API
return
INFINI_STATUS_NOT_IMPLEMENTED
;
#else
CHECK_CUDART
(
cudaStreamWaitEvent
((
cudaStream_t
)
stream
,
(
cudaEvent_t
)
event
));
return
INFINI_STATUS_SUCCESS
;
#else
return
INFINI_STATUS_NOT_IMPLEMENTED
;
#endif
}
...
...
xmake/iluvatar.lua
View file @
09ed53f7
...
...
@@ -39,7 +39,7 @@ target("infiniop-iluvatar")
add_rules
(
"iluvatar.env"
)
set_values
(
"cuda.rdc"
,
false
)
add_links
(
"cublas"
,
"cudnn"
)
add_links
(
"cudart"
,
"cublas"
,
"cudnn"
)
set_warnings
(
"all"
,
"error"
)
add_cuflags
(
"-fPIC"
,
"-x"
,
"ivcore"
,
"-std=c++17"
,
{
force
=
true
})
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment