Commit 09ed53f7 authored by YdrMaster's avatar YdrMaster
Browse files

issue/158/fix: 修改天数上的其他编译问题


Signed-off-by: default avatarYdrMaster <ydrml@hotmail.com>
parent 29089d99
...@@ -38,18 +38,19 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS] ...@@ -38,18 +38,19 @@ python scripts/install.py [XMAKE_CONFIG_FLAGS]
参数 `XMAKE_CONFIG_FLAGS` 是 xmake 构建配置,可配置下列可选项: 参数 `XMAKE_CONFIG_FLAGS` 是 xmake 构建配置,可配置下列可选项:
| 选项 | 功能 | 默认值 | 选项 | 功能 | 默认值
|--------------------------|-------------------------------|:-: |--------------------------|-----------------------------------|:-:
| `--omp=[y\|n]` | 是否使用 OpenMP | y | `--omp=[y\|n]` | 是否使用 OpenMP | y
| `--cpu=[y\|n]` | 是否编译 CPU 接口实现 | y | `--cpu=[y\|n]` | 是否编译 CPU 接口实现 | y
| `--nv-gpu=[y\|n]` | 是否编译英伟达 GPU 接口实现 | n | `--nv-gpu=[y\|n]` | 是否编译英伟达 GPU 接口实现 | n
| `--ascend-npu=[y\|n]` | 是否编译昇腾 NPU 接口实现 | n | `--ascend-npu=[y\|n]` | 是否编译昇腾 NPU 接口实现 | n
| `--cambricon-mlu=[y\|n]` | 是否编译寒武纪 MLU 接口实现 | n | `--cambricon-mlu=[y\|n]` | 是否编译寒武纪 MLU 接口实现 | n
| `--metax-gpu=[y\|n]` | 是否编译沐曦 GPU 接口实现 | n | `--metax-gpu=[y\|n]` | 是否编译沐曦 GPU 接口实现 | n
| `--moore-gpu=[y\|n]` | 是否编译摩尔线程 GPU 接口实现 | n | `--moore-gpu=[y\|n]` | 是否编译摩尔线程 GPU 接口实现 | n
| `--sugon-dcu=[y\|n]` | 是否编译曙光 DCU 接口实现 | n | `--iluvatar-gpu=[y\|n]` | 是否编译沐曦 GPU 接口实现 | n
| `--kunlun-xpu=[y\|n]` | 是否编译昆仑 XPU 接口实现 | n | `--sugon-dcu=[y\|n]` | 是否编译曙光 DCU 接口实现 | n
| `--ccl=[y\|n]` | 是否编译 InfiniCCL 通信库接口实现 | n | `--kunlun-xpu=[y\|n]` | 是否编译昆仑 XPU 接口实现 | n
| `--ccl=[y\|n]` | 是否编译 InfiniCCL 通信库接口实现 | n
### 手动安装 ### 手动安装
...@@ -114,11 +115,13 @@ python scripts/python_test.py [--cpu | --nvidia | --cambricon | --ascend] ...@@ -114,11 +115,13 @@ python scripts/python_test.py [--cpu | --nvidia | --cambricon | --ascend]
#### 通信库(InfiniCCL)测试 #### 通信库(InfiniCCL)测试
编译(需要先安装InfiniCCL): 编译(需要先安装InfiniCCL):
```shell ```shell
xmake build infiniccl-test xmake build infiniccl-test
``` ```
在英伟达平台运行测试(会自动使用所有可见的卡): 在英伟达平台运行测试(会自动使用所有可见的卡):
```shell ```shell
infiniccl-test --nvidia infiniccl-test --nvidia
``` ```
......
...@@ -51,10 +51,12 @@ exp_(const float val) { ...@@ -51,10 +51,12 @@ exp_(const float val) {
return expf(val); return expf(val);
} }
#ifndef ENABLE_ILUVATAR_CUDA_API
__forceinline__ __device__ long double __forceinline__ __device__ long double
exp_(const long double val) { exp_(const long double val) {
return expl(val); return expl(val);
} }
#endif
__forceinline__ __device__ double __forceinline__ __device__ double
exp_(const double val) { exp_(const double val) {
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#define __CLIP_CUDA_H__ #define __CLIP_CUDA_H__
#include "../../../elementwise/cuda/elementwise_cuda.cuh" #include "../../../elementwise/cuda/elementwise_cuda.cuh"
#include <cuda_bf16.h>
#include <cuda_fp16.h> #include <cuda_fp16.h>
namespace op::clip::cuda { namespace op::clip::cuda {
...@@ -12,17 +13,14 @@ public: ...@@ -12,17 +13,14 @@ public:
template <typename T> template <typename T>
__device__ __forceinline__ T operator()(const T &x, const T &min_val, const T &max_val) const { __device__ __forceinline__ T operator()(const T &x, const T &min_val, const T &max_val) const {
if constexpr (std::is_same_v<T, half2>) { if constexpr (std::is_same_v<T, half2> || std::is_same_v<T, nv_bfloat162>) {
#ifndef ENABLE_ILUVATAR_CUDA_API
return __hmax2(__hmin2(x, max_val), min_val); return __hmax2(__hmin2(x, max_val), min_val);
} else if constexpr (std::is_same_v<T, half>) { #else
return __hmax(__hmin(x, max_val), min_val); return {std::clamp(x.x, min_val.x, max_val.x), std::clamp(x.y, min_val.y, max_val.y)};
} else if constexpr (std::is_same_v<T, float>) { #endif
return fmaxf(fminf(x, max_val), min_val);
} else if constexpr (std::is_same_v<T, double>) {
return fmax(fmin(x, max_val), min_val);
} else {
return std::max(std::min(x, max_val), min_val);
} }
return std::clamp(x, min_val, max_val);
} }
} ClipOp; } ClipOp;
} // namespace op::clip::cuda } // namespace op::clip::cuda
......
...@@ -60,7 +60,11 @@ infiniStatus_t Descriptor::calculate( ...@@ -60,7 +60,11 @@ infiniStatus_t Descriptor::calculate(
break; break;
case INFINI_DTYPE_BF16: case INFINI_DTYPE_BF16:
a_type = b_type = c_type = CUDA_R_16BF; a_type = b_type = c_type = CUDA_R_16BF;
#ifdef ENABLE_ILUVATAR_CUDA_API
compute_type = CUDA_R_32F;
#else
compute_type = CUBLAS_COMPUTE_32F; compute_type = CUBLAS_COMPUTE_32F;
#endif
break; break;
case INFINI_DTYPE_F32: case INFINI_DTYPE_F32:
a_type = b_type = c_type = CUDA_R_32F; a_type = b_type = c_type = CUDA_R_32F;
......
...@@ -233,17 +233,8 @@ utils::Result<void *> getRearrangeKernel(const RearrangeParams &params) { ...@@ -233,17 +233,8 @@ utils::Result<void *> getRearrangeKernel(const RearrangeParams &params) {
CHECK_OR_RETURN(grid_num <= MAX_GRID_ARRAY_SIZE && grid_num != 0, INFINI_STATUS_BAD_PARAM); CHECK_OR_RETURN(grid_num <= MAX_GRID_ARRAY_SIZE && grid_num != 0, INFINI_STATUS_BAD_PARAM);
CHECK_OR_RETURN(block_num <= MAX_BLOCK_ARRAY_SIZE && block_num != 0, INFINI_STATUS_BAD_PARAM); CHECK_OR_RETURN(block_num <= MAX_BLOCK_ARRAY_SIZE && block_num != 0, INFINI_STATUS_BAD_PARAM);
CHECK_OR_RETURN(constraint_num <= 2, INFINI_STATUS_BAD_PARAM); CHECK_OR_RETURN(constraint_num <= 2, INFINI_STATUS_BAD_PARAM);
auto block_len = params.block_len.data();
auto src_block_stride = params.src_block_stride.data();
auto dst_block_stride = params.dst_block_stride.data();
auto grid_len = params.grid_len.data();
auto src_grid_stride = params.src_grid_stride.data();
auto dst_grid_stride = params.dst_grid_stride.data();
auto constrain = params.constraints.data();
void *kernel_func = nullptr; void *kernel_func = nullptr;
#define GET_REARRANGE_KERNEL(Tmem_type, block_array_size, grid_array_size, constraint_num) \ #define GET_REARRANGE_KERNEL(Tmem_type, block_array_size, grid_array_size, constraint_num) \
kernel_func = (void *)rearrange_unit_##Tmem_type##_block_##block_array_size##_grid_##grid_array_size##_constrain_##constraint_num; kernel_func = (void *)rearrange_unit_##Tmem_type##_block_##block_array_size##_grid_##grid_array_size##_constrain_##constraint_num;
......
...@@ -38,11 +38,11 @@ infiniStatus_t streamSynchronize(infinirtStream_t stream) { ...@@ -38,11 +38,11 @@ infiniStatus_t streamSynchronize(infinirtStream_t stream) {
} }
infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) { infiniStatus_t streamWaitEvent(infinirtStream_t stream, infinirtEvent_t event) {
#ifndef ENABLE_ILUVATAR_CUDA_API #ifdef ENABLE_ILUVATAR_CUDA_API
return INFINI_STATUS_NOT_IMPLEMENTED;
#else
CHECK_CUDART(cudaStreamWaitEvent((cudaStream_t)stream, (cudaEvent_t)event)); CHECK_CUDART(cudaStreamWaitEvent((cudaStream_t)stream, (cudaEvent_t)event));
return INFINI_STATUS_SUCCESS; return INFINI_STATUS_SUCCESS;
#else
return INFINI_STATUS_NOT_IMPLEMENTED;
#endif #endif
} }
......
...@@ -39,7 +39,7 @@ target("infiniop-iluvatar") ...@@ -39,7 +39,7 @@ target("infiniop-iluvatar")
add_rules("iluvatar.env") add_rules("iluvatar.env")
set_values("cuda.rdc", false) set_values("cuda.rdc", false)
add_links("cublas", "cudnn") add_links("cudart", "cublas", "cudnn")
set_warnings("all", "error") set_warnings("all", "error")
add_cuflags("-fPIC", "-x", "ivcore", "-std=c++17", {force = true}) add_cuflags("-fPIC", "-x", "ivcore", "-std=c++17", {force = true})
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment