Unverified Commit c76c0645 authored by crapromer's avatar crapromer Committed by GitHub
Browse files

issue/367 - Fix compile bug on cuda 13.0



* fix compile bug on cuda 13.0

* issue/367 - clang format code on ubuntu

---------
Co-authored-by: default avatarroot <root@Crapromer>
parent f796aaa8
......@@ -54,7 +54,11 @@ __device__ void logSoftmaxKernel(
}
}
}
#if CUDART_VERSION >= 12090
max_val = BlockReduce(temp_storage).Reduce(max_val, ::cuda::maximum());
#else
max_val = BlockReduce(temp_storage).Reduce(max_val, cub::Max());
#endif
if (tid == 0) {
shared_max_val = max_val;
}
......
......@@ -89,9 +89,13 @@ utils::Result<size_t> calculateWorkspace(size_t n_) {
nullptr, size_inclusive_sum,
nullptr, n,
nullptr));
#if CUDART_VERSION >= 12090
size_random += ::cuda::maximum()(size_radix_sort, size_inclusive_sum);
return utils::Result<size_t>(::cuda::maximum()(argmax, size_random));
#else
size_random += cub::Max()(size_radix_sort, size_inclusive_sum);
return utils::Result<size_t>(cub::Max()(argmax, size_random));
#endif
}
// ↑↑↑ 计算 workspace
......@@ -161,8 +165,13 @@ static __global__ void randomSampleKernel(
const Tidx *__restrict__ indices_out,
size_t n,
float random, float topp, size_t topk) {
#if CUDART_VERSION >= 12090
topk = ::cuda::minimum()(topk, n);
auto p = (Tval)(random * ::cuda::minimum()(topp * (float)sorted[n - 1], (float)sorted[topk - 1]));
#else
topk = cub::Min()(topk, n);
auto p = (Tval)(random * cub::Min()(topp * (float)sorted[n - 1], (float)sorted[topk - 1]));
#endif
for (size_t i = 0;; ++i) {
if ((sorted[i]) >= p) {
*result = indices_out[i];
......@@ -228,8 +237,11 @@ struct Algo {
workspace_ = reinterpret_cast<void *>(workspace);
workspace_size = workspace_end - workspace;
#if CUDART_VERSION >= 12090
auto block = ::cuda::minimum()((size_t)block_size, n);
#else
auto block = cub::Min()((size_t)block_size, n);
#endif
auto grid = (n + block - 1) / block;
// sort
fillIndices<<<static_cast<unsigned int>(grid), static_cast<unsigned int>(block), 0, stream>>>(indices, static_cast<int>(n));
......
......@@ -55,7 +55,11 @@ __global__ void softmax_topk_row_kernel(float *values_topk, // 输出数据, 形
{
__shared__ typename BlockReduce::TempStorage temp_storage_max;
#if CUDART_VERSION >= 12090
T value_max = BlockReduce(temp_storage_max).Reduce(thread_max, ::cuda::maximum());
#else
T value_max = BlockReduce(temp_storage_max).Reduce(thread_max, cub::Max());
#endif
if (tid == 0) {
shared_max = value_max;
}
......
......@@ -54,8 +54,12 @@ __device__ __forceinline__ Tdata max(const Tdata *data_ptr, size_t count) {
for (size_t i = threadIdx.x; i < count; i += BLOCK_SIZE) {
#ifdef ENABLE_HYGON_API
max_ = (data_ptr[i] > max_) ? data_ptr[i] : max_;
#else
#if CUDART_VERSION >= 12090
max_ = ::cuda::maximum()(max_, data_ptr[i]);
#else
max_ = cub::Max()(max_, data_ptr[i]);
#endif
#endif
}
......@@ -65,9 +69,13 @@ __device__ __forceinline__ Tdata max(const Tdata *data_ptr, size_t count) {
#ifdef ENABLE_HYGON_API
return BlockReduce(temp_storage).Reduce(
max_, [](const Tdata &a, const Tdata &b) { return (a > b) ? a : b; }, BLOCK_SIZE);
#else
#if CUDART_VERSION >= 12090
return BlockReduce(temp_storage).Reduce(max_, ::cuda::maximum(), BLOCK_SIZE);
#else
return BlockReduce(temp_storage).Reduce(max_, cub::Max(), BLOCK_SIZE);
#endif
#endif
}
} // namespace op::common_cuda::reduce_op
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment