Commit 97f6e1c3 authored by zhanggzh's avatar zhanggzh
Browse files

add hcu support code

parent 1661daf1
...@@ -16,9 +16,10 @@ constexpr inline __host__ __device__ bool isPo2(IntType num) { ...@@ -16,9 +16,10 @@ constexpr inline __host__ __device__ bool isPo2(IntType num) {
} }
inline __device__ int laneId() { inline __device__ int laneId() {
int id; //int id;
asm("mov.s32 %0, %%laneid;" : "=r"(id)); //asm("mov.s32 %0, %%laneid;" : "=r"(id));
return id; //return id;
return __lane_id();
} }
/** /**
* @brief Shuffle the data inside a warp * @brief Shuffle the data inside a warp
......
...@@ -12,7 +12,7 @@ namespace cu_ctc { ...@@ -12,7 +12,7 @@ namespace cu_ctc {
* @tparam IntType data type (checked only for integers) * @tparam IntType data type (checked only for integers)
*/ */
template <typename IntType> template <typename IntType>
constexpr __device__ IntType log2(IntType num, IntType ret = IntType(0)) { constexpr __host__ __device__ IntType log2(IntType num, IntType ret = IntType(0)) {
return num <= IntType(1) ? ret : log2(num >> IntType(1), ++ret); return num <= IntType(1) ? ret : log2(num >> IntType(1), ++ret);
} }
......
...@@ -313,7 +313,7 @@ class warp_sort_filtered : public warp_sort<Capacity, Ascending, T, IdxT> { ...@@ -313,7 +313,7 @@ class warp_sort_filtered : public warp_sort<Capacity, Ascending, T, IdxT> {
__device__ __forceinline__ void merge_buf_() { __device__ __forceinline__ void merge_buf_() {
topk::bitonic<kMaxBufLen>(!Ascending, kWarpWidth).sort(val_buf_, idx_buf_); topk::bitonic<kMaxBufLen>(!Ascending, kWarpWidth).sort(val_buf_, idx_buf_);
this->merge_in<kMaxBufLen>(val_buf_, idx_buf_); this->template merge_in<kMaxBufLen>(val_buf_, idx_buf_);
buf_len_ = 0; buf_len_ = 0;
set_k_th_(); // contains warp sync set_k_th_(); // contains warp sync
#pragma unroll #pragma unroll
...@@ -421,7 +421,7 @@ constexpr inline __host__ __device__ IntType ceildiv(IntType a, IntType b) { ...@@ -421,7 +421,7 @@ constexpr inline __host__ __device__ IntType ceildiv(IntType a, IntType b) {
return (a + b - 1) / b; return (a + b - 1) / b;
} }
template <typename IntType> template <typename IntType>
constexpr inline __device__ IntType roundUp256(IntType num) { constexpr inline __host__ __device__ IntType roundUp256(IntType num) {
// return (num + 255) / 256 * 256; // return (num + 255) / 256 * 256;
constexpr int MASK = 255; constexpr int MASK = 255;
return (num + MASK) & (~MASK); return (num + MASK) & (~MASK);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment