Commit 97f6e1c3 authored by zhanggzh's avatar zhanggzh
Browse files

add hcu support code

parent 1661daf1
......@@ -16,9 +16,10 @@ constexpr inline __host__ __device__ bool isPo2(IntType num) {
}
inline __device__ int laneId() {
int id;
asm("mov.s32 %0, %%laneid;" : "=r"(id));
return id;
//int id;
//asm("mov.s32 %0, %%laneid;" : "=r"(id));
//return id;
return __lane_id();
}
/**
* @brief Shuffle the data inside a warp
......
......@@ -12,7 +12,7 @@ namespace cu_ctc {
* @tparam IntType data type (checked only for integers)
*/
template <typename IntType>
constexpr __device__ IntType log2(IntType num, IntType ret = IntType(0)) {
constexpr __host__ __device__ IntType log2(IntType num, IntType ret = IntType(0)) {
return num <= IntType(1) ? ret : log2(num >> IntType(1), ++ret);
}
......
......@@ -313,7 +313,7 @@ class warp_sort_filtered : public warp_sort<Capacity, Ascending, T, IdxT> {
__device__ __forceinline__ void merge_buf_() {
topk::bitonic<kMaxBufLen>(!Ascending, kWarpWidth).sort(val_buf_, idx_buf_);
this->merge_in<kMaxBufLen>(val_buf_, idx_buf_);
this->template merge_in<kMaxBufLen>(val_buf_, idx_buf_);
buf_len_ = 0;
set_k_th_(); // contains warp sync
#pragma unroll
......@@ -421,7 +421,7 @@ constexpr inline __host__ __device__ IntType ceildiv(IntType a, IntType b) {
return (a + b - 1) / b;
}
template <typename IntType>
constexpr inline __device__ IntType roundUp256(IntType num) {
constexpr inline __host__ __device__ IntType roundUp256(IntType num) {
// return (num + 255) / 256 * 256;
constexpr int MASK = 255;
return (num + MASK) & (~MASK);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment