Commit a49479f4 authored by Your Name's avatar Your Name
Browse files

增加warp +=情况下的显式sync

parent 847585df
......@@ -248,13 +248,13 @@ __global__ void pqScanNoPrecomputedMultiPass(
#pragma unroll
for (int byte = 0; byte < kBytesPerCode32; ++byte) {
auto code = getByte(code32[word], byte * 8, 8);
// HC Debug
__syncthreads();
auto offset = codesPerSubQuantizer *
(word * kBytesPerCode32 + byte);
dist += ConvertTo<float>::to(codeDist[offset + code]);
// HC Debug explicit sync
__syncthreads();
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment