"vscode:/vscode.git/clone" did not exist on "6073a3bb72bb27cc5e2c5bcdbb10e06a67c3d50e"
Commit 5db397d0 authored by peastman's avatar peastman
Browse files

Merge pull request #13 from proteneer/master

Add support for long long shuffle
parents c589f1cc 370c8403
......@@ -25,6 +25,16 @@ static __inline__ __device__ double real_shfl(double var, int srcLane) {
lo = __shfl(lo, srcLane);
return __hiloint2double( hi, lo );
}
static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
int hi, lo;
asm volatile("mov.b64 { %0, %1 }, %2;" : "=r"(lo), "=r"(hi) : "l"(var));
hi = __shfl(hi, srcLane);
lo = __shfl(lo, srcLane);
// unforunately there isn't an __nv_hiloint2long(hi,lo) intrinsic cast
int2 fuse; fuse.x = lo; fuse.y = hi;
return *reinterpret_cast<long long*>(&fuse);
}
#endif
/**
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment