Commit 0221ee14 authored by Peter Eastman's avatar Peter Eastman
Browse files

Beginnings of volta support

parent 1ef93e47
......@@ -236,12 +236,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
minor = 3;
}
}
if (major == 7) {
// Don't generate Volta-specific code until we've made the changes needed
// to support it properly.
major = 6;
minor = 0;
}
gpuArchitecture = intToString(major)+intToString(minor);
computeCapability = major+0.1*minor;
......@@ -263,6 +257,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors;
if (cudaDriverVersion >= 9000) {
compilationDefines["SYNC_WARPS"] = "__syncwarp();";
compilationDefines["SHFL(var, srcLane)"] = "__shfl_sync(0xffffffff, var, srcLane);";
}
else {
compilationDefines["SYNC_WARPS"] = "";
compilationDefines["SHFL(var, srcLane)"] = "__shfl(var, srcLane);";
}
if (useDoublePrecision) {
posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq");
velm = CudaArray::create<double4>(*this, paddedNumAtoms, "velm");
......
......@@ -15,22 +15,22 @@ typedef struct {
#ifdef ENABLE_SHUFFLE
//support for 64 bit shuffles
static __inline__ __device__ float real_shfl(float var, int srcLane) {
return __shfl(var, srcLane);
return SHFL(var, srcLane);
}
static __inline__ __device__ double real_shfl(double var, int srcLane) {
int hi, lo;
asm volatile("mov.b64 { %0, %1 }, %2;" : "=r"(lo), "=r"(hi) : "d"(var));
hi = __shfl(hi, srcLane);
lo = __shfl(lo, srcLane);
hi = SHFL(hi, srcLane);
lo = SHFL(lo, srcLane);
return __hiloint2double( hi, lo );
}
static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
int hi, lo;
asm volatile("mov.b64 { %0, %1 }, %2;" : "=r"(lo), "=r"(hi) : "l"(var));
hi = __shfl(hi, srcLane);
lo = __shfl(lo, srcLane);
hi = SHFL(hi, srcLane);
lo = SHFL(lo, srcLane);
// unforunately there isn't an __nv_hiloint2long(hi,lo) intrinsic cast
int2 fuse; fuse.x = lo; fuse.y = hi;
return *reinterpret_cast<long long*>(&fuse);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment