Merge pull request #1296 from peastman/bugs

Bug fixes

Merge pull request #1296 from peastman/bugs
Bug fixes
cab0faf8 · peastman · 844a91b4 · 80d41381 · cab0faf8 · cab0faf8
Commit cab0faf8 authored Dec 14, 2015 by peastman
Show whitespace changes
Inline Side-by-side

Showing with 8 additions and 5 deletions

platforms/cuda/src/CudaContext.cpp platforms/cuda/src/CudaContext.cpp +3 -1

platforms/cuda/src/CudaKernels.cpp platforms/cuda/src/CudaKernels.cpp +5 -4

No files found.
--- a/platforms/cuda/src/CudaContext.cpp
+++ b/platforms/cuda/src/CudaContext.cpp
@@ -76,7 +76,7 @@ bool CudaContext::hasInitializedCuda = false;
 CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
        const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
        time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false),
-        pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
+        pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
    this->compiler = "\""+compiler+"\"";
    if (platformData.context != NULL) {
        try {
@@ -339,6 +339,8 @@ CudaContext::~CudaContext() {
        delete force;
    if (energyBuffer != NULL)
        delete energyBuffer;
+    if (atomIndexDevice != NULL)
+        delete atomIndexDevice;
    if (integration != NULL)
        delete integration;
    if (expression != NULL)

--- a/platforms/cuda/src/CudaKernels.cpp
+++ b/platforms/cuda/src/CudaKernels.cpp
@@ -1643,6 +1643,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
        defines["USE_EWALD"] = "1";
        if (cu.getContextIndex() == 0) {
            ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
+            char deviceName[100];
+            cuDeviceGetName(deviceName, 100, cu.getDevice());
+            usePmeStream = (string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
            pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
            pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles);
            pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
@@ -1654,6 +1657,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
            pmeDefines["M_PI"] = cu.doubleToString(M_PI);
            if (cu.getUseDoublePrecision())
                pmeDefines["USE_DOUBLE_PRECISION"] = "1";
+            if (usePmeStream)
+                pmeDefines["USE_PME_STREAM"] = "1";
            CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
            if (cu.getPlatformData().useCpuPme) {
                // Create the CPU PME kernel.
@@ -1713,11 +1718,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
                // Prepare for doing PME on its own stream.
-                char deviceName[100];
-                cuDeviceGetName(deviceName, 100, cu.getDevice());
-                usePmeStream = (string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
                if (usePmeStream) {
-                    pmeDefines["USE_PME_STREAM"] = "1";
                    cuStreamCreate(&pmeStream, CU_STREAM_NON_BLOCKING);
                    if (useCudaFFT) {
                        cufftSetStream(fftForward, pmeStream);