Commit cab0faf8 authored by peastman's avatar peastman
Browse files

Merge pull request #1296 from peastman/bugs

Bug fixes
parents 844a91b4 80d41381
......@@ -76,7 +76,7 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlockingSync, const string& precision, const string& compiler,
const string& tempDir, const std::string& hostCompiler, CudaPlatform::PlatformData& platformData) : system(system), currentStream(0),
time(0.0), platformData(platformData), stepCount(0), computeForceCount(0), stepsSinceReorder(99999), contextIsValid(false), atomsWereReordered(false), hasCompilerKernel(false),
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
pinnedBuffer(NULL), posq(NULL), posqCorrection(NULL), velm(NULL), force(NULL), energyBuffer(NULL), atomIndexDevice(NULL), integration(NULL), expression(NULL), bonded(NULL), nonbonded(NULL), thread(NULL) {
this->compiler = "\""+compiler+"\"";
if (platformData.context != NULL) {
try {
......@@ -339,6 +339,8 @@ CudaContext::~CudaContext() {
delete force;
if (energyBuffer != NULL)
delete energyBuffer;
if (atomIndexDevice != NULL)
delete atomIndexDevice;
if (integration != NULL)
delete integration;
if (expression != NULL)
......
......@@ -1643,6 +1643,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines["USE_EWALD"] = "1";
if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
char deviceName[100];
cuDeviceGetName(deviceName, 100, cu.getDevice());
usePmeStream = (string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles);
pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
......@@ -1654,6 +1657,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines["M_PI"] = cu.doubleToString(M_PI);
if (cu.getUseDoublePrecision())
pmeDefines["USE_DOUBLE_PRECISION"] = "1";
if (usePmeStream)
pmeDefines["USE_PME_STREAM"] = "1";
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
if (cu.getPlatformData().useCpuPme) {
// Create the CPU PME kernel.
......@@ -1713,11 +1718,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Prepare for doing PME on its own stream.
char deviceName[100];
cuDeviceGetName(deviceName, 100, cu.getDevice());
usePmeStream = (string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
if (usePmeStream) {
pmeDefines["USE_PME_STREAM"] = "1";
cuStreamCreate(&pmeStream, CU_STREAM_NON_BLOCKING);
if (useCudaFFT) {
cufftSetStream(fftForward, pmeStream);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment