Commit dbbf7c3e authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed race condition

parent 11dc1eb6
...@@ -709,7 +709,7 @@ private: ...@@ -709,7 +709,7 @@ private:
CudaFFT3D* dispersionFft; CudaFFT3D* dispersionFft;
cufftHandle dispersionFftForward; cufftHandle dispersionFftForward;
cufftHandle dispersionFftBackward; cufftHandle dispersionFftBackward;
CUfunction computeParamsKernel; CUfunction computeParamsKernel, computeExclusionParamsKernel;
CUfunction ewaldSumsKernel; CUfunction ewaldSumsKernel;
CUfunction ewaldForcesKernel; CUfunction ewaldForcesKernel;
CUfunction pmeGridIndexKernel; CUfunction pmeGridIndexKernel;
......
...@@ -2110,6 +2110,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -2110,6 +2110,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
CUmodule module = cu.createModule(CudaKernelSources::nonbondedParameters, paramsDefines); CUmodule module = cu.createModule(CudaKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cu.getKernel(module, "computeParameters"); computeParamsKernel = cu.getKernel(module, "computeParameters");
computeExclusionParamsKernel = cu.getKernel(module, "computeExclusionParameters");
info = new ForceInfo(force); info = new ForceInfo(force);
cu.addForce(info); cu.addForce(info);
} }
...@@ -2136,7 +2137,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2136,7 +2137,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms, vector<void*> paramsArgs = {&cu.getEnergyBuffer().getDevicePointer(), &computeSelfEnergy, &globalParams.getDevicePointer(), &numAtoms,
&baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(), &baseParticleParams.getDevicePointer(), &cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
&particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()}; &particleParamOffsets.getDevicePointer(), &particleOffsetIndices.getDevicePointer()};
int numExceptions, numExclusions; int numExceptions;
if (exceptionParams.isInitialized()) { if (exceptionParams.isInitialized()) {
numExceptions = exceptionParams.getSize(); numExceptions = exceptionParams.getSize();
paramsArgs.push_back(&numExceptions); paramsArgs.push_back(&numExceptions);
...@@ -2145,13 +2146,13 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF ...@@ -2145,13 +2146,13 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
paramsArgs.push_back(&exceptionParamOffsets.getDevicePointer()); paramsArgs.push_back(&exceptionParamOffsets.getDevicePointer());
paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer()); paramsArgs.push_back(&exceptionOffsetIndices.getDevicePointer());
} }
cu.executeKernel(computeParamsKernel, &paramsArgs[0], cu.getPaddedNumAtoms());
if (exclusionParams.isInitialized()) { if (exclusionParams.isInitialized()) {
numExclusions = exclusionParams.getSize(); int numExclusions = exclusionParams.getSize();
paramsArgs.push_back(&numExclusions); vector<void*> exclusionParamsArgs = {&cu.getPosq().getDevicePointer(), &charges.getDevicePointer(), &sigmaEpsilon.getDevicePointer(),
paramsArgs.push_back(&exclusionAtoms.getDevicePointer()); &numExclusions, &exclusionAtoms.getDevicePointer(), &exclusionParams.getDevicePointer()};
paramsArgs.push_back(&exclusionParams.getDevicePointer()); cu.executeKernel(computeExclusionParamsKernel, &exclusionParamsArgs[0], numExclusions);
} }
cu.executeKernel(computeParamsKernel, &paramsArgs[0], cu.getPaddedNumAtoms());
if (usePmeStream) { if (usePmeStream) {
cuEventRecord(paramsSyncEvent, cu.getCurrentStream()); cuEventRecord(paramsSyncEvent, cu.getCurrentStream());
cuStreamWaitEvent(pmeStream, paramsSyncEvent, 0); cuStreamWaitEvent(pmeStream, paramsSyncEvent, 0);
......
...@@ -7,9 +7,6 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b ...@@ -7,9 +7,6 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
#ifdef HAS_EXCEPTIONS #ifdef HAS_EXCEPTIONS
, int numExceptions, const float4* __restrict__ baseExceptionParams, float4* __restrict__ exceptionParams, , int numExceptions, const float4* __restrict__ baseExceptionParams, float4* __restrict__ exceptionParams,
float4* __restrict__ exceptionParamOffsets, int* __restrict__ exceptionOffsetIndices float4* __restrict__ exceptionParamOffsets, int* __restrict__ exceptionOffsetIndices
#endif
#ifdef HAS_EXCLUSIONS
, int numExclusions, const int2* __restrict__ exclusionAtoms, float4* __restrict__ exclusionParams
#endif #endif
) { ) {
mixed energy = 0; mixed energy = 0;
...@@ -63,10 +60,15 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b ...@@ -63,10 +60,15 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
exceptionParams[i] = make_float4((float) (138.935456f*params.x), (float) params.y, (float) (4*params.z), 0); exceptionParams[i] = make_float4((float) (138.935456f*params.x), (float) params.y, (float) (4*params.z), 0);
} }
#endif #endif
if (includeSelfEnergy)
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
// Compute parameters for subtracting the reciprocal part of excluded interactions. /**
* Compute parameters for subtracting the reciprocal part of excluded interactions.
#ifdef HAS_EXCLUSIONS */
extern "C" __global__ void computeExclusionParameters(real4* __restrict__ posq, real* __restrict__ charge, float2* __restrict__ sigmaEpsilon,
int numExclusions, const int2* __restrict__ exclusionAtoms, float4* __restrict__ exclusionParams) {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numExclusions; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < numExclusions; i += blockDim.x*gridDim.x) {
int2 atoms = exclusionAtoms[i]; int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES #ifdef USE_POSQ_CHARGES
...@@ -85,7 +87,4 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b ...@@ -85,7 +87,4 @@ extern "C" __global__ void computeParameters(mixed* __restrict__ energyBuffer, b
#endif #endif
exclusionParams[i] = make_float4((float) (138.935456f*chargeProd), sigma, epsilon, 0); exclusionParams[i] = make_float4((float) (138.935456f*chargeProd), sigma, epsilon, 0);
} }
#endif }
if (includeSelfEnergy) \ No newline at end of file
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
}
...@@ -685,7 +685,7 @@ private: ...@@ -685,7 +685,7 @@ private:
Kernel cpuPme; Kernel cpuPme;
PmeIO* pmeio; PmeIO* pmeio;
SyncQueuePostComputation* syncQueue; SyncQueuePostComputation* syncQueue;
cl::Kernel computeParamsKernel; cl::Kernel computeParamsKernel, computeExclusionParamsKernel;
cl::Kernel ewaldSumsKernel; cl::Kernel ewaldSumsKernel;
cl::Kernel ewaldForcesKernel; cl::Kernel ewaldForcesKernel;
cl::Kernel pmeAtomRangeKernel; cl::Kernel pmeAtomRangeKernel;
......
...@@ -2036,6 +2036,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -2036,6 +2036,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cl::Program program = cl.createProgram(OpenCLKernelSources::nonbondedParameters, paramsDefines); cl::Program program = cl.createProgram(OpenCLKernelSources::nonbondedParameters, paramsDefines);
computeParamsKernel = cl::Kernel(program, "computeParameters"); computeParamsKernel = cl::Kernel(program, "computeParameters");
computeExclusionParamsKernel = cl::Kernel(program, "computeExclusionParameters");
info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force); info = new ForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force);
cl.addForce(info); cl.addForce(info);
} }
...@@ -2063,9 +2064,12 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2063,9 +2064,12 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
computeParamsKernel.setArg<cl::Buffer>(index++, exceptionOffsetIndices.getDeviceBuffer()); computeParamsKernel.setArg<cl::Buffer>(index++, exceptionOffsetIndices.getDeviceBuffer());
} }
if (exclusionParams.isInitialized()) { if (exclusionParams.isInitialized()) {
computeParamsKernel.setArg<cl_int>(index++, exclusionParams.getSize()); computeExclusionParamsKernel.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exclusionAtoms.getDeviceBuffer()); computeExclusionParamsKernel.setArg<cl::Buffer>(1, charges.getDeviceBuffer());
computeParamsKernel.setArg<cl::Buffer>(index++, exclusionParams.getDeviceBuffer()); computeExclusionParamsKernel.setArg<cl::Buffer>(2, sigmaEpsilon.getDeviceBuffer());
computeExclusionParamsKernel.setArg<cl_int>(3, exclusionParams.getSize());
computeExclusionParamsKernel.setArg<cl::Buffer>(4, exclusionAtoms.getDeviceBuffer());
computeExclusionParamsKernel.setArg<cl::Buffer>(5, exclusionParams.getDeviceBuffer());
} }
if (cosSinSums.isInitialized()) { if (cosSinSums.isInitialized()) {
ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer()); ewaldSumsKernel.setArg<cl::Buffer>(0, cl.getEnergyBuffer().getDeviceBuffer());
...@@ -2215,6 +2219,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -2215,6 +2219,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
if (recomputeParams || hasOffsets) { if (recomputeParams || hasOffsets) {
computeParamsKernel.setArg<cl_int>(1, includeEnergy && includeReciprocal); computeParamsKernel.setArg<cl_int>(1, includeEnergy && includeReciprocal);
cl.executeKernel(computeParamsKernel, cl.getPaddedNumAtoms()); cl.executeKernel(computeParamsKernel, cl.getPaddedNumAtoms());
if (exclusionParams.isInitialized())
cl.executeKernel(computeExclusionParamsKernel, exclusionParams.getSize());
if (usePmeQueue) { if (usePmeQueue) {
vector<cl::Event> events(1); vector<cl::Event> events(1);
cl.getQueue().enqueueMarker(&events[0]); cl.getQueue().enqueueMarker(&events[0]);
......
...@@ -7,9 +7,6 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu ...@@ -7,9 +7,6 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu
#ifdef HAS_EXCEPTIONS #ifdef HAS_EXCEPTIONS
, int numExceptions, __global const float4* restrict baseExceptionParams, __global float4* restrict exceptionParams, , int numExceptions, __global const float4* restrict baseExceptionParams, __global float4* restrict exceptionParams,
__global float4* restrict exceptionParamOffsets, __global int* restrict exceptionOffsetIndices __global float4* restrict exceptionParamOffsets, __global int* restrict exceptionOffsetIndices
#endif
#ifdef HAS_EXCLUSIONS
, int numExclusions, __global const int2* restrict exclusionAtoms, __global float4* restrict exclusionParams
#endif #endif
) { ) {
mixed energy = 0; mixed energy = 0;
...@@ -63,10 +60,15 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu ...@@ -63,10 +60,15 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu
exceptionParams[i] = (float4) ((float) (138.935456f*params.x), (float) params.y, (float) (4*params.z), 0); exceptionParams[i] = (float4) ((float) (138.935456f*params.x), (float) params.y, (float) (4*params.z), 0);
} }
#endif #endif
if (includeSelfEnergy)
energyBuffer[get_global_id(0)] += energy;
}
// Compute parameters for subtracting the reciprocal part of excluded interactions. /**
* Compute parameters for subtracting the reciprocal part of excluded interactions.
#ifdef HAS_EXCLUSIONS */
__kernel void computeExclusionParameters(__global real4* restrict posq, __global real* restrict charge, __global float2* restrict sigmaEpsilon,
int numExclusions, __global const int2* restrict exclusionAtoms, __global float4* restrict exclusionParams) {
for (int i = get_global_id(0); i < numExclusions; i += get_global_size(0)) { for (int i = get_global_id(0); i < numExclusions; i += get_global_size(0)) {
int2 atoms = exclusionAtoms[i]; int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES #ifdef USE_POSQ_CHARGES
...@@ -85,7 +87,4 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu ...@@ -85,7 +87,4 @@ __kernel void computeParameters(__global mixed* restrict energyBuffer, int inclu
#endif #endif
exclusionParams[i] = (float4) ((float) (138.935456f*chargeProd), sigma, epsilon, 0); exclusionParams[i] = (float4) ((float) (138.935456f*chargeProd), sigma, epsilon, 0);
} }
#endif
if (includeSelfEnergy)
energyBuffer[get_global_id(0)] += energy;
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment