Commit 890cdb46 authored by peastman's avatar peastman
Browse files

Merge pull request #439 from peastman/master

Fixed bug using multiple devices with PME
parents 5ed9dd65 f1991b3c
...@@ -1520,7 +1520,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1520,7 +1520,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
else else
dispersionCoefficient = 0.0; dispersionCoefficient = 0.0;
alpha = 0; alpha = 0;
if (force.getNonbondedMethod() == NonbondedForce::Ewald && cu.getContextIndex() == 0) { if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
// Compute the Ewald parameters. // Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz; int kmaxx, kmaxy, kmaxz;
...@@ -1528,26 +1528,28 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1528,26 +1528,28 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines["EWALD_ALPHA"] = cu.doubleToString(alpha); defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1"; defines["USE_EWALD"] = "1";
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI); if (cu.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
// Create the reciprocal space kernels.
// Create the reciprocal space kernels.
map<string, string> replacements;
replacements["NUM_ATOMS"] = cu.intToString(numParticles); map<string, string> replacements;
replacements["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); replacements["NUM_ATOMS"] = cu.intToString(numParticles);
replacements["KMAX_X"] = cu.intToString(kmaxx); replacements["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
replacements["KMAX_Y"] = cu.intToString(kmaxy); replacements["KMAX_X"] = cu.intToString(kmaxx);
replacements["KMAX_Z"] = cu.intToString(kmaxz); replacements["KMAX_Y"] = cu.intToString(kmaxy);
replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha)); replacements["KMAX_Z"] = cu.intToString(kmaxz);
replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0); replacements["EXP_COEFFICIENT"] = cu.doubleToString(-1.0/(4.0*alpha*alpha));
replacements["M_PI"] = cu.doubleToString(M_PI); replacements["ONE_4PI_EPS0"] = cu.doubleToString(ONE_4PI_EPS0);
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ewald, replacements); replacements["M_PI"] = cu.doubleToString(M_PI);
ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums"); CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::ewald, replacements);
ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces"); ewaldSumsKernel = cu.getKernel(module, "calculateEwaldCosSinSums");
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2)); ewaldForcesKernel = cu.getKernel(module, "calculateEwaldForces");
cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums"); int elementSize = (cu.getUseDoublePrecision() ? sizeof(double2) : sizeof(float2));
} cosSinSums = new CudaArray(cu, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
else if (force.getNonbondedMethod() == NonbondedForce::PME && cu.getContextIndex() == 0) { }
}
else if (force.getNonbondedMethod() == NonbondedForce::PME) {
// Compute the PME parameters. // Compute the PME parameters.
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
...@@ -1560,140 +1562,142 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon ...@@ -1560,140 +1562,142 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
defines["EWALD_ALPHA"] = cu.doubleToString(alpha); defines["EWALD_ALPHA"] = cu.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = cu.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1"; defines["USE_EWALD"] = "1";
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI); if (cu.getContextIndex() == 0) {
pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder); ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles); pmeDefines["PME_ORDER"] = cu.intToString(PmeOrder);
pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms()); pmeDefines["NUM_ATOMS"] = cu.intToString(numParticles);
pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(alpha*alpha)); pmeDefines["PADDED_NUM_ATOMS"] = cu.intToString(cu.getPaddedNumAtoms());
pmeDefines["GRID_SIZE_X"] = cu.intToString(gridSizeX); pmeDefines["RECIP_EXP_FACTOR"] = cu.doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY); pmeDefines["GRID_SIZE_X"] = cu.intToString(gridSizeX);
pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ); pmeDefines["GRID_SIZE_Y"] = cu.intToString(gridSizeY);
pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0)); pmeDefines["GRID_SIZE_Z"] = cu.intToString(gridSizeZ);
pmeDefines["M_PI"] = cu.doubleToString(M_PI); pmeDefines["EPSILON_FACTOR"] = cu.doubleToString(sqrt(ONE_4PI_EPS0));
if (cu.getUseDoublePrecision()) pmeDefines["M_PI"] = cu.doubleToString(M_PI);
pmeDefines["USE_DOUBLE_PRECISION"] = "1"; if (cu.getUseDoublePrecision())
CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines); pmeDefines["USE_DOUBLE_PRECISION"] = "1";
if (cu.getPlatformData().useCpuPme) { CUmodule module = cu.createModule(CudaKernelSources::vectorOps+CudaKernelSources::pme, pmeDefines);
// Create the CPU PME kernel. if (cu.getPlatformData().useCpuPme) {
// Create the CPU PME kernel.
try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context); try {
cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha); cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cu.getPlatformData().context);
CUfunction addForcesKernel = cu.getKernel(module, "addForces"); cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
pmeio = new PmeIO(cu, addForcesKernel); CUfunction addForcesKernel = cu.getKernel(module, "addForces");
cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio)); pmeio = new PmeIO(cu, addForcesKernel);
cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio)); cu.addPreComputation(new PmePreComputation(cu, cpuPme, *pmeio));
} cu.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
catch (OpenMMException& ex) { }
// The CPU PME plugin isn't available. catch (OpenMMException& ex) {
} // The CPU PME plugin isn't available.
} }
if (pmeio == NULL) {
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce");
pmeEvalEnergyKernel = cu.getKernel(module, "gridEvaluateEnergy");
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
// Create required data structures.
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
directPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, cu.getComputeCapability() >= 2.0 ? elementSize : sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*(gridSizeZ/2+1), 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(*directPmeGrid);
pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
cufftResult result = cufftPlan3d(&fftForward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
result = cufftPlan3d(&fftBackward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
cufftSetCompatibilityMode(fftForward, CUFFT_COMPATIBILITY_NATIVE);
cufftSetCompatibilityMode(fftBackward, CUFFT_COMPATIBILITY_NATIVE);
hasInitializedFFT = true;
// Initialize the b-spline moduli.
int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
vector<double> data(PmeOrder);
vector<double> ddata(PmeOrder);
vector<double> bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
} }
if (pmeio == NULL) {
pmeGridIndexKernel = cu.getKernel(module, "findAtomGridIndex");
pmeSpreadChargeKernel = cu.getKernel(module, "gridSpreadCharge");
pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce");
pmeEvalEnergyKernel = cu.getKernel(module, "gridEvaluateEnergy");
pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge");
cuFuncSetCacheConfig(pmeSpreadChargeKernel, CU_FUNC_CACHE_PREFER_L1);
cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
// Create required data structures.
int elementSize = (cu.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
directPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*gridSizeZ, cu.getComputeCapability() >= 2.0 ? elementSize : sizeof(long long), "originalPmeGrid");
reciprocalPmeGrid = new CudaArray(cu, gridSizeX*gridSizeY*(gridSizeZ/2+1), 2*elementSize, "reciprocalPmeGrid");
cu.addAutoclearBuffer(*directPmeGrid);
pmeBsplineModuliX = new CudaArray(cu, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new CudaArray(cu, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new CudaArray(cu, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeAtomRange = CudaArray::create<int>(cu, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = CudaArray::create<int2>(cu, numParticles, "pmeAtomGridIndex");
sort = new CudaSort(cu, new SortTrait(), cu.getNumAtoms());
cufftResult result = cufftPlan3d(&fftForward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_D2Z : CUFFT_R2C);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
result = cufftPlan3d(&fftBackward, gridSizeX, gridSizeY, gridSizeZ, cu.getUseDoublePrecision() ? CUFFT_Z2D : CUFFT_C2R);
if (result != CUFFT_SUCCESS)
throw OpenMMException("Error initializing FFT: "+cu.intToString(result));
cufftSetCompatibilityMode(fftForward, CUFFT_COMPATIBILITY_NATIVE);
cufftSetCompatibilityMode(fftBackward, CUFFT_COMPATIBILITY_NATIVE);
hasInitializedFFT = true;
// Initialize the b-spline moduli.
int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
vector<double> data(PmeOrder);
vector<double> ddata(PmeOrder);
vector<double> bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
// Differentiate. ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[0] = -data[0]; ddata[i] = data[i-1]-data[i];
for (int i = 1; i < PmeOrder; i++) double div = 1.0/(PmeOrder-1);
ddata[i] = data[i-1]-data[i]; data[PmeOrder-1] = 0.0;
double div = 1.0/(PmeOrder-1); for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-1] = 0.0; data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
for (int i = 1; i < (PmeOrder-1); i++) data[0] = div*data[0];
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]); for (int i = 0; i < maxSize; i++)
data[0] = div*data[0]; bsplines_data[i] = 0.0;
for (int i = 0; i < maxSize; i++) for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = 0.0; bsplines_data[i] = data[i-1];
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1]; // Evaluate the actual bspline moduli for X/Y/Z.
// Evaluate the actual bspline moduli for X/Y/Z. for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
for(int dim = 0; dim < 3; dim++) { vector<double> moduli(ndata);
int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ); for (int i = 0; i < ndata; i++) {
vector<double> moduli(ndata); double sc = 0.0;
for (int i = 0; i < ndata; i++) { double ss = 0.0;
double sc = 0.0; for (int j = 0; j < ndata; j++) {
double ss = 0.0; double arg = (2.0*M_PI*i*j)/ndata;
for (int j = 0; j < ndata; j++) { sc += bsplines_data[j]*cos(arg);
double arg = (2.0*M_PI*i*j)/ndata; ss += bsplines_data[j]*sin(arg);
sc += bsplines_data[j]*cos(arg); }
ss += bsplines_data[j]*sin(arg); moduli[i] = sc*sc+ss*ss;
} }
moduli[i] = sc*sc+ss*ss;
}
for (int i = 0; i < ndata; i++)
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5;
if (cu.getUseDoublePrecision()) {
if (dim == 0)
pmeBsplineModuliX->upload(moduli);
else if (dim == 1)
pmeBsplineModuliY->upload(moduli);
else
pmeBsplineModuliZ->upload(moduli);
}
else {
vector<float> modulif(ndata);
for (int i = 0; i < ndata; i++) for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i]; if (moduli[i] < 1.0e-7)
if (dim == 0) moduli[i] = (moduli[i-1]+moduli[i+1])*0.5;
pmeBsplineModuliX->upload(modulif); if (cu.getUseDoublePrecision()) {
else if (dim == 1) if (dim == 0)
pmeBsplineModuliY->upload(modulif); pmeBsplineModuliX->upload(moduli);
else else if (dim == 1)
pmeBsplineModuliZ->upload(modulif); pmeBsplineModuliY->upload(moduli);
else
pmeBsplineModuliZ->upload(moduli);
}
else {
vector<float> modulif(ndata);
for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i];
if (dim == 0)
pmeBsplineModuliX->upload(modulif);
else if (dim == 1)
pmeBsplineModuliY->upload(modulif);
else
pmeBsplineModuliZ->upload(modulif);
}
} }
} }
} }
......
...@@ -748,7 +748,7 @@ void testChangingParameters() { ...@@ -748,7 +748,7 @@ void testChangingParameters() {
ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol); ASSERT_EQUAL_TOL(cuState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
} }
void testParallelComputation(bool useCutoff) { void testParallelComputation(NonbondedForce::NonbondedMethod method) {
System system; System system;
const int numParticles = 200; const int numParticles = 200;
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
...@@ -756,9 +756,9 @@ void testParallelComputation(bool useCutoff) { ...@@ -756,9 +756,9 @@ void testParallelComputation(bool useCutoff) {
NonbondedForce* force = new NonbondedForce(); NonbondedForce* force = new NonbondedForce();
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
force->addParticle(i%2-0.5, 0.5, 1.0); force->addParticle(i%2-0.5, 0.5, 1.0);
if (useCutoff) force->setNonbondedMethod(method);
force->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
system.addForce(force); system.addForce(force);
system.setDefaultPeriodicBoxVectors(Vec3(5,0,0), Vec3(0,5,0), Vec3(0,0,5));
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt); init_gen_rand(0, sfmt);
vector<Vec3> positions(numParticles); vector<Vec3> positions(numParticles);
...@@ -877,8 +877,9 @@ int main(int argc, char* argv[]) { ...@@ -877,8 +877,9 @@ int main(int argc, char* argv[]) {
//testBlockInteractions(true); //testBlockInteractions(true);
testDispersionCorrection(); testDispersionCorrection();
testChangingParameters(); testChangingParameters();
testParallelComputation(false); testParallelComputation(NonbondedForce::NoCutoff);
testParallelComputation(true); testParallelComputation(NonbondedForce::Ewald);
testParallelComputation(NonbondedForce::PME);
testSwitchingFunction(NonbondedForce::CutoffNonPeriodic); testSwitchingFunction(NonbondedForce::CutoffNonPeriodic);
testSwitchingFunction(NonbondedForce::PME); testSwitchingFunction(NonbondedForce::PME);
} }
......
...@@ -1492,7 +1492,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1492,7 +1492,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else else
dispersionCoefficient = 0.0; dispersionCoefficient = 0.0;
alpha = 0; alpha = 0;
if (force.getNonbondedMethod() == NonbondedForce::Ewald && cl.getContextIndex() == 0) { if (force.getNonbondedMethod() == NonbondedForce::Ewald) {
// Compute the Ewald parameters. // Compute the Ewald parameters.
int kmaxx, kmaxy, kmaxz; int kmaxx, kmaxy, kmaxz;
...@@ -1500,23 +1500,25 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1500,23 +1500,25 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["EWALD_ALPHA"] = cl.doubleToString(alpha); defines["EWALD_ALPHA"] = cl.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1"; defines["USE_EWALD"] = "1";
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI); if (cl.getContextIndex() == 0) {
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
// Create the reciprocal space kernels. // Create the reciprocal space kernels.
map<string, string> replacements; map<string, string> replacements;
replacements["NUM_ATOMS"] = cl.intToString(numParticles); replacements["NUM_ATOMS"] = cl.intToString(numParticles);
replacements["KMAX_X"] = cl.intToString(kmaxx); replacements["KMAX_X"] = cl.intToString(kmaxx);
replacements["KMAX_Y"] = cl.intToString(kmaxy); replacements["KMAX_Y"] = cl.intToString(kmaxy);
replacements["KMAX_Z"] = cl.intToString(kmaxz); replacements["KMAX_Z"] = cl.intToString(kmaxz);
replacements["EXP_COEFFICIENT"] = cl.doubleToString(-1.0/(4.0*alpha*alpha)); replacements["EXP_COEFFICIENT"] = cl.doubleToString(-1.0/(4.0*alpha*alpha));
cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements); cl::Program program = cl.createProgram(OpenCLKernelSources::ewald, replacements);
ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums"); ewaldSumsKernel = cl::Kernel(program, "calculateEwaldCosSinSums");
ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces"); ewaldForcesKernel = cl::Kernel(program, "calculateEwaldForces");
int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2)); int elementSize = (cl.getUseDoublePrecision() ? sizeof(mm_double2) : sizeof(mm_float2));
cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums"); cosSinSums = new OpenCLArray(cl, (2*kmaxx-1)*(2*kmaxy-1)*(2*kmaxz-1), elementSize, "cosSinSums");
} }
else if (force.getNonbondedMethod() == NonbondedForce::PME && cl.getContextIndex() == 0) { }
else if (force.getNonbondedMethod() == NonbondedForce::PME) {
// Compute the PME parameters. // Compute the PME parameters.
int gridSizeX, gridSizeY, gridSizeZ; int gridSizeX, gridSizeY, gridSizeZ;
...@@ -1527,119 +1529,121 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb ...@@ -1527,119 +1529,121 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
defines["EWALD_ALPHA"] = cl.doubleToString(alpha); defines["EWALD_ALPHA"] = cl.doubleToString(alpha);
defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI)); defines["TWO_OVER_SQRT_PI"] = cl.doubleToString(2.0/sqrt(M_PI));
defines["USE_EWALD"] = "1"; defines["USE_EWALD"] = "1";
ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI); if (cl.getContextIndex() == 0) {
pmeDefines["PME_ORDER"] = cl.intToString(PmeOrder); ewaldSelfEnergy = -ONE_4PI_EPS0*alpha*sumSquaredCharges/sqrt(M_PI);
pmeDefines["NUM_ATOMS"] = cl.intToString(numParticles); pmeDefines["PME_ORDER"] = cl.intToString(PmeOrder);
pmeDefines["RECIP_EXP_FACTOR"] = cl.doubleToString(M_PI*M_PI/(alpha*alpha)); pmeDefines["NUM_ATOMS"] = cl.intToString(numParticles);
pmeDefines["GRID_SIZE_X"] = cl.intToString(gridSizeX); pmeDefines["RECIP_EXP_FACTOR"] = cl.doubleToString(M_PI*M_PI/(alpha*alpha));
pmeDefines["GRID_SIZE_Y"] = cl.intToString(gridSizeY); pmeDefines["GRID_SIZE_X"] = cl.intToString(gridSizeX);
pmeDefines["GRID_SIZE_Z"] = cl.intToString(gridSizeZ); pmeDefines["GRID_SIZE_Y"] = cl.intToString(gridSizeY);
pmeDefines["EPSILON_FACTOR"] = cl.doubleToString(sqrt(ONE_4PI_EPS0)); pmeDefines["GRID_SIZE_Z"] = cl.intToString(gridSizeZ);
bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU); pmeDefines["EPSILON_FACTOR"] = cl.doubleToString(sqrt(ONE_4PI_EPS0));
if (deviceIsCpu) bool deviceIsCpu = (cl.getDevice().getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_CPU);
pmeDefines["DEVICE_IS_CPU"] = "1"; if (deviceIsCpu)
if (cl.getPlatformData().useCpuPme) { pmeDefines["DEVICE_IS_CPU"] = "1";
// Create the CPU PME kernel. if (cl.getPlatformData().useCpuPme) {
// Create the CPU PME kernel.
try {
cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context); try {
cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha); cpuPme = getPlatform().createKernel(CalcPmeReciprocalForceKernel::Name(), *cl.getPlatformData().context);
cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines); cpuPme.getAs<CalcPmeReciprocalForceKernel>().initialize(gridSizeX, gridSizeY, gridSizeZ, numParticles, alpha);
cl::Kernel addForcesKernel = cl::Kernel(program, "addForces"); cl::Program program = cl.createProgram(OpenCLKernelSources::pme, pmeDefines);
pmeio = new PmeIO(cl, addForcesKernel); cl::Kernel addForcesKernel = cl::Kernel(program, "addForces");
cl.addPreComputation(new PmePreComputation(cl, cpuPme, *pmeio)); pmeio = new PmeIO(cl, addForcesKernel);
cl.addPostComputation(new PmePostComputation(cpuPme, *pmeio)); cl.addPreComputation(new PmePreComputation(cl, cpuPme, *pmeio));
} cl.addPostComputation(new PmePostComputation(cpuPme, *pmeio));
catch (OpenMMException& ex) {
// The CPU PME plugin isn't available.
}
}
if (pmeio == NULL) {
// Create required data structures.
int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
pmeGrid = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid");
cl.addAutoclearBuffer(*pmeGrid);
pmeGrid2 = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid2");
pmeBsplineModuliX = new OpenCLArray(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new OpenCLArray(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new OpenCLArray(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeBsplineTheta = new OpenCLArray(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeAtomRange = OpenCLArray::create<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ);
// Initialize the b-spline moduli.
int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
vector<double> data(PmeOrder);
vector<double> ddata(PmeOrder);
vector<double> bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
}
// Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
vector<cl_double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = (float) (sc*sc+ss*ss);
} }
for (int i = 0; i < ndata; i++) catch (OpenMMException& ex) {
{ // The CPU PME plugin isn't available.
if (moduli[i] < 1.0e-7)
moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
} }
if (cl.getUseDoublePrecision()) { }
if (dim == 0) if (pmeio == NULL) {
pmeBsplineModuliX->upload(moduli); // Create required data structures.
else if (dim == 1)
pmeBsplineModuliY->upload(moduli); int elementSize = (cl.getUseDoublePrecision() ? sizeof(double) : sizeof(float));
else pmeGrid = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid");
pmeBsplineModuliZ->upload(moduli); cl.addAutoclearBuffer(*pmeGrid);
pmeGrid2 = new OpenCLArray(cl, gridSizeX*gridSizeY*gridSizeZ, 2*elementSize, "pmeGrid2");
pmeBsplineModuliX = new OpenCLArray(cl, gridSizeX, elementSize, "pmeBsplineModuliX");
pmeBsplineModuliY = new OpenCLArray(cl, gridSizeY, elementSize, "pmeBsplineModuliY");
pmeBsplineModuliZ = new OpenCLArray(cl, gridSizeZ, elementSize, "pmeBsplineModuliZ");
pmeBsplineTheta = new OpenCLArray(cl, PmeOrder*numParticles, 4*elementSize, "pmeBsplineTheta");
pmeAtomRange = OpenCLArray::create<cl_int>(cl, gridSizeX*gridSizeY*gridSizeZ+1, "pmeAtomRange");
pmeAtomGridIndex = OpenCLArray::create<mm_int2>(cl, numParticles, "pmeAtomGridIndex");
sort = new OpenCLSort(cl, new SortTrait(), cl.getNumAtoms());
fft = new OpenCLFFT3D(cl, gridSizeX, gridSizeY, gridSizeZ);
// Initialize the b-spline moduli.
int maxSize = max(max(gridSizeX, gridSizeY), gridSizeZ);
vector<double> data(PmeOrder);
vector<double> ddata(PmeOrder);
vector<double> bsplines_data(maxSize);
data[PmeOrder-1] = 0.0;
data[1] = 0.0;
data[0] = 1.0;
for (int i = 3; i < PmeOrder; i++) {
double div = 1.0/(i-1.0);
data[i-1] = 0.0;
for (int j = 1; j < (i-1); j++)
data[i-j-1] = div*(j*data[i-j-2]+(i-j)*data[i-j-1]);
data[0] = div*data[0];
} }
else {
vector<float> modulif(ndata); // Differentiate.
ddata[0] = -data[0];
for (int i = 1; i < PmeOrder; i++)
ddata[i] = data[i-1]-data[i];
double div = 1.0/(PmeOrder-1);
data[PmeOrder-1] = 0.0;
for (int i = 1; i < (PmeOrder-1); i++)
data[PmeOrder-i-1] = div*(i*data[PmeOrder-i-2]+(PmeOrder-i)*data[PmeOrder-i-1]);
data[0] = div*data[0];
for (int i = 0; i < maxSize; i++)
bsplines_data[i] = 0.0;
for (int i = 1; i <= PmeOrder; i++)
bsplines_data[i] = data[i-1];
// Evaluate the actual bspline moduli for X/Y/Z.
for(int dim = 0; dim < 3; dim++) {
int ndata = (dim == 0 ? gridSizeX : dim == 1 ? gridSizeY : gridSizeZ);
vector<cl_double> moduli(ndata);
for (int i = 0; i < ndata; i++) {
double sc = 0.0;
double ss = 0.0;
for (int j = 0; j < ndata; j++) {
double arg = (2.0*M_PI*i*j)/ndata;
sc += bsplines_data[j]*cos(arg);
ss += bsplines_data[j]*sin(arg);
}
moduli[i] = (float) (sc*sc+ss*ss);
}
for (int i = 0; i < ndata; i++) for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i]; {
if (dim == 0) if (moduli[i] < 1.0e-7)
pmeBsplineModuliX->upload(modulif); moduli[i] = (moduli[i-1]+moduli[i+1])*0.5f;
else if (dim == 1) }
pmeBsplineModuliY->upload(modulif); if (cl.getUseDoublePrecision()) {
else if (dim == 0)
pmeBsplineModuliZ->upload(modulif); pmeBsplineModuliX->upload(moduli);
else if (dim == 1)
pmeBsplineModuliY->upload(moduli);
else
pmeBsplineModuliZ->upload(moduli);
}
else {
vector<float> modulif(ndata);
for (int i = 0; i < ndata; i++)
modulif[i] = (float) moduli[i];
if (dim == 0)
pmeBsplineModuliX->upload(modulif);
else if (dim == 1)
pmeBsplineModuliY->upload(modulif);
else
pmeBsplineModuliZ->upload(modulif);
}
} }
} }
} }
......
...@@ -751,7 +751,7 @@ void testChangingParameters() { ...@@ -751,7 +751,7 @@ void testChangingParameters() {
ASSERT_EQUAL_TOL(clState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol); ASSERT_EQUAL_TOL(clState.getPotentialEnergy(), referenceState.getPotentialEnergy(), tol);
} }
void testParallelComputation(bool useCutoff) { void testParallelComputation(NonbondedForce::NonbondedMethod method) {
System system; System system;
const int numParticles = 200; const int numParticles = 200;
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
...@@ -759,9 +759,9 @@ void testParallelComputation(bool useCutoff) { ...@@ -759,9 +759,9 @@ void testParallelComputation(bool useCutoff) {
NonbondedForce* force = new NonbondedForce(); NonbondedForce* force = new NonbondedForce();
for (int i = 0; i < numParticles; i++) for (int i = 0; i < numParticles; i++)
force->addParticle(i%2-0.5, 0.5, 1.0); force->addParticle(i%2-0.5, 0.5, 1.0);
if (useCutoff) force->setNonbondedMethod(method);
force->setNonbondedMethod(NonbondedForce::CutoffNonPeriodic);
system.addForce(force); system.addForce(force);
system.setDefaultPeriodicBoxVectors(Vec3(5,0,0), Vec3(0,5,0), Vec3(0,0,5));
OpenMM_SFMT::SFMT sfmt; OpenMM_SFMT::SFMT sfmt;
init_gen_rand(0, sfmt); init_gen_rand(0, sfmt);
vector<Vec3> positions(numParticles); vector<Vec3> positions(numParticles);
...@@ -880,8 +880,9 @@ int main(int argc, char* argv[]) { ...@@ -880,8 +880,9 @@ int main(int argc, char* argv[]) {
// testBlockInteractions(true); // testBlockInteractions(true);
testDispersionCorrection(); testDispersionCorrection();
testChangingParameters(); testChangingParameters();
testParallelComputation(false); testParallelComputation(NonbondedForce::NoCutoff);
testParallelComputation(true); testParallelComputation(NonbondedForce::Ewald);
testParallelComputation(NonbondedForce::PME);
testSwitchingFunction(NonbondedForce::CutoffNonPeriodic); testSwitchingFunction(NonbondedForce::CutoffNonPeriodic);
testSwitchingFunction(NonbondedForce::PME); testSwitchingFunction(NonbondedForce::PME);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment