"vscode:/vscode.git/clone" did not exist on "f582ec34f0bf5a916f0aeba9ac6410c27f75ec8a"
Commit 227aa72a authored by Peter Eastman's avatar Peter Eastman
Browse files

Bug fixes to PME reciprocal space calculation

parent 42d5b80a
...@@ -119,9 +119,6 @@ void kComputeAmoebaBsplines_kernel() ...@@ -119,9 +119,6 @@ void kComputeAmoebaBsplines_kernel()
posq.x -= floor(posq.x*cSim.invPeriodicBoxSizeX)*cSim.periodicBoxSizeX; posq.x -= floor(posq.x*cSim.invPeriodicBoxSizeX)*cSim.periodicBoxSizeX;
posq.y -= floor(posq.y*cSim.invPeriodicBoxSizeY)*cSim.periodicBoxSizeY; posq.y -= floor(posq.y*cSim.invPeriodicBoxSizeY)*cSim.periodicBoxSizeY;
posq.z -= floor(posq.z*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ; posq.z -= floor(posq.z*cSim.invPeriodicBoxSizeZ)*cSim.periodicBoxSizeZ;
float3 t = make_float3((posq.x*cSim.invPeriodicBoxSizeX)*cSim.pmeGridSize.x,
(posq.y*cSim.invPeriodicBoxSizeY)*cSim.pmeGridSize.y,
(posq.z*cSim.invPeriodicBoxSizeZ)*cSim.pmeGridSize.z);
// First axis. // First axis.
...@@ -153,7 +150,14 @@ void kComputeAmoebaBsplines_kernel() ...@@ -153,7 +150,14 @@ void kComputeAmoebaBsplines_kernel()
// Record the grid point. // Record the grid point.
cAmoebaSim.pIgrid[i] = make_int4(igrid1, igrid2, igrid3, 0); cAmoebaSim.pIgrid[i] = make_int4(igrid1, igrid2, igrid3, 0);
cSim.pPmeAtomGridIndex[i] = make_int2(i, igrid1*cSim.pmeGridSize.y*cSim.pmeGridSize.z+igrid2*cSim.pmeGridSize.z+igrid3); float3 t = make_float3((posq.x*cSim.invPeriodicBoxSizeX)*cSim.pmeGridSize.x,
(posq.y*cSim.invPeriodicBoxSizeY)*cSim.pmeGridSize.y,
(posq.z*cSim.invPeriodicBoxSizeZ)*cSim.pmeGridSize.z);
float4 dr = make_float4(t.x-(int) t.x, t.y-(int) t.y, t.z-(int) t.z, 0.0f);
int3 gridIndex = make_int3(((int) t.x) % cSim.pmeGridSize.x,
((int) t.y) % cSim.pmeGridSize.y,
((int) t.z) % cSim.pmeGridSize.z);
cSim.pPmeAtomGridIndex[i] = make_int2(i, gridIndex.x*cSim.pmeGridSize.y*cSim.pmeGridSize.z+gridIndex.y*cSim.pmeGridSize.z+gridIndex.z);
} }
} }
...@@ -660,24 +664,24 @@ void kComputeInducedPotentialFromGrid_kernel() ...@@ -660,24 +664,24 @@ void kComputeInducedPotentialFromGrid_kernel()
tuv012 += tu01*v.z; tuv012 += tu01*v.z;
tuv111 += tu11*v.y; tuv111 += tu11*v.y;
} }
cAmoebaSim.pPhid[20*m+1] = tuv100_1; cAmoebaSim.pPhid[10*m+1] = tuv100_1;
cAmoebaSim.pPhid[20*m+2] = tuv010_1; cAmoebaSim.pPhid[10*m+2] = tuv010_1;
cAmoebaSim.pPhid[20*m+3] = tuv001_1; cAmoebaSim.pPhid[10*m+3] = tuv001_1;
cAmoebaSim.pPhid[20*m+4] = tuv200_1; cAmoebaSim.pPhid[10*m+4] = tuv100_1;
cAmoebaSim.pPhid[20*m+5] = tuv020_1; cAmoebaSim.pPhid[10*m+5] = tuv010_1;
cAmoebaSim.pPhid[20*m+6] = tuv002_1; cAmoebaSim.pPhid[10*m+6] = tuv002_1;
cAmoebaSim.pPhid[20*m+7] = tuv110_1; cAmoebaSim.pPhid[10*m+7] = tuv110_1;
cAmoebaSim.pPhid[20*m+8] = tuv101_1; cAmoebaSim.pPhid[10*m+8] = tuv101_1;
cAmoebaSim.pPhid[20*m+9] = tuv011_1; cAmoebaSim.pPhid[10*m+9] = tuv011_1;
cAmoebaSim.pPhip[20*m+1] = tuv100_2; cAmoebaSim.pPhip[10*m+1] = tuv100_2;
cAmoebaSim.pPhip[20*m+2] = tuv010_2; cAmoebaSim.pPhip[10*m+2] = tuv010_2;
cAmoebaSim.pPhip[20*m+3] = tuv001_2; cAmoebaSim.pPhip[10*m+3] = tuv001_2;
cAmoebaSim.pPhip[20*m+4] = tuv200_2; cAmoebaSim.pPhip[10*m+4] = tuv100_2;
cAmoebaSim.pPhip[20*m+5] = tuv020_2; cAmoebaSim.pPhip[10*m+5] = tuv010_2;
cAmoebaSim.pPhip[20*m+6] = tuv002_2; cAmoebaSim.pPhip[10*m+6] = tuv002_2;
cAmoebaSim.pPhip[20*m+7] = tuv110_2; cAmoebaSim.pPhip[10*m+7] = tuv110_2;
cAmoebaSim.pPhip[20*m+8] = tuv101_2; cAmoebaSim.pPhip[10*m+8] = tuv101_2;
cAmoebaSim.pPhip[20*m+9] = tuv011_2; cAmoebaSim.pPhip[10*m+9] = tuv011_2;
cAmoebaSim.pPhidp[20*m] = tuv000; cAmoebaSim.pPhidp[20*m] = tuv000;
cAmoebaSim.pPhidp[20*m+1] = tuv100; cAmoebaSim.pPhidp[20*m+1] = tuv100;
cAmoebaSim.pPhidp[20*m+2] = tuv010; cAmoebaSim.pPhidp[20*m+2] = tuv010;
...@@ -712,9 +716,9 @@ __launch_bounds__(192, 1) ...@@ -712,9 +716,9 @@ __launch_bounds__(192, 1)
void kComputeFixedMultipoleForceAndEnergy_kernel() void kComputeFixedMultipoleForceAndEnergy_kernel()
{ {
float multipole[10]; float multipole[10];
const int deriv1[] = {2, 5, 8, 9, 11, 16, 18, 14, 15, 20}; const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19};
const int deriv2[] = {3, 8, 6, 10, 14, 12, 19, 16, 20, 17}; const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16};
const int deriv3[] = {4, 9, 10, 7, 15, 17, 13, 20, 18, 19}; const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18};
float energy = 0.0f; float energy = 0.0f;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < cSim.atoms; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < cSim.atoms; i += blockDim.x*gridDim.x) {
// Compute the force and energy. // Compute the force and energy.
...@@ -777,9 +781,9 @@ void kComputeInducedDipoleForceAndEnergy_kernel() ...@@ -777,9 +781,9 @@ void kComputeInducedDipoleForceAndEnergy_kernel()
float multipole[10]; float multipole[10];
float inducedDipole[3]; float inducedDipole[3];
float inducedDipolePolar[3]; float inducedDipolePolar[3];
const int deriv1[] = {2, 5, 8, 9, 11, 16, 18, 14, 15, 20}; const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19};
const int deriv2[] = {3, 8, 6, 10, 14, 12, 19, 16, 20, 17}; const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16};
const int deriv3[] = {4, 9, 10, 7, 15, 17, 13, 20, 18, 19}; const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18};
float energy = 0.0f; float energy = 0.0f;
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < cSim.atoms; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < cSim.atoms; i += blockDim.x*gridDim.x) {
// Compute the force and energy. // Compute the force and energy.
...@@ -801,8 +805,8 @@ void kComputeInducedDipoleForceAndEnergy_kernel() ...@@ -801,8 +805,8 @@ void kComputeInducedDipoleForceAndEnergy_kernel()
inducedDipolePolar[1] = cAmoebaSim.pInducedDipolePolar[i*3+1]; inducedDipolePolar[1] = cAmoebaSim.pInducedDipolePolar[i*3+1];
inducedDipolePolar[2] = cAmoebaSim.pInducedDipolePolar[i*3+2]; inducedDipolePolar[2] = cAmoebaSim.pInducedDipolePolar[i*3+2];
float* phi = &cAmoebaSim.pPhi[20*i]; float* phi = &cAmoebaSim.pPhi[20*i];
float* phip = &cAmoebaSim.pPhip[20*i]; float* phip = &cAmoebaSim.pPhip[10*i];
float* phid = &cAmoebaSim.pPhid[20*i]; float* phid = &cAmoebaSim.pPhid[10*i];
float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f); float4 f = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int k = 0; k < 3; k++) { for (int k = 0; k < 3; k++) {
int j1 = deriv1[k+1]; int j1 = deriv1[k+1];
...@@ -813,10 +817,11 @@ void kComputeInducedDipoleForceAndEnergy_kernel() ...@@ -813,10 +817,11 @@ void kComputeInducedDipoleForceAndEnergy_kernel()
f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[j2] + inducedDipole[k]*phip[j2] + inducedDipolePolar[k]*phid[j2]; f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[j2] + inducedDipole[k]*phip[j2] + inducedDipolePolar[k]*phid[j2];
f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[j3] + inducedDipole[k]*phip[j3] + inducedDipolePolar[k]*phid[j3]; f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[j3] + inducedDipole[k]*phip[j3] + inducedDipolePolar[k]*phid[j3];
} }
float* phidp = &cAmoebaSim.pPhidp[20*i];
for (int k = 0; k < 10; k++) { for (int k = 0; k < 10; k++) {
f.x += multipole[k]*cAmoebaSim.pPhidp[20*i+deriv1[k]]; f.x += multipole[k]*phidp[deriv1[k]];
f.y += multipole[k]*cAmoebaSim.pPhidp[20*i+deriv2[k]]; f.y += multipole[k]*phidp[deriv2[k]];
f.z += multipole[k]*cAmoebaSim.pPhidp[20*i+deriv3[k]]; f.z += multipole[k]*phidp[deriv3[k]];
} }
f.x *= cSim.pmeGridSize.x*cSim.invPeriodicBoxSizeX; f.x *= cSim.pmeGridSize.x*cSim.invPeriodicBoxSizeX;
f.y *= cSim.pmeGridSize.y*cSim.invPeriodicBoxSizeY; f.y *= cSim.pmeGridSize.y*cSim.invPeriodicBoxSizeY;
...@@ -856,9 +861,9 @@ __launch_bounds__(192, 1) ...@@ -856,9 +861,9 @@ __launch_bounds__(192, 1)
void kRecordFixedMultipoleField_kernel(float* output) void kRecordFixedMultipoleField_kernel(float* output)
{ {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < cSim.atoms; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < cSim.atoms; i += blockDim.x*gridDim.x) {
output[3*i] -= cAmoebaSim.pPhi[20*i]; output[3*i] -= cAmoebaSim.pPhi[20*i+1];
output[3*i+1] -= cAmoebaSim.pPhi[20*i+1]; output[3*i+1] -= cAmoebaSim.pPhi[20*i+2];
output[3*i+2] -= cAmoebaSim.pPhi[20*i+2]; output[3*i+2] -= cAmoebaSim.pPhi[20*i+3];
} }
} }
...@@ -873,12 +878,12 @@ __launch_bounds__(192, 1) ...@@ -873,12 +878,12 @@ __launch_bounds__(192, 1)
void kRecordInducedDipoleField_kernel(float* output, float* outputPolar) void kRecordInducedDipoleField_kernel(float* output, float* outputPolar)
{ {
for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < cSim.atoms; i += blockDim.x*gridDim.x) { for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < cSim.atoms; i += blockDim.x*gridDim.x) {
output[3*i] -= cAmoebaSim.pPhid[20*i]; output[3*i] -= cAmoebaSim.pPhid[20*i+1];
output[3*i+1] -= cAmoebaSim.pPhid[20*i+1]; output[3*i+1] -= cAmoebaSim.pPhid[20*i+2];
output[3*i+2] -= cAmoebaSim.pPhid[20*i+2]; output[3*i+2] -= cAmoebaSim.pPhid[20*i+3];
outputPolar[3*i] -= cAmoebaSim.pPhip[20*i]; outputPolar[3*i] -= cAmoebaSim.pPhip[20*i+1];
outputPolar[3*i+1] -= cAmoebaSim.pPhip[20*i+1]; outputPolar[3*i+1] -= cAmoebaSim.pPhip[20*i+2];
outputPolar[3*i+2] -= cAmoebaSim.pPhip[20*i+2]; outputPolar[3*i+2] -= cAmoebaSim.pPhip[20*i+3];
} }
} }
......
...@@ -1271,6 +1271,6 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1271,6 +1271,6 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
{ {
cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpu ); cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpu );
//kCalculateAmoebaPME( amoebaGpu ); kCalculateAmoebaPME( amoebaGpu );
} }
...@@ -521,5 +521,5 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -521,5 +521,5 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu ) void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
{ {
cudaComputeAmoebaPmeDirectFixedEField( amoebaGpu ); cudaComputeAmoebaPmeDirectFixedEField( amoebaGpu );
//kCalculateAmoebaPMEFixedMultipoleField( amoebaGpu ); kCalculateAmoebaPMEFixedMultipoleField( amoebaGpu );
} }
...@@ -620,7 +620,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -620,7 +620,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// matrix multiply // matrix multiply
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpu, amoebaGpu->psWorkVector[0], amoebaGpu->psWorkVector[1] ); cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpu, amoebaGpu->psWorkVector[0], amoebaGpu->psWorkVector[1] );
//kCalculateAmoebaPMEInducedDipoleField( amoebaGpu ); kCalculateAmoebaPMEInducedDipoleField( amoebaGpu );
LAUNCHERROR("cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop\n"); LAUNCHERROR("cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop\n");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment