Commit 101f206d authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Added loop over particles for torque mapping

parent 41abd9fb
...@@ -75,15 +75,13 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1) ...@@ -75,15 +75,13 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else #else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1) __launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif #endif
void amoebaMapTorqueToForce_kernel( float* torque ){ void amoebaMapTorqueToForce_kernel( float* torque )
{
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
int ii; int ii;
int threadId = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; int particleIndex = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
int numOfAtoms = cSim.atoms;
if( threadId >= numOfAtoms )return;
float4* atomCoord = cSim.pPosq; float4* atomCoord = cSim.pPosq;
int4* multiPoleAtoms = cAmoebaSim.pMultipoleParticlesIdsAndAxisType; int4* multiPoleAtoms = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
...@@ -113,28 +111,29 @@ void amoebaMapTorqueToForce_kernel( float* torque ){ ...@@ -113,28 +111,29 @@ void amoebaMapTorqueToForce_kernel( float* torque ){
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
int axisAtom = multiPoleAtoms[threadId].z; while( particleIndex < cSim.atoms )
int axisType = multiPoleAtoms[threadId].w; {
int axisAtom = multiPoleAtoms[particleIndex].z;
int axisType = multiPoleAtoms[particleIndex].w;
// NoAxisType // NoAxisType
if( axisType == 5 ) if( axisType < 5 && multiPoleAtoms[particleIndex].z >= 0 )
{ {
return;
}
vector[U][0] = atomCoord[threadId].x - atomCoord[axisAtom].x; vector[U][0] = atomCoord[particleIndex].x - atomCoord[axisAtom].x;
vector[U][1] = atomCoord[threadId].y - atomCoord[axisAtom].y; vector[U][1] = atomCoord[particleIndex].y - atomCoord[axisAtom].y;
vector[U][2] = atomCoord[threadId].z - atomCoord[axisAtom].z; vector[U][2] = atomCoord[particleIndex].z - atomCoord[axisAtom].z;
norms[U] = normVector3( vector[U] ); norms[U] = normVector3( vector[U] );
if( axisType != 4 ){ if( axisType != 4 && multiPoleAtoms[particleIndex].x >= 0 ){
axisAtom = multiPoleAtoms[threadId].x; axisAtom = multiPoleAtoms[particleIndex].x;
vector[V][0] = atomCoord[threadId].x - atomCoord[axisAtom].x; vector[V][0] = atomCoord[particleIndex].x - atomCoord[axisAtom].x;
vector[V][1] = atomCoord[threadId].y - atomCoord[axisAtom].y; vector[V][1] = atomCoord[particleIndex].y - atomCoord[axisAtom].y;
vector[V][2] = atomCoord[threadId].z - atomCoord[axisAtom].z; vector[V][2] = atomCoord[particleIndex].z - atomCoord[axisAtom].z;
} else { } else {
vector[V][0] = 0.1f; vector[V][0] = 0.1f;
...@@ -149,11 +148,11 @@ void amoebaMapTorqueToForce_kernel( float* torque ){ ...@@ -149,11 +148,11 @@ void amoebaMapTorqueToForce_kernel( float* torque ){
if( axisType < 2 || axisType > 3 ){ if( axisType < 2 || axisType > 3 ){
crossVector3( vector[U], vector[V], vector[W] ); crossVector3( vector[U], vector[V], vector[W] );
} else { } else {
axisAtom = multiPoleAtoms[threadId].y; axisAtom = multiPoleAtoms[particleIndex].y;
vector[W][0] = atomCoord[threadId].x - atomCoord[axisAtom].x; vector[W][0] = atomCoord[particleIndex].x - atomCoord[axisAtom].x;
vector[W][1] = atomCoord[threadId].y - atomCoord[axisAtom].y; vector[W][1] = atomCoord[particleIndex].y - atomCoord[axisAtom].y;
vector[W][2] = atomCoord[threadId].z - atomCoord[axisAtom].z; vector[W][2] = atomCoord[particleIndex].z - atomCoord[axisAtom].z;
} }
norms[W] = normVector3( vector[W] ); norms[W] = normVector3( vector[W] );
...@@ -175,9 +174,9 @@ void amoebaMapTorqueToForce_kernel( float* torque ){ ...@@ -175,9 +174,9 @@ void amoebaMapTorqueToForce_kernel( float* torque ){
angles[VW][1] = sqrtf( 1.0f - angles[VW][0]*angles[VW][0]); angles[VW][1] = sqrtf( 1.0f - angles[VW][0]*angles[VW][0]);
float dphi[3]; float dphi[3];
dphi[U] = DOT3( vector[U], (torque + threadId*3) ); dphi[U] = DOT3( vector[U], (torque + particleIndex*3) );
dphi[V] = DOT3( vector[V], (torque + threadId*3) ); dphi[V] = DOT3( vector[V], (torque + particleIndex*3) );
dphi[W] = DOT3( vector[W], (torque + threadId*3) ); dphi[W] = DOT3( vector[W], (torque + particleIndex*3) );
dphi[U] *= -1.0f; dphi[U] *= -1.0f;
dphi[V] *= -1.0f; dphi[V] *= -1.0f;
...@@ -261,8 +260,8 @@ void amoebaMapTorqueToForce_kernel( float* torque ){ ...@@ -261,8 +260,8 @@ void amoebaMapTorqueToForce_kernel( float* torque ){
float ut2cos = DOT3( vector[U], t2 ); float ut2cos = DOT3( vector[U], t2 );
float ut2sin = sqrtf( 1.0f - ut2cos*ut2cos); float ut2sin = sqrtf( 1.0f - ut2cos*ut2cos);
float dphiR = -1.0f*DOT3( vector[R], (torque + threadId*3) ); float dphiR = -1.0f*DOT3( vector[R], (torque + particleIndex*3) );
float dphiS = -1.0f*DOT3( vector[S], (torque + threadId*3) ); float dphiS = -1.0f*DOT3( vector[S], (torque + particleIndex*3) );
float factor1 = dphiR/(norms[U]*angles[UR][1]); float factor1 = dphiR/(norms[U]*angles[UR][1]);
float factor2 = dphiS/(norms[U]); float factor2 = dphiS/(norms[U]);
...@@ -330,38 +329,37 @@ void amoebaMapTorqueToForce_kernel( float* torque ){ ...@@ -330,38 +329,37 @@ void amoebaMapTorqueToForce_kernel( float* torque ){
// Z // Z
int4 forceBufferIndices = cAmoebaSim.pMultipoleParticlesTorqueBufferIndices[threadId]; int4 forceBufferIndices = cAmoebaSim.pMultipoleParticlesTorqueBufferIndices[particleIndex];
loadMappedTorque( multiPoleAtoms[threadId].z, forceBufferIndices.z, forces[Z] ); loadMappedTorque( multiPoleAtoms[particleIndex].z, forceBufferIndices.z, forces[Z] );
// X // X
if( axisType != 4 ){ if( axisType != 4 ){
loadMappedTorque( multiPoleAtoms[threadId].x, forceBufferIndices.x, forces[X] ); loadMappedTorque( multiPoleAtoms[particleIndex].x, forceBufferIndices.x, forces[X] );
} }
// Y // Y
if( axisType == 2 || axisType == 3 ){ if( axisType == 2 || axisType == 3 ){
int particleId = multiPoleAtoms[threadId].y; int particleId = multiPoleAtoms[particleIndex].y;
if( particleId > -1 ){ if( particleId > -1 ){
loadMappedTorque( multiPoleAtoms[threadId].y, forceBufferIndices.y, forces[Y] ); loadMappedTorque( multiPoleAtoms[particleIndex].y, forceBufferIndices.y, forces[Y] );
} }
} }
// put particle force in buffer 0 // put particle force in buffer 0
loadMappedTorque( threadId, 0, forces[I] ); loadMappedTorque( particleIndex, 0, forces[I] );
}
particleIndex += gridDim.x*blockDim.x;
}
} }
void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDAStream<float>* psTorque ) void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDAStream<float>* psTorque )
{ {
gpuContext gpu = amoebaGpu->gpuContext; gpuContext gpu = amoebaGpu->gpuContext;
int numThreads = min(256, (gpu->natoms)); amoebaMapTorqueToForce_kernel<<< gpu->sim.blocks, gpu->sim.update_threads_per_block>>> ( psTorque->_pDevData );
int numBlocks = 1 + (gpu->natoms/numThreads);
amoebaMapTorqueToForce_kernel<<< numBlocks, numThreads>>> ( psTorque->_pDevData );
LAUNCHERROR("amoebaMapTorqueToForce"); LAUNCHERROR("amoebaMapTorqueToForce");
} }
...@@ -147,7 +147,6 @@ void kCudaComputeLabFrameMoments_kernel( void ) ...@@ -147,7 +147,6 @@ void kCudaComputeLabFrameMoments_kernel( void )
float vectorZ[3]; float vectorZ[3];
int particleIndex = __mul24(blockIdx.x,blockDim.x) + threadIdx.x; int particleIndex = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
int numberOfParticles = cSim.atoms;
float4* particleCoord = cSim.pPosq; float4* particleCoord = cSim.pPosq;
int4* multiPoleParticles = cAmoebaSim.pMultipoleParticlesIdsAndAxisType; int4* multiPoleParticles = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
...@@ -162,7 +161,7 @@ void kCudaComputeLabFrameMoments_kernel( void ) ...@@ -162,7 +161,7 @@ void kCudaComputeLabFrameMoments_kernel( void )
// code common to ZThenX and Bisector // code common to ZThenX and Bisector
while( particleIndex < numberOfParticles ) while( particleIndex < cSim.atoms )
{ {
if( multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].z >= 0 ) if( multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].z >= 0 )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment