Commit 101f206d authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Added loop over particles for torque mapping

parent 41abd9fb
......@@ -75,15 +75,13 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void amoebaMapTorqueToForce_kernel( float* torque ){
void amoebaMapTorqueToForce_kernel( float* torque )
{
// ---------------------------------------------------------------------------------------
int ii;
int threadId = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
int numOfAtoms = cSim.atoms;
if( threadId >= numOfAtoms )return;
int particleIndex = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
float4* atomCoord = cSim.pPosq;
int4* multiPoleAtoms = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
......@@ -113,28 +111,29 @@ void amoebaMapTorqueToForce_kernel( float* torque ){
// ---------------------------------------------------------------------------------------
int axisAtom = multiPoleAtoms[threadId].z;
int axisType = multiPoleAtoms[threadId].w;
while( particleIndex < cSim.atoms )
{
int axisAtom = multiPoleAtoms[particleIndex].z;
int axisType = multiPoleAtoms[particleIndex].w;
// NoAxisType
if( axisType == 5 )
if( axisType < 5 && multiPoleAtoms[particleIndex].z >= 0 )
{
return;
}
vector[U][0] = atomCoord[threadId].x - atomCoord[axisAtom].x;
vector[U][1] = atomCoord[threadId].y - atomCoord[axisAtom].y;
vector[U][2] = atomCoord[threadId].z - atomCoord[axisAtom].z;
vector[U][0] = atomCoord[particleIndex].x - atomCoord[axisAtom].x;
vector[U][1] = atomCoord[particleIndex].y - atomCoord[axisAtom].y;
vector[U][2] = atomCoord[particleIndex].z - atomCoord[axisAtom].z;
norms[U] = normVector3( vector[U] );
if( axisType != 4 ){
if( axisType != 4 && multiPoleAtoms[particleIndex].x >= 0 ){
axisAtom = multiPoleAtoms[threadId].x;
vector[V][0] = atomCoord[threadId].x - atomCoord[axisAtom].x;
vector[V][1] = atomCoord[threadId].y - atomCoord[axisAtom].y;
vector[V][2] = atomCoord[threadId].z - atomCoord[axisAtom].z;
axisAtom = multiPoleAtoms[particleIndex].x;
vector[V][0] = atomCoord[particleIndex].x - atomCoord[axisAtom].x;
vector[V][1] = atomCoord[particleIndex].y - atomCoord[axisAtom].y;
vector[V][2] = atomCoord[particleIndex].z - atomCoord[axisAtom].z;
} else {
vector[V][0] = 0.1f;
......@@ -149,11 +148,11 @@ void amoebaMapTorqueToForce_kernel( float* torque ){
if( axisType < 2 || axisType > 3 ){
crossVector3( vector[U], vector[V], vector[W] );
} else {
axisAtom = multiPoleAtoms[threadId].y;
axisAtom = multiPoleAtoms[particleIndex].y;
vector[W][0] = atomCoord[threadId].x - atomCoord[axisAtom].x;
vector[W][1] = atomCoord[threadId].y - atomCoord[axisAtom].y;
vector[W][2] = atomCoord[threadId].z - atomCoord[axisAtom].z;
vector[W][0] = atomCoord[particleIndex].x - atomCoord[axisAtom].x;
vector[W][1] = atomCoord[particleIndex].y - atomCoord[axisAtom].y;
vector[W][2] = atomCoord[particleIndex].z - atomCoord[axisAtom].z;
}
norms[W] = normVector3( vector[W] );
......@@ -175,9 +174,9 @@ void amoebaMapTorqueToForce_kernel( float* torque ){
angles[VW][1] = sqrtf( 1.0f - angles[VW][0]*angles[VW][0]);
float dphi[3];
dphi[U] = DOT3( vector[U], (torque + threadId*3) );
dphi[V] = DOT3( vector[V], (torque + threadId*3) );
dphi[W] = DOT3( vector[W], (torque + threadId*3) );
dphi[U] = DOT3( vector[U], (torque + particleIndex*3) );
dphi[V] = DOT3( vector[V], (torque + particleIndex*3) );
dphi[W] = DOT3( vector[W], (torque + particleIndex*3) );
dphi[U] *= -1.0f;
dphi[V] *= -1.0f;
......@@ -261,8 +260,8 @@ void amoebaMapTorqueToForce_kernel( float* torque ){
float ut2cos = DOT3( vector[U], t2 );
float ut2sin = sqrtf( 1.0f - ut2cos*ut2cos);
float dphiR = -1.0f*DOT3( vector[R], (torque + threadId*3) );
float dphiS = -1.0f*DOT3( vector[S], (torque + threadId*3) );
float dphiR = -1.0f*DOT3( vector[R], (torque + particleIndex*3) );
float dphiS = -1.0f*DOT3( vector[S], (torque + particleIndex*3) );
float factor1 = dphiR/(norms[U]*angles[UR][1]);
float factor2 = dphiS/(norms[U]);
......@@ -330,38 +329,37 @@ void amoebaMapTorqueToForce_kernel( float* torque ){
// Z
int4 forceBufferIndices = cAmoebaSim.pMultipoleParticlesTorqueBufferIndices[threadId];
loadMappedTorque( multiPoleAtoms[threadId].z, forceBufferIndices.z, forces[Z] );
int4 forceBufferIndices = cAmoebaSim.pMultipoleParticlesTorqueBufferIndices[particleIndex];
loadMappedTorque( multiPoleAtoms[particleIndex].z, forceBufferIndices.z, forces[Z] );
// X
if( axisType != 4 ){
loadMappedTorque( multiPoleAtoms[threadId].x, forceBufferIndices.x, forces[X] );
loadMappedTorque( multiPoleAtoms[particleIndex].x, forceBufferIndices.x, forces[X] );
}
// Y
if( axisType == 2 || axisType == 3 ){
int particleId = multiPoleAtoms[threadId].y;
int particleId = multiPoleAtoms[particleIndex].y;
if( particleId > -1 ){
loadMappedTorque( multiPoleAtoms[threadId].y, forceBufferIndices.y, forces[Y] );
loadMappedTorque( multiPoleAtoms[particleIndex].y, forceBufferIndices.y, forces[Y] );
}
}
// put particle force in buffer 0
loadMappedTorque( threadId, 0, forces[I] );
loadMappedTorque( particleIndex, 0, forces[I] );
}
particleIndex += gridDim.x*blockDim.x;
}
}
void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDAStream<float>* psTorque )
{
gpuContext gpu = amoebaGpu->gpuContext;
int numThreads = min(256, (gpu->natoms));
int numBlocks = 1 + (gpu->natoms/numThreads);
amoebaMapTorqueToForce_kernel<<< numBlocks, numThreads>>> ( psTorque->_pDevData );
amoebaMapTorqueToForce_kernel<<< gpu->sim.blocks, gpu->sim.update_threads_per_block>>> ( psTorque->_pDevData );
LAUNCHERROR("amoebaMapTorqueToForce");
}
......@@ -147,7 +147,6 @@ void kCudaComputeLabFrameMoments_kernel( void )
float vectorZ[3];
int particleIndex = __mul24(blockIdx.x,blockDim.x) + threadIdx.x;
int numberOfParticles = cSim.atoms;
float4* particleCoord = cSim.pPosq;
int4* multiPoleParticles = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
......@@ -162,7 +161,7 @@ void kCudaComputeLabFrameMoments_kernel( void )
// code common to ZThenX and Bisector
while( particleIndex < numberOfParticles )
while( particleIndex < cSim.atoms )
{
if( multiPoleParticles[particleIndex].x >= 0 && multiPoleParticles[particleIndex].z >= 0 )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment