Commit 09fb3811 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Added checkChiral for chiral systems (Reference/Cuda)

Added support for missing axis types (Cuda only)
Removed rotationMatrix
parent 562cfb39
......@@ -63,7 +63,7 @@ public:
PME = 1
};
enum MultipoleAxisTypes { ZThenX, Bisector };
enum MultipoleAxisTypes { ZThenX, Bisector, ZBisect, ThreeFold, ZOnly, LastAxisTypeIndex };
// Algorithm used to converge mutual induced dipoles:
// SOR: successive-over-relaxation
......
......@@ -62,12 +62,17 @@ void AmoebaMultipoleForceImpl::initialize(ContextImpl& context) {
owner.getMultipoleParameters( ii, charge, molecularDipole, molecularQuadrupole, axisType, multipoleAtomZ, multipoleAtomX, multipoleAtomY,
thole, dampingFactor, polarity );
// only 'Z-then-X' or 'Bisector' currently handled
// only 'Z-then-X', 'Bisector', Z-Bisect, ThreeFold currently handled
if( axisType != AmoebaMultipoleForce::ZThenX && axisType != AmoebaMultipoleForce::Bisector ){
if( axisType != AmoebaMultipoleForce::ZThenX && axisType != AmoebaMultipoleForce::Bisector &&
axisType != AmoebaMultipoleForce::ZBisect && axisType != AmoebaMultipoleForce::ThreeFold &&
axisType != AmoebaMultipoleForce::ZOnly ) {
std::stringstream buffer;
buffer << "AmoebaMultipoleForce: axis type=" << axisType;
buffer << " not currently handled - only axisTypes[ " << AmoebaMultipoleForce::ZThenX << ", " << AmoebaMultipoleForce::Bisector << "] (ZThenX, Bisector) currently handled .";
buffer << " not currently handled - only axisTypes[ ";
buffer << AmoebaMultipoleForce::ZThenX << ", " << AmoebaMultipoleForce::Bisector << ", ";
buffer << AmoebaMultipoleForce::ZBisect << ", " << AmoebaMultipoleForce::ThreeFold;
buffer << "] (ZThenX, Bisector, Z-Bisect, ThreeFold) currently handled .";
throw OpenMMException(buffer.str());
}
}
......
......@@ -338,11 +338,20 @@ void gpuPrintCudaAmoebaGmxSimulation(amoebaGpuContext amoebaGpu, FILE* log )
(void) fprintf( log, " pAmoebaUreyBradleyID %p\n", amoebaGpu->amoebaSim.pAmoebaUreyBradleyID );
(void) fprintf( log, " pAmoebaUreyBradleyParameter %p\n", amoebaGpu->amoebaSim.pAmoebaUreyBradleyParameter );
if( amoebaGpu->psRotationMatrix)(void) fprintf( log, "\n" );
gpuPrintCudaStreamFloat( amoebaGpu->psRotationMatrix, log );
// if( amoebaGpu->psRotationMatrix)(void) fprintf( log, "\n" );
// gpuPrintCudaStreamFloat( amoebaGpu->psRotationMatrix, log );
// (void) fprintf( log, " pRotationMatrix %p\n", amoebaGpu->amoebaSim.pRotationMatrix);
gpuPrintCudaStreamInt4( amoebaGpu->psMultipoleParticlesIdsAndAxisType, log );
(void) fprintf( log, " pMultipoleParticlesIdsAndAxisType %p\n", amoebaGpu->amoebaSim.pMultipoleParticlesIdsAndAxisType);
gpuPrintCudaStreamInt( amoebaGpu->psMultipoleAxisOffset, log );
(void) fprintf( log, " pMultipoleAxisOffset %p\n", amoebaGpu->amoebaSim.pMultipoleAxisOffset);
gpuPrintCudaStreamFloat( amoebaGpu->psMolecularDipole, log );
(void) fprintf( log, " pMolecularDipole %p\n", amoebaGpu->amoebaSim.pMolecularDipole);
gpuPrintCudaStreamFloat( amoebaGpu->psMolecularQuadrupole, log );
(void) fprintf( log, " pMolecularQuadrupole %p\n", amoebaGpu->amoebaSim.pMolecularQuadrupole );
gpuPrintCudaStreamFloat( amoebaGpu->psLabFrameDipole, log );
gpuPrintCudaStreamFloat( amoebaGpu->psLabFrameQuadrupole, log );
......@@ -1276,7 +1285,7 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
// ---------------------------------------------------------------------------------------
if( amoebaGpu->psRotationMatrix != NULL ){
if( amoebaGpu->psMultipoleParticlesIdsAndAxisType != NULL ){
return;
}
......@@ -1289,13 +1298,22 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
// work space
amoebaGpu->psRotationMatrix = new CUDAStream<float>(9*amoebaGpu->paddedNumberOfAtoms, 1, "RotationMatrix");
// amoebaGpu->psRotationMatrix = new CUDAStream<float>(9*amoebaGpu->paddedNumberOfAtoms, 1, "RotationMatrix");
// amoebaGpu->amoebaSim.pRotationMatrix = amoebaGpu->psRotationMatrix->_pDevStream[0];
// parameters
amoebaGpu->psMultipoleParticlesIdsAndAxisType = new CUDAStream<int4>(amoebaGpu->paddedNumberOfAtoms, 1, "MultipoleParticlesIdsAndAxisType");
amoebaGpu->amoebaSim.pMultipoleParticlesIdsAndAxisType = amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pDevStream[0];
amoebaGpu->psMultipoleAxisOffset = new CUDAStream<int>(amoebaGpu->paddedNumberOfAtoms, 1, "psMultipoleAxisOffset");
amoebaGpu->amoebaSim.pMultipoleAxisOffset = amoebaGpu->psMultipoleAxisOffset->_pDevStream[0];
amoebaGpu->psMolecularDipole = new CUDAStream<float>(3*amoebaGpu->paddedNumberOfAtoms, 1, "MolecularDipole");
amoebaGpu->amoebaSim.pMolecularDipole = amoebaGpu->psMolecularDipole->_pDevStream[0];
amoebaGpu->psMolecularQuadrupole = new CUDAStream<float>(9*amoebaGpu->paddedNumberOfAtoms, 1, "MolecularQuadrupole");
amoebaGpu->amoebaSim.pMolecularQuadrupole = amoebaGpu->psMolecularQuadrupole->_pDevStream[0];
// output
......@@ -1306,6 +1324,7 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
amoebaGpu->amoebaSim.pLabFrameQuadrupole = amoebaGpu->psLabFrameQuadrupole->_pDevStream[0];
memset( amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0], 0, sizeof(int)*4*amoebaGpu->paddedNumberOfAtoms );
memset( amoebaGpu->psMultipoleAxisOffset->_pSysStream[0], 0, sizeof(int)*amoebaGpu->paddedNumberOfAtoms );
}
static void gpuFixedEFieldAllocate( amoebaGpuContext amoebaGpu )
......@@ -1597,7 +1616,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
std::vector<int> maxIndices;
for( unsigned int ii = 0; ii < charges.size(); ii++ ){
maxIndices.push_back(ii);
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z = ii;
amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][ii] = ii;
}
if( nonbondedMethod == 0 ){
......@@ -1623,37 +1642,64 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
amoebaGpu->amoebaSim.dielec = 1.0f;
}
static const int maxAxisType = 5;
int axisTypeCount[maxAxisType+1] = { 0, 0, 0, 0, 0, 0 };
for( int ii = 0; ii < static_cast<int>(charges.size()); ii++ ){
// axis type & multipole particles ids
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].x = multipoleParticleZ[ii];
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].y = multipoleParticleX[ii];
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].x = multipoleParticleX[ii];
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].y = multipoleParticleY[ii];
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z = multipoleParticleZ[ii];
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].w = axisType[ii];
if( axisType[ii] < (maxAxisType) && axisType[ii] > -1 ){
axisTypeCount[axisType[ii]]++;
} else {
axisTypeCount[maxAxisType]++;
}
// for z-only need to add access to random numbers
// and need test system
if( axisType[ii] == 4 ){
//fprintf( stderr, "Axis type z-only (atom=%d) not fully implemented -- aborting.\n", ii );
fprintf( stderr, "Warning: Axis type z-only (atom=%d) not fully implemented.\n", ii );
// exit(0);
}
int axisParticleIndex = multipoleParticleZ[ii];
if( maxIndices[axisParticleIndex] < ii ){
maxIndices[axisParticleIndex] = ii;
}
if( amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][axisParticleIndex].z > ii ){
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][axisParticleIndex].z = ii;
if( amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] > ii ){
amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] = ii;
}
axisParticleIndex = multipoleParticleX[ii];
if( maxIndices[axisParticleIndex] < ii ){
maxIndices[axisParticleIndex] = ii;
}
if( amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][axisParticleIndex].z > ii ){
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][axisParticleIndex].z = ii;
if( amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] > ii ){
amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] = ii;
}
axisParticleIndex = multipoleParticleY[ii];
if( axisParticleIndex > -1 ){
if( maxIndices[axisParticleIndex] < ii ){
maxIndices[axisParticleIndex] = ii;
}
if( amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] > ii ){
amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][axisParticleIndex] = ii;
}
}
if( 0 && amoebaGpu->log )
fprintf( amoebaGpu->log, "Z1 %4d [%4d %4d] %4d %4d %4d %4d %d %d\n", ii,
multipoleParticleZ[ii], multipoleParticleX[ii],
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][multipoleParticleZ[ii]].z,
multipoleParticleX[ii], multipoleParticleY[ii], multipoleParticleZ[ii],
amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][multipoleParticleZ[ii]],
maxIndices[multipoleParticleZ[ii]],
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][multipoleParticleX[ii]].z,
amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][multipoleParticleX[ii]],
maxIndices[multipoleParticleX[ii]],
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][0].z, maxIndices[0] );
amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][0], maxIndices[0] );
// charges
......@@ -1733,17 +1779,16 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
}
#ifdef AMOEBA_DEBUG
//if( amoebaGpu->log && ( ( ( ii < maxPrint ) || (ii >= (charges.size() - maxPrint) ) ) || ( ii == targetAtoms[0] || ii == targetAtoms[1]) ) ){
if( (amoebaGpu->log && ( ( ( ii < maxPrint ) || (ii >= (charges.size() - maxPrint) )) ) ) ){
// axis particles
(void) fprintf( amoebaGpu->log,"%u axis particles [%6d %6d %6d diff=%d %d] ", ii,
(void) fprintf( amoebaGpu->log,"%u axis particles [%6d %6d %6d] axis=%d max=%d diff=%d ", ii,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].x,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].y,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z,
maxIndices[ii], maxIndices[ii] - amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].w );
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].w,
maxIndices[ii], maxIndices[ii] - amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][ii] );
// dipole
......@@ -1886,6 +1931,13 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
}
if( amoebaGpu->log ){
std::string axisLabel[maxAxisType+1] = { "ZThenX", "Bisector", "ZBisect", "ThreeFold", "ZOnly", "Unknown"};
for( unsigned int kk = 0; kk < (maxAxisType+1); kk++ ){
(void) fprintf( amoebaGpu->log, "%2u %10s atom count=%d\n", kk, axisLabel[kk].c_str(), axisTypeCount[kk] );
}
}
#if 0
if( amoebaGpu->log ){
FILE* filePtr = fopen( "oldScale.txt", "w" );
......@@ -1916,7 +1968,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
// axis type & multipole particles ids
int diff = maxIndices[ii] - amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z;
int diff = maxIndices[ii] - amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][ii];
if( diff > amoebaGpu->maxMapTorqueDifference ){
amoebaGpu->maxMapTorqueDifference = diff;
}
......@@ -1944,6 +1996,7 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
amoebaGpu->amoebaSim.paddedNumberOfAtoms = amoebaGpu->paddedNumberOfAtoms;
amoebaGpu->psMultipoleParticlesIdsAndAxisType->Upload();
amoebaGpu->psMultipoleAxisOffset->Upload();
amoebaGpu->psMolecularDipole->Upload();
amoebaGpu->psMolecularQuadrupole->Upload();
amoebaGpu->psCovalentDegree->Upload();
......@@ -2707,8 +2760,9 @@ void amoebaGpuShutDown(amoebaGpuContext gpu)
// molecular frame multipoles
delete gpu->psRotationMatrix;
//delete gpu->psRotationMatrix;
delete gpu->psMultipoleParticlesIdsAndAxisType;
delete gpu->psMultipoleAxisOffset;
delete gpu->psMolecularDipole;
delete gpu->psMolecularQuadrupole;
delete gpu->psLabFrameDipole;
......
......@@ -139,6 +139,11 @@ struct cudaAmoebaGmxSimulation {
float scalingDistanceCutoff; // scaling cutoff
float2* pDampingFactorAndThole; // Thole & damping factors
float* pRotationMatrix;
int4* pMultipoleParticlesIdsAndAxisType;
int* pMultipoleAxisOffset;
float* pMolecularDipole;
float* pMolecularQuadrupole;
float* pLabFrameDipole;
float* pLabFrameQuadrupole;
float* pInducedDipole;
......
......@@ -129,6 +129,7 @@ struct _amoebaGpuContext {
// multipole parameters
CUDAStream<int4>* psMultipoleParticlesIdsAndAxisType;
CUDAStream<int>* psMultipoleAxisOffset;
CUDAStream<float>* psMolecularDipole;
CUDAStream<float>* psMolecularQuadrupole;
......
......@@ -42,6 +42,15 @@ __device__ static float normVector3( float* vector )
return returnNorm;
}
__device__ static void crossVector3( float* vector1, float* vector2, float* vector3 )
{
vector3[0] = vector1[1]*vector2[2] - vector1[2]*vector2[1];
vector3[1] = vector1[2]*vector2[0] - vector1[0]*vector2[2];
vector3[2] = vector1[0]*vector2[1] - vector1[1]*vector2[0];
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
......@@ -50,35 +59,327 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void amoebaMapTorqueToForce_kernel(
int numOfAtoms,
float4* atomCoord,
float* torque,
int4* multiPoleAtoms,
int maxDiff,
float* tempElecForce
){
void amoebaMapTorqueToForce_kernel( float* torque, int maxDiff, float* tempElecForce){
// ---------------------------------------------------------------------------------------
int ii;
int threadId = __mul24(blockIdx.x,blockDim.x) + threadIdx.x ;
int numOfAtoms = cSim.atoms;
if( threadId >= numOfAtoms )return;
float4* atomCoord = cSim.pPosq;
int4* multiPoleAtoms = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
int* multiPoleAtomOffset = cAmoebaSim.pMultipoleAxisOffset;
const int U = 0;
const int V = 1;
const int W = 2;
const int R = 3;
const int S = 4;
const int UV = 5;
const int UW = 6;
const int VW = 7;
const int UR = 8;
const int US = 9;
const int VS = 10;
const int WS = 11;
const int LastVectorIndex = 12;
const int X = 0;
const int Y = 1;
const int Z = 2;
const int I = 3;
float forces[4][3];
float norms[LastVectorIndex];
float vector[LastVectorIndex][3];
float angles[LastVectorIndex][2];
// ---------------------------------------------------------------------------------------
int axisAtom = multiPoleAtoms[threadId].z;
int axisType = multiPoleAtoms[threadId].w;
vector[U][0] = atomCoord[threadId].x - atomCoord[axisAtom].x;
vector[U][1] = atomCoord[threadId].y - atomCoord[axisAtom].y;
vector[U][2] = atomCoord[threadId].z - atomCoord[axisAtom].z;
norms[U] = normVector3( vector[U] );
if( axisType != 4 ){
axisAtom = multiPoleAtoms[threadId].x;
vector[V][0] = atomCoord[threadId].x - atomCoord[axisAtom].x;
vector[V][1] = atomCoord[threadId].y - atomCoord[axisAtom].y;
vector[V][2] = atomCoord[threadId].z - atomCoord[axisAtom].z;
} else {
vector[V][0] = 0.1f;
vector[V][1] = 0.1f;
vector[V][2] = 0.1f;
}
norms[V] = normVector3( vector[V] );
// W = UxV
if( axisType < 2 || axisType > 3 ){
crossVector3( vector[U], vector[V], vector[W] );
} else {
axisAtom = multiPoleAtoms[threadId].y;
vector[W][0] = atomCoord[threadId].x - atomCoord[axisAtom].x;
vector[W][1] = atomCoord[threadId].y - atomCoord[axisAtom].y;
vector[W][2] = atomCoord[threadId].z - atomCoord[axisAtom].z;
}
norms[W] = normVector3( vector[W] );
crossVector3( vector[V], vector[U], vector[UV] );
crossVector3( vector[W], vector[U], vector[UW] );
crossVector3( vector[W], vector[V], vector[VW] );
norms[UV] = normVector3( vector[UV] );
norms[UW] = normVector3( vector[UW] );
norms[VW] = normVector3( vector[VW] );
angles[UV][0] = DOT3( vector[U], vector[V] );
angles[UV][1] = sqrtf( 1.0f - angles[UV][0]*angles[UV][0]);
angles[UW][0] = DOT3( vector[U], vector[W] );
angles[UW][1] = sqrtf( 1.0f - angles[UW][0]*angles[UW][0]);
angles[VW][0] = DOT3( vector[V], vector[W] );
angles[VW][1] = sqrtf( 1.0f - angles[VW][0]*angles[VW][0]);
float dphi[3];
dphi[U] = DOT3( vector[U], (torque + threadId*3) );
dphi[V] = DOT3( vector[V], (torque + threadId*3) );
dphi[W] = DOT3( vector[W], (torque + threadId*3) );
dphi[U] *= -1.0f;
dphi[V] *= -1.0f;
dphi[W] *= -1.0f;
// z-then-x and bisector
if( axisType == 0 || axisType == 1 ){
float factor1;
float factor2;
float factor3;
float factor4;
factor1 = dphi[V]/(norms[U]*angles[UV][1]);
factor2 = dphi[W]/(norms[U]);
factor3 = -dphi[U]/(norms[V]*angles[UV][1]);
if( axisType == 1 ){
factor2 *= 0.5f;
factor4 = 0.5f*dphi[W]/(norms[V]);
} else {
factor4 = 0.0f;
}
for( ii = 0; ii < 3; ii++ ){
forces[Z][ii] = vector[UV][ii]*factor1 + factor2*vector[UW][ii];
forces[X][ii] = vector[UV][ii]*factor3 + factor4*vector[VW][ii];
forces[I][ii] = -(forces[X][ii] + forces[Z][ii]);
forces[Y][ii] = 0.0f;
}
} else if( axisType == 2 ){
// z-bisect
vector[R][0] = vector[V][0] + vector[W][0];
vector[R][1] = vector[V][1] + vector[W][1];
vector[R][2] = vector[V][2] + vector[W][2];
crossVector3( vector[U], vector[R], vector[S] );
norms[R] = normVector3( vector[R] );
norms[S] = normVector3( vector[S] );
crossVector3( vector[R], vector[U], vector[UR] );
crossVector3( vector[S], vector[U], vector[US] );
crossVector3( vector[S], vector[V], vector[VS] );
crossVector3( vector[S], vector[W], vector[WS] );
norms[UR] = normVector3( vector[UR] );
norms[US] = normVector3( vector[US] );
norms[VS] = normVector3( vector[VS] );
norms[WS] = normVector3( vector[WS] );
angles[UR][0] = DOT3( vector[U], vector[R] );
angles[UR][1] = sqrtf( 1.0f - angles[UR][0]*angles[UR][0]);
angles[US][0] = DOT3( vector[U], vector[S] );
angles[US][1] = sqrtf( 1.0f - angles[US][0]*angles[US][0]);
angles[VS][0] = DOT3( vector[V], vector[S] );
angles[VS][1] = sqrtf( 1.0f - angles[VS][0]*angles[VS][0]);
angles[WS][0] = DOT3( vector[W], vector[S] );
angles[WS][1] = sqrtf( 1.0f - angles[WS][0]*angles[WS][0]);
float t1[3];
float t2[3];
t1[0] = vector[V][0] - vector[S][0]*angles[VS][0];
t1[1] = vector[V][1] - vector[S][1]*angles[VS][0];
t1[2] = vector[V][2] - vector[S][2]*angles[VS][0];
t2[0] = vector[W][0] - vector[S][0]*angles[WS][0];
t2[1] = vector[W][1] - vector[S][1]*angles[WS][0];
t2[2] = vector[W][2] - vector[S][2]*angles[WS][0];
float notUsed = normVector3( t1 );
notUsed = normVector3( t2 );
float ut1cos = DOT3( vector[U], t1 );
float ut1sin = sqrtf( 1.0f - ut1cos*ut1cos);
float ut2cos = DOT3( vector[U], t2 );
float ut2sin = sqrtf( 1.0f - ut2cos*ut2cos);
float dphiR = -1.0f*DOT3( vector[R], (torque + threadId*3) );
float dphiS = -1.0f*DOT3( vector[S], (torque + threadId*3) );
float factor1 = dphiR/(norms[U]*angles[UR][1]);
float factor2 = dphiS/(norms[U]);
float factor3 = dphi[U]/(norms[V]*(ut1sin+ut2sin));
float factor4 = dphi[U]/(norms[W]*(ut1sin+ut2sin));
for( ii = 0; ii < 3; ii++ ){
forces[Z][ii] = vector[UR][ii]*factor1 + factor2*vector[US][ii];
forces[X][ii] = (angles[VS][1]*vector[S][ii] - angles[VS][0]*t1[ii])*factor3;
forces[Y][ii] = (angles[WS][1]*vector[S][ii] - angles[WS][0]*t2[ii])*factor4;
forces[I][ii] = -(forces[X][ii] + forces[Y][ii] + forces[Z][ii]);
}
} else if( axisType == 3 ){
// 3-fold
for( ii = 0; ii < 3; ii++ ){
float du = vector[UW][ii]*dphi[W]/(norms[U]*angles[UW][1]) +
vector[UV][ii]*dphi[V]/(norms[U]*angles[UV][1]) -
vector[UW][ii]*dphi[U]/(norms[U]*angles[UW][1]) -
vector[UV][ii]*dphi[U]/(norms[U]*angles[UV][1]);
float dv = vector[VW][ii]*dphi[W]/(norms[V]*angles[VW][1]) -
vector[UV][ii]*dphi[U]/(norms[V]*angles[UV][1]) -
vector[VW][ii]*dphi[V]/(norms[V]*angles[VW][1]) +
vector[UV][ii]*dphi[V]/(norms[V]*angles[UV][1]);
float dw = -vector[UW][ii]*dphi[U]/(norms[W]*angles[UW][1]) -
vector[VW][ii]*dphi[V]/(norms[W]*angles[VW][1]) +
vector[UW][ii]*dphi[W]/(norms[W]*angles[UW][1]) +
vector[VW][ii]*dphi[W]/(norms[W]*angles[VW][1]);
du /= 3.0f;
dv /= 3.0f;
dw /= 3.0f;
forces[Z][ii] = du;
forces[X][ii] = dv;
forces[Y][ii] = dw;
forces[I][ii] = -(du + dv + dw);
}
} else if( axisType == 4 ){
// z-only
for( ii = 0; ii < 3; ii++ ){
float du = vector[UV][ii]*dphi[V]/(norms[U]*angles[UV][1]);
forces[Z][ii] = du;
forces[X][ii] = 0.0f;
forces[Y][ii] = 0.0f;
forces[I][ii] = -du;
}
} else {
for( ii = 0; ii < 3; ii++ ){
forces[Z][ii] = 0.0f;
forces[X][ii] = 0.0f;
forces[Y][ii] = 0.0f;
forces[I][ii] = 0.0f;
}
}
// load results
int temp = multiPoleAtoms[threadId].z;
int min = multiPoleAtomOffset[temp];
int offset = 3*(temp*maxDiff + threadId-min);
tempElecForce[offset ] = forces[Z][0];
tempElecForce[offset+1] = forces[Z][1];
tempElecForce[offset+2] = forces[Z][2];
if( axisType != 4 ){
temp = multiPoleAtoms[threadId].x;
min = multiPoleAtomOffset[temp];
offset = 3*(temp*maxDiff + threadId-min);
tempElecForce[offset ] = forces[X][0];
tempElecForce[offset+1] = forces[X][1];
tempElecForce[offset+2] = forces[X][2];
}
if( axisType == 2 || axisType == 3 ){
temp = multiPoleAtoms[threadId].y;
if( temp > -1 ){
min = multiPoleAtomOffset[temp];
offset = 3*(temp*maxDiff + threadId-min);
tempElecForce[offset ] = forces[Y][0];
tempElecForce[offset+1] = forces[Y][1];
tempElecForce[offset+2] = forces[Y][2];
}
}
min = multiPoleAtomOffset[threadId];
offset = 3*(threadId*(maxDiff + 1)-min);
tempElecForce[offset ] = forces[I][0];
tempElecForce[offset+1] = forces[I][1];
tempElecForce[offset+2] = forces[I][2];
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void amoebaMapTorqueToForceOld_kernel( float* torque, int maxDiff, float* tempElecForce){
// ---------------------------------------------------------------------------------------
int threadId = __mul24(blockIdx.x,blockDim.x) + threadIdx.x ;
int numOfAtoms = cSim.atoms;
if(threadId >= numOfAtoms)return;
int U = 0;
int V = 1;
int W = 2;
float4* atomCoord = cSim.pPosq;
int4* multiPoleAtoms = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
int* multiPoleAtomOffset = cAmoebaSim.pMultipoleAxisOffset;
const int U = 0;
const int V = 1;
const int W = 2;
const int R = 3;
const int S = 4;
int X = 0;
int Y = 1;
int Z = 2;
const int X = 0;
const int Y = 1;
const int Z = 2;
float forces[3][3];
float norms[3];
float vector[3][3];
float norms[5];
float vector[5][3];
// ---------------------------------------------------------------------------------------
int axisAtom = multiPoleAtoms[threadId].x;
int axisAtom = multiPoleAtoms[threadId].z;
int axisType = multiPoleAtoms[threadId].w;
vector[U][0] = atomCoord[axisAtom].x - atomCoord[threadId].x;
vector[U][1] = atomCoord[axisAtom].y - atomCoord[threadId].y;
......@@ -86,22 +387,47 @@ void amoebaMapTorqueToForce_kernel(
norms[U] = normVector3( vector[U] );
axisAtom = multiPoleAtoms[threadId].y;
if( axisType != 4 ){
axisAtom = multiPoleAtoms[threadId].x;
vector[V][0] = atomCoord[axisAtom].x - atomCoord[threadId].x;
vector[V][1] = atomCoord[axisAtom].y - atomCoord[threadId].y;
vector[V][2] = atomCoord[axisAtom].z - atomCoord[threadId].z;
} else {
vector[V][0] = 0.1f;
vector[V][1] = 0.1f;
vector[V][2] = 0.1f;
}
norms[V] = normVector3( vector[V] );
// W = UxV
if( axisType < 2 || axisType > 3 ){
vector[W][0] = vector[U][1]*vector[V][2] - vector[U][2]*vector[V][1];
vector[W][1] = vector[U][2]*vector[V][0] - vector[U][0]*vector[V][2];
vector[W][2] = vector[U][0]*vector[V][1] - vector[U][1]*vector[V][0];
} else {
axisAtom = multiPoleAtoms[threadId].y;
vector[W][0] = atomCoord[axisAtom].x - atomCoord[threadId].x;
vector[W][1] = atomCoord[axisAtom].y - atomCoord[threadId].y;
vector[W][2] = atomCoord[axisAtom].z - atomCoord[threadId].z;
}
norms[W] = normVector3( vector[W] );
if( axisType == 2 ){
vector[R][0] = vector[V][0] + vector[W][0];
vector[R][1] = vector[V][1] + vector[W][1];
vector[R][2] = vector[V][2] + vector[W][2];
crossVector3( vector[U], vector[R], vector[S] );
norms[R] = normVector3( vector[R] );
norms[S] = normVector3( vector[S] );
}
float diff[3];
diff[0] = vector[V][0] - vector[U][0];
diff[1] = vector[V][1] - vector[U][1];
......@@ -141,7 +467,7 @@ void amoebaMapTorqueToForce_kernel(
float factorX;
float factorZ;
if( multiPoleAtoms[threadId].w == 1 ){
if( axisType == 1 ){
factorX = 0.5f;
factorZ = 0.5f;
} else {
......@@ -162,21 +488,21 @@ void amoebaMapTorqueToForce_kernel(
forces[Y][1] = -(forces[X][1] + forces[Z][1]);
forces[Y][2] = -(forces[X][2] + forces[Z][2]);
int temp = multiPoleAtoms[threadId].x;
int min = multiPoleAtoms[temp].z;
int temp = multiPoleAtoms[threadId].z;
int min = multiPoleAtomOffset[temp];
int offset = 3*(temp*maxDiff + threadId-min);
tempElecForce[offset ] = forces[X][0];
tempElecForce[offset+1] = forces[X][1];
tempElecForce[offset+2] = forces[X][2];
temp = multiPoleAtoms[threadId].y;
min = multiPoleAtoms[temp].z;
temp = multiPoleAtoms[threadId].x;
min = multiPoleAtomOffset[temp];
offset = 3*(temp*maxDiff + threadId-min);
tempElecForce[offset ] = forces[Z][0];
tempElecForce[offset+1] = forces[Z][1];
tempElecForce[offset+2] = forces[Z][2];
min = multiPoleAtoms[threadId].z;
min = multiPoleAtomOffset[threadId];
offset = 3*(threadId*(maxDiff + 1)-min);
tempElecForce[offset ] = forces[Y][0];
tempElecForce[offset+1] = forces[Y][1];
......@@ -426,10 +752,7 @@ void cudaComputeAmoebaMapTorques( amoebaGpuContext amoebaGpu, CUDAStream<float>*
amoebaMapTorqueToForce_kernel<<< numBlocks, numThreads>>> (
gpu->natoms,
gpu->psPosq4->_pDevStream[0],
psTorque->_pDevStream[0],
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pDevStream[0],
amoebaGpu->maxMapTorqueDifference,
amoebaGpu->torqueMapForce->_pDevStream[0] );
LAUNCHERROR("AmoebaMapTrqKernel");
......@@ -440,18 +763,19 @@ void cudaComputeAmoebaMapTorques( amoebaGpuContext amoebaGpu, CUDAStream<float>*
//int maxPrint = 10;
(void) fprintf( amoebaGpu->log,"Post AmoebaMapTrqKernel maxMapTorqueDifference=%d\n", amoebaGpu->maxMapTorqueDifference );
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "\n%5d multi[%d %d %d %d]\n", ii,
(void) fprintf( amoebaGpu->log, "\n%5d multi[%d %d %d %d] offset=%d\n", ii,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].x,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].y,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].w );
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].w,
amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][ii] );
int indexOffset = ii*3*amoebaGpu->maxMapTorqueDifference;
float sum[3] = { 0.0f, 0.0f, 0.0f };
for( int jj = 0; jj < amoebaGpu->maxMapTorqueDifference; jj++ ){
if( amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset] != 0.0f ){
(void) fprintf( amoebaGpu->log," %4d %4d Temp[%16.9e %16.9e %16.9e] %d\n",
ii, jj + amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z,
ii, jj + amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][ii],
amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset],
amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset+1],
amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset+2], indexOffset );
......@@ -551,8 +875,8 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s: numBlocks=%d numThreads=%d\n", methodName, numBlocks, numThreads ); (void) fflush( amoebaGpu->log );
}
amoebaGpu->psForce->Download();
amoebaGpu->psTorque->Download();
psForce->Download();
psTorque->Download();
int maxPrint = 20;
(void) fprintf( amoebaGpu->log,"Pre torqueMap\n" );
for( int ii = 0; ii < gpu->natoms; ii++ ){
......@@ -561,17 +885,17 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
int indexOffset = ii*3;
(void) fprintf( amoebaGpu->log,"E[%16.9e %16.9e %16.9e] ",
amoebaGpu->psForce->_pSysStream[0][indexOffset],
amoebaGpu->psForce->_pSysStream[0][indexOffset+1],
amoebaGpu->psForce->_pSysStream[0][indexOffset+2] );
psForce->_pSysStream[0][indexOffset],
psForce->_pSysStream[0][indexOffset+1],
psForce->_pSysStream[0][indexOffset+2] );
(void) fprintf( amoebaGpu->log,"T[%16.9e %16.9e %16.9e]\n",
amoebaGpu->psTorque->_pSysStream[0][indexOffset],
amoebaGpu->psTorque->_pSysStream[0][indexOffset+1],
amoebaGpu->psTorque->_pSysStream[0][indexOffset+2] );
psTorque->_pSysStream[0][indexOffset],
psTorque->_pSysStream[0][indexOffset+1],
psTorque->_pSysStream[0][indexOffset+2] );
if( ii == maxPrint && (gpu->natoms - maxPrint) > ii )ii = gpu->natoms - maxPrint;
}
int nansDetected = checkForNansAndInfinities( gpu->natoms*3, amoebaGpu->psForce );
nansDetected += checkForNansAndInfinities( gpu->natoms*3, amoebaGpu->psTorque );
int nansDetected = checkForNansAndInfinities( gpu->natoms*3, psForce );
nansDetected += checkForNansAndInfinities( gpu->natoms*3, psTorque );
if( nansDetected ){
(void) fprintf( amoebaGpu->log,"WARNING: %d nans/infinities detected force/torques.\n", nansDetected );
} else {
......@@ -581,9 +905,31 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
// zero forces
#if 0
for( int ii = 0; ii < 3*gpu->natoms; ii++ ){
amoebaGpu->psForce->_pSysStream[0][ii] = 0.0f;
psForce->_pSysStream[0][ii] = 0.0f;
}
amoebaGpu->psForce->Upload();
psForce->Upload();
#endif
#if 0
(void) fprintf( amoebaGpu->log,"Setting force & torque values.\n" );
for( int ii = 0; ii < 3*gpu->natoms; ii += 3 ){
psTorque->_pSysStream[0][ii] = 1.0f;
psTorque->_pSysStream[0][ii+1] = 0.0f;
psTorque->_pSysStream[0][ii+2] = 0.0f;
psForce->_pSysStream[0][ii] = 0.0f;
psForce->_pSysStream[0][ii+1] = 0.0f;
psForce->_pSysStream[0][ii+2] = 0.0f;
}
for( int ii = 0; ii < gpu->natoms; ii++ ){
psCudaForce4->_pSysStream[0][ii].x = 0.0f;
psCudaForce4->_pSysStream[0][ii].y = 0.0f;
psCudaForce4->_pSysStream[0][ii].z = 0.0f;
}
psForce->Upload();
psTorque->Upload();
psCudaForce4->Upload();
#endif
#endif
......@@ -597,11 +943,9 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
*/
//amoebaMapTorqueToForceOld_kernel<<< numBlocks, numThreads>>> (
amoebaMapTorqueToForce_kernel<<< numBlocks, numThreads>>> (
gpu->natoms,
gpu->psPosq4->_pDevStream[0],
psTorque->_pDevStream[0],
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pDevStream[0],
amoebaGpu->maxMapTorqueDifference,
amoebaGpu->torqueMapForce->_pDevStream[0] );
LAUNCHERROR("AmoebaMapTrqKernel");
......@@ -616,14 +960,14 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].x,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].y,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z,
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].w );
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].w, amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][ii] );
int indexOffset = ii*3*amoebaGpu->maxMapTorqueDifference;
float sum[3] = { 0.0f, 0.0f, 0.0f };
for( int jj = 0; jj < amoebaGpu->maxMapTorqueDifference; jj++ ){
if( amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset] != 0.0f ){
(void) fprintf( amoebaGpu->log," %4d %4d Temp[%16.9e %16.9e %16.9e] %d\n",
ii, jj + amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pSysStream[0][ii].z,
ii, jj + amoebaGpu->psMultipoleAxisOffset->_pSysStream[0][ii],
amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset],
amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset+1],
amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset+2], indexOffset );
......@@ -651,10 +995,10 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s: numBlocks=%d numThreads=%d %d\n", methodName, numBlocks, numThreads, amoebaGpu->maxMapTorqueDifferencePow2); (void) fflush( amoebaGpu->log );
amoebaGpu->psForce->Download();
psForce->Download();
psCudaForce4->Download();
amoebaGpu->torqueMapForce->Download();
amoebaGpu->psTorque->Download();
psTorque->Download();
int maxPrint = 10;
(void) fprintf( amoebaGpu->log,"Post torqueMap\n" );
for( int ii = 0; ii < gpu->natoms; ii++ ){
......@@ -667,17 +1011,17 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
psCudaForce4->_pSysStream[0][ii].y,
psCudaForce4->_pSysStream[0][ii].z );
(void) fprintf( amoebaGpu->log,"F[%16.9e %16.9e %16.9e] ",
amoebaGpu->psForce->_pSysStream[0][indexOffset],
amoebaGpu->psForce->_pSysStream[0][indexOffset+1],
amoebaGpu->psForce->_pSysStream[0][indexOffset+2] );
psForce->_pSysStream[0][indexOffset],
psForce->_pSysStream[0][indexOffset+1],
psForce->_pSysStream[0][indexOffset+2] );
(void) fprintf( amoebaGpu->log,"fT[%16.9e %16.9e %16.9e] ",
amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset],
amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset+1],
amoebaGpu->torqueMapForce->_pSysStream[0][indexOffset+2] );
(void) fprintf( amoebaGpu->log,"T[%16.9e %16.9e %16.9e]\n",
amoebaGpu->psTorque->_pSysStream[0][indexOffset],
amoebaGpu->psTorque->_pSysStream[0][indexOffset+1],
amoebaGpu->psTorque->_pSysStream[0][indexOffset+2] );
psTorque->_pSysStream[0][indexOffset],
psTorque->_pSysStream[0][indexOffset+1],
psTorque->_pSysStream[0][indexOffset+2] );
if( ii == maxPrint && (gpu->natoms - maxPrint) > ii )ii = gpu->natoms - maxPrint;
}
(void) fflush( amoebaGpu->log );
......@@ -688,8 +1032,8 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 4, gpu->psForce4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psForce, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector);
cudaLoadCudaFloatArray( gpu->natoms, 3, psForce, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, psTorque, outputVector);
cudaWriteVectorOfDoubleVectorsToFile( "CudaVacuumElecForce", fileId, outputVector );
}
#endif
......@@ -729,10 +1073,7 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce2( amoebaGpuContext amoebaGpu,
int numBlocks = 1 + (gpu->natoms/numThreads);
amoebaMapTorqueToForce_kernel<<< numBlocks, numThreads>>> (
gpu->natoms,
gpu->psPosq4->_pDevStream[0],
psTorque->_pDevStream[0],
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pDevStream[0],
amoebaGpu->maxMapTorqueDifference,
amoebaGpu->torqueMapForce->_pDevStream[0] );
LAUNCHERROR("AmoebaMapTrqKernel");
......
......@@ -52,6 +52,97 @@ __device__ static float normVector3( float* vector )
#undef AMOEBA_DEBUG
// ZThenX == 0
// Bisector == 1
// ZBisect == 2
// ThreeFold == 3
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kCudaComputeCheckChiral_kernel( void )
{
const int AD = 0;
const int BD = 1;
const int CD = 2;
const int C = 3;
float delta[4][3];
float4* atomCoord = cSim.pPosq;
int4* multiPoleAtoms = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
float* molecularDipole = cAmoebaSim.pMolecularDipole;
float* molecularQuadrupole = cAmoebaSim.pMolecularQuadrupole;
float* labFrameDipole = cAmoebaSim.pLabFrameDipole;
float* labFrameQuadrupole = cAmoebaSim.pLabFrameQuadrupole;
// ---------------------------------------------------------------------------------------
int atomIndex = blockIdx.x;
int axisType = multiPoleAtoms[atomIndex].w;
float* molDipole = &(molecularDipole[atomIndex*3]);
float* labDipole = &(labFrameDipole[atomIndex*3]);
labDipole[0] = molDipole[0];
labDipole[1] = molDipole[1];
labDipole[2] = molDipole[2];
float* molQuadrupole = &(molecularQuadrupole[atomIndex*9]);
float* labQuadrupole = &(labFrameQuadrupole[atomIndex*9]);
labQuadrupole[0] = molQuadrupole[0];
labQuadrupole[1] = molQuadrupole[1];
labQuadrupole[2] = molQuadrupole[2];
labQuadrupole[3] = molQuadrupole[3];
labQuadrupole[4] = molQuadrupole[4];
labQuadrupole[5] = molQuadrupole[5];
labQuadrupole[6] = molQuadrupole[6];
labQuadrupole[7] = molQuadrupole[7];
labQuadrupole[8] = molQuadrupole[8];
// skip z-then-x
if( axisType == 0 )return;
// ---------------------------------------------------------------------------------------
int atomA = atomIndex;
int atomB = multiPoleAtoms[atomIndex].z;
int atomC = multiPoleAtoms[atomIndex].x;
int atomD = multiPoleAtoms[atomIndex].y;
delta[AD][0] = atomCoord[atomA].x - atomCoord[atomD].x;
delta[AD][1] = atomCoord[atomA].y - atomCoord[atomD].y;
delta[AD][2] = atomCoord[atomA].z - atomCoord[atomD].z;
delta[BD][0] = atomCoord[atomB].x - atomCoord[atomD].x;
delta[BD][1] = atomCoord[atomB].y - atomCoord[atomD].y;
delta[BD][2] = atomCoord[atomB].z - atomCoord[atomD].z;
delta[CD][0] = atomCoord[atomC].x - atomCoord[atomD].x;
delta[CD][1] = atomCoord[atomC].y - atomCoord[atomD].y;
delta[CD][2] = atomCoord[atomC].z - atomCoord[atomD].z;
delta[C][0] = delta[BD][1]*delta[CD][2] - delta[BD][2]*delta[CD][1];
delta[C][1] = delta[CD][1]*delta[AD][2] - delta[CD][2]*delta[AD][1];
delta[C][2] = delta[AD][1]*delta[BD][2] - delta[AD][2]*delta[BD][1];
float volume = delta[C][0]*delta[AD][0] + delta[C][1]*delta[BD][0] + delta[C][2]*delta[CD][0];
if( volume < 0.0 ){
labDipole[1] *= -1.0f; // pole(3,i)
labQuadrupole[1] *= -1.0f; // pole(6,i) && pole(8,i)
labQuadrupole[3] *= -1.0f; // pole(10,i) && pole(12,i)
labQuadrupole[5] *= -1.0f; // pole(6,i) && pole(8,i)
labQuadrupole[7] *= -1.0f; // pole(10,i) && pole(12,i)
}
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__(GF1XX_THREADS_PER_BLOCK, 1)
......@@ -60,22 +151,23 @@ __launch_bounds__(GT2XX_THREADS_PER_BLOCK, 1)
#else
__launch_bounds__(G8X_THREADS_PER_BLOCK, 1)
#endif
void kCudaComputeLabFrameMoments_kernel(
int numOfAtoms,
float *rotationMatrix,
float4 *atomCoord,
int4 *multiPoleAtoms,
float *molecularDipole, float *molecularQuadrupole,
float *labFrameDipole, float *labFrameQuadrupole )
void kCudaComputeLabFrameMoments_kernel( void )
{
float* vectorX;
float* vectorY;
float* vectorZ;
float vectorX[3];
float vectorY[3];
float vectorZ[3];
int numOfAtoms = cSim.atoms;
//float* rotationMatrix = cAmoebaSim.pRotationMatrix;
float4* atomCoord = cSim.pPosq;
int4* multiPoleAtoms = cAmoebaSim.pMultipoleParticlesIdsAndAxisType;
float* labFrameDipole = cAmoebaSim.pLabFrameDipole;
float* labFrameQuadrupole = cAmoebaSim.pLabFrameQuadrupole;
// ---------------------------------------------------------------------------------------
int atomIndex = blockIdx.x;//__mul24(blockIdx.x,blockDim.x) + threadIdx.x ;
int atomIndex = blockIdx.x;
// ---------------------------------------------------------------------------------------
......@@ -87,20 +179,22 @@ void kCudaComputeLabFrameMoments_kernel(
// code common to ZThenX and Bisector
/*
vectorX = &(rotationMatrix[atomIndex*9]);
vectorY = &(rotationMatrix[atomIndex*9+ 3]);
vectorZ = &(rotationMatrix[atomIndex*9+ 6]);
*/
float4 coordinatesThisAtom = atomCoord[atomIndex];
int multipoleAtomIndex = multiPoleAtoms[atomIndex].x;
int multipoleAtomIndex = multiPoleAtoms[atomIndex].z;
float4 coordinatesAxisAtom = atomCoord[multipoleAtomIndex];
vectorZ[0] = coordinatesAxisAtom.x - coordinatesThisAtom.x;
vectorZ[1] = coordinatesAxisAtom.y - coordinatesThisAtom.y;
vectorZ[2] = coordinatesAxisAtom.z - coordinatesThisAtom.z;
multipoleAtomIndex = multiPoleAtoms[atomIndex].y;
multipoleAtomIndex = multiPoleAtoms[atomIndex].x;
coordinatesAxisAtom = atomCoord[multipoleAtomIndex];
vectorX[0] = coordinatesAxisAtom.x - coordinatesThisAtom.x;
......@@ -109,14 +203,55 @@ void kCudaComputeLabFrameMoments_kernel(
int axisType = multiPoleAtoms[atomIndex].w;
float sum = normVector3( vectorZ );
/*
z-only
(1) norm z
(2) select random x
(3) x = x - (x.z)z
(4) norm x
z-then-x
(1) norm z
(2) norm x (not needed)
(3) x = x - (x.z)z
(4) norm x
bisector
(1) norm z
(2) norm x
(3) z = x + z
(4) norm z
(5) x = x - (x.z)z
(6) norm x
z-bisect
(1) norm z
(2) norm x
(3) norm y
(3) x = x + y
(4) norm x
(5) x = x - (x.z)z
(6) norm x
3-fold
(1) norm z
(2) norm x
(3) norm y
(4) z = x + y + z
(5) norm z
(6) x = x - (x.z)z
(7) norm x
*/
// branch based on axis type
float sum = normVector3( vectorZ );
if( axisType == 1 ){
// bisector
// dx = dx1 + dx2 (in Tinker code)
sum = normVector3( vectorX );
......@@ -126,8 +261,45 @@ void kCudaComputeLabFrameMoments_kernel(
sum = normVector3( vectorZ );
} else if( axisType == 2 || axisType == 3 ){
// z-bisect
multipoleAtomIndex = multiPoleAtoms[atomIndex].y;
coordinatesAxisAtom = atomCoord[multipoleAtomIndex];
vectorY[0] = coordinatesAxisAtom.x - coordinatesThisAtom.x;
vectorY[1] = coordinatesAxisAtom.y - coordinatesThisAtom.y;
vectorY[2] = coordinatesAxisAtom.z - coordinatesThisAtom.z;
sum = normVector3( vectorY );
sum = normVector3( vectorX );
if( axisType == 2 ){
vectorX[0] += vectorY[0];
vectorX[1] += vectorY[1];
vectorX[2] += vectorY[2];
sum = normVector3( vectorX );
} else {
// 3-fold
vectorZ[0] += vectorX[0] + vectorY[0];
vectorZ[1] += vectorX[1] + vectorY[1];
vectorZ[2] += vectorX[2] + vectorY[2];
sum = normVector3( vectorZ );
}
} else if( axisType == 4 ){
vectorX[0] = 0.1f;
vectorX[1] = 0.1f;
vectorX[2] = 0.1f;
}
// x = x - (x.z)z
float dot = vectorZ[0]*vectorX[0] + vectorZ[1]*vectorX[1] + vectorZ[2]*vectorX[2];
vectorX[0] -= dot*vectorZ[0];
......@@ -140,8 +312,28 @@ void kCudaComputeLabFrameMoments_kernel(
vectorY[1] = (vectorZ[2]*vectorX[0]) - (vectorZ[0]*vectorX[2]);
vectorY[2] = (vectorZ[0]*vectorX[1]) - (vectorZ[1]*vectorX[0]);
float* molDipole = &(molecularDipole[atomIndex*3]);
// use identity rotation matrix for unrecognized axis types
if( axisType < 0 || axisType > 4 ){
vectorX[0] = 1.0f;
vectorX[1] = 0.0f;
vectorX[2] = 0.0f;
vectorY[0] = 0.0f;
vectorY[1] = 1.0f;
vectorY[2] = 0.0f;
vectorZ[0] = 0.0f;
vectorZ[1] = 0.0f;
vectorZ[2] = 1.0f;
}
float molDipole[3];
float* labDipole = &(labFrameDipole[atomIndex*3]);
molDipole[0] = labDipole[0];
molDipole[1] = labDipole[1];
molDipole[2] = labDipole[2];
// set out-of-range elements to 0.0f
......@@ -151,16 +343,20 @@ void kCudaComputeLabFrameMoments_kernel(
// ---------------------------------------------------------------------------------------
const float * mPole[3];
float* rPole[3];
float* molQuadrupole = &(molecularQuadrupole[atomIndex*9]);
float mPole[3][3];
float* labQuadrupole = &(labFrameQuadrupole[atomIndex*9]);
for( int ii = 0; ii < 3; ii++ ){
mPole[ii] = molQuadrupole + ii*3;
mPole[ii][0] = labQuadrupole[3*ii+0];
mPole[ii][1] = labQuadrupole[3*ii+1];
mPole[ii][2] = labQuadrupole[3*ii+2];
rPole[ii] = labQuadrupole + ii*3;
rPole[ii][0] = rPole[ii][1] = rPole[ii][2] = 0.0f;
rPole[ii][0] = 0.0f;
rPole[ii][1] = 0.0f;
rPole[ii][2] = 0.0f;
}
int ii = threadIdx.x;
......@@ -240,16 +436,10 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
double kernelTime = 0.0;
#endif
kCudaComputeLabFrameMoments_kernel<<< numBlocks, numThreads>>> (
gpu->natoms,
amoebaGpu->psRotationMatrix->_pDevStream[0],
gpu->psPosq4->_pDevStream[0],
amoebaGpu->psMultipoleParticlesIdsAndAxisType->_pDevStream[0],
amoebaGpu->psMolecularDipole->_pDevStream[0],
amoebaGpu->psMolecularQuadrupole->_pDevStream[0],
amoebaGpu->psLabFrameDipole->_pDevStream[0],
amoebaGpu->psLabFrameQuadrupole->_pDevStream[0]
);
kCudaComputeCheckChiral_kernel<<< numBlocks, numThreads>>> ( );
LAUNCHERROR("kCudaComputeCheckChiral");
kCudaComputeLabFrameMoments_kernel<<< numBlocks, numThreads>>> ( );
LAUNCHERROR(methodName);
#ifdef AMOEBA_DEBUG
......
......@@ -382,6 +382,46 @@ void AmoebaReferenceMultipoleForce::loadParticleData( RealOpenMM** particlePosit
}
}
void AmoebaReferenceMultipoleForce::checkChiral( MultipoleParticleData& particleI, int axisType,
MultipoleParticleData& particleZ, MultipoleParticleData& particleX,
MultipoleParticleData& particleY ) const {
// ---------------------------------------------------------------------------------------
static const RealOpenMM one = 1.0;
static const std::string methodName = "AmoebaReferenceMultipoleForce::checkChiral";
static const int AD = 0;
static const int BD = 1;
static const int CD = 2;
static const int C = 3;
double delta[4][3];
// ---------------------------------------------------------------------------------------
if( axisType == AmoebaMultipoleForce::ZThenX ){
return;
}
getDelta( particleY, particleI, delta[AD] );
getDelta( particleY, particleZ, delta[BD] );
getDelta( particleY, particleX, delta[CD] );
delta[C][0] = delta[BD][1]*delta[CD][2] - delta[BD][2]*delta[CD][1];
delta[C][1] = delta[CD][1]*delta[AD][2] - delta[CD][2]*delta[AD][1];
delta[C][2] = delta[AD][1]*delta[BD][2] - delta[AD][2]*delta[BD][1];
RealOpenMM volume = delta[C][0]*delta[AD][0] + delta[C][1]*delta[BD][0] + delta[C][2]*delta[CD][0];
if( volume < 0.0 ){
particleI.dipole[1] *= -one; // pole(3,i)
particleI.quadrupole[QXY] *= -one; // pole(6,i) && pole(8,i)
particleI.quadrupole[QYZ] *= -one; // pole(10,i) && pole(12,i)
}
return;
}
void AmoebaReferenceMultipoleForce::applyRotationMatrix( MultipoleParticleData& particleI,
const MultipoleParticleData& particleZ,
const MultipoleParticleData& particleX, int axisType ) const {
......@@ -1460,7 +1500,7 @@ RealOpenMM AmoebaReferenceMultipoleForce::calculateNoCutoffElectrostatic( std::v
// ---------------------------------------------------------------------------------------
// initialize forces/energy and scaleing factors
// initialize forces/energy and scaling factors
std::vector<Vec3> torques( particleData.size() );
for( unsigned int ii = 0; ii < particleData.size(); ii++ ){
......@@ -1580,6 +1620,16 @@ RealOpenMM AmoebaReferenceMultipoleForce::calculateNoCutoffForceAndEnergy( unsig
loadParticleData( particlePositions, charges, dipoles, quadrupoles,
tholes, dampingFactors, polarity, particleData );
// check for chiral centers that need multipoles inverted
for( unsigned int ii = 0; ii < numParticles; ii++ ){
if( multipoleAtomYs[ii] ){
checkChiral( particleData[ii], axisTypes[ii], particleData[multipoleAtomZs[ii]], particleData[multipoleAtomXs[ii]], particleData[multipoleAtomYs[ii]] );
}
}
// apply rotation matrix
for( unsigned int ii = 0; ii < numParticles; ii++ ){
if( multipoleAtomZs[ii] >= 0 && multipoleAtomXs[ii] >= 0 ){
applyRotationMatrix( particleData[ii], particleData[multipoleAtomZs[ii]], particleData[multipoleAtomXs[ii]], axisTypes[ii] );
......
......@@ -407,6 +407,24 @@ private:
void getAndScaleInverseRs( RealOpenMM dampI, RealOpenMM dampJ, RealOpenMM tholeI, RealOpenMM tholeJ,
RealOpenMM r, std::vector<RealOpenMM>& rrI ) const;
/**---------------------------------------------------------------------------------------
Check multipoles at chiral sites
inverts atomic multipole moments as necessary
at sites with chiral local reference frame definitions
@param particleI particleI data
@param axisType axis type
@param particleZ z-axis particle to particleI
@param particleX x-axis particle to particleI
@param particleY y-axis particle to particleI
--------------------------------------------------------------------------------------- */
void checkChiral( MultipoleParticleData& particleI, int axisType, MultipoleParticleData& particleZ,
MultipoleParticleData& particleX, MultipoleParticleData& particleY ) const;
/**---------------------------------------------------------------------------------------
Apply roatation matrix to molecular dipole/quadrupoles to get corresponding lab frame values
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment